Coverage for polar/organization/ai_validation.py: 33%

87 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-12-05 17:15 +0000

1import asyncio 1a

2from typing import Literal 1a

3 

4import httpx 1a

5import structlog 1a

6from pydantic import Field 1a

7from pydantic_ai import Agent 1a

8from pydantic_ai.models.openai import OpenAIChatModel 1a

9from pydantic_ai.providers.openai import OpenAIProvider 1a

10 

11from polar.config import settings 1a

12from polar.kit.schemas import Schema 1a

13from polar.models.organization import Organization 1a

14 

15log = structlog.get_logger(__name__) 1a

16 

17 

18class OrganizationAIValidationVerdict(Schema): 1a

19 verdict: Literal["PASS", "FAIL", "UNCERTAIN"] = Field( 1a

20 ..., description="PASS | FAIL | UNCERTAIN - indicates compliance status." 

21 ) 

22 risk_score: float = Field( 1a

23 ..., 

24 ge=0, 

25 le=100, 

26 description="Risk score from 0 to 100, where 0 is no risk and 100 is high risk.", 

27 ) 

28 violated_sections: list[str] = Field( 1a

29 default_factory=list, 

30 description="List of violated sections or bullets from the policy.", 

31 ) 

32 reason: str = Field( 1a

33 ..., 

34 description="A 1 or 3 line explanation of the verdict and the reasoning behind it. The reason will be shown to our customer.", 

35 ) 

36 

37 

38class OrganizationAIValidationResult(Schema): 1a

39 verdict: OrganizationAIValidationVerdict = Field( 1a

40 description="AI validation verdict" 

41 ) 

42 timed_out: bool = Field( 1a

43 default=False, description="Whether the validation timed out" 

44 ) 

45 model: str = Field( 1a

46 ..., 

47 description="The model used for validation, e.g. 'gpt-4o-mini'.", 

48 ) 

49 

50 

51SYSTEM_PROMPT = """ 1a

52 You are a compliance expert analyzing organization details against Polar's acceptable use policy. 

53 Your task is to evaluate if an organization's intended use aligns with our acceptable use policy. 

54 Guidelines: 

55 - Be thorough but fair in your analysis 

56 - Consider the overall business model and intent 

57 - Focus on the core business activities described 

58 - If information is unclear or insufficient, respond with UNCERTAIN 

59 - Only mark as FAIL if there's clear policy violation 

60 - Provide specific reasoning for your decision 

61 - Reference specific policy sections when violations are identified 

62""" 

63 

64FALLBACK_POLICY = """ 1a

65 As your Merchant of Record (MoR), we are the reseller of all digital goods and 

66 services and focus exclusively on digital products. Therefore we cannot support 

67 physical goods or entirely human services, e.g consultation or support. In 

68 addition to not accepting the sale of anything illegal, harmful, abusive, 

69 deceptive or sketchy. 

70 

71 ## Acceptable Products & Businesses 

72 

73 * Software & SaaS 

74 * Digital products: Templates, eBooks, PDFs, code, icons, fonts, design assets, photos, videos, audio etc 

75 * Premium content & access: Discord server, GitHub repositories, courses and content requiring a subscription. 

76 

77 **General rule of acceptable services** 

78 

79 Digital goods, software or services that can be fulfilled by… 

80 

81 1. Polar on your behalf (License Keys, File Downloads, GitHub- or Discord invites or private links, e.g premium YouTube videos etc) 

82 2. Your site/service using our APIs to grant immediate access to digital assets 

83 or services for customers with a one-time purchase or subscriptions 

84 

85 Combined with being something you'd proudly boast about in public, i.e nothing illegal, unfair, deceptive, abusive, harmful or shady. 

86 

87 Don't hesitate to [reach out to us](/support) in advance in case you're unsure if your use case would be approved. 

88 

89 ## Prohibited Businesses 

90 

91 <Note> 

92 **Not an exhaustive list** 

93 

94 We reserve the right to add to it at any time. Combined with placing your 

95 account under further review or suspend it in case we consider the usage 

96 deceptive, fraudulent, high-risk or of low quality for consumers with high 

97 refund/chargeback risks. 

98 </Note> 

99 

100 * Illegal or age restricted, e.g drugs, alcohol, tobacco or vaping products 

101 * Violates laws in the jurisdictions where your business is located or to which your business is targeted 

102 * Violates any rules or regulations from payment processors & credit card networks, e.g [Stripe](https://stripe.com/en-se/legal/restricted-businesses) 

103 * Reselling or distributing customer data to other parties for commercial, promotional or any other reason (disclosed service providers are accepted). 

104 * Threatens reputation of Polar or any of our partners and payment providers 

105 * Causes or has a significant risk of refunds, chargebacks, fines, damages, or harm and liability 

106 * Services used by-, intended for or advertised towards minors 

107 * Physical goods of any kind. Including SaaS services offering or requiring fulfilment via physical delivery or human services. 

108 * Human services, e.g marketing, design, web development and consulting in general. 

109 * Donations or charity, i.e price is greater than product value or there is no exchange at all (pure money transfer). Open source maintainers with sponsorship can be supported - reach out. 

110 * Marketplaces. Selling others’ products or services using Polar against an upfront payment or with an agreed upon revenue share. 

111 * Adult services or content. Including by AI or proxy, e.g 

112 * AI Girlfriend/Boyfriend services. 

113 * OnlyFans related services. 

114 * Explicit/NSFW content generated with AI 

115 * Low-quality products, services or sites, e.g 

116 * E-books generated with AI or 4 pages sold for \\$50 

117 * Quickly & poorly executed websites, products or services 

118 * Services with a lot of bugs and issues 

119 * Products, services or websites we determine to have a low trust score 

120 * Fake testimonials, reviews, and social proof. It's deceptive to consumers which is behaviour we do not tolerate. 

121 * Trademark violations 

122 * "Get rich" schemes or content 

123 * Gambling & betting services 

124 * Regulated services or products 

125 * Counterfeit goods 

126 * Job boards 

127 * NFT & Crypto assets. 

128 * Cheating: Utilizing cheat codes, hacks, or any unauthorized modifications that alter gameplay or provide an unfair advantage. 

129 * Reselling Licenses: Selling, distributing, or otherwise transferring software licenses at reduced prices or without proper authorization. 

130 * Services to circumvent rules or terms of other services: Attempting to bypass, manipulate, or undermine any established rules, gameplay mechanics, or pricing structures of other vendors/games. 

131 * Financial services, e.g facilitating transactions, investments or balances for customers. 

132 * Financial advice, e.g content or services related to tax guidance, wealth management, investment strategies etc. 

133 * IPTV services 

134 * Virus & Spyware 

135 * Telecommunication & eSIM Services 

136 * Products you don’t own the IP of or have the required licenses to resell 

137 * Advertising & unsolicited marketing services. Including services to: 

138 * Generate, scrape or sell leads 

139 * Send SMS/WhatsApp messages in bulk 

140 * Automate outreach (spam risks) 

141 * Automate mass content generation & submission across sites 

142 * API & IP cloaking services, e.g services to circumvent IP bans, API rate limits etc. 

143 * Products or services associated with pseudo-science; clairvoyance, horoscopes, fortune-telling etc. 

144 * Travel services, reservation services, travel clubs and timeshares 

145 * Medical advice services or products, e.g. pharmaceutical, weight loss, muscle building. 

146 

147 ## Restricted Businesses 

148 

149 Requires closer review and a higher bar of quality, execution, trust and compliance 

150 standards to be accepted. 

151 

152 * Directories & boards 

153 * Marketing services 

154 * Pre-orders & Paid waitlist 

155 * Ticket sales 

156""" 

157 

158TECHNICAL_ERROR_VERDICT = OrganizationAIValidationVerdict( 1a

159 verdict="UNCERTAIN", 

160 risk_score=50.0, 

161 violated_sections=[], 

162 reason="Technical error during validation. Manual review required.", 

163) 

164 

165# Cached policy content - will be fetched once and cached 

166_cached_policy_content: str | None = None 1a

167 

168 

169async def _fetch_policy_content() -> str: 1a

170 """Fetch and cache the acceptable use policy content.""" 

171 global _cached_policy_content 

172 

173 if _cached_policy_content is not None: 

174 return _cached_policy_content 

175 

176 try: 

177 # Fetch the actual policy from the documentation URL 

178 async with httpx.AsyncClient() as client: 

179 response = await client.get( 

180 "https://polar.sh/docs/merchant-of-record/acceptable-use.md", 

181 timeout=10.0, 

182 follow_redirects=True, 

183 ) 

184 if response.status_code == 200: 

185 _cached_policy_content = response.text 

186 log.info("Successfully fetched acceptable use policy from docs") 

187 else: 

188 log.warning( 

189 "Failed to fetch policy, using fallback", 

190 status_code=response.status_code, 

191 ) 

192 _cached_policy_content = FALLBACK_POLICY 

193 except Exception as e: 

194 log.warning("Error fetching policy, using fallback", error=str(e)) 

195 _cached_policy_content = FALLBACK_POLICY 

196 

197 return _cached_policy_content 

198 

199 

200class OrganizationAIValidator: 1a

201 """AI-powered organization details validator using pydantic-ai.""" 

202 

203 def __init__(self) -> None: 1a

204 provider = OpenAIProvider(api_key=settings.OPENAI_API_KEY) 1a

205 self.model = OpenAIChatModel(settings.OPENAI_MODEL, provider=provider) 1a

206 

207 self.agent = Agent( 1a

208 self.model, 

209 output_type=OrganizationAIValidationVerdict, 

210 system_prompt=SYSTEM_PROMPT, 

211 ) 

212 

213 def _validate_input(self, organization: Organization) -> None: 1a

214 """Validate organization input before AI processing.""" 

215 if not organization: 

216 raise ValueError("Organization is required") 

217 

218 if not organization.details: 

219 raise ValueError("Organization details are required for AI validation") 

220 

221 if not organization.name: 

222 raise ValueError("Organization name is required") 

223 

224 # Check details size to prevent excessive API costs 

225 details_str = str(organization.details) 

226 if len(details_str) > 10000: # 10KB limit 

227 raise ValueError("Organization details too large for AI validation") 

228 

229 async def validate_organization_details( 1a

230 self, organization: Organization, timeout_seconds: int = 25 

231 ) -> OrganizationAIValidationResult: 

232 """ 

233 Validate organization details against acceptable use policy. 

234 """ 

235 # Validate input first 

236 self._validate_input(organization) 

237 

238 timed_out = False 

239 

240 try: 

241 # Fetch policy content 

242 policy_content = await _fetch_policy_content() 

243 

244 # Prepare organization context 

245 org_context = self._prepare_organization_context(organization) 

246 

247 # Create the validation prompt 

248 prompt = f""" 

249 Analyze this organization against our acceptable use policy: 

250 

251 ORGANIZATION DETAILS: 

252 {org_context} 

253 

254 ACCEPTABLE USE POLICY: 

255 {policy_content} 

256 

257 Provide your compliance verdict with detailed reasoning. 

258 """ 

259 

260 # Run AI validation with timeout 

261 try: 

262 result = await asyncio.wait_for( 

263 self.agent.run(prompt), timeout=timeout_seconds 

264 ) 

265 verdict = result.output 

266 

267 except TimeoutError: 

268 log.warning( 

269 "AI validation timed out", 

270 organization_id=str(organization.id), 

271 timeout_seconds=timeout_seconds, 

272 ) 

273 timed_out = True 

274 verdict = OrganizationAIValidationVerdict( 

275 verdict="UNCERTAIN", 

276 risk_score=50.0, 

277 violated_sections=[], 

278 reason="Validation timed out. Manual review required.", 

279 ) 

280 

281 return OrganizationAIValidationResult( 

282 verdict=verdict, timed_out=timed_out, model=self.model.model_name 

283 ) 

284 

285 except Exception as e: 

286 log.error( 

287 "AI validation failed", 

288 organization_id=str(organization.id), 

289 error=str(e), 

290 ) 

291 

292 verdict = TECHNICAL_ERROR_VERDICT 

293 

294 return OrganizationAIValidationResult( 

295 verdict=verdict, timed_out=False, model=self.model.model_name 

296 ) 

297 

298 def _prepare_organization_context(self, organization: Organization) -> str: 1a

299 """Prepare organization details for AI analysis.""" 

300 details = organization.details or {} 

301 

302 context_parts = [ 

303 f"Organization Name: {organization.name}", 

304 ] 

305 

306 if organization.website: 

307 context_parts.append(f"Website: {organization.website}") 

308 

309 if details.get("about"): 

310 context_parts.append(f"About: {details['about']}") 

311 

312 if details.get("product_description"): 

313 context_parts.append( 

314 f"Product Description: {details['product_description']}" 

315 ) 

316 

317 if details.get("intended_use"): 

318 context_parts.append(f"Intended Use: {details['intended_use']}") 

319 

320 return "\n".join(context_parts) 

321 

322 

323validator: OrganizationAIValidator = OrganizationAIValidator() 1a