Update main.py
Browse files
main.py
CHANGED
|
@@ -33,11 +33,10 @@ else:
|
|
| 33 |
# --- Constants & Headers ---
|
| 34 |
LLM_API_URL = "https://api.typegpt.net/v1/chat/completions"
|
| 35 |
LLM_MODEL = "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8"
|
| 36 |
-
MAX_SOURCES_TO_PROCESS = 6
|
| 37 |
-
MAX_CONCURRENT_REQUESTS = 3
|
| 38 |
RESEARCH_TIMEOUT = 180 # 3 minutes maximum
|
| 39 |
-
REQUEST_DELAY = 2.0
|
| 40 |
-
USER_AGENT_ROTATION = True
|
| 41 |
|
| 42 |
# Initialize fake user agent generator
|
| 43 |
try:
|
|
@@ -65,7 +64,7 @@ class DeepResearchRequest(BaseModel):
|
|
| 65 |
app = FastAPI(
|
| 66 |
title="AI Deep Research API",
|
| 67 |
description="Provides robust, long-form, streaming deep research completions using real web searches.",
|
| 68 |
-
version="2.1.0"
|
| 69 |
)
|
| 70 |
app.add_middleware(
|
| 71 |
CORSMiddleware,
|
|
@@ -87,18 +86,21 @@ def extract_json_from_llm_response(text: str) -> Optional[list]:
|
|
| 87 |
|
| 88 |
async def get_real_user_agent() -> str:
|
| 89 |
"""Get a realistic user agent string."""
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
|
|
|
|
|
|
|
|
|
| 93 |
|
| 94 |
async def check_robots_txt(url: str) -> bool:
|
| 95 |
"""Check if scraping is allowed by robots.txt."""
|
| 96 |
try:
|
| 97 |
-
|
| 98 |
-
if not
|
| 99 |
return False
|
| 100 |
|
| 101 |
-
domain =
|
| 102 |
robots_url = f"https://{domain}/robots.txt"
|
| 103 |
|
| 104 |
async with aiohttp.ClientSession() as session:
|
|
@@ -106,22 +108,20 @@ async def check_robots_txt(url: str) -> bool:
|
|
| 106 |
async with session.get(robots_url, headers=headers, timeout=5) as response:
|
| 107 |
if response.status == 200:
|
| 108 |
robots = await response.text()
|
| 109 |
-
# Simple check - disallow all if present
|
| 110 |
if "Disallow: /" in robots:
|
| 111 |
return False
|
| 112 |
-
# Check for specific
|
| 113 |
path = re.sub(r'https?://[^/]+', '', url)
|
| 114 |
-
if f"Disallow: {
|
| 115 |
return False
|
| 116 |
return True
|
| 117 |
except Exception as e:
|
| 118 |
logging.warning(f"Could not check robots.txt for {url}: {e}")
|
| 119 |
-
return False
|
| 120 |
|
| 121 |
async def fetch_search_results(query: str, max_results: int = 5) -> List[dict]:
|
| 122 |
"""
|
| 123 |
Perform a real search using DuckDuckGo's HTML interface.
|
| 124 |
-
Note: This may break if DuckDuckGo changes their HTML structure.
|
| 125 |
"""
|
| 126 |
try:
|
| 127 |
search_url = f"https://html.duckduckgo.com/html/?q={quote_plus(query)}"
|
|
@@ -144,20 +144,18 @@ async def fetch_search_results(query: str, max_results: int = 5) -> List[dict]:
|
|
| 144 |
|
| 145 |
results = []
|
| 146 |
# Updated selectors for DuckDuckGo's current HTML structure
|
| 147 |
-
for result in soup.select('.
|
| 148 |
try:
|
| 149 |
title_elem = result.select_one('.result__title .result__a')
|
| 150 |
link_elem = title_elem if title_elem else result.select_one('a')
|
| 151 |
snippet_elem = result.select_one('.result__snippet')
|
| 152 |
|
| 153 |
if title_elem and link_elem and snippet_elem:
|
| 154 |
-
#
|
| 155 |
link = link_elem['href']
|
| 156 |
if link.startswith('/l/'):
|
| 157 |
-
|
| 158 |
-
# We need to follow these to get the actual URL
|
| 159 |
try:
|
| 160 |
-
redirect_url = f"https://duckduckgo.com{link}"
|
| 161 |
async with session.get(redirect_url, headers=headers, timeout=5, allow_redirects=False) as redirect_resp:
|
| 162 |
if redirect_resp.status == 302:
|
| 163 |
link = redirect_resp.headers.get('Location', link)
|
|
@@ -176,7 +174,6 @@ async def fetch_search_results(query: str, max_results: int = 5) -> List[dict]:
|
|
| 176 |
|
| 177 |
logging.info(f"Found {len(results)} real search results for '{query}'")
|
| 178 |
return results
|
| 179 |
-
|
| 180 |
except Exception as e:
|
| 181 |
logging.error(f"Real search failed: {e}")
|
| 182 |
return []
|
|
@@ -223,7 +220,6 @@ async def process_web_source(session: aiohttp.ClientSession, source: dict, timeo
|
|
| 223 |
tag.decompose()
|
| 224 |
|
| 225 |
# Try to find main content by common patterns
|
| 226 |
-
main_content = None
|
| 227 |
selectors_to_try = [
|
| 228 |
'main',
|
| 229 |
'article',
|
|
@@ -236,6 +232,7 @@ async def process_web_source(session: aiohttp.ClientSession, source: dict, timeo
|
|
| 236 |
'#content'
|
| 237 |
]
|
| 238 |
|
|
|
|
| 239 |
for selector in selectors_to_try:
|
| 240 |
main_content = soup.select_one(selector)
|
| 241 |
if main_content:
|
|
@@ -244,10 +241,8 @@ async def process_web_source(session: aiohttp.ClientSession, source: dict, timeo
|
|
| 244 |
if not main_content:
|
| 245 |
# If no main content found, try to find the largest text block
|
| 246 |
all_elements = soup.find_all()
|
| 247 |
-
# Filter out elements that are likely not main content
|
| 248 |
candidates = [el for el in all_elements if el.name not in ['script', 'style', 'nav', 'footer', 'header']]
|
| 249 |
if candidates:
|
| 250 |
-
# Sort by text length
|
| 251 |
candidates.sort(key=lambda x: len(x.get_text()), reverse=True)
|
| 252 |
main_content = candidates[0] if candidates else soup
|
| 253 |
|
|
@@ -270,7 +265,7 @@ async def process_web_source(session: aiohttp.ClientSession, source: dict, timeo
|
|
| 270 |
content = " ".join(soup.stripped_strings)
|
| 271 |
content = re.sub(r'\s+', ' ', content).strip()
|
| 272 |
|
| 273 |
-
if len(content.split()) < 30:
|
| 274 |
logging.warning(f"Very little content extracted from {source['link']}")
|
| 275 |
return source.get('snippet', ''), source_info
|
| 276 |
|
|
@@ -312,25 +307,22 @@ async def generate_research_plan(query: str, session: aiohttp.ClientSession) ->
|
|
| 312 |
content = result['choices'][0]['message']['content']
|
| 313 |
sub_questions = extract_json_from_llm_response(content)
|
| 314 |
if sub_questions and isinstance(sub_questions, list):
|
| 315 |
-
# Clean up the questions
|
| 316 |
cleaned = []
|
| 317 |
for q in sub_questions:
|
| 318 |
if isinstance(q, str) and q.strip():
|
| 319 |
cleaned_q = re.sub(r'^[^a-zA-Z0-9]*|[^a-zA-Z0-9]*$', '', q)
|
| 320 |
if cleaned_q:
|
| 321 |
cleaned.append(cleaned_q)
|
| 322 |
-
return cleaned[:5]
|
| 323 |
|
| 324 |
# Fallback if we couldn't get good questions from LLM
|
| 325 |
-
|
| 326 |
f"What is {query} and its key characteristics?",
|
| 327 |
f"What are the main aspects or components of {query}?",
|
| 328 |
f"What is the history and development of {query}?",
|
| 329 |
f"What are the current trends or recent developments in {query}?",
|
| 330 |
f"What are common challenges or controversies related to {query}?"
|
| 331 |
]
|
| 332 |
-
return default_questions[:4]
|
| 333 |
-
|
| 334 |
except Exception as e:
|
| 335 |
logging.error(f"Failed to generate research plan: {e}")
|
| 336 |
return [
|
|
@@ -416,7 +408,6 @@ async def run_deep_research_stream(query: str) -> AsyncGenerator[str, None]:
|
|
| 416 |
"data": f"Found {len(unique_sources)} unique sources to process."
|
| 417 |
})
|
| 418 |
|
| 419 |
-
# If we have no sources, return early
|
| 420 |
if not unique_sources:
|
| 421 |
yield format_sse({
|
| 422 |
"event": "error",
|
|
@@ -439,7 +430,7 @@ async def run_deep_research_stream(query: str) -> AsyncGenerator[str, None]:
|
|
| 439 |
for i, source in enumerate(unique_sources):
|
| 440 |
# Check if we're running out of time
|
| 441 |
elapsed = time.time() - start_time
|
| 442 |
-
if elapsed > RESEARCH_TIMEOUT * 0.7:
|
| 443 |
yield format_sse({
|
| 444 |
"event": "status",
|
| 445 |
"data": f"Approaching time limit, stopping source processing at {i}/{len(unique_sources)}"
|
|
@@ -448,12 +439,11 @@ async def run_deep_research_stream(query: str) -> AsyncGenerator[str, None]:
|
|
| 448 |
|
| 449 |
# Add delay between processing each source to be polite
|
| 450 |
if i > 0:
|
| 451 |
-
await asyncio.sleep(REQUEST_DELAY * 0.5)
|
| 452 |
|
| 453 |
task = asyncio.create_task(process_with_semaphore(source))
|
| 454 |
processing_tasks.append(task)
|
| 455 |
|
| 456 |
-
# Yield progress updates periodically
|
| 457 |
if (i + 1) % 2 == 0 or (i + 1) == len(unique_sources):
|
| 458 |
yield format_sse({
|
| 459 |
"event": "status",
|
|
@@ -465,11 +455,10 @@ async def run_deep_research_stream(query: str) -> AsyncGenerator[str, None]:
|
|
| 465 |
processed_sources += 1
|
| 466 |
content, source_info = await future
|
| 467 |
if content and content.strip():
|
| 468 |
-
# Add source content to our consolidated context
|
| 469 |
consolidated_context += f"Source: {source_info['link']}\nContent: {content}\n\n---\n\n"
|
| 470 |
all_sources_used.append(source_info)
|
| 471 |
successful_sources += 1
|
| 472 |
-
total_tokens += len(content.split())
|
| 473 |
else:
|
| 474 |
processing_errors += 1
|
| 475 |
|
|
@@ -480,14 +469,13 @@ async def run_deep_research_stream(query: str) -> AsyncGenerator[str, None]:
|
|
| 480 |
})
|
| 481 |
return
|
| 482 |
|
| 483 |
-
# Step 4: Synthesize report
|
| 484 |
time_remaining = max(0, RESEARCH_TIMEOUT - (time.time() - start_time))
|
| 485 |
yield format_sse({
|
| 486 |
"event": "status",
|
| 487 |
"data": f"Synthesizing report with content from {successful_sources} sources..."
|
| 488 |
})
|
| 489 |
|
| 490 |
-
# Estimate how many tokens we can generate based on remaining time
|
| 491 |
max_output_tokens = min(1500, int(time_remaining * 5))
|
| 492 |
|
| 493 |
report_prompt = f"""Compose a comprehensive research report on "{query}".
|
|
@@ -496,16 +484,15 @@ async def run_deep_research_stream(query: str) -> AsyncGenerator[str, None]:
|
|
| 496 |
|
| 497 |
Key requirements:
|
| 498 |
1. Start with an introduction that explains what {query} is and why it's important
|
| 499 |
-
2. Include well-organized sections with clear headings
|
| 500 |
3. Cite specific information from sources where appropriate
|
| 501 |
4. End with a conclusion that summarizes key findings and insights
|
| 502 |
5. Keep the report concise but comprehensive
|
| 503 |
|
| 504 |
Available information (summarized from {successful_sources} sources):
|
| 505 |
-
{consolidated_context[:18000]}
|
| 506 |
|
| 507 |
-
Generate a report that is approximately {max_output_tokens//4} words long
|
| 508 |
-
Focus on the most important and relevant information.
|
| 509 |
"""
|
| 510 |
|
| 511 |
report_payload = {
|
|
@@ -515,11 +502,9 @@ async def run_deep_research_stream(query: str) -> AsyncGenerator[str, None]:
|
|
| 515 |
"max_tokens": max_output_tokens
|
| 516 |
}
|
| 517 |
|
| 518 |
-
# Stream the report generation
|
| 519 |
async with session.post(LLM_API_URL, headers=LLM_HEADERS, json=report_payload) as response:
|
| 520 |
response.raise_for_status()
|
| 521 |
async for line in response.content:
|
| 522 |
-
# Check if we're running out of time
|
| 523 |
if time.time() - start_time > RESEARCH_TIMEOUT:
|
| 524 |
yield format_sse({
|
| 525 |
"event": "warning",
|
|
@@ -539,13 +524,10 @@ async def run_deep_research_stream(query: str) -> AsyncGenerator[str, None]:
|
|
| 539 |
content = choices[0].get("delta", {}).get("content")
|
| 540 |
if content:
|
| 541 |
yield format_sse({"event": "chunk", "data": content})
|
| 542 |
-
except json.JSONDecodeError:
|
| 543 |
-
continue
|
| 544 |
except Exception as e:
|
| 545 |
logging.warning(f"Error processing stream chunk: {e}")
|
| 546 |
continue
|
| 547 |
|
| 548 |
-
# Final status update
|
| 549 |
duration = time.time() - start_time
|
| 550 |
stats = {
|
| 551 |
"total_time_seconds": round(duration),
|
|
|
|
| 33 |
# --- Constants & Headers ---
|
| 34 |
LLM_API_URL = "https://api.typegpt.net/v1/chat/completions"
|
| 35 |
LLM_MODEL = "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8"
|
| 36 |
+
MAX_SOURCES_TO_PROCESS = 6
|
| 37 |
+
MAX_CONCURRENT_REQUESTS = 3
|
| 38 |
RESEARCH_TIMEOUT = 180 # 3 minutes maximum
|
| 39 |
+
REQUEST_DELAY = 2.0
|
|
|
|
| 40 |
|
| 41 |
# Initialize fake user agent generator
|
| 42 |
try:
|
|
|
|
| 64 |
app = FastAPI(
|
| 65 |
title="AI Deep Research API",
|
| 66 |
description="Provides robust, long-form, streaming deep research completions using real web searches.",
|
| 67 |
+
version="2.1.0"
|
| 68 |
)
|
| 69 |
app.add_middleware(
|
| 70 |
CORSMiddleware,
|
|
|
|
| 86 |
|
| 87 |
async def get_real_user_agent() -> str:
|
| 88 |
"""Get a realistic user agent string."""
|
| 89 |
+
try:
|
| 90 |
+
if isinstance(ua, UserAgent):
|
| 91 |
+
return ua.random
|
| 92 |
+
return ua.random() # For our fallback class
|
| 93 |
+
except:
|
| 94 |
+
return "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36"
|
| 95 |
|
| 96 |
async def check_robots_txt(url: str) -> bool:
|
| 97 |
"""Check if scraping is allowed by robots.txt."""
|
| 98 |
try:
|
| 99 |
+
domain_match = re.search(r'https?://([^/]+)', url)
|
| 100 |
+
if not domain_match:
|
| 101 |
return False
|
| 102 |
|
| 103 |
+
domain = domain_match.group(1)
|
| 104 |
robots_url = f"https://{domain}/robots.txt"
|
| 105 |
|
| 106 |
async with aiohttp.ClientSession() as session:
|
|
|
|
| 108 |
async with session.get(robots_url, headers=headers, timeout=5) as response:
|
| 109 |
if response.status == 200:
|
| 110 |
robots = await response.text()
|
|
|
|
| 111 |
if "Disallow: /" in robots:
|
| 112 |
return False
|
| 113 |
+
# Check for specific path disallows
|
| 114 |
path = re.sub(r'https?://[^/]+', '', url)
|
| 115 |
+
if any(f"Disallow: {p}" in robots for p in [path, path.rstrip('/') + '/']):
|
| 116 |
return False
|
| 117 |
return True
|
| 118 |
except Exception as e:
|
| 119 |
logging.warning(f"Could not check robots.txt for {url}: {e}")
|
| 120 |
+
return False
|
| 121 |
|
| 122 |
async def fetch_search_results(query: str, max_results: int = 5) -> List[dict]:
|
| 123 |
"""
|
| 124 |
Perform a real search using DuckDuckGo's HTML interface.
|
|
|
|
| 125 |
"""
|
| 126 |
try:
|
| 127 |
search_url = f"https://html.duckduckgo.com/html/?q={quote_plus(query)}"
|
|
|
|
| 144 |
|
| 145 |
results = []
|
| 146 |
# Updated selectors for DuckDuckGo's current HTML structure
|
| 147 |
+
for result in soup.select('.result__body')[:max_results]:
|
| 148 |
try:
|
| 149 |
title_elem = result.select_one('.result__title .result__a')
|
| 150 |
link_elem = title_elem if title_elem else result.select_one('a')
|
| 151 |
snippet_elem = result.select_one('.result__snippet')
|
| 152 |
|
| 153 |
if title_elem and link_elem and snippet_elem:
|
| 154 |
+
# Handle DuckDuckGo's redirect URLs
|
| 155 |
link = link_elem['href']
|
| 156 |
if link.startswith('/l/'):
|
| 157 |
+
redirect_url = f"https://duckduckgo.com{link}"
|
|
|
|
| 158 |
try:
|
|
|
|
| 159 |
async with session.get(redirect_url, headers=headers, timeout=5, allow_redirects=False) as redirect_resp:
|
| 160 |
if redirect_resp.status == 302:
|
| 161 |
link = redirect_resp.headers.get('Location', link)
|
|
|
|
| 174 |
|
| 175 |
logging.info(f"Found {len(results)} real search results for '{query}'")
|
| 176 |
return results
|
|
|
|
| 177 |
except Exception as e:
|
| 178 |
logging.error(f"Real search failed: {e}")
|
| 179 |
return []
|
|
|
|
| 220 |
tag.decompose()
|
| 221 |
|
| 222 |
# Try to find main content by common patterns
|
|
|
|
| 223 |
selectors_to_try = [
|
| 224 |
'main',
|
| 225 |
'article',
|
|
|
|
| 232 |
'#content'
|
| 233 |
]
|
| 234 |
|
| 235 |
+
main_content = None
|
| 236 |
for selector in selectors_to_try:
|
| 237 |
main_content = soup.select_one(selector)
|
| 238 |
if main_content:
|
|
|
|
| 241 |
if not main_content:
|
| 242 |
# If no main content found, try to find the largest text block
|
| 243 |
all_elements = soup.find_all()
|
|
|
|
| 244 |
candidates = [el for el in all_elements if el.name not in ['script', 'style', 'nav', 'footer', 'header']]
|
| 245 |
if candidates:
|
|
|
|
| 246 |
candidates.sort(key=lambda x: len(x.get_text()), reverse=True)
|
| 247 |
main_content = candidates[0] if candidates else soup
|
| 248 |
|
|
|
|
| 265 |
content = " ".join(soup.stripped_strings)
|
| 266 |
content = re.sub(r'\s+', ' ', content).strip()
|
| 267 |
|
| 268 |
+
if len(content.split()) < 30:
|
| 269 |
logging.warning(f"Very little content extracted from {source['link']}")
|
| 270 |
return source.get('snippet', ''), source_info
|
| 271 |
|
|
|
|
| 307 |
content = result['choices'][0]['message']['content']
|
| 308 |
sub_questions = extract_json_from_llm_response(content)
|
| 309 |
if sub_questions and isinstance(sub_questions, list):
|
|
|
|
| 310 |
cleaned = []
|
| 311 |
for q in sub_questions:
|
| 312 |
if isinstance(q, str) and q.strip():
|
| 313 |
cleaned_q = re.sub(r'^[^a-zA-Z0-9]*|[^a-zA-Z0-9]*$', '', q)
|
| 314 |
if cleaned_q:
|
| 315 |
cleaned.append(cleaned_q)
|
| 316 |
+
return cleaned[:5]
|
| 317 |
|
| 318 |
# Fallback if we couldn't get good questions from LLM
|
| 319 |
+
return [
|
| 320 |
f"What is {query} and its key characteristics?",
|
| 321 |
f"What are the main aspects or components of {query}?",
|
| 322 |
f"What is the history and development of {query}?",
|
| 323 |
f"What are the current trends or recent developments in {query}?",
|
| 324 |
f"What are common challenges or controversies related to {query}?"
|
| 325 |
]
|
|
|
|
|
|
|
| 326 |
except Exception as e:
|
| 327 |
logging.error(f"Failed to generate research plan: {e}")
|
| 328 |
return [
|
|
|
|
| 408 |
"data": f"Found {len(unique_sources)} unique sources to process."
|
| 409 |
})
|
| 410 |
|
|
|
|
| 411 |
if not unique_sources:
|
| 412 |
yield format_sse({
|
| 413 |
"event": "error",
|
|
|
|
| 430 |
for i, source in enumerate(unique_sources):
|
| 431 |
# Check if we're running out of time
|
| 432 |
elapsed = time.time() - start_time
|
| 433 |
+
if elapsed > RESEARCH_TIMEOUT * 0.7:
|
| 434 |
yield format_sse({
|
| 435 |
"event": "status",
|
| 436 |
"data": f"Approaching time limit, stopping source processing at {i}/{len(unique_sources)}"
|
|
|
|
| 439 |
|
| 440 |
# Add delay between processing each source to be polite
|
| 441 |
if i > 0:
|
| 442 |
+
await asyncio.sleep(REQUEST_DELAY * 0.5)
|
| 443 |
|
| 444 |
task = asyncio.create_task(process_with_semaphore(source))
|
| 445 |
processing_tasks.append(task)
|
| 446 |
|
|
|
|
| 447 |
if (i + 1) % 2 == 0 or (i + 1) == len(unique_sources):
|
| 448 |
yield format_sse({
|
| 449 |
"event": "status",
|
|
|
|
| 455 |
processed_sources += 1
|
| 456 |
content, source_info = await future
|
| 457 |
if content and content.strip():
|
|
|
|
| 458 |
consolidated_context += f"Source: {source_info['link']}\nContent: {content}\n\n---\n\n"
|
| 459 |
all_sources_used.append(source_info)
|
| 460 |
successful_sources += 1
|
| 461 |
+
total_tokens += len(content.split())
|
| 462 |
else:
|
| 463 |
processing_errors += 1
|
| 464 |
|
|
|
|
| 469 |
})
|
| 470 |
return
|
| 471 |
|
| 472 |
+
# Step 4: Synthesize report
|
| 473 |
time_remaining = max(0, RESEARCH_TIMEOUT - (time.time() - start_time))
|
| 474 |
yield format_sse({
|
| 475 |
"event": "status",
|
| 476 |
"data": f"Synthesizing report with content from {successful_sources} sources..."
|
| 477 |
})
|
| 478 |
|
|
|
|
| 479 |
max_output_tokens = min(1500, int(time_remaining * 5))
|
| 480 |
|
| 481 |
report_prompt = f"""Compose a comprehensive research report on "{query}".
|
|
|
|
| 484 |
|
| 485 |
Key requirements:
|
| 486 |
1. Start with an introduction that explains what {query} is and why it's important
|
| 487 |
+
2. Include well-organized sections with clear headings
|
| 488 |
3. Cite specific information from sources where appropriate
|
| 489 |
4. End with a conclusion that summarizes key findings and insights
|
| 490 |
5. Keep the report concise but comprehensive
|
| 491 |
|
| 492 |
Available information (summarized from {successful_sources} sources):
|
| 493 |
+
{consolidated_context[:18000]}
|
| 494 |
|
| 495 |
+
Generate a report that is approximately {max_output_tokens//4} words long.
|
|
|
|
| 496 |
"""
|
| 497 |
|
| 498 |
report_payload = {
|
|
|
|
| 502 |
"max_tokens": max_output_tokens
|
| 503 |
}
|
| 504 |
|
|
|
|
| 505 |
async with session.post(LLM_API_URL, headers=LLM_HEADERS, json=report_payload) as response:
|
| 506 |
response.raise_for_status()
|
| 507 |
async for line in response.content:
|
|
|
|
| 508 |
if time.time() - start_time > RESEARCH_TIMEOUT:
|
| 509 |
yield format_sse({
|
| 510 |
"event": "warning",
|
|
|
|
| 524 |
content = choices[0].get("delta", {}).get("content")
|
| 525 |
if content:
|
| 526 |
yield format_sse({"event": "chunk", "data": content})
|
|
|
|
|
|
|
| 527 |
except Exception as e:
|
| 528 |
logging.warning(f"Error processing stream chunk: {e}")
|
| 529 |
continue
|
| 530 |
|
|
|
|
| 531 |
duration = time.time() - start_time
|
| 532 |
stats = {
|
| 533 |
"total_time_seconds": round(duration),
|