Update main.py
Browse files
main.py
CHANGED
|
@@ -38,13 +38,15 @@ LLM_MODEL = "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8"
|
|
| 38 |
MAX_SOURCES_TO_PROCESS = 20 # Increased for more research
|
| 39 |
MAX_CONCURRENT_REQUESTS = 2
|
| 40 |
SEARCH_TIMEOUT = 300 # 5 minutes for longer research
|
| 41 |
-
|
|
|
|
| 42 |
REQUEST_DELAY = 3.0
|
| 43 |
RETRY_ATTEMPTS = 5
|
| 44 |
RETRY_DELAY = 5.0
|
| 45 |
USER_AGENT_ROTATION = True
|
| 46 |
-
|
| 47 |
-
|
|
|
|
| 48 |
|
| 49 |
# Initialize fake user agent generator
|
| 50 |
try:
|
|
@@ -331,13 +333,12 @@ async def generate_research_plan(query: str, session: aiohttp.ClientSession) ->
|
|
| 331 |
"model": LLM_MODEL,
|
| 332 |
"messages": [{
|
| 333 |
"role": "user",
|
| 334 |
-
"content": f"""Generate 4-
|
| 335 |
Focus on key aspects that would provide a complete understanding of the topic.
|
| 336 |
Your response MUST be ONLY the raw JSON array with no additional text.
|
| 337 |
-
Example: ["What is the historical background of X
|
| 338 |
}],
|
| 339 |
-
"temperature": 0.7
|
| 340 |
-
"max_tokens": 300
|
| 341 |
}
|
| 342 |
|
| 343 |
async with session.post(LLM_API_URL, headers=LLM_HEADERS, json=plan_prompt, timeout=30) as response:
|
|
@@ -637,93 +638,149 @@ async def run_deep_research_stream(query: str, search_time: int = 300) -> AsyncG
|
|
| 637 |
})
|
| 638 |
return
|
| 639 |
|
| 640 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 641 |
yield format_sse({
|
| 642 |
"event": "status",
|
| 643 |
-
"data": f"Synthesizing
|
| 644 |
})
|
| 645 |
|
| 646 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 647 |
|
| 648 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 649 |
|
| 650 |
-
|
| 651 |
|
| 652 |
-
|
| 653 |
-
|
| 654 |
-
2. Key Features and Capabilities
|
| 655 |
-
3. Comparative Analysis with Alternatives
|
| 656 |
-
4. Current Developments and Trends
|
| 657 |
-
5. Challenges and Limitations
|
| 658 |
-
6. Future Outlook
|
| 659 |
-
7. Conclusion and Recommendations
|
| 660 |
|
| 661 |
-
|
| 662 |
-
Include specific examples and data points from the sources when available.
|
| 663 |
-
Compare and contrast different viewpoints from various sources.
|
| 664 |
|
| 665 |
-
|
| 666 |
-
Cite sources where appropriate using inline citations like [1][2].
|
| 667 |
|
| 668 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 669 |
{consolidated_context[:MAX_CONTEXT_SIZE]}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 670 |
|
| 671 |
-
|
| 672 |
-
|
| 673 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 674 |
|
| 675 |
-
|
| 676 |
-
|
| 677 |
-
|
| 678 |
-
|
| 679 |
-
|
| 680 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 681 |
|
| 682 |
-
|
| 683 |
-
|
| 684 |
yield format_sse({
|
| 685 |
-
"event": "
|
| 686 |
-
"data": f"
|
| 687 |
})
|
| 688 |
-
return
|
| 689 |
-
|
| 690 |
-
buffer = ""
|
| 691 |
-
async for line in response.content:
|
| 692 |
-
if time.time() - start_time > TOTAL_TIMEOUT:
|
| 693 |
-
yield format_sse({
|
| 694 |
-
"event": "warning",
|
| 695 |
-
"data": "Time limit reached, ending report generation early."
|
| 696 |
-
})
|
| 697 |
-
break
|
| 698 |
|
| 699 |
-
|
| 700 |
-
|
| 701 |
-
|
| 702 |
-
|
| 703 |
-
|
| 704 |
-
|
| 705 |
-
|
| 706 |
-
if not line_str:
|
| 707 |
-
continue # Skip empty lines
|
| 708 |
-
try:
|
| 709 |
-
chunk = json.loads(line_str)
|
| 710 |
-
choices = chunk.get("choices")
|
| 711 |
-
if choices and isinstance(choices, list) and len(choices) > 0:
|
| 712 |
-
content = choices[0].get("delta", {}).get("content")
|
| 713 |
-
if content:
|
| 714 |
-
buffer += content
|
| 715 |
-
if len(buffer) > 100:
|
| 716 |
-
yield format_sse({"event": "chunk", "data": buffer})
|
| 717 |
-
buffer = ""
|
| 718 |
-
except json.JSONDecodeError as e:
|
| 719 |
-
logging.warning(f"JSON decode error for line: {line_str} - {e}")
|
| 720 |
-
continue
|
| 721 |
-
except Exception as e:
|
| 722 |
-
logging.warning(f"Error processing stream chunk: {e}")
|
| 723 |
-
continue
|
| 724 |
-
|
| 725 |
-
if buffer:
|
| 726 |
-
yield format_sse({"event": "chunk", "data": buffer})
|
| 727 |
|
| 728 |
duration = time.time() - start_time
|
| 729 |
stats = {
|
|
|
|
| 38 |
MAX_SOURCES_TO_PROCESS = 20 # Increased for more research
|
| 39 |
MAX_CONCURRENT_REQUESTS = 2
|
| 40 |
SEARCH_TIMEOUT = 300 # 5 minutes for longer research
|
| 41 |
+
# Allow substantially longer overall time to enable large, multi-section outputs
|
| 42 |
+
TOTAL_TIMEOUT = 1800
|
| 43 |
REQUEST_DELAY = 3.0
|
| 44 |
RETRY_ATTEMPTS = 5
|
| 45 |
RETRY_DELAY = 5.0
|
| 46 |
USER_AGENT_ROTATION = True
|
| 47 |
+
# Context management
|
| 48 |
+
CONTEXT_WINDOW_SIZE = 10_000_000
|
| 49 |
+
MAX_CONTEXT_SIZE = 2_000_000
|
| 50 |
|
| 51 |
# Initialize fake user agent generator
|
| 52 |
try:
|
|
|
|
| 333 |
"model": LLM_MODEL,
|
| 334 |
"messages": [{
|
| 335 |
"role": "user",
|
| 336 |
+
"content": f"""Generate 4-8 comprehensive sub-questions for in-depth research on '{query}'.
|
| 337 |
Focus on key aspects that would provide a complete understanding of the topic.
|
| 338 |
Your response MUST be ONLY the raw JSON array with no additional text.
|
| 339 |
+
Example: [\"What is the historical background of X?\", \"What are the current trends in X?\"]"""
|
| 340 |
}],
|
| 341 |
+
"temperature": 0.7
|
|
|
|
| 342 |
}
|
| 343 |
|
| 344 |
async with session.post(LLM_API_URL, headers=LLM_HEADERS, json=plan_prompt, timeout=30) as response:
|
|
|
|
| 638 |
})
|
| 639 |
return
|
| 640 |
|
| 641 |
+
# Prepare numbered citations list for the model and a references block we'll emit at the end
|
| 642 |
+
sources_catalog = []
|
| 643 |
+
for idx, s in enumerate(all_sources_used, start=1):
|
| 644 |
+
title = s.get('title') or s.get('link')
|
| 645 |
+
sources_catalog.append({
|
| 646 |
+
"id": idx,
|
| 647 |
+
"title": title,
|
| 648 |
+
"url": s.get('link')
|
| 649 |
+
})
|
| 650 |
+
|
| 651 |
+
# Section-by-section long-form synthesis (streamed)
|
| 652 |
yield format_sse({
|
| 653 |
"event": "status",
|
| 654 |
+
"data": f"Synthesizing a long multi-section report from {successful_sources} sources..."
|
| 655 |
})
|
| 656 |
|
| 657 |
+
sections = [
|
| 658 |
+
{"key": "introduction", "title": "1. Introduction and Background", "target_words": 800},
|
| 659 |
+
{"key": "features", "title": "2. Key Features and Capabilities", "target_words": 900},
|
| 660 |
+
{"key": "comparative", "title": "3. Comparative Analysis with Alternatives", "target_words": 900},
|
| 661 |
+
{"key": "trends", "title": "4. Current Developments and Trends", "target_words": 900},
|
| 662 |
+
{"key": "challenges", "title": "5. Challenges and Limitations", "target_words": 900},
|
| 663 |
+
{"key": "future", "title": "6. Future Outlook", "target_words": 900},
|
| 664 |
+
{"key": "conclusion", "title": "7. Conclusion and Recommendations", "target_words": 600},
|
| 665 |
+
]
|
| 666 |
+
|
| 667 |
+
# Common preface for all section prompts
|
| 668 |
+
preface = (
|
| 669 |
+
"You are a meticulous research assistant. Write the requested section in clear, structured markdown. "
|
| 670 |
+
"Use subheadings, bullet lists, and short paragraphs. Provide deep analysis, data points, and concrete examples. "
|
| 671 |
+
"When drawing from a listed source, include inline citations like [n] where n is the source number from the catalog. "
|
| 672 |
+
"Avoid repeating the section title at the top if already included. Do not include a references list inside the section."
|
| 673 |
+
)
|
| 674 |
+
|
| 675 |
+
catalog_md = "\n".join([f"[{s['id']}] {s['title']} — {s['url']}" for s in sources_catalog])
|
| 676 |
|
| 677 |
+
# Stream each section individually to achieve very long total output
|
| 678 |
+
for sec in sections:
|
| 679 |
+
if time.time() - start_time > TOTAL_TIMEOUT:
|
| 680 |
+
yield format_sse({
|
| 681 |
+
"event": "warning",
|
| 682 |
+
"data": "Time limit reached before completing all sections."
|
| 683 |
+
})
|
| 684 |
+
break
|
| 685 |
|
| 686 |
+
yield format_sse({"event": "section_start", "data": {"key": sec["key"], "title": sec["title"]}})
|
| 687 |
|
| 688 |
+
section_prompt = f"""
|
| 689 |
+
{preface}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 690 |
|
| 691 |
+
Write the section titled: "{sec['title']}" (aim for ~{sec['target_words']} words, it's okay to exceed if valuable).
|
|
|
|
|
|
|
| 692 |
|
| 693 |
+
Topic: "{query}"
|
|
|
|
| 694 |
|
| 695 |
+
Sub-questions to consider (optional):
|
| 696 |
+
{json.dumps(sub_questions, ensure_ascii=False)}
|
| 697 |
+
|
| 698 |
+
Source Catalog (use inline citations like [1], [2]):
|
| 699 |
+
{catalog_md}
|
| 700 |
+
|
| 701 |
+
Evidence and notes from crawled sources (trimmed):
|
| 702 |
{consolidated_context[:MAX_CONTEXT_SIZE]}
|
| 703 |
+
"""
|
| 704 |
+
|
| 705 |
+
payload = {
|
| 706 |
+
"model": LLM_MODEL,
|
| 707 |
+
"messages": [
|
| 708 |
+
{"role": "system", "content": "You are an expert web research analyst and technical writer."},
|
| 709 |
+
{"role": "user", "content": section_prompt}
|
| 710 |
+
],
|
| 711 |
+
"stream": True,
|
| 712 |
+
"temperature": 0.6
|
| 713 |
+
}
|
| 714 |
|
| 715 |
+
try:
|
| 716 |
+
async with session.post(LLM_API_URL, headers=LLM_HEADERS, json=payload) as response:
|
| 717 |
+
if response.status != 200:
|
| 718 |
+
yield format_sse({
|
| 719 |
+
"event": "warning",
|
| 720 |
+
"data": f"Section '{sec['title']}' failed to start (HTTP {response.status}). Skipping."
|
| 721 |
+
})
|
| 722 |
+
continue
|
| 723 |
+
|
| 724 |
+
buffer = ""
|
| 725 |
+
async for line in response.content:
|
| 726 |
+
if time.time() - start_time > TOTAL_TIMEOUT:
|
| 727 |
+
yield format_sse({
|
| 728 |
+
"event": "warning",
|
| 729 |
+
"data": "Time limit reached, halting section generation early."
|
| 730 |
+
})
|
| 731 |
+
break
|
| 732 |
|
| 733 |
+
line_str = line.decode('utf-8', errors='ignore').strip()
|
| 734 |
+
if line_str.startswith('data:'):
|
| 735 |
+
line_str = line_str[5:].strip()
|
| 736 |
+
if not line_str:
|
| 737 |
+
continue
|
| 738 |
+
if line_str == "[DONE]":
|
| 739 |
+
if buffer:
|
| 740 |
+
# Back-compat: emit raw chunk
|
| 741 |
+
yield format_sse({"event": "chunk", "data": buffer})
|
| 742 |
+
# New: emit section-tagged chunk
|
| 743 |
+
yield format_sse({"event": "section_chunk", "data": {"text": buffer, "section": sec["key"]}})
|
| 744 |
+
break
|
| 745 |
+
try:
|
| 746 |
+
chunk = json.loads(line_str)
|
| 747 |
+
choices = chunk.get("choices")
|
| 748 |
+
if choices and isinstance(choices, list):
|
| 749 |
+
delta = choices[0].get("delta", {})
|
| 750 |
+
content = delta.get("content")
|
| 751 |
+
if content:
|
| 752 |
+
buffer += content
|
| 753 |
+
if len(buffer) >= 400:
|
| 754 |
+
# Back-compat: emit raw chunk
|
| 755 |
+
yield format_sse({"event": "chunk", "data": buffer})
|
| 756 |
+
# New: emit section-tagged chunk
|
| 757 |
+
yield format_sse({"event": "section_chunk", "data": {"text": buffer, "section": sec["key"]}})
|
| 758 |
+
buffer = ""
|
| 759 |
+
except json.JSONDecodeError:
|
| 760 |
+
# Some providers send keep-alives or non-JSON noise; ignore
|
| 761 |
+
continue
|
| 762 |
+
except Exception as e:
|
| 763 |
+
logging.warning(f"Error processing stream chunk: {e}")
|
| 764 |
+
continue
|
| 765 |
+
|
| 766 |
+
if buffer:
|
| 767 |
+
yield format_sse({"event": "chunk", "data": buffer})
|
| 768 |
+
yield format_sse({"event": "section_chunk", "data": {"text": buffer, "section": sec["key"]}})
|
| 769 |
|
| 770 |
+
yield format_sse({"event": "section_end", "data": {"key": sec["key"], "title": sec["title"]}})
|
| 771 |
+
except Exception as e:
|
| 772 |
yield format_sse({
|
| 773 |
+
"event": "warning",
|
| 774 |
+
"data": f"Section '{sec['title']}' failed: {str(e)[:160]}"
|
| 775 |
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 776 |
|
| 777 |
+
# Emit references as a final chunk for convenience
|
| 778 |
+
if sources_catalog:
|
| 779 |
+
refs_md_lines = ["\n\n## References"] + [
|
| 780 |
+
f"[{s['id']}] {s['title']} — {s['url']}" for s in sources_catalog
|
| 781 |
+
]
|
| 782 |
+
refs_md = "\n".join(refs_md_lines)
|
| 783 |
+
yield format_sse({"event": "chunk", "data": {"text": refs_md, "section": "references"}})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 784 |
|
| 785 |
duration = time.time() - start_time
|
| 786 |
stats = {
|