rkihacker commited on
Commit
c830710
·
verified ·
1 Parent(s): 0d0aa07

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +134 -77
main.py CHANGED
@@ -38,13 +38,15 @@ LLM_MODEL = "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8"
38
  MAX_SOURCES_TO_PROCESS = 20 # Increased for more research
39
  MAX_CONCURRENT_REQUESTS = 2
40
  SEARCH_TIMEOUT = 300 # 5 minutes for longer research
41
- TOTAL_TIMEOUT = 600 # Increased to allow more time for generation
 
42
  REQUEST_DELAY = 3.0
43
  RETRY_ATTEMPTS = 5
44
  RETRY_DELAY = 5.0
45
  USER_AGENT_ROTATION = True
46
- CONTEXT_WINDOW_SIZE = 10000000 # 10 million tokens
47
- MAX_CONTEXT_SIZE = 2000000 # Increased practical limit for prompt
 
48
 
49
  # Initialize fake user agent generator
50
  try:
@@ -331,13 +333,12 @@ async def generate_research_plan(query: str, session: aiohttp.ClientSession) ->
331
  "model": LLM_MODEL,
332
  "messages": [{
333
  "role": "user",
334
- "content": f"""Generate 4-6 comprehensive sub-questions for in-depth research on '{query}'.
335
  Focus on key aspects that would provide a complete understanding of the topic.
336
  Your response MUST be ONLY the raw JSON array with no additional text.
337
- Example: ["What is the historical background of X?", "What are the current trends in X?"]"""
338
  }],
339
- "temperature": 0.7,
340
- "max_tokens": 300
341
  }
342
 
343
  async with session.post(LLM_API_URL, headers=LLM_HEADERS, json=plan_prompt, timeout=30) as response:
@@ -637,93 +638,149 @@ async def run_deep_research_stream(query: str, search_time: int = 300) -> AsyncG
637
  })
638
  return
639
 
640
- time_remaining = max(0, TOTAL_TIMEOUT - (time.time() - start_time))
 
 
 
 
 
 
 
 
 
 
641
  yield format_sse({
642
  "event": "status",
643
- "data": f"Synthesizing comprehensive report from {successful_sources} sources..."
644
  })
645
 
646
- max_output_tokens = 16000 # Fixed to allow long response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
647
 
648
- report_prompt = f"""Compose an in-depth analysis report on "{query}".
 
 
 
 
 
 
 
649
 
650
- Generate a very long, detailed report leveraging the large context window of 10 million tokens. Provide thorough, deep analysis with extensive details, examples, and insights in each section. Expand on each point with sub-sections, data, and comprehensive explanations to make the report as long and informative as possible, aiming for 5,000 to 10,000 words.
651
 
652
- Structure the report with these sections:
653
- 1. Introduction and Background
654
- 2. Key Features and Capabilities
655
- 3. Comparative Analysis with Alternatives
656
- 4. Current Developments and Trends
657
- 5. Challenges and Limitations
658
- 6. Future Outlook
659
- 7. Conclusion and Recommendations
660
 
661
- For each section, provide detailed analysis based on the source material.
662
- Include specific examples and data points from the sources when available.
663
- Compare and contrast different viewpoints from various sources.
664
 
665
- Use markdown formatting for headings, subheadings, lists, and emphasis.
666
- Cite sources where appropriate using inline citations like [1][2].
667
 
668
- Available information from {successful_sources} sources:
 
 
 
 
 
 
669
  {consolidated_context[:MAX_CONTEXT_SIZE]}
 
 
 
 
 
 
 
 
 
 
 
670
 
671
- Generate a comprehensive report of approximately 5,000 to 10,000 words.
672
- Focus on providing deep insights, analysis, and actionable information.
673
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
674
 
675
- report_payload = {
676
- "model": LLM_MODEL,
677
- "messages": [{"role": "user", "content": report_prompt}],
678
- "stream": True,
679
- "max_tokens": max_output_tokens
680
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
681
 
682
- async with session.post(LLM_API_URL, headers=LLM_HEADERS, json=report_payload) as response:
683
- if response.status != 200:
684
  yield format_sse({
685
- "event": "error",
686
- "data": f"Failed to generate report: HTTP {response.status}"
687
  })
688
- return
689
-
690
- buffer = ""
691
- async for line in response.content:
692
- if time.time() - start_time > TOTAL_TIMEOUT:
693
- yield format_sse({
694
- "event": "warning",
695
- "data": "Time limit reached, ending report generation early."
696
- })
697
- break
698
 
699
- line_str = line.decode('utf-8').strip()
700
- if line_str.startswith('data:'):
701
- line_str = line_str[5:].strip()
702
- if line_str == "[DONE]":
703
- if buffer:
704
- yield format_sse({"event": "chunk", "data": buffer})
705
- break
706
- if not line_str:
707
- continue # Skip empty lines
708
- try:
709
- chunk = json.loads(line_str)
710
- choices = chunk.get("choices")
711
- if choices and isinstance(choices, list) and len(choices) > 0:
712
- content = choices[0].get("delta", {}).get("content")
713
- if content:
714
- buffer += content
715
- if len(buffer) > 100:
716
- yield format_sse({"event": "chunk", "data": buffer})
717
- buffer = ""
718
- except json.JSONDecodeError as e:
719
- logging.warning(f"JSON decode error for line: {line_str} - {e}")
720
- continue
721
- except Exception as e:
722
- logging.warning(f"Error processing stream chunk: {e}")
723
- continue
724
-
725
- if buffer:
726
- yield format_sse({"event": "chunk", "data": buffer})
727
 
728
  duration = time.time() - start_time
729
  stats = {
 
38
  MAX_SOURCES_TO_PROCESS = 20 # Increased for more research
39
  MAX_CONCURRENT_REQUESTS = 2
40
  SEARCH_TIMEOUT = 300 # 5 minutes for longer research
41
+ # Allow substantially longer overall time to enable large, multi-section outputs
42
+ TOTAL_TIMEOUT = 1800
43
  REQUEST_DELAY = 3.0
44
  RETRY_ATTEMPTS = 5
45
  RETRY_DELAY = 5.0
46
  USER_AGENT_ROTATION = True
47
+ # Context management
48
+ CONTEXT_WINDOW_SIZE = 10_000_000
49
+ MAX_CONTEXT_SIZE = 2_000_000
50
 
51
  # Initialize fake user agent generator
52
  try:
 
333
  "model": LLM_MODEL,
334
  "messages": [{
335
  "role": "user",
336
+ "content": f"""Generate 4-8 comprehensive sub-questions for in-depth research on '{query}'.
337
  Focus on key aspects that would provide a complete understanding of the topic.
338
  Your response MUST be ONLY the raw JSON array with no additional text.
339
+ Example: [\"What is the historical background of X?\", \"What are the current trends in X?\"]"""
340
  }],
341
+ "temperature": 0.7
 
342
  }
343
 
344
  async with session.post(LLM_API_URL, headers=LLM_HEADERS, json=plan_prompt, timeout=30) as response:
 
638
  })
639
  return
640
 
641
+ # Prepare numbered citations list for the model and a references block we'll emit at the end
642
+ sources_catalog = []
643
+ for idx, s in enumerate(all_sources_used, start=1):
644
+ title = s.get('title') or s.get('link')
645
+ sources_catalog.append({
646
+ "id": idx,
647
+ "title": title,
648
+ "url": s.get('link')
649
+ })
650
+
651
+ # Section-by-section long-form synthesis (streamed)
652
  yield format_sse({
653
  "event": "status",
654
+ "data": f"Synthesizing a long multi-section report from {successful_sources} sources..."
655
  })
656
 
657
+ sections = [
658
+ {"key": "introduction", "title": "1. Introduction and Background", "target_words": 800},
659
+ {"key": "features", "title": "2. Key Features and Capabilities", "target_words": 900},
660
+ {"key": "comparative", "title": "3. Comparative Analysis with Alternatives", "target_words": 900},
661
+ {"key": "trends", "title": "4. Current Developments and Trends", "target_words": 900},
662
+ {"key": "challenges", "title": "5. Challenges and Limitations", "target_words": 900},
663
+ {"key": "future", "title": "6. Future Outlook", "target_words": 900},
664
+ {"key": "conclusion", "title": "7. Conclusion and Recommendations", "target_words": 600},
665
+ ]
666
+
667
+ # Common preface for all section prompts
668
+ preface = (
669
+ "You are a meticulous research assistant. Write the requested section in clear, structured markdown. "
670
+ "Use subheadings, bullet lists, and short paragraphs. Provide deep analysis, data points, and concrete examples. "
671
+ "When drawing from a listed source, include inline citations like [n] where n is the source number from the catalog. "
672
+ "Avoid repeating the section title at the top if already included. Do not include a references list inside the section."
673
+ )
674
+
675
+ catalog_md = "\n".join([f"[{s['id']}] {s['title']} — {s['url']}" for s in sources_catalog])
676
 
677
+ # Stream each section individually to achieve very long total output
678
+ for sec in sections:
679
+ if time.time() - start_time > TOTAL_TIMEOUT:
680
+ yield format_sse({
681
+ "event": "warning",
682
+ "data": "Time limit reached before completing all sections."
683
+ })
684
+ break
685
 
686
+ yield format_sse({"event": "section_start", "data": {"key": sec["key"], "title": sec["title"]}})
687
 
688
+ section_prompt = f"""
689
+ {preface}
 
 
 
 
 
 
690
 
691
+ Write the section titled: "{sec['title']}" (aim for ~{sec['target_words']} words, it's okay to exceed if valuable).
 
 
692
 
693
+ Topic: "{query}"
 
694
 
695
+ Sub-questions to consider (optional):
696
+ {json.dumps(sub_questions, ensure_ascii=False)}
697
+
698
+ Source Catalog (use inline citations like [1], [2]):
699
+ {catalog_md}
700
+
701
+ Evidence and notes from crawled sources (trimmed):
702
  {consolidated_context[:MAX_CONTEXT_SIZE]}
703
+ """
704
+
705
+ payload = {
706
+ "model": LLM_MODEL,
707
+ "messages": [
708
+ {"role": "system", "content": "You are an expert web research analyst and technical writer."},
709
+ {"role": "user", "content": section_prompt}
710
+ ],
711
+ "stream": True,
712
+ "temperature": 0.6
713
+ }
714
 
715
+ try:
716
+ async with session.post(LLM_API_URL, headers=LLM_HEADERS, json=payload) as response:
717
+ if response.status != 200:
718
+ yield format_sse({
719
+ "event": "warning",
720
+ "data": f"Section '{sec['title']}' failed to start (HTTP {response.status}). Skipping."
721
+ })
722
+ continue
723
+
724
+ buffer = ""
725
+ async for line in response.content:
726
+ if time.time() - start_time > TOTAL_TIMEOUT:
727
+ yield format_sse({
728
+ "event": "warning",
729
+ "data": "Time limit reached, halting section generation early."
730
+ })
731
+ break
732
 
733
+ line_str = line.decode('utf-8', errors='ignore').strip()
734
+ if line_str.startswith('data:'):
735
+ line_str = line_str[5:].strip()
736
+ if not line_str:
737
+ continue
738
+ if line_str == "[DONE]":
739
+ if buffer:
740
+ # Back-compat: emit raw chunk
741
+ yield format_sse({"event": "chunk", "data": buffer})
742
+ # New: emit section-tagged chunk
743
+ yield format_sse({"event": "section_chunk", "data": {"text": buffer, "section": sec["key"]}})
744
+ break
745
+ try:
746
+ chunk = json.loads(line_str)
747
+ choices = chunk.get("choices")
748
+ if choices and isinstance(choices, list):
749
+ delta = choices[0].get("delta", {})
750
+ content = delta.get("content")
751
+ if content:
752
+ buffer += content
753
+ if len(buffer) >= 400:
754
+ # Back-compat: emit raw chunk
755
+ yield format_sse({"event": "chunk", "data": buffer})
756
+ # New: emit section-tagged chunk
757
+ yield format_sse({"event": "section_chunk", "data": {"text": buffer, "section": sec["key"]}})
758
+ buffer = ""
759
+ except json.JSONDecodeError:
760
+ # Some providers send keep-alives or non-JSON noise; ignore
761
+ continue
762
+ except Exception as e:
763
+ logging.warning(f"Error processing stream chunk: {e}")
764
+ continue
765
+
766
+ if buffer:
767
+ yield format_sse({"event": "chunk", "data": buffer})
768
+ yield format_sse({"event": "section_chunk", "data": {"text": buffer, "section": sec["key"]}})
769
 
770
+ yield format_sse({"event": "section_end", "data": {"key": sec["key"], "title": sec["title"]}})
771
+ except Exception as e:
772
  yield format_sse({
773
+ "event": "warning",
774
+ "data": f"Section '{sec['title']}' failed: {str(e)[:160]}"
775
  })
 
 
 
 
 
 
 
 
 
 
776
 
777
+ # Emit references as a final chunk for convenience
778
+ if sources_catalog:
779
+ refs_md_lines = ["\n\n## References"] + [
780
+ f"[{s['id']}] {s['title']} {s['url']}" for s in sources_catalog
781
+ ]
782
+ refs_md = "\n".join(refs_md_lines)
783
+ yield format_sse({"event": "chunk", "data": {"text": refs_md, "section": "references"}})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
784
 
785
  duration = time.time() - start_time
786
  stats = {