Spaces:

rkihacker
/

Scrap

Paused

App Files Files Community

rkihacker commited on Sep 18

Commit

c830710

verified ·

1 Parent(s): 0d0aa07

Update main.py

Browse files

Files changed (1) hide show

main.py +134 -77

main.py CHANGED Viewed

@@ -38,13 +38,15 @@ LLM_MODEL = "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8"
 MAX_SOURCES_TO_PROCESS = 20  # Increased for more research
 MAX_CONCURRENT_REQUESTS = 2
 SEARCH_TIMEOUT = 300  # 5 minutes for longer research
-TOTAL_TIMEOUT = 600  # Increased to allow more time for generation
 REQUEST_DELAY = 3.0
 RETRY_ATTEMPTS = 5
 RETRY_DELAY = 5.0
 USER_AGENT_ROTATION = True
-CONTEXT_WINDOW_SIZE = 10000000  # 10 million tokens
-MAX_CONTEXT_SIZE = 2000000  # Increased practical limit for prompt
 # Initialize fake user agent generator
 try:
@@ -331,13 +333,12 @@ async def generate_research_plan(query: str, session: aiohttp.ClientSession) ->
             "model": LLM_MODEL,
             "messages": [{
                 "role": "user",
-                "content": f"""Generate 4-6 comprehensive sub-questions for in-depth research on '{query}'.
                 Focus on key aspects that would provide a complete understanding of the topic.
                 Your response MUST be ONLY the raw JSON array with no additional text.
-                Example: ["What is the historical background of X?", "What are the current trends in X?"]"""
             }],
-            "temperature": 0.7,
-            "max_tokens": 300
         }
         async with session.post(LLM_API_URL, headers=LLM_HEADERS, json=plan_prompt, timeout=30) as response:
@@ -637,93 +638,149 @@ async def run_deep_research_stream(query: str, search_time: int = 300) -> AsyncG
                 })
                 return
-            time_remaining = max(0, TOTAL_TIMEOUT - (time.time() - start_time))
             yield format_sse({
                 "event": "status",
-                "data": f"Synthesizing comprehensive report from {successful_sources} sources..."
             })
-            max_output_tokens = 16000  # Fixed to allow long response
-            report_prompt = f"""Compose an in-depth analysis report on "{query}".
-Generate a very long, detailed report leveraging the large context window of 10 million tokens. Provide thorough, deep analysis with extensive details, examples, and insights in each section. Expand on each point with sub-sections, data, and comprehensive explanations to make the report as long and informative as possible, aiming for 5,000 to 10,000 words.
-Structure the report with these sections:
-1. Introduction and Background
-2. Key Features and Capabilities
-3. Comparative Analysis with Alternatives
-4. Current Developments and Trends
-5. Challenges and Limitations
-6. Future Outlook
-7. Conclusion and Recommendations
-For each section, provide detailed analysis based on the source material.
-Include specific examples and data points from the sources when available.
-Compare and contrast different viewpoints from various sources.
-Use markdown formatting for headings, subheadings, lists, and emphasis.
-Cite sources where appropriate using inline citations like [1][2].
-Available information from {successful_sources} sources:
 {consolidated_context[:MAX_CONTEXT_SIZE]}
-Generate a comprehensive report of approximately 5,000 to 10,000 words.
-Focus on providing deep insights, analysis, and actionable information.
-            """
-            report_payload = {
-                "model": LLM_MODEL,
-                "messages": [{"role": "user", "content": report_prompt}],
-                "stream": True,
-                "max_tokens": max_output_tokens
-            }
-            async with session.post(LLM_API_URL, headers=LLM_HEADERS, json=report_payload) as response:
-                if response.status != 200:
                     yield format_sse({
-                        "event": "error",
-                        "data": f"Failed to generate report: HTTP {response.status}"
                     })
-                    return
-                buffer = ""
-                async for line in response.content:
-                    if time.time() - start_time > TOTAL_TIMEOUT:
-                        yield format_sse({
-                            "event": "warning",
-                            "data": "Time limit reached, ending report generation early."
-                        })
-                        break
-                    line_str = line.decode('utf-8').strip()
-                    if line_str.startswith('data:'):
-                        line_str = line_str[5:].strip()
-                    if line_str == "[DONE]":
-                        if buffer:
-                            yield format_sse({"event": "chunk", "data": buffer})
-                        break
-                    if not line_str:
-                        continue  # Skip empty lines
-                    try:
-                        chunk = json.loads(line_str)
-                        choices = chunk.get("choices")
-                        if choices and isinstance(choices, list) and len(choices) > 0:
-                            content = choices[0].get("delta", {}).get("content")
-                            if content:
-                                buffer += content
-                                if len(buffer) > 100:
-                                    yield format_sse({"event": "chunk", "data": buffer})
-                                    buffer = ""
-                    except json.JSONDecodeError as e:
-                        logging.warning(f"JSON decode error for line: {line_str} - {e}")
-                        continue
-                    except Exception as e:
-                        logging.warning(f"Error processing stream chunk: {e}")
-                        continue
-                if buffer:
-                    yield format_sse({"event": "chunk", "data": buffer})
             duration = time.time() - start_time
             stats = {

 MAX_SOURCES_TO_PROCESS = 20  # Increased for more research
 MAX_CONCURRENT_REQUESTS = 2
 SEARCH_TIMEOUT = 300  # 5 minutes for longer research
+# Allow substantially longer overall time to enable large, multi-section outputs
+TOTAL_TIMEOUT = 1800
 REQUEST_DELAY = 3.0
 RETRY_ATTEMPTS = 5
 RETRY_DELAY = 5.0
 USER_AGENT_ROTATION = True
+# Context management
+CONTEXT_WINDOW_SIZE = 10_000_000
+MAX_CONTEXT_SIZE = 2_000_000
 # Initialize fake user agent generator
 try:
             "model": LLM_MODEL,
             "messages": [{
                 "role": "user",
+                "content": f"""Generate 4-8 comprehensive sub-questions for in-depth research on '{query}'.
                 Focus on key aspects that would provide a complete understanding of the topic.
                 Your response MUST be ONLY the raw JSON array with no additional text.
+                Example: [\"What is the historical background of X?\", \"What are the current trends in X?\"]"""
             }],
+            "temperature": 0.7
         }
         async with session.post(LLM_API_URL, headers=LLM_HEADERS, json=plan_prompt, timeout=30) as response:
                 })
                 return
+            # Prepare numbered citations list for the model and a references block we'll emit at the end
+            sources_catalog = []
+            for idx, s in enumerate(all_sources_used, start=1):
+                title = s.get('title') or s.get('link')
+                sources_catalog.append({
+                    "id": idx,
+                    "title": title,
+                    "url": s.get('link')
+                })
+            # Section-by-section long-form synthesis (streamed)
             yield format_sse({
                 "event": "status",
+                "data": f"Synthesizing a long multi-section report from {successful_sources} sources..."
             })
+            sections = [
+                {"key": "introduction", "title": "1. Introduction and Background", "target_words": 800},
+                {"key": "features", "title": "2. Key Features and Capabilities", "target_words": 900},
+                {"key": "comparative", "title": "3. Comparative Analysis with Alternatives", "target_words": 900},
+                {"key": "trends", "title": "4. Current Developments and Trends", "target_words": 900},
+                {"key": "challenges", "title": "5. Challenges and Limitations", "target_words": 900},
+                {"key": "future", "title": "6. Future Outlook", "target_words": 900},
+                {"key": "conclusion", "title": "7. Conclusion and Recommendations", "target_words": 600},
+            ]
+            # Common preface for all section prompts
+            preface = (
+                "You are a meticulous research assistant. Write the requested section in clear, structured markdown. "
+                "Use subheadings, bullet lists, and short paragraphs. Provide deep analysis, data points, and concrete examples. "
+                "When drawing from a listed source, include inline citations like [n] where n is the source number from the catalog. "
+                "Avoid repeating the section title at the top if already included. Do not include a references list inside the section."
+            )
+            catalog_md = "\n".join([f"[{s['id']}] {s['title']} — {s['url']}" for s in sources_catalog])
+            # Stream each section individually to achieve very long total output
+            for sec in sections:
+                if time.time() - start_time > TOTAL_TIMEOUT:
+                    yield format_sse({
+                        "event": "warning",
+                        "data": "Time limit reached before completing all sections."
+                    })
+                    break
+                yield format_sse({"event": "section_start", "data": {"key": sec["key"], "title": sec["title"]}})
+                section_prompt = f"""
+{preface}
+Write the section titled: "{sec['title']}" (aim for ~{sec['target_words']} words, it's okay to exceed if valuable).
+Topic: "{query}"
+Sub-questions to consider (optional):
+{json.dumps(sub_questions, ensure_ascii=False)}
+Source Catalog (use inline citations like [1], [2]):
+{catalog_md}
+Evidence and notes from crawled sources (trimmed):
 {consolidated_context[:MAX_CONTEXT_SIZE]}
+"""
+                payload = {
+                    "model": LLM_MODEL,
+                    "messages": [
+                        {"role": "system", "content": "You are an expert web research analyst and technical writer."},
+                        {"role": "user", "content": section_prompt}
+                    ],
+                    "stream": True,
+                    "temperature": 0.6
+                }
+                try:
+                    async with session.post(LLM_API_URL, headers=LLM_HEADERS, json=payload) as response:
+                        if response.status != 200:
+                            yield format_sse({
+                                "event": "warning",
+                                "data": f"Section '{sec['title']}' failed to start (HTTP {response.status}). Skipping."
+                            })
+                            continue
+                        buffer = ""
+                        async for line in response.content:
+                            if time.time() - start_time > TOTAL_TIMEOUT:
+                                yield format_sse({
+                                    "event": "warning",
+                                    "data": "Time limit reached, halting section generation early."
+                                })
+                                break
+                            line_str = line.decode('utf-8', errors='ignore').strip()
+                            if line_str.startswith('data:'):
+                                line_str = line_str[5:].strip()
+                            if not line_str:
+                                continue
+                            if line_str == "[DONE]":
+                                if buffer:
+                                    # Back-compat: emit raw chunk
+                                    yield format_sse({"event": "chunk", "data": buffer})
+                                    # New: emit section-tagged chunk
+                                    yield format_sse({"event": "section_chunk", "data": {"text": buffer, "section": sec["key"]}})
+                                break
+                            try:
+                                chunk = json.loads(line_str)
+                                choices = chunk.get("choices")
+                                if choices and isinstance(choices, list):
+                                    delta = choices[0].get("delta", {})
+                                    content = delta.get("content")
+                                    if content:
+                                        buffer += content
+                                        if len(buffer) >= 400:
+                                            # Back-compat: emit raw chunk
+                                            yield format_sse({"event": "chunk", "data": buffer})
+                                            # New: emit section-tagged chunk
+                                            yield format_sse({"event": "section_chunk", "data": {"text": buffer, "section": sec["key"]}})
+                                            buffer = ""
+                            except json.JSONDecodeError:
+                                # Some providers send keep-alives or non-JSON noise; ignore
+                                continue
+                            except Exception as e:
+                                logging.warning(f"Error processing stream chunk: {e}")
+                                continue
+                        if buffer:
+                            yield format_sse({"event": "chunk", "data": buffer})
+                            yield format_sse({"event": "section_chunk", "data": {"text": buffer, "section": sec["key"]}})
+                    yield format_sse({"event": "section_end", "data": {"key": sec["key"], "title": sec["title"]}})
+                except Exception as e:
                     yield format_sse({
+                        "event": "warning",
+                        "data": f"Section '{sec['title']}' failed: {str(e)[:160]}"
                     })
+            # Emit references as a final chunk for convenience
+            if sources_catalog:
+                refs_md_lines = ["\n\n## References"] + [
+                    f"[{s['id']}] {s['title']} — {s['url']}" for s in sources_catalog
+                ]
+                refs_md = "\n".join(refs_md_lines)
+                yield format_sse({"event": "chunk", "data": {"text": refs_md, "section": "references"}})
             duration = time.time() - start_time
             stats = {