Spaces:

rkihacker
/

Scrap

Paused

App Files Files Community

rkihacker commited on Sep 15

Commit

5b2a6b6

verified ·

1 Parent(s): 0e14740

Update main.py

Browse files

Files changed (1) hide show

main.py +42 -29

main.py CHANGED Viewed

@@ -4,7 +4,7 @@ import json
 import logging
 from typing import AsyncGenerator
-from fastapi import FastAPI, HTTPException, Query
 from fastapi.responses import StreamingResponse
 from pydantic import BaseModel
 from dotenv import load_dotenv
@@ -18,36 +18,39 @@ logger = logging.getLogger(__name__)
 load_dotenv()
 LLM_API_KEY = os.getenv("LLM_API_KEY")
-# ***** CHANGE 1: Add API Key loading confirmation *****
 if not LLM_API_KEY:
     raise RuntimeError("LLM_API_KEY must be set in a .env file.")
 else:
     logger.info(f"LLM API Key loaded successfully (starts with: {LLM_API_KEY[:4]}...).")
-# API URLs, Models, and a new constant for context size
 SNAPZION_API_URL = "https://search.snapzion.com/get-snippets"
 LLM_API_URL = "https://api.inference.net/v1/chat/completions"
-LLM_MODEL = "mistralai/mistral-nemo-12b-instruct/fp-8"
-MAX_CONTEXT_CHAR_LENGTH = 120000 # Safeguard: roughly 30k tokens
 # Headers for external services
 SNAPZION_HEADERS = { 'Content-Type': 'application/json', 'User-Agent': 'AI-Deep-Research-Agent/1.0' }
 SCRAPING_HEADERS = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36' }
-LLM_HEADERS = { "Authorization": f"Bearer {LLM_API_KEY}", "Content-Type": "application/json" }
-# --- Pydantic Models for Request Body ---
 class DeepResearchRequest(BaseModel):
     query: str
-# --- FastAPI App Initialization ---
 app = FastAPI(
     title="AI Deep Research API",
-    description="Provides single-shot AI search and streaming deep research completions.",
-    version="2.1.0" # Version bump for new robustness feature
 )
 # --- Core Service Functions (Unchanged) ---
 async def call_snapzion_search(session: aiohttp.ClientSession, query: str) -> list:
     try:
         async with session.post(SNAPZION_API_URL, headers=SNAPZION_HEADERS, json={"query": query}, timeout=15) as response:
@@ -76,10 +79,8 @@ async def search_and_scrape(session: aiohttp.ClientSession, query: str) -> tuple
     search_results = await call_snapzion_search(session, query)
     sources = search_results[:4]
     if not sources: return "", []
     scrape_tasks = [scrape_url(session, source["link"]) for source in sources]
     scraped_contents = await asyncio.gather(*scrape_tasks)
     context = "\n\n".join(
         f"Source Details: Title '{sources[i]['title']}', URL '{sources[i]['link']}'\nContent:\n{content}"
         for i, content in enumerate(scraped_contents) if not content.startswith("Error:")
@@ -95,20 +96,38 @@ async def run_deep_research_stream(query: str) -> AsyncGenerator[str, None]:
     try:
         async with aiohttp.ClientSession() as session:
-            # Step 1: Generate Sub-Questions (Unchanged)
             yield format_sse({"event": "status", "data": "Generating research plan..."})
             sub_question_prompt = {
                 "model": LLM_MODEL,
-                "messages": [{ "role": "user", "content": f"You are a research planner. For the topic '{query}', create a JSON array of 3-4 key sub-questions for a research report. Example: [\"Question 1?\", \"Question 2?\"]" }]
             }
-            async with session.post(LLM_API_URL, headers=LLM_HEADERS, json=sub_question_prompt) as response:
-                response.raise_for_status()
-                result = await response.json()
-                sub_questions = json.loads(result['choices'][0]['message']['content'])
             yield format_sse({"event": "plan", "data": sub_questions})
-            # Step 2: Concurrently research all sub-questions (Unchanged)
             research_tasks = [search_and_scrape(session, sq) for sq in sub_questions]
             all_research_results = []
@@ -123,8 +142,6 @@ async def run_deep_research_stream(query: str) -> AsyncGenerator[str, None]:
             all_sources = [source for res in all_research_results for source in res[1]]
             unique_sources = list({s['link']: s for s in all_sources}.values())
-            # ***** CHANGE 2: Implement the context truncation safeguard *****
-            logger.info(f"Consolidated context size: {len(full_context)} characters.")
             if len(full_context) > MAX_CONTEXT_CHAR_LENGTH:
                 logger.warning(f"Context is too long. Truncating from {len(full_context)} to {MAX_CONTEXT_CHAR_LENGTH} characters.")
                 full_context = full_context[:MAX_CONTEXT_CHAR_LENGTH]
@@ -140,14 +157,11 @@ async def run_deep_research_stream(query: str) -> AsyncGenerator[str, None]:
             final_report_payload = {"model": LLM_MODEL, "messages": [{"role": "user", "content": final_report_prompt}], "stream": True}
             async with session.post(LLM_API_URL, headers=LLM_HEADERS, json=final_report_payload) as response:
-                # ***** CHANGE 3: More robust error handling for the streaming call *****
                 if response.status != 200:
                     error_text = await response.text()
-                    logger.error(f"LLM API returned a non-200 status: {response.status} - {error_text}")
-                    raise Exception(f"LLM API Error: {response.status}, {error_text}")
                 async for line in response.content:
-                    # (Rest of the streaming logic is the same)
                     if line.strip():
                         line_str = line.decode('utf-8').strip()
                         if line_str.startswith('data:'): line_str = line_str[5:].strip()
@@ -166,7 +180,6 @@ async def run_deep_research_stream(query: str) -> AsyncGenerator[str, None]:
     finally:
         yield format_sse({"event": "done", "data": "Deep research complete."})
 # --- API Endpoints ---
 @app.post("/v1/deepresearch/completions")
 async def deep_research_endpoint(request: DeepResearchRequest):

 import logging
 from typing import AsyncGenerator
+from fastapi import FastAPI
 from fastapi.responses import StreamingResponse
 from pydantic import BaseModel
 from dotenv import load_dotenv
 load_dotenv()
 LLM_API_KEY = os.getenv("LLM_API_KEY")
 if not LLM_API_KEY:
     raise RuntimeError("LLM_API_KEY must be set in a .env file.")
 else:
     logger.info(f"LLM API Key loaded successfully (starts with: {LLM_API_KEY[:4]}...).")
+# API URLs, Models, and context size limit
 SNAPZION_API_URL = "https://search.snapzion.com/get-snippets"
 LLM_API_URL = "https://api.inference.net/v1/chat/completions"
+LLM_MODEL = "mistralai/mistral-nemo-12b-instruct/fp-8" # Corrected model name from previous attempts
+MAX_CONTEXT_CHAR_LENGTH = 120000
 # Headers for external services
 SNAPZION_HEADERS = { 'Content-Type': 'application/json', 'User-Agent': 'AI-Deep-Research-Agent/1.0' }
 SCRAPING_HEADERS = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36' }
+# ***** CHANGE 1: Add a User-Agent to the LLM headers *****
+LLM_HEADERS = {
+    "Authorization": f"Bearer {LLM_API_KEY}",
+    "Content-Type": "application/json",
+    "User-Agent": "AI-Deep-Research-Client/2.2"
+}
+# --- Pydantic Models ---
 class DeepResearchRequest(BaseModel):
     query: str
+# --- FastAPI App ---
 app = FastAPI(
     title="AI Deep Research API",
+    description="Provides streaming deep research completions.",
+    version="2.2.0" # Version bump for critical bug fix
 )
 # --- Core Service Functions (Unchanged) ---
 async def call_snapzion_search(session: aiohttp.ClientSession, query: str) -> list:
     try:
         async with session.post(SNAPZION_API_URL, headers=SNAPZION_HEADERS, json={"query": query}, timeout=15) as response:
     search_results = await call_snapzion_search(session, query)
     sources = search_results[:4]
     if not sources: return "", []
     scrape_tasks = [scrape_url(session, source["link"]) for source in sources]
     scraped_contents = await asyncio.gather(*scrape_tasks)
     context = "\n\n".join(
         f"Source Details: Title '{sources[i]['title']}', URL '{sources[i]['link']}'\nContent:\n{content}"
         for i, content in enumerate(scraped_contents) if not content.startswith("Error:")
     try:
         async with aiohttp.ClientSession() as session:
+            # Step 1: Generate Sub-Questions
             yield format_sse({"event": "status", "data": "Generating research plan..."})
             sub_question_prompt = {
                 "model": LLM_MODEL,
+                "messages": [{ "role": "user", "content": f"You are a research planner. For the topic '{query}', create a JSON array of 3-4 key sub-questions for a research report. Respond ONLY with the JSON array. Example: [\"Question 1?\", \"Question 2?\"]" }]
             }
+            # ***** CHANGE 2: Implement robust parsing for the API call *****
+            try:
+                async with session.post(LLM_API_URL, headers=LLM_HEADERS, json=sub_question_prompt, timeout=20) as response:
+                    if response.status != 200:
+                        error_text = await response.text()
+                        logger.error(f"LLM API for planning failed with status {response.status}: {error_text}")
+                        raise Exception(f"LLM API returned non-200 status: {response.status}")
+                    raw_response_text = await response.text()
+                    if not raw_response_text:
+                        raise Exception("LLM API returned an empty response.")
+                    result = json.loads(raw_response_text)
+                    llm_content = result['choices'][0]['message']['content']
+                    sub_questions = json.loads(llm_content)
+            except Exception as e:
+                logger.error(f"Failed to generate or parse research plan: {e}")
+                yield format_sse({"event": "error", "data": f"Could not generate research plan. Reason: {e}"})
+                return # Stop the process if planning fails
             yield format_sse({"event": "plan", "data": sub_questions})
+            # (The rest of the logic remains the same)
+            # Step 2: Concurrently research all sub-questions
             research_tasks = [search_and_scrape(session, sq) for sq in sub_questions]
             all_research_results = []
             all_sources = [source for res in all_research_results for source in res[1]]
             unique_sources = list({s['link']: s for s in all_sources}.values())
             if len(full_context) > MAX_CONTEXT_CHAR_LENGTH:
                 logger.warning(f"Context is too long. Truncating from {len(full_context)} to {MAX_CONTEXT_CHAR_LENGTH} characters.")
                 full_context = full_context[:MAX_CONTEXT_CHAR_LENGTH]
             final_report_payload = {"model": LLM_MODEL, "messages": [{"role": "user", "content": final_report_prompt}], "stream": True}
             async with session.post(LLM_API_URL, headers=LLM_HEADERS, json=final_report_payload) as response:
                 if response.status != 200:
                     error_text = await response.text()
+                    raise Exception(f"LLM API Error for final report: {response.status}, {error_text}")
                 async for line in response.content:
                     if line.strip():
                         line_str = line.decode('utf-8').strip()
                         if line_str.startswith('data:'): line_str = line_str[5:].strip()
     finally:
         yield format_sse({"event": "done", "data": "Deep research complete."})
 # --- API Endpoints ---
 @app.post("/v1/deepresearch/completions")
 async def deep_research_endpoint(request: DeepResearchRequest):