Spaces:

rkihacker
/

Scrap

Paused

App Files Files Community

rkihacker commited on Sep 17

Commit

31e12c0

verified ·

1 Parent(s): 27b1db9

Update main.py

Browse files

Files changed (1) hide show

main.py +15 -18

main.py CHANGED Viewed

@@ -13,6 +13,7 @@ from pydantic import BaseModel
 from dotenv import load_dotenv
 import aiohttp
 from bs4 import BeautifulSoup
 # --- Configuration ---
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
@@ -27,9 +28,8 @@ else:
     logger.info("LLM API Key loaded successfully.")
 # --- Constants & Headers ---
-SEARCH_API_URL = "https://search.privateinstance.com/api/text"
 LLM_API_URL = "https://api.typegpt.net/v1/chat/completions"
-LLM_MODEL = "gpt-4.1-mini"
 MAX_SOURCES_TO_PROCESS = 15
 # Real Browser User Agents for SCRAPING
@@ -46,8 +46,8 @@ class DeepResearchRequest(BaseModel):
 app = FastAPI(
     title="AI Deep Research API",
-    description="Provides robust, long-form, streaming deep research completions using the PrivateInstance Search API.",
-    version="9.0.0"  # Definitive Production Version
 )
 # Enable CORS for all origins
@@ -62,29 +62,26 @@ def extract_json_from_llm_response(text: str) -> Optional[list]:
     return None
 # --- Core Service Functions ---
-async def call_privateinstance_search(session: aiohttp.ClientSession, query: str) -> List[dict]:
-    """Performs a search using the PrivateInstance Search API."""
-    params = {'q': query, 'max_results': 10}
-    logger.info(f"Searching PrivateInstance API for: '{query}'")
     try:
-        async with session.get(SEARCH_API_URL, params=params, timeout=15) as response:
-            response.raise_for_status()
-            data = await response.json()
-            raw_results = data if isinstance(data, list) else data.get('results', [])
-            # ***** THE DEFINITIVE FIX: Correctly map the API's response keys *****
             results = [
                 {
                     'title': r.get('title'),
-                    'link': r.get('href'),      # Use 'href' instead of 'url'
-                    'snippet': r.get('body')    # Use 'body' instead of 'description'
                 }
                 for r in raw_results if r.get('href') and r.get('title') and r.get('body')
             ]
-            logger.info(f"Found {len(results)} sources from PrivateInstance for: '{query}'")
             return results
     except Exception as e:
-        logger.error(f"PrivateInstance search failed for query '{query}': {e}"); return []
 async def research_and_process_source(session: aiohttp.ClientSession, source: dict) -> Tuple[str, dict]:
     headers = {'User-Agent': random.choice(USER_AGENTS)}
@@ -121,7 +118,7 @@ async def run_deep_research_stream(query: str) -> AsyncGenerator[str, None]:
             yield format_sse({"event": "plan", "data": sub_questions})
             yield format_sse({"event": "status", "data": f"Searching sources for {len(sub_questions)} topics..."})
-            search_tasks = [call_privateinstance_search(session, sq) for sq in sub_questions]
             all_search_results = await asyncio.gather(*search_tasks)
             unique_sources = list({source['link']: source for results in all_search_results for source in results}.values())

 from dotenv import load_dotenv
 import aiohttp
 from bs4 import BeautifulSoup
+from duckduckgo_search import AsyncDDGS
 # --- Configuration ---
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
     logger.info("LLM API Key loaded successfully.")
 # --- Constants & Headers ---
 LLM_API_URL = "https://api.typegpt.net/v1/chat/completions"
+LLM_MODEL = "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8"
 MAX_SOURCES_TO_PROCESS = 15
 # Real Browser User Agents for SCRAPING
 app = FastAPI(
     title="AI Deep Research API",
+    description="Provides robust, long-form, streaming deep research completions using the DuckDuckGo Search API.",
+    version="9.1.0"  # Updated version for DuckDuckGo integration
 )
 # Enable CORS for all origins
     return None
 # --- Core Service Functions ---
+async def call_duckduckgo_search(query: str, max_results: int = 10) -> List[dict]:
+    """Performs a search using the DuckDuckGo Search API."""
+    logger.info(f"Searching DuckDuckGo API for: '{query}'")
     try:
+        async with AsyncDDGS() as ddgs:
+            raw_results = [r async for r in ddgs.text(query, max_results=max_results)]
+            # Map the response keys to the expected format
             results = [
                 {
                     'title': r.get('title'),
+                    'link': r.get('href'),
+                    'snippet': r.get('body')
                 }
                 for r in raw_results if r.get('href') and r.get('title') and r.get('body')
             ]
+            logger.info(f"Found {len(results)} sources from DuckDuckGo for: '{query}'")
             return results
     except Exception as e:
+        logger.error(f"DuckDuckGo search failed for query '{query}': {e}"); return []
 async def research_and_process_source(session: aiohttp.ClientSession, source: dict) -> Tuple[str, dict]:
     headers = {'User-Agent': random.choice(USER_AGENTS)}
             yield format_sse({"event": "plan", "data": sub_questions})
             yield format_sse({"event": "status", "data": f"Searching sources for {len(sub_questions)} topics..."})
+            search_tasks = [call_duckduckgo_search(sq) for sq in sub_questions]
             all_search_results = await asyncio.gather(*search_tasks)
             unique_sources = list({source['link']: source for results in all_search_results for source in results}.values())