Spaces:

rkihacker
/

Scrap

Paused

App Files Files Community

rkihacker commited on Sep 17

Commit

d38cf69

verified ·

1 Parent(s): 9c44d7d

Update main.py

Browse files

Files changed (1) hide show

main.py +36 -25

main.py CHANGED Viewed

@@ -13,7 +13,6 @@ from pydantic import BaseModel
 from dotenv import load_dotenv
 import aiohttp
 from bs4 import BeautifulSoup
-from ddgs import DDGS # Ensure this library is installed: pip install duckduckgo-search
 # --- Configuration ---
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
@@ -46,8 +45,8 @@ class DeepResearchRequest(BaseModel):
 app = FastAPI(
     title="AI Deep Research API",
-    description="Provides robust, long-form, streaming deep research completions using the DuckDuckGo Search API.",
-    version="9.6.0"  # Correctly implemented DDGS library for robust searching
 )
 # Enable CORS for all origins
@@ -64,30 +63,43 @@ def extract_json_from_llm_response(text: str) -> Optional[list]:
     return None
 # --- Core Service Functions ---
-async def call_duckduckgo_search(query: str, max_results: int = 10) -> List[dict]:
     """
-    Performs a search using the DDGS library, correctly handling async operations.
-    This is the most reliable method.
     """
-    logger.info(f"Searching DuckDuckGo API via DDGS for: '{query}'")
     try:
-        results = []
-        # Use 'async with' to let the library manage its own session lifecycle
-        async with DDGS() as ddgs:
-            # The ddgs.atext() is an async generator
-            async for r in ddgs.atext(query, max_results=max_results):
-                results.append(r)
-        # The library now returns a dict with 'title', 'href', and 'body'
-        formatted_results = [
-            {'title': r.get('title'), 'link': r.get('href'), 'snippet': r.get('body')}
-            for r in results if r.get('href') and r.get('title') and r.get('body')
-        ]
-        logger.info(f"Found {len(formatted_results)} sources from DuckDuckGo for: '{query}'")
-        return formatted_results
     except Exception as e:
-        logger.error(f"DDGS search failed for query '{query}': {e}", exc_info=True)
         return []
@@ -141,8 +153,7 @@ async def run_deep_research_stream(query: str) -> AsyncGenerator[str, None]:
             yield format_sse({"event": "plan", "data": sub_questions})
             yield format_sse({"event": "status", "data": f"Searching sources for {len(sub_questions)} topics..."})
-            # Note: We no longer pass the 'session' object to the search function
-            search_tasks = [call_duckduckgo_search(sq) for sq in sub_questions]
             all_search_results = await asyncio.gather(*search_tasks)
             unique_sources = list({source['link']: source for results in all_search_results for source in results}.values())

 from dotenv import load_dotenv
 import aiohttp
 from bs4 import BeautifulSoup
 # --- Configuration ---
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 app = FastAPI(
     title="AI Deep Research API",
+    description="Provides robust, long-form, streaming deep research completions using the DuckDuckGo Lite API.",
+    version="9.7.0"  # Switched to reliable DuckDuckGo Lite JSON API
 )
 # Enable CORS for all origins
     return None
 # --- Core Service Functions ---
+async def call_duckduckgo_search(session: aiohttp.ClientSession, query: str, max_results: int = 10) -> List[dict]:
     """
+    Performs a search using the DuckDuckGo Lite JSON API as defined by the OpenAPI spec.
+    This is a stable, non-scraping method.
     """
+    logger.info(f"Searching DuckDuckGo Lite API for: '{query}'")
+    search_url = "https://lite.duckduckgo.com/lite/"
+    # Parameters for the POST request's URL, including 'o=json' for JSON output
+    params = {
+        'q': query,
+        's': 0,
+        'o': 'json',
+        'kl': 'wt-wt'
+    }
+    headers = {'User-Agent': random.choice(USER_AGENTS)}
     try:
+        async with session.post(search_url, params=params, headers=headers, ssl=False) as response:
+            response.raise_for_status() # Will raise an exception for non-2xx status codes
+            # The API returns a JSON array of results
+            raw_results = await response.json()
+            # The keys in the JSON are 't' (title), 'u' (url), and 'a' (abstract/snippet)
+            results = [
+                {'title': r.get('t'), 'link': r.get('u'), 'snippet': r.get('a')}
+                for r in raw_results if r.get('u') and r.get('t') and r.get('a')
+            ]
+            # The API doesn't have a max_results param, so we slice the list
+            limited_results = results[:max_results]
+            logger.info(f"Found {len(limited_results)} sources from DuckDuckGo for: '{query}'")
+            return limited_results
     except Exception as e:
+        logger.error(f"DuckDuckGo Lite API search failed for query '{query}': {e}", exc_info=True)
         return []
             yield format_sse({"event": "plan", "data": sub_questions})
             yield format_sse({"event": "status", "data": f"Searching sources for {len(sub_questions)} topics..."})
+            search_tasks = [call_duckduckgo_search(session, sq) for sq in sub_questions]
             all_search_results = await asyncio.gather(*search_tasks)
             unique_sources = list({source['link']: source for results in all_search_results for source in results}.values())