rkihacker commited on
Commit
d38cf69
·
verified ·
1 Parent(s): 9c44d7d

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +36 -25
main.py CHANGED
@@ -13,7 +13,6 @@ from pydantic import BaseModel
13
  from dotenv import load_dotenv
14
  import aiohttp
15
  from bs4 import BeautifulSoup
16
- from ddgs import DDGS # Ensure this library is installed: pip install duckduckgo-search
17
 
18
  # --- Configuration ---
19
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
@@ -46,8 +45,8 @@ class DeepResearchRequest(BaseModel):
46
 
47
  app = FastAPI(
48
  title="AI Deep Research API",
49
- description="Provides robust, long-form, streaming deep research completions using the DuckDuckGo Search API.",
50
- version="9.6.0" # Correctly implemented DDGS library for robust searching
51
  )
52
 
53
  # Enable CORS for all origins
@@ -64,30 +63,43 @@ def extract_json_from_llm_response(text: str) -> Optional[list]:
64
  return None
65
 
66
  # --- Core Service Functions ---
67
- async def call_duckduckgo_search(query: str, max_results: int = 10) -> List[dict]:
68
  """
69
- Performs a search using the DDGS library, correctly handling async operations.
70
- This is the most reliable method.
71
  """
72
- logger.info(f"Searching DuckDuckGo API via DDGS for: '{query}'")
 
 
 
 
 
 
 
 
 
 
 
 
73
  try:
74
- results = []
75
- # Use 'async with' to let the library manage its own session lifecycle
76
- async with DDGS() as ddgs:
77
- # The ddgs.atext() is an async generator
78
- async for r in ddgs.atext(query, max_results=max_results):
79
- results.append(r)
80
-
81
- # The library now returns a dict with 'title', 'href', and 'body'
82
- formatted_results = [
83
- {'title': r.get('title'), 'link': r.get('href'), 'snippet': r.get('body')}
84
- for r in results if r.get('href') and r.get('title') and r.get('body')
85
- ]
86
-
87
- logger.info(f"Found {len(formatted_results)} sources from DuckDuckGo for: '{query}'")
88
- return formatted_results
 
89
  except Exception as e:
90
- logger.error(f"DDGS search failed for query '{query}': {e}", exc_info=True)
91
  return []
92
 
93
 
@@ -141,8 +153,7 @@ async def run_deep_research_stream(query: str) -> AsyncGenerator[str, None]:
141
  yield format_sse({"event": "plan", "data": sub_questions})
142
 
143
  yield format_sse({"event": "status", "data": f"Searching sources for {len(sub_questions)} topics..."})
144
- # Note: We no longer pass the 'session' object to the search function
145
- search_tasks = [call_duckduckgo_search(sq) for sq in sub_questions]
146
  all_search_results = await asyncio.gather(*search_tasks)
147
  unique_sources = list({source['link']: source for results in all_search_results for source in results}.values())
148
 
 
13
  from dotenv import load_dotenv
14
  import aiohttp
15
  from bs4 import BeautifulSoup
 
16
 
17
  # --- Configuration ---
18
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 
45
 
46
  app = FastAPI(
47
  title="AI Deep Research API",
48
+ description="Provides robust, long-form, streaming deep research completions using the DuckDuckGo Lite API.",
49
+ version="9.7.0" # Switched to reliable DuckDuckGo Lite JSON API
50
  )
51
 
52
  # Enable CORS for all origins
 
63
  return None
64
 
65
  # --- Core Service Functions ---
66
+ async def call_duckduckgo_search(session: aiohttp.ClientSession, query: str, max_results: int = 10) -> List[dict]:
67
  """
68
+ Performs a search using the DuckDuckGo Lite JSON API as defined by the OpenAPI spec.
69
+ This is a stable, non-scraping method.
70
  """
71
+ logger.info(f"Searching DuckDuckGo Lite API for: '{query}'")
72
+ search_url = "https://lite.duckduckgo.com/lite/"
73
+
74
+ # Parameters for the POST request's URL, including 'o=json' for JSON output
75
+ params = {
76
+ 'q': query,
77
+ 's': 0,
78
+ 'o': 'json',
79
+ 'kl': 'wt-wt'
80
+ }
81
+
82
+ headers = {'User-Agent': random.choice(USER_AGENTS)}
83
+
84
  try:
85
+ async with session.post(search_url, params=params, headers=headers, ssl=False) as response:
86
+ response.raise_for_status() # Will raise an exception for non-2xx status codes
87
+
88
+ # The API returns a JSON array of results
89
+ raw_results = await response.json()
90
+
91
+ # The keys in the JSON are 't' (title), 'u' (url), and 'a' (abstract/snippet)
92
+ results = [
93
+ {'title': r.get('t'), 'link': r.get('u'), 'snippet': r.get('a')}
94
+ for r in raw_results if r.get('u') and r.get('t') and r.get('a')
95
+ ]
96
+
97
+ # The API doesn't have a max_results param, so we slice the list
98
+ limited_results = results[:max_results]
99
+ logger.info(f"Found {len(limited_results)} sources from DuckDuckGo for: '{query}'")
100
+ return limited_results
101
  except Exception as e:
102
+ logger.error(f"DuckDuckGo Lite API search failed for query '{query}': {e}", exc_info=True)
103
  return []
104
 
105
 
 
153
  yield format_sse({"event": "plan", "data": sub_questions})
154
 
155
  yield format_sse({"event": "status", "data": f"Searching sources for {len(sub_questions)} topics..."})
156
+ search_tasks = [call_duckduckgo_search(session, sq) for sq in sub_questions]
 
157
  all_search_results = await asyncio.gather(*search_tasks)
158
  unique_sources = list({source['link']: source for results in all_search_results for source in results}.values())
159