rkihacker commited on
Commit
768d891
·
verified ·
1 Parent(s): 0eacd1e

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +46 -20
main.py CHANGED
@@ -5,6 +5,7 @@ import logging
5
  import random
6
  import re
7
  from typing import AsyncGenerator, Optional, Tuple, List
 
8
 
9
  from fastapi import FastAPI
10
  from fastapi.responses import StreamingResponse
@@ -13,7 +14,6 @@ from pydantic import BaseModel
13
  from dotenv import load_dotenv
14
  import aiohttp
15
  from bs4 import BeautifulSoup
16
- from ddgs import DDGS # <-- Make sure this import is present
17
 
18
  # --- Configuration ---
19
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
@@ -46,8 +46,8 @@ class DeepResearchRequest(BaseModel):
46
 
47
  app = FastAPI(
48
  title="AI Deep Research API",
49
- description="Provides robust, long-form, streaming deep research completions using the DuckDuckGo Search API.",
50
- version="9.4.0" # Reverted to reliable DDGS library search
51
  )
52
 
53
  # Enable CORS for all origins
@@ -66,25 +66,52 @@ def extract_json_from_llm_response(text: str) -> Optional[list]:
66
  # --- Core Service Functions ---
67
  async def call_duckduckgo_search(session: aiohttp.ClientSession, query: str, max_results: int = 10) -> List[dict]:
68
  """
69
- Performs a search using the DDGS library with an existing aiohttp session.
70
- This method is more reliable than direct HTML scraping.
71
  """
72
- logger.info(f"Searching DuckDuckGo API via DDGS for: '{query}'")
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  try:
74
- ddgs = DDGS(session=session)
75
- # Use ddgs.atext for asynchronous text search
76
- raw_results = [r async for r in ddgs.atext(query, max_results=max_results)]
77
-
78
- # Filter and format results to ensure they have the necessary keys
79
- results = [
80
- {'title': r.get('title'), 'link': r.get('href'), 'snippet': r.get('body')}
81
- for r in raw_results if r.get('href') and r.get('title') and r.get('body')
82
- ]
83
-
84
- logger.info(f"Found {len(results)} sources from DuckDuckGo for: '{query}'")
85
- return results
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  except Exception as e:
87
- logger.error(f"DDGS search failed for query '{query}': {e}", exc_info=True)
88
  return []
89
 
90
 
@@ -199,5 +226,4 @@ async def deep_research_endpoint(request: DeepResearchRequest):
199
 
200
  if __name__ == "__main__":
201
  import uvicorn
202
- # To run this app: uvicorn your_filename:app --reload
203
  uvicorn.run(app, host="0.0.0.0", port=8000)
 
5
  import random
6
  import re
7
  from typing import AsyncGenerator, Optional, Tuple, List
8
+ from urllib.parse import unquote
9
 
10
  from fastapi import FastAPI
11
  from fastapi.responses import StreamingResponse
 
14
  from dotenv import load_dotenv
15
  import aiohttp
16
  from bs4 import BeautifulSoup
 
17
 
18
  # --- Configuration ---
19
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 
46
 
47
  app = FastAPI(
48
  title="AI Deep Research API",
49
+ description="Provides robust, long-form, streaming deep research completions using direct DuckDuckGo scraping.",
50
+ version="9.5.0" # Implemented direct HTML scraping
51
  )
52
 
53
  # Enable CORS for all origins
 
66
  # --- Core Service Functions ---
67
  async def call_duckduckgo_search(session: aiohttp.ClientSession, query: str, max_results: int = 10) -> List[dict]:
68
  """
69
+ Performs a search by directly scraping the DuckDuckGo HTML interface,
70
+ mimicking a real browser request.
71
  """
72
+ logger.info(f"Searching DuckDuckGo (HTML) for: '{query}'")
73
+ search_url = "https://html.duckduckgo.com/html/"
74
+
75
+ # Form data to be sent with the POST request
76
+ payload = {'q': query, 'b': '', 'kl': '', 'df': ''}
77
+
78
+ # Headers to mimic a browser, based on the provided curl command
79
+ headers = {
80
+ 'Content-Type': 'application/x-www-form-urlencoded',
81
+ 'Origin': 'https://html.duckduckgo.com',
82
+ 'Referer': 'https://html.duckduckgo.com/',
83
+ 'User-Agent': random.choice(USER_AGENTS)
84
+ }
85
+
86
  try:
87
+ async with session.post(search_url, data=payload, headers=headers, ssl=False) as response:
88
+ if response.status != 200:
89
+ logger.error(f"DuckDuckGo search failed with status {response.status} for query '{query}'")
90
+ return []
91
+
92
+ html = await response.text()
93
+ soup = BeautifulSoup(html, "html.parser")
94
+ results = []
95
+
96
+ # Find all result containers
97
+ for result_div in soup.find_all('div', class_='result'):
98
+ title_elem = result_div.find('a', class_='result__a')
99
+ snippet_elem = result_div.find('a', class_='result__snippet')
100
+
101
+ if title_elem and snippet_elem:
102
+ link = title_elem.get('href')
103
+ title = title_elem.get_text(strip=True)
104
+ snippet = snippet_elem.get_text(strip=True)
105
+
106
+ if link and title and snippet:
107
+ results.append({'title': title, 'link': link, 'snippet': snippet})
108
+ if len(results) >= max_results:
109
+ break
110
+
111
+ logger.info(f"Found {len(results)} sources from DuckDuckGo for: '{query}'")
112
+ return results
113
  except Exception as e:
114
+ logger.error(f"DuckDuckGo HTML search failed for query '{query}': {e}", exc_info=True)
115
  return []
116
 
117
 
 
226
 
227
  if __name__ == "__main__":
228
  import uvicorn
 
229
  uvicorn.run(app, host="0.0.0.0", port=8000)