Update main.py
Browse files
main.py
CHANGED
|
@@ -13,6 +13,7 @@ from pydantic import BaseModel
|
|
| 13 |
from dotenv import load_dotenv
|
| 14 |
import aiohttp
|
| 15 |
from bs4 import BeautifulSoup
|
|
|
|
| 16 |
|
| 17 |
# --- Configuration ---
|
| 18 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
|
@@ -27,9 +28,8 @@ else:
|
|
| 27 |
logger.info("LLM API Key loaded successfully.")
|
| 28 |
|
| 29 |
# --- Constants & Headers ---
|
| 30 |
-
SEARCH_API_URL = "https://search.privateinstance.com/api/text"
|
| 31 |
LLM_API_URL = "https://api.typegpt.net/v1/chat/completions"
|
| 32 |
-
LLM_MODEL = "
|
| 33 |
MAX_SOURCES_TO_PROCESS = 15
|
| 34 |
|
| 35 |
# Real Browser User Agents for SCRAPING
|
|
@@ -46,8 +46,8 @@ class DeepResearchRequest(BaseModel):
|
|
| 46 |
|
| 47 |
app = FastAPI(
|
| 48 |
title="AI Deep Research API",
|
| 49 |
-
description="Provides robust, long-form, streaming deep research completions using the
|
| 50 |
-
version="9.
|
| 51 |
)
|
| 52 |
|
| 53 |
# Enable CORS for all origins
|
|
@@ -62,29 +62,26 @@ def extract_json_from_llm_response(text: str) -> Optional[list]:
|
|
| 62 |
return None
|
| 63 |
|
| 64 |
# --- Core Service Functions ---
|
| 65 |
-
async def
|
| 66 |
-
"""Performs a search using the
|
| 67 |
-
|
| 68 |
-
logger.info(f"Searching PrivateInstance API for: '{query}'")
|
| 69 |
try:
|
| 70 |
-
async with
|
| 71 |
-
|
| 72 |
-
data = await response.json()
|
| 73 |
-
raw_results = data if isinstance(data, list) else data.get('results', [])
|
| 74 |
|
| 75 |
-
#
|
| 76 |
results = [
|
| 77 |
{
|
| 78 |
'title': r.get('title'),
|
| 79 |
-
'link': r.get('href'),
|
| 80 |
-
'snippet': r.get('body')
|
| 81 |
}
|
| 82 |
for r in raw_results if r.get('href') and r.get('title') and r.get('body')
|
| 83 |
]
|
| 84 |
-
logger.info(f"Found {len(results)} sources from
|
| 85 |
return results
|
| 86 |
except Exception as e:
|
| 87 |
-
logger.error(f"
|
| 88 |
|
| 89 |
async def research_and_process_source(session: aiohttp.ClientSession, source: dict) -> Tuple[str, dict]:
|
| 90 |
headers = {'User-Agent': random.choice(USER_AGENTS)}
|
|
@@ -121,7 +118,7 @@ async def run_deep_research_stream(query: str) -> AsyncGenerator[str, None]:
|
|
| 121 |
yield format_sse({"event": "plan", "data": sub_questions})
|
| 122 |
|
| 123 |
yield format_sse({"event": "status", "data": f"Searching sources for {len(sub_questions)} topics..."})
|
| 124 |
-
search_tasks = [
|
| 125 |
all_search_results = await asyncio.gather(*search_tasks)
|
| 126 |
unique_sources = list({source['link']: source for results in all_search_results for source in results}.values())
|
| 127 |
|
|
|
|
| 13 |
from dotenv import load_dotenv
|
| 14 |
import aiohttp
|
| 15 |
from bs4 import BeautifulSoup
|
| 16 |
+
from duckduckgo_search import AsyncDDGS
|
| 17 |
|
| 18 |
# --- Configuration ---
|
| 19 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
|
|
|
| 28 |
logger.info("LLM API Key loaded successfully.")
|
| 29 |
|
| 30 |
# --- Constants & Headers ---
|
|
|
|
| 31 |
LLM_API_URL = "https://api.typegpt.net/v1/chat/completions"
|
| 32 |
+
LLM_MODEL = "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8"
|
| 33 |
MAX_SOURCES_TO_PROCESS = 15
|
| 34 |
|
| 35 |
# Real Browser User Agents for SCRAPING
|
|
|
|
| 46 |
|
| 47 |
app = FastAPI(
|
| 48 |
title="AI Deep Research API",
|
| 49 |
+
description="Provides robust, long-form, streaming deep research completions using the DuckDuckGo Search API.",
|
| 50 |
+
version="9.1.0" # Updated version for DuckDuckGo integration
|
| 51 |
)
|
| 52 |
|
| 53 |
# Enable CORS for all origins
|
|
|
|
| 62 |
return None
|
| 63 |
|
| 64 |
# --- Core Service Functions ---
|
| 65 |
+
async def call_duckduckgo_search(query: str, max_results: int = 10) -> List[dict]:
|
| 66 |
+
"""Performs a search using the DuckDuckGo Search API."""
|
| 67 |
+
logger.info(f"Searching DuckDuckGo API for: '{query}'")
|
|
|
|
| 68 |
try:
|
| 69 |
+
async with AsyncDDGS() as ddgs:
|
| 70 |
+
raw_results = [r async for r in ddgs.text(query, max_results=max_results)]
|
|
|
|
|
|
|
| 71 |
|
| 72 |
+
# Map the response keys to the expected format
|
| 73 |
results = [
|
| 74 |
{
|
| 75 |
'title': r.get('title'),
|
| 76 |
+
'link': r.get('href'),
|
| 77 |
+
'snippet': r.get('body')
|
| 78 |
}
|
| 79 |
for r in raw_results if r.get('href') and r.get('title') and r.get('body')
|
| 80 |
]
|
| 81 |
+
logger.info(f"Found {len(results)} sources from DuckDuckGo for: '{query}'")
|
| 82 |
return results
|
| 83 |
except Exception as e:
|
| 84 |
+
logger.error(f"DuckDuckGo search failed for query '{query}': {e}"); return []
|
| 85 |
|
| 86 |
async def research_and_process_source(session: aiohttp.ClientSession, source: dict) -> Tuple[str, dict]:
|
| 87 |
headers = {'User-Agent': random.choice(USER_AGENTS)}
|
|
|
|
| 118 |
yield format_sse({"event": "plan", "data": sub_questions})
|
| 119 |
|
| 120 |
yield format_sse({"event": "status", "data": f"Searching sources for {len(sub_questions)} topics..."})
|
| 121 |
+
search_tasks = [call_duckduckgo_search(sq) for sq in sub_questions]
|
| 122 |
all_search_results = await asyncio.gather(*search_tasks)
|
| 123 |
unique_sources = list({source['link']: source for results in all_search_results for source in results}.values())
|
| 124 |
|