File size: 5,721 Bytes
4b17916 2a0098d 4b17916 2a0098d 4b17916 2a0098d 4b17916 2a0098d 4b17916 2a0098d 4b17916 2a0098d 4b17916 2a0098d 4b17916 2a0098d 4b17916 2a0098d 4b17916 2a0098d 4b17916 2a0098d 4b17916 2a0098d 4b17916 2a0098d 4b17916 2a0098d 4b17916 2a0098d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 |
import os
import asyncio
from fastapi import FastAPI, HTTPException, Query
from dotenv import load_dotenv
import aiohttp
from bs4 import BeautifulSoup
# --- Configuration ---
load_dotenv()
LLM_API_KEY = os.getenv("LLM_API_KEY")
if not LLM_API_KEY:
raise RuntimeError("LLM_API_KEY must be set in a .env file.")
# Snapzion Search API Configuration
SNAPZION_API_URL = "https://search.snapzion.com/get-snippets"
SNAPZION_HEADERS = {
'accept': '*/*',
'accept-language': 'en-US,en;q=0.9',
'content-type': 'application/json',
'origin': 'https://search.snapzion.com',
'priority': 'u=1, i',
'referer': 'https://search.snapzion.com/docs',
'sec-ch-ua': '"Chromium";v="140", "Not=A?Brand";v="24", "Google Chrome";v="140"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-origin',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36',
}
# LLM Configuration
LLM_API_URL = "https://api.inference.net/v1/chat/completions"
LLM_MODEL = "meta-llama/llama-3.1-8b-instruct/fp-8"
# --- FastAPI App Initialization ---
app = FastAPI(
title="AI Search Snippets API (Snapzion)",
description="Provides AI-generated summaries from Snapzion search results.",
version="1.0.1"
)
# --- Core Asynchronous Functions ---
async def call_snapzion_search(session: aiohttp.ClientSession, query: str) -> list:
"""Calls the Snapzion search API and returns a list of organic results."""
try:
async with session.post(SNAPZION_API_URL, headers=SNAPZION_HEADERS, json={"query": query}, timeout=15) as response:
response.raise_for_status()
data = await response.json()
return data.get("organic_results", [])
except Exception as e:
raise HTTPException(status_code=503, detail=f"Search service (Snapzion) failed: {e}")
async def scrape_url(session: aiohttp.ClientSession, url: str) -> str:
"""Asynchronously scrapes the primary text content from a URL, ignoring PDFs."""
if url.lower().endswith('.pdf'):
return "Content is a PDF, which cannot be scraped."
try:
async with session.get(url, timeout=10) as response:
if response.status != 200:
return f"Error: Failed to fetch {url} with status {response.status}"
html = await response.text()
soup = BeautifulSoup(html, "html.parser")
for tag in soup(['script', 'style', 'nav', 'footer', 'header', 'aside']):
tag.decompose()
return " ".join(soup.stripped_strings)
except Exception as e:
return f"Error: Could not scrape {url}. Reason: {e}"
async def get_ai_snippet(query: str, context: str, sources: list) -> str:
"""Generates a synthesized answer using an LLM based on the provided context."""
headers = {"Authorization": f"Bearer {LLM_API_KEY}", "Content-Type": "application/json"}
source_list_str = "\n".join([f"[{i+1}] {source['title']}: {source['link']}" for i, source in enumerate(sources)])
prompt = f"""
Based *only* on the provided context from web pages, provide a concise, factual answer to the user's query. Cite every sentence with the corresponding source number(s), like `[1]`, `[2]`, or `[1, 3]`.
Sources:
{source_list_str}
Context:
---
{context}
---
User Query: "{query}"
Answer with citations:
"""
data = {"model": LLM_MODEL, "messages": [{"role": "user", "content": prompt}], "max_tokens": 500}
async with aiohttp.ClientSession() as session:
try:
async with session.post(LLM_API_URL, headers=headers, json=data, timeout=45) as response:
response.raise_for_status()
result = await response.json()
return result['choices'][0]['message']['content']
except Exception as e:
raise HTTPException(status_code=502, detail=f"Failed to get response from LLM: {e}")
# --- API Endpoint ---
@app.get("/search")
async def ai_search(q: str = Query(..., min_length=3, description="The search query.")):
"""
Performs an AI-powered search using Snapzion. It finds relevant web pages,
scrapes their content, and generates a synthesized answer with citations.
"""
async with aiohttp.ClientSession() as session:
# 1. Search for relevant web pages using Snapzion
search_results = await call_snapzion_search(session, q)
if not search_results:
raise HTTPException(status_code=404, detail="Could not find any relevant sources for the query.")
# Limit to the top 4 results for speed and relevance
sources = search_results[:4]
# 2. Scrape all pages concurrently for speed
scrape_tasks = [scrape_url(session, source["link"]) for source in sources]
scraped_contents = await asyncio.gather(*scrape_tasks)
# 3. Combine content and snippets for a rich context
full_context = "\n\n".join(
f"Source [{i+1}] (from {sources[i]['link']}):\nOriginal Snippet: {sources[i]['snippet']}\nScraped Content: {content}"
for i, content in enumerate(scraped_contents) if not content.startswith("Error:")
)
if not full_context.strip():
raise HTTPException(status_code=500, detail="Failed to scrape content from all available sources.")
# 4. Generate the final AI snippet
ai_summary = await get_ai_snippet(q, full_context, sources)
return {"ai_summary": ai_summary, "sources": sources}
@app.get("/")
def root():
return {"message": "AI Search API is active. Use the /docs endpoint to test."} |