File size: 5,721 Bytes
4b17916
2a0098d
 
 
4b17916
2a0098d
4b17916
 
2a0098d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4b17916
 
2a0098d
4b17916
2a0098d
 
 
4b17916
 
2a0098d
 
 
 
4b17916
2a0098d
4b17916
2a0098d
 
 
 
 
 
 
 
 
4b17916
2a0098d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4b17916
2a0098d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4b17916
2a0098d
 
4b17916
 
2a0098d
 
 
 
 
 
 
4b17916
2a0098d
 
 
4b17916
2a0098d
 
 
 
 
 
 
 
 
 
 
4b17916
2a0098d
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import os
import asyncio
from fastapi import FastAPI, HTTPException, Query
from dotenv import load_dotenv
import aiohttp
from bs4 import BeautifulSoup

# --- Configuration ---
load_dotenv()
LLM_API_KEY = os.getenv("LLM_API_KEY")

if not LLM_API_KEY:
    raise RuntimeError("LLM_API_KEY must be set in a .env file.")

# Snapzion Search API Configuration
SNAPZION_API_URL = "https://search.snapzion.com/get-snippets"
SNAPZION_HEADERS = {
    'accept': '*/*',
    'accept-language': 'en-US,en;q=0.9',
    'content-type': 'application/json',
    'origin': 'https://search.snapzion.com',
    'priority': 'u=1, i',
    'referer': 'https://search.snapzion.com/docs',
    'sec-ch-ua': '"Chromium";v="140", "Not=A?Brand";v="24", "Google Chrome";v="140"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',
    'sec-fetch-dest': 'empty',
    'sec-fetch-mode': 'cors',
    'sec-fetch-site': 'same-origin',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36',
}

# LLM Configuration
LLM_API_URL = "https://api.inference.net/v1/chat/completions"
LLM_MODEL = "meta-llama/llama-3.1-8b-instruct/fp-8"

# --- FastAPI App Initialization ---
app = FastAPI(
    title="AI Search Snippets API (Snapzion)",
    description="Provides AI-generated summaries from Snapzion search results.",
    version="1.0.1"
)

# --- Core Asynchronous Functions ---

async def call_snapzion_search(session: aiohttp.ClientSession, query: str) -> list:
    """Calls the Snapzion search API and returns a list of organic results."""
    try:
        async with session.post(SNAPZION_API_URL, headers=SNAPZION_HEADERS, json={"query": query}, timeout=15) as response:
            response.raise_for_status()
            data = await response.json()
            return data.get("organic_results", [])
    except Exception as e:
        raise HTTPException(status_code=503, detail=f"Search service (Snapzion) failed: {e}")

async def scrape_url(session: aiohttp.ClientSession, url: str) -> str:
    """Asynchronously scrapes the primary text content from a URL, ignoring PDFs."""
    if url.lower().endswith('.pdf'):
        return "Content is a PDF, which cannot be scraped."
    try:
        async with session.get(url, timeout=10) as response:
            if response.status != 200:
                return f"Error: Failed to fetch {url} with status {response.status}"
            html = await response.text()
            soup = BeautifulSoup(html, "html.parser")
            for tag in soup(['script', 'style', 'nav', 'footer', 'header', 'aside']):
                tag.decompose()
            return " ".join(soup.stripped_strings)
    except Exception as e:
        return f"Error: Could not scrape {url}. Reason: {e}"

async def get_ai_snippet(query: str, context: str, sources: list) -> str:
    """Generates a synthesized answer using an LLM based on the provided context."""
    headers = {"Authorization": f"Bearer {LLM_API_KEY}", "Content-Type": "application/json"}
    source_list_str = "\n".join([f"[{i+1}] {source['title']}: {source['link']}" for i, source in enumerate(sources)])
    
    prompt = f"""
Based *only* on the provided context from web pages, provide a concise, factual answer to the user's query. Cite every sentence with the corresponding source number(s), like `[1]`, `[2]`, or `[1, 3]`.

Sources:
{source_list_str}

Context:
---
{context}
---

User Query: "{query}"

Answer with citations:
"""
    data = {"model": LLM_MODEL, "messages": [{"role": "user", "content": prompt}], "max_tokens": 500}

    async with aiohttp.ClientSession() as session:
        try:
            async with session.post(LLM_API_URL, headers=headers, json=data, timeout=45) as response:
                response.raise_for_status()
                result = await response.json()
                return result['choices'][0]['message']['content']
        except Exception as e:
            raise HTTPException(status_code=502, detail=f"Failed to get response from LLM: {e}")

# --- API Endpoint ---

@app.get("/search")
async def ai_search(q: str = Query(..., min_length=3, description="The search query.")):
    """
    Performs an AI-powered search using Snapzion. It finds relevant web pages,
    scrapes their content, and generates a synthesized answer with citations.
    """
    async with aiohttp.ClientSession() as session:
        # 1. Search for relevant web pages using Snapzion
        search_results = await call_snapzion_search(session, q)
        if not search_results:
            raise HTTPException(status_code=404, detail="Could not find any relevant sources for the query.")
        
        # Limit to the top 4 results for speed and relevance
        sources = search_results[:4]

        # 2. Scrape all pages concurrently for speed
        scrape_tasks = [scrape_url(session, source["link"]) for source in sources]
        scraped_contents = await asyncio.gather(*scrape_tasks)

        # 3. Combine content and snippets for a rich context
        full_context = "\n\n".join(
            f"Source [{i+1}] (from {sources[i]['link']}):\nOriginal Snippet: {sources[i]['snippet']}\nScraped Content: {content}"
            for i, content in enumerate(scraped_contents) if not content.startswith("Error:")
        )

        if not full_context.strip():
            raise HTTPException(status_code=500, detail="Failed to scrape content from all available sources.")

        # 4. Generate the final AI snippet
        ai_summary = await get_ai_snippet(q, full_context, sources)

    return {"ai_summary": ai_summary, "sources": sources}

@app.get("/")
def root():
    return {"message": "AI Search API is active. Use the /docs endpoint to test."}