| import os | |
| import requests | |
| from bs4 import BeautifulSoup | |
| import logging | |
| import time | |
| logger = logging.getLogger(__name__) | |
| def web_search(query: str) -> str: | |
| try: | |
| google_api_key = os.getenv("GOOGLE_API_KEY") | |
| google_cse_id = os.getenv("GOOGLE_CSE_ID") | |
| if not google_api_key or not google_cse_id: | |
| return "Web search requires GOOGLE_API_KEY and GOOGLE_CSE_ID to be set." | |
| url = f"https://www.googleapis.com/customsearch/v1?key={google_api_key}&cx={google_cse_id}&q={query}" | |
| headers = { | |
| "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" | |
| } | |
| response = requests.get(url, headers=headers, timeout=10) | |
| response.raise_for_status() | |
| results = response.json().get("items", []) | |
| if not results: | |
| return "No web results found." | |
| search_results = [] | |
| for i, item in enumerate(results[:5]): | |
| title = item.get("title", "") | |
| snippet = item.get("snippet", "") | |
| link = item.get("link", "") | |
| try: | |
| time.sleep(2) | |
| page_response = requests.get(link, headers=headers, timeout=10) | |
| page_response.raise_for_status() | |
| soup = BeautifulSoup(page_response.text, "html.parser") | |
| paragraphs = soup.find_all("p") | |
| page_content = " ".join([p.get_text() for p in paragraphs][:1000]) | |
| except Exception as e: | |
| logger.warning(f"Failed to fetch page content for {link}: {e}") | |
| page_content = snippet | |
| search_results.append(f"Result {i+1}:\nTitle: {title}\nLink: {link}\nContent: {page_content}\n") | |
| return "\n".join(search_results) | |
| except Exception as e: | |
| logger.exception("Web search failed") | |
| return f"Web search error: {e}" | |