File size: 4,343 Bytes
4ad488c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
#!/usr/bin/env python
"""
Research actions for debate agents
Simplified version for Streamlit Cloud deployment
"""
from typing import Dict, List
from metagpt.actions import Action
from duckduckgo_search import DDGS
import asyncio
import aiohttp
from bs4 import BeautifulSoup
class CollectLinks(Action):
"""Collect search result links using DuckDuckGo"""
name: str = "CollectLinks"
async def run(self, topic: str, decomposition_nums: int = 2, url_per_query: int = 3) -> Dict[str, List[str]]:
"""
Search for links related to the topic
Args:
topic: Search topic
decomposition_nums: Number of search queries (simplified to 1 for cloud)
url_per_query: Number of URLs per query
Returns:
Dict mapping queries to lists of URLs
"""
try:
# Use DuckDuckGo search (free, no API key needed)
ddgs = DDGS()
results = ddgs.text(topic, max_results=url_per_query)
urls = []
for result in results:
if 'href' in result:
urls.append(result['href'])
elif 'link' in result:
urls.append(result['link'])
return {topic: urls[:url_per_query]}
except Exception as e:
# Fallback to empty results if search fails
return {topic: []}
class WebBrowseAndSummarize(Action):
"""Browse web pages and extract content"""
name: str = "WebBrowseAndSummarize"
async def run(self, *urls: str, query: str = "") -> Dict[str, str]:
"""
Fetch and summarize web pages
Args:
urls: URLs to fetch
query: Original search query
Returns:
Dict mapping URLs to their content summaries
"""
results = {}
async with aiohttp.ClientSession() as session:
for url in urls:
try:
# Set timeout and user agent
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
}
async with session.get(url, headers=headers, timeout=10) as response:
if response.status == 200:
html = await response.text()
soup = BeautifulSoup(html, 'lxml')
# Remove script and style elements
for script in soup(["script", "style"]):
script.decompose()
# Get text
text = soup.get_text(separator=' ', strip=True)
# Limit text length
text = text[:2000] if len(text) > 2000 else text
results[url] = f"[Source: {url}]\n{text}"
else:
results[url] = f"[Source: {url}]\nFailed to fetch content (HTTP {response.status})"
except asyncio.TimeoutError:
results[url] = f"[Source: {url}]\nTimeout while fetching content"
except Exception as e:
results[url] = f"[Source: {url}]\nError: {str(e)}"
return results
class ConductResearch(Action):
"""Compile research from collected information"""
name: str = "ConductResearch"
async def run(self, topic: str, content: str) -> str:
"""
Summarize research findings using LLM
Args:
topic: Research topic
content: Collected content from web sources
Returns:
Research summary
"""
if not content or content.strip() == "":
return f"No research data available for topic: {topic}"
prompt = f"""Based on the following web search results about "{topic}", provide a concise research summary (200-300 words) with key facts, statistics, and relevant information.
Web Content:
{content[:3000]}
Research Summary:"""
try:
# Use LLM to summarize
rsp = await self._aask(prompt)
return rsp
except Exception as e:
# Fallback: return raw content summary
return f"Research on '{topic}':\n\n{content[:1000]}..."
|