DebateT / research_actions.py
hhhhdgfs's picture
Upload 2 files
4ad488c verified
#!/usr/bin/env python
"""
Research actions for debate agents
Simplified version for Streamlit Cloud deployment
"""
from typing import Dict, List
from metagpt.actions import Action
from duckduckgo_search import DDGS
import asyncio
import aiohttp
from bs4 import BeautifulSoup
class CollectLinks(Action):
"""Collect search result links using DuckDuckGo"""
name: str = "CollectLinks"
async def run(self, topic: str, decomposition_nums: int = 2, url_per_query: int = 3) -> Dict[str, List[str]]:
"""
Search for links related to the topic
Args:
topic: Search topic
decomposition_nums: Number of search queries (simplified to 1 for cloud)
url_per_query: Number of URLs per query
Returns:
Dict mapping queries to lists of URLs
"""
try:
# Use DuckDuckGo search (free, no API key needed)
ddgs = DDGS()
results = ddgs.text(topic, max_results=url_per_query)
urls = []
for result in results:
if 'href' in result:
urls.append(result['href'])
elif 'link' in result:
urls.append(result['link'])
return {topic: urls[:url_per_query]}
except Exception as e:
# Fallback to empty results if search fails
return {topic: []}
class WebBrowseAndSummarize(Action):
"""Browse web pages and extract content"""
name: str = "WebBrowseAndSummarize"
async def run(self, *urls: str, query: str = "") -> Dict[str, str]:
"""
Fetch and summarize web pages
Args:
urls: URLs to fetch
query: Original search query
Returns:
Dict mapping URLs to their content summaries
"""
results = {}
async with aiohttp.ClientSession() as session:
for url in urls:
try:
# Set timeout and user agent
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
}
async with session.get(url, headers=headers, timeout=10) as response:
if response.status == 200:
html = await response.text()
soup = BeautifulSoup(html, 'lxml')
# Remove script and style elements
for script in soup(["script", "style"]):
script.decompose()
# Get text
text = soup.get_text(separator=' ', strip=True)
# Limit text length
text = text[:2000] if len(text) > 2000 else text
results[url] = f"[Source: {url}]\n{text}"
else:
results[url] = f"[Source: {url}]\nFailed to fetch content (HTTP {response.status})"
except asyncio.TimeoutError:
results[url] = f"[Source: {url}]\nTimeout while fetching content"
except Exception as e:
results[url] = f"[Source: {url}]\nError: {str(e)}"
return results
class ConductResearch(Action):
"""Compile research from collected information"""
name: str = "ConductResearch"
async def run(self, topic: str, content: str) -> str:
"""
Summarize research findings using LLM
Args:
topic: Research topic
content: Collected content from web sources
Returns:
Research summary
"""
if not content or content.strip() == "":
return f"No research data available for topic: {topic}"
prompt = f"""Based on the following web search results about "{topic}", provide a concise research summary (200-300 words) with key facts, statistics, and relevant information.
Web Content:
{content[:3000]}
Research Summary:"""
try:
# Use LLM to summarize
rsp = await self._aask(prompt)
return rsp
except Exception as e:
# Fallback: return raw content summary
return f"Research on '{topic}':\n\n{content[:1000]}..."