|
|
|
|
|
""" |
|
|
Research actions for debate agents |
|
|
Simplified version for Streamlit Cloud deployment |
|
|
""" |
|
|
|
|
|
from typing import Dict, List |
|
|
from metagpt.actions import Action |
|
|
from duckduckgo_search import DDGS |
|
|
import asyncio |
|
|
import aiohttp |
|
|
from bs4 import BeautifulSoup |
|
|
|
|
|
|
|
|
class CollectLinks(Action): |
|
|
"""Collect search result links using DuckDuckGo""" |
|
|
|
|
|
name: str = "CollectLinks" |
|
|
|
|
|
async def run(self, topic: str, decomposition_nums: int = 2, url_per_query: int = 3) -> Dict[str, List[str]]: |
|
|
""" |
|
|
Search for links related to the topic |
|
|
|
|
|
Args: |
|
|
topic: Search topic |
|
|
decomposition_nums: Number of search queries (simplified to 1 for cloud) |
|
|
url_per_query: Number of URLs per query |
|
|
|
|
|
Returns: |
|
|
Dict mapping queries to lists of URLs |
|
|
""" |
|
|
try: |
|
|
|
|
|
ddgs = DDGS() |
|
|
results = ddgs.text(topic, max_results=url_per_query) |
|
|
|
|
|
urls = [] |
|
|
for result in results: |
|
|
if 'href' in result: |
|
|
urls.append(result['href']) |
|
|
elif 'link' in result: |
|
|
urls.append(result['link']) |
|
|
|
|
|
return {topic: urls[:url_per_query]} |
|
|
|
|
|
except Exception as e: |
|
|
|
|
|
return {topic: []} |
|
|
|
|
|
|
|
|
class WebBrowseAndSummarize(Action): |
|
|
"""Browse web pages and extract content""" |
|
|
|
|
|
name: str = "WebBrowseAndSummarize" |
|
|
|
|
|
async def run(self, *urls: str, query: str = "") -> Dict[str, str]: |
|
|
""" |
|
|
Fetch and summarize web pages |
|
|
|
|
|
Args: |
|
|
urls: URLs to fetch |
|
|
query: Original search query |
|
|
|
|
|
Returns: |
|
|
Dict mapping URLs to their content summaries |
|
|
""" |
|
|
results = {} |
|
|
|
|
|
async with aiohttp.ClientSession() as session: |
|
|
for url in urls: |
|
|
try: |
|
|
|
|
|
headers = { |
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' |
|
|
} |
|
|
|
|
|
async with session.get(url, headers=headers, timeout=10) as response: |
|
|
if response.status == 200: |
|
|
html = await response.text() |
|
|
soup = BeautifulSoup(html, 'lxml') |
|
|
|
|
|
|
|
|
for script in soup(["script", "style"]): |
|
|
script.decompose() |
|
|
|
|
|
|
|
|
text = soup.get_text(separator=' ', strip=True) |
|
|
|
|
|
|
|
|
text = text[:2000] if len(text) > 2000 else text |
|
|
|
|
|
results[url] = f"[Source: {url}]\n{text}" |
|
|
else: |
|
|
results[url] = f"[Source: {url}]\nFailed to fetch content (HTTP {response.status})" |
|
|
|
|
|
except asyncio.TimeoutError: |
|
|
results[url] = f"[Source: {url}]\nTimeout while fetching content" |
|
|
except Exception as e: |
|
|
results[url] = f"[Source: {url}]\nError: {str(e)}" |
|
|
|
|
|
return results |
|
|
|
|
|
|
|
|
class ConductResearch(Action): |
|
|
"""Compile research from collected information""" |
|
|
|
|
|
name: str = "ConductResearch" |
|
|
|
|
|
async def run(self, topic: str, content: str) -> str: |
|
|
""" |
|
|
Summarize research findings using LLM |
|
|
|
|
|
Args: |
|
|
topic: Research topic |
|
|
content: Collected content from web sources |
|
|
|
|
|
Returns: |
|
|
Research summary |
|
|
""" |
|
|
if not content or content.strip() == "": |
|
|
return f"No research data available for topic: {topic}" |
|
|
|
|
|
prompt = f"""Based on the following web search results about "{topic}", provide a concise research summary (200-300 words) with key facts, statistics, and relevant information. |
|
|
|
|
|
Web Content: |
|
|
{content[:3000]} |
|
|
|
|
|
Research Summary:""" |
|
|
|
|
|
try: |
|
|
|
|
|
rsp = await self._aask(prompt) |
|
|
return rsp |
|
|
except Exception as e: |
|
|
|
|
|
return f"Research on '{topic}':\n\n{content[:1000]}..." |
|
|
|