Spaces:

hhhhdgfs
/

DebateT

Sleeping

App Files Files Community

DebateT / research_actions.py

hhhhdgfs

Upload 2 files

4ad488c verified 19 days ago

raw

history blame contribute delete

4.34 kB

	#!/usr/bin/env python
	"""
	Research actions for debate agents
	Simplified version for Streamlit Cloud deployment
	"""

	from typing import Dict, List
	from metagpt.actions import Action
	from duckduckgo_search import DDGS
	import asyncio
	import aiohttp
	from bs4 import BeautifulSoup


	class CollectLinks(Action):
	"""Collect search result links using DuckDuckGo"""

	name: str = "CollectLinks"

	async def run(self, topic: str, decomposition_nums: int = 2, url_per_query: int = 3) -> Dict[str, List[str]]:
	"""
	Search for links related to the topic

	Args:
	topic: Search topic
	decomposition_nums: Number of search queries (simplified to 1 for cloud)
	url_per_query: Number of URLs per query

	Returns:
	Dict mapping queries to lists of URLs
	"""
	try:
	# Use DuckDuckGo search (free, no API key needed)
	ddgs = DDGS()
	results = ddgs.text(topic, max_results=url_per_query)

	urls = []
	for result in results:
	if 'href' in result:
	urls.append(result['href'])
	elif 'link' in result:
	urls.append(result['link'])

	return {topic: urls[:url_per_query]}

	except Exception as e:
	# Fallback to empty results if search fails
	return {topic: []}


	class WebBrowseAndSummarize(Action):
	"""Browse web pages and extract content"""

	name: str = "WebBrowseAndSummarize"

	async def run(self, *urls: str, query: str = "") -> Dict[str, str]:
	"""
	Fetch and summarize web pages

	Args:
	urls: URLs to fetch
	query: Original search query

	Returns:
	Dict mapping URLs to their content summaries
	"""
	results = {}

	async with aiohttp.ClientSession() as session:
	for url in urls:
	try:
	# Set timeout and user agent
	headers = {
	'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
	}

	async with session.get(url, headers=headers, timeout=10) as response:
	if response.status == 200:
	html = await response.text()
	soup = BeautifulSoup(html, 'lxml')

	# Remove script and style elements
	for script in soup(["script", "style"]):
	script.decompose()

	# Get text
	text = soup.get_text(separator=' ', strip=True)

	# Limit text length
	text = text[:2000] if len(text) > 2000 else text

	results[url] = f"[Source: {url}]\n{text}"
	else:
	results[url] = f"[Source: {url}]\nFailed to fetch content (HTTP {response.status})"

	except asyncio.TimeoutError:
	results[url] = f"[Source: {url}]\nTimeout while fetching content"
	except Exception as e:
	results[url] = f"[Source: {url}]\nError: {str(e)}"

	return results


	class ConductResearch(Action):
	"""Compile research from collected information"""

	name: str = "ConductResearch"

	async def run(self, topic: str, content: str) -> str:
	"""
	Summarize research findings using LLM

	Args:
	topic: Research topic
	content: Collected content from web sources

	Returns:
	Research summary
	"""
	if not content or content.strip() == "":
	return f"No research data available for topic: {topic}"

	prompt = f"""Based on the following web search results about "{topic}", provide a concise research summary (200-300 words) with key facts, statistics, and relevant information.

	Web Content:
	{content[:3000]}

	Research Summary:"""

	try:
	# Use LLM to summarize
	rsp = await self._aask(prompt)
	return rsp
	except Exception as e:
	# Fallback: return raw content summary
	return f"Research on '{topic}':\n\n{content[:1000]}..."