Spaces:

hhhhdgfs
/

DebateT

Sleeping

File size: 4,343 Bytes

4ad488c

#!/usr/bin/env python
"""
Research actions for debate agents
Simplified version for Streamlit Cloud deployment
"""

from typing import Dict, List
from metagpt.actions import Action
from duckduckgo_search import DDGS
import asyncio
import aiohttp
from bs4 import BeautifulSoup


class CollectLinks(Action):
    """Collect search result links using DuckDuckGo"""

    name: str = "CollectLinks"

    async def run(self, topic: str, decomposition_nums: int = 2, url_per_query: int = 3) -> Dict[str, List[str]]:
        """
        Search for links related to the topic

        Args:
            topic: Search topic
            decomposition_nums: Number of search queries (simplified to 1 for cloud)
            url_per_query: Number of URLs per query

        Returns:
            Dict mapping queries to lists of URLs
        """
        try:
            # Use DuckDuckGo search (free, no API key needed)
            ddgs = DDGS()
            results = ddgs.text(topic, max_results=url_per_query)

            urls = []
            for result in results:
                if 'href' in result:
                    urls.append(result['href'])
                elif 'link' in result:
                    urls.append(result['link'])

            return {topic: urls[:url_per_query]}

        except Exception as e:
            # Fallback to empty results if search fails
            return {topic: []}


class WebBrowseAndSummarize(Action):
    """Browse web pages and extract content"""

    name: str = "WebBrowseAndSummarize"

    async def run(self, *urls: str, query: str = "") -> Dict[str, str]:
        """
        Fetch and summarize web pages

        Args:
            urls: URLs to fetch
            query: Original search query

        Returns:
            Dict mapping URLs to their content summaries
        """
        results = {}

        async with aiohttp.ClientSession() as session:
            for url in urls:
                try:
                    # Set timeout and user agent
                    headers = {
                        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
                    }

                    async with session.get(url, headers=headers, timeout=10) as response:
                        if response.status == 200:
                            html = await response.text()
                            soup = BeautifulSoup(html, 'lxml')

                            # Remove script and style elements
                            for script in soup(["script", "style"]):
                                script.decompose()

                            # Get text
                            text = soup.get_text(separator=' ', strip=True)

                            # Limit text length
                            text = text[:2000] if len(text) > 2000 else text

                            results[url] = f"[Source: {url}]\n{text}"
                        else:
                            results[url] = f"[Source: {url}]\nFailed to fetch content (HTTP {response.status})"

                except asyncio.TimeoutError:
                    results[url] = f"[Source: {url}]\nTimeout while fetching content"
                except Exception as e:
                    results[url] = f"[Source: {url}]\nError: {str(e)}"

        return results


class ConductResearch(Action):
    """Compile research from collected information"""

    name: str = "ConductResearch"

    async def run(self, topic: str, content: str) -> str:
        """
        Summarize research findings using LLM

        Args:
            topic: Research topic
            content: Collected content from web sources

        Returns:
            Research summary
        """
        if not content or content.strip() == "":
            return f"No research data available for topic: {topic}"

        prompt = f"""Based on the following web search results about "{topic}", provide a concise research summary (200-300 words) with key facts, statistics, and relevant information.

Web Content:
{content[:3000]}

Research Summary:"""

        try:
            # Use LLM to summarize
            rsp = await self._aask(prompt)
            return rsp
        except Exception as e:
            # Fallback: return raw content summary
            return f"Research on '{topic}':\n\n{content[:1000]}..."