Spaces:

mariemerenc
/

Agentic_RAG

Sleeping

App Files Files Community

Agentic_RAG / web_scrapping.py

mariemerenc

Upload 7 files

8da6fbf verified 8 months ago

raw

history blame contribute delete

3.06 kB

	from crewai import Agent, Task, Crew
	from llm_setup import crew_llm
	from crewai_tools import (
	SerperDevTool,
	ScrapeWebsiteTool
	)

	# Função disponibilizada no DataCamp
	def setup_web_scraping_agent():
	"""Setup the web scraping agent and related components"""
	search_tool = SerperDevTool() # Tool for performing web searches
	scrape_website = ScrapeWebsiteTool() # Tool for extracting data from websites

	# Define the web search agent
	web_search_agent = Agent(
	role="Expert Web Search Agent",
	goal="Identify and retrieve relevant web data for user queries",
	backstory="An expert in identifying valuable web sources for the user's needs",
	allow_delegation=False,
	verbose=True,
	llm=crew_llm
	)

	# Define the web scraping agent
	web_scraper_agent = Agent(
	role="Expert Web Scraper Agent",
	goal="Extract and analyze content from specific web pages identified by the search agent",
	backstory="A highly skilled web scraper, capable of analyzing and summarizing website content accurately",
	allow_delegation=False,
	verbose=True,
	llm=crew_llm
	)

	# Define the web search task
	search_task = Task(
	description=(
	"Identify the most relevant web page or article for the topic: '{topic}'. "
	"Use all available tools to search for and provide a link to a web page "
	"that contains valuable information about the topic. Keep your response concise."
	),
	expected_output=(
	"A concise summary of the most relevant web page or article for '{topic}', "
	"including the link to the source and key points from the content."
	),
	tools=[search_tool],
	agent=web_search_agent,
	)

	# Define the web scraping task
	scraping_task = Task(
	description=(
	"Extract and analyze data from the given web page or website. Focus on the key sections "
	"that provide insights into the topic: '{topic}'. Use all available tools to retrieve the content, "
	"and summarize the key findings in a concise manner."
	),
	expected_output=(
	"A detailed summary of the content from the given web page or website, highlighting the key insights "
	"and explaining their relevance to the topic: '{topic}'. Ensure clarity and conciseness."
	),
	tools=[scrape_website],
	agent=web_scraper_agent,
	)

	# Define the crew to manage agents and tasks
	crew = Crew(
	agents=[web_search_agent, web_scraper_agent],
	tasks=[search_task, scraping_task],
	verbose=1,
	memory=False,
	)
	return crew


	# Função disponibilizada no DataCamp
	def get_web_content(query):
	"""Get content from web scraping"""
	crew = setup_web_scraping_agent()
	result = crew.kickoff(inputs={"topic": query})
	return result.raw