bpm-agent / app.py
dkolarova's picture
Update app.py
bb19c62 verified
raw
history blame
4.67 kB
import os
import json
import arxiv
# import packages that are used in our tools
import requests
from typing import Tuple
from bs4 import BeautifulSoup
from huggingface_hub import HfApi
from pypdf import PdfReader
from smolagents import CodeAgent, HfApiModel, tool, GradioUI
@tool
def get_hugging_face_top_daily_paper() -> str:
"""
This is a tool that returns the most upvoted paper on Hugging Face daily papers.
It returns the title of the paper
"""
try:
url = "<https://huggingface.co/papers>"
response = requests.get(url)
response.raise_for_status() # Raise an exception for bad status codes (4xx or 5xx)
soup = BeautifulSoup(response.content, "html.parser")
# Extract the title element from the JSON-like data in the "data-props" attribute
containers = soup.find_all('div', class_='SVELTE_HYDRATER contents')
top_paper = ""
for container in containers:
data_props = container.get('data-props', '')
if data_props:
try:
# Parse the JSON-like string
json_data = json.loads(data_props.replace('&quot;', '"'))
if 'dailyPapers' in json_data:
top_paper = json_data['dailyPapers'][0]['title']
except json.JSONDecodeError:
continue
return top_paper
except requests.exceptions.RequestException as e:
print(f"Error occurred while fetching the HTML: {e}")
return ''
@tool
def get_paper_id_by_title(title: str) -> str:
"""
This is a tool that returns the arxiv paper id by its title.
It returns the title of the paper
Args:
title: The paper title for which to get the id.
"""
api = HfApi()
papers = api.list_papers(query=title)
if papers:
paper = next(iter(papers))
return paper.id
else:
return ''
@tool
def download_paper_by_id(paper_id: str) -> None:
"""
This tool gets the id of a paper and downloads it from arxiv. It saves the paper locally
in the current directory as "paper.pdf".
Args:
paper_id: The id of the paper to download.
"""
paper = next(arxiv.Client().results(arxiv.Search(id_list=[paper_id])))
paper.download_pdf(filename="paper.pdf")
return None
@tool
def read_pdf_file(file_path: str) -> str:
"""
This function reads the first three pages of a PDF file and returns its content as a string.
Args:
file_path: The path to the PDF file.
Returns:
A string containing the content of the PDF file.
"""
content = ""
reader = PdfReader('paper.pdf')
print(len(reader.pages))
pages = reader.pages[:3]
for page in pages:
content += page.extract_text()
return content
@tool
def process_file_transfer_result(log_files_path: str, job_id: str) -> Tuple:
"""
This function processes the logs sent by the server.
Args:
log_files_path: The path to the logs.
job_id: The job identifier of the initated file transfer
Returns:
True if the transfer suceeded and the corresponding message if an error occured.
"""
return True, ''
@tool
def request_file_transfer(file_path: str, server: str) -> str:
"""
This function sends a request for a file transfer initiation.
Args:
file_path: The path to the source file.
server: identifier of the file transfer server
Returns:
The job identifier of the initated file transfer
"""
return 'trans001'
@tool
def encrypt_file(file_path: str) -> bool:
"""
This function encrypts the source file.
Args:
file_path: The path to the source file.
Returns:
True if encryption went well
"""
print(f'File {file_path} encrypted')
return True
@tool
def validate_file(file_path: str) -> str:
"""
This function validates that the source file exists.
Args:
file_path: The path to the source file.
Returns:
A boolean value indicateing the existance of a file.
"""
return True
model_id = "Qwen/Qwen2.5-Coder-32B-Instruct"
hf_token = os.environ["HF_TOKEN"]
model = HfApiModel(model_id=model_id, token=hf_token)
agent = CodeAgent(tools=[validate_file,
encrypt_file,
request_file_transfer,
process_file_transfer_result],
model=model,
add_base_tools=True)
#agent.run(
# "Summarize today's top paper on Hugging Face daily papers by reading it.",
#)
GradioUI(agent).launch()