Spaces:
Sleeping
Sleeping
| import os | |
| import json | |
| import arxiv | |
| # import packages that are used in our tools | |
| import requests | |
| from typing import Tuple | |
| from bs4 import BeautifulSoup | |
| from huggingface_hub import HfApi | |
| from pypdf import PdfReader | |
| from smolagents import CodeAgent, HfApiModel, tool, GradioUI | |
| def get_hugging_face_top_daily_paper() -> str: | |
| """ | |
| This is a tool that returns the most upvoted paper on Hugging Face daily papers. | |
| It returns the title of the paper | |
| """ | |
| try: | |
| url = "<https://huggingface.co/papers>" | |
| response = requests.get(url) | |
| response.raise_for_status() # Raise an exception for bad status codes (4xx or 5xx) | |
| soup = BeautifulSoup(response.content, "html.parser") | |
| # Extract the title element from the JSON-like data in the "data-props" attribute | |
| containers = soup.find_all('div', class_='SVELTE_HYDRATER contents') | |
| top_paper = "" | |
| for container in containers: | |
| data_props = container.get('data-props', '') | |
| if data_props: | |
| try: | |
| # Parse the JSON-like string | |
| json_data = json.loads(data_props.replace('"', '"')) | |
| if 'dailyPapers' in json_data: | |
| top_paper = json_data['dailyPapers'][0]['title'] | |
| except json.JSONDecodeError: | |
| continue | |
| return top_paper | |
| except requests.exceptions.RequestException as e: | |
| print(f"Error occurred while fetching the HTML: {e}") | |
| return '' | |
| def get_paper_id_by_title(title: str) -> str: | |
| """ | |
| This is a tool that returns the arxiv paper id by its title. | |
| It returns the title of the paper | |
| Args: | |
| title: The paper title for which to get the id. | |
| """ | |
| api = HfApi() | |
| papers = api.list_papers(query=title) | |
| if papers: | |
| paper = next(iter(papers)) | |
| return paper.id | |
| else: | |
| return '' | |
| def download_paper_by_id(paper_id: str) -> None: | |
| """ | |
| This tool gets the id of a paper and downloads it from arxiv. It saves the paper locally | |
| in the current directory as "paper.pdf". | |
| Args: | |
| paper_id: The id of the paper to download. | |
| """ | |
| paper = next(arxiv.Client().results(arxiv.Search(id_list=[paper_id]))) | |
| paper.download_pdf(filename="paper.pdf") | |
| return None | |
| def read_pdf_file(file_path: str) -> str: | |
| """ | |
| This function reads the first three pages of a PDF file and returns its content as a string. | |
| Args: | |
| file_path: The path to the PDF file. | |
| Returns: | |
| A string containing the content of the PDF file. | |
| """ | |
| content = "" | |
| reader = PdfReader('paper.pdf') | |
| print(len(reader.pages)) | |
| pages = reader.pages[:3] | |
| for page in pages: | |
| content += page.extract_text() | |
| return content | |
| def process_file_transfer_result(log_files_path: str, job_id: str) -> Tuple: | |
| """ | |
| This function processes the logs sent by the server. | |
| Args: | |
| log_files_path: The path to the logs. | |
| job_id: The job identifier of the initated file transfer | |
| Returns: | |
| True if the transfer suceeded and the corresponding message if an error occured. | |
| """ | |
| return True, '' | |
| def request_file_transfer(file_path: str, server: str) -> str: | |
| """ | |
| This function sends a request for a file transfer initiation. | |
| Args: | |
| file_path: The path to the source file. | |
| server: identifier of the file transfer server | |
| Returns: | |
| The job identifier of the initated file transfer | |
| """ | |
| return 'trans001' | |
| def encrypt_file(file_path: str) -> bool: | |
| """ | |
| This function encrypts the source file. | |
| Args: | |
| file_path: The path to the source file. | |
| Returns: | |
| True if encryption went well | |
| """ | |
| print(f'File {file_path} encrypted') | |
| return True | |
| def validate_file(file_path: str) -> str: | |
| """ | |
| This function validates that the source file exists. | |
| Args: | |
| file_path: The path to the source file. | |
| Returns: | |
| A boolean value indicateing the existance of a file. | |
| """ | |
| return True | |
| model_id = "Qwen/Qwen2.5-Coder-32B-Instruct" | |
| hf_token = os.environ["HF_TOKEN"] | |
| model = HfApiModel(model_id=model_id, token=hf_token) | |
| agent = CodeAgent(tools=[validate_file, | |
| encrypt_file, | |
| request_file_transfer, | |
| process_file_transfer_result], | |
| model=model, | |
| add_base_tools=True) | |
| #agent.run( | |
| # "Summarize today's top paper on Hugging Face daily papers by reading it.", | |
| #) | |
| GradioUI(agent).launch() |