Spaces:

dkolarova
/

bpm-agent

Sleeping

App Files Files Community

dkolarova commited on Feb 3

Commit

0a59f74

verified ·

1 Parent(s): 9191308

main app

Browse files

Files changed (1) hide show

app.py +108 -0

app.py ADDED Viewed

	@@ -0,0 +1,108 @@

+import json
+import arxiv
+# import packages that are used in our tools
+import requests
+from bs4 import BeautifulSoup
+from huggingface_hub import HfApi
+from pypdf import PdfReader
+from smolagents import CodeAgent, HfApiModel, tool
+@tool
+def get_hugging_face_top_daily_paper() -> str:
+    """
+    This is a tool that returns the most upvoted paper on Hugging Face daily papers.
+    It returns the title of the paper
+    """
+    try:
+        url = "<https://huggingface.co/papers>"
+        response = requests.get(url)
+        response.raise_for_status()  # Raise an exception for bad status codes (4xx or 5xx)
+        soup = BeautifulSoup(response.content, "html.parser")
+        # Extract the title element from the JSON-like data in the "data-props" attribute
+        containers = soup.find_all('div', class_='SVELTE_HYDRATER contents')
+        top_paper = ""
+        for container in containers:
+            data_props = container.get('data-props', '')
+            if data_props:
+                try:
+                    # Parse the JSON-like string
+                    json_data = json.loads(data_props.replace('&quot;', '"'))
+                    if 'dailyPapers' in json_data:
+                        top_paper = json_data['dailyPapers'][0]['title']
+                except json.JSONDecodeError:
+                    continue
+        return top_paper
+    except requests.exceptions.RequestException as e:
+        print(f"Error occurred while fetching the HTML: {e}")
+        return ''
+@tool
+def get_paper_id_by_title(title: str) -> str:
+    """
+    This is a tool that returns the arxiv paper id by its title.
+    It returns the title of the paper
+    Args:
+        title: The paper title for which to get the id.
+    """
+    api = HfApi()
+    papers = api.list_papers(query=title)
+    if papers:
+        paper = next(iter(papers))
+        return paper.id
+    else:
+        return ''
+@tool
+def download_paper_by_id(paper_id: str) -> None:
+    """
+    This tool gets the id of a paper and downloads it from arxiv. It saves the paper locally
+    in the current directory as "paper.pdf".
+    Args:
+        paper_id: The id of the paper to download.
+    """
+    paper = next(arxiv.Client().results(arxiv.Search(id_list=[paper_id])))
+    paper.download_pdf(filename="paper.pdf")
+    return None
+@tool
+def read_pdf_file(file_path: str) -> str:
+    """
+    This function reads the first three pages of a PDF file and returns its content as a string.
+    Args:
+        file_path: The path to the PDF file.
+    Returns:
+        A string containing the content of the PDF file.
+    """
+    content = ""
+    reader = PdfReader('paper.pdf')
+    print(len(reader.pages))
+    pages = reader.pages[:3]
+    for page in pages:
+        content += page.extract_text()
+    return content
+model_id = "Qwen/Qwen2.5-Coder-32B-Instruct"
+model = HfApiModel(model_id=model_id, token='')
+agent = CodeAgent(tools=[get_hugging_face_top_daily_paper,
+                         get_paper_id_by_title,
+                         download_paper_by_id,
+                         read_pdf_file],
+                  model=model,
+                  add_base_tools=True)
+agent.run(
+    "Summarize today's top paper on Hugging Face daily papers by reading it.",
+)