dkolarova commited on
Commit
0a59f74
·
verified ·
1 Parent(s): 9191308
Files changed (1) hide show
  1. app.py +108 -0
app.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+
3
+ import arxiv
4
+
5
+ # import packages that are used in our tools
6
+ import requests
7
+ from bs4 import BeautifulSoup
8
+ from huggingface_hub import HfApi
9
+ from pypdf import PdfReader
10
+ from smolagents import CodeAgent, HfApiModel, tool
11
+
12
+
13
+ @tool
14
+ def get_hugging_face_top_daily_paper() -> str:
15
+ """
16
+ This is a tool that returns the most upvoted paper on Hugging Face daily papers.
17
+ It returns the title of the paper
18
+ """
19
+ try:
20
+ url = "<https://huggingface.co/papers>"
21
+ response = requests.get(url)
22
+ response.raise_for_status() # Raise an exception for bad status codes (4xx or 5xx)
23
+ soup = BeautifulSoup(response.content, "html.parser")
24
+
25
+ # Extract the title element from the JSON-like data in the "data-props" attribute
26
+ containers = soup.find_all('div', class_='SVELTE_HYDRATER contents')
27
+ top_paper = ""
28
+
29
+ for container in containers:
30
+ data_props = container.get('data-props', '')
31
+ if data_props:
32
+ try:
33
+ # Parse the JSON-like string
34
+ json_data = json.loads(data_props.replace('&quot;', '"'))
35
+ if 'dailyPapers' in json_data:
36
+ top_paper = json_data['dailyPapers'][0]['title']
37
+ except json.JSONDecodeError:
38
+ continue
39
+
40
+ return top_paper
41
+ except requests.exceptions.RequestException as e:
42
+ print(f"Error occurred while fetching the HTML: {e}")
43
+ return ''
44
+
45
+
46
+ @tool
47
+ def get_paper_id_by_title(title: str) -> str:
48
+ """
49
+ This is a tool that returns the arxiv paper id by its title.
50
+ It returns the title of the paper
51
+
52
+ Args:
53
+ title: The paper title for which to get the id.
54
+ """
55
+ api = HfApi()
56
+ papers = api.list_papers(query=title)
57
+ if papers:
58
+ paper = next(iter(papers))
59
+ return paper.id
60
+ else:
61
+ return ''
62
+
63
+
64
+ @tool
65
+ def download_paper_by_id(paper_id: str) -> None:
66
+ """
67
+ This tool gets the id of a paper and downloads it from arxiv. It saves the paper locally
68
+ in the current directory as "paper.pdf".
69
+
70
+ Args:
71
+ paper_id: The id of the paper to download.
72
+ """
73
+ paper = next(arxiv.Client().results(arxiv.Search(id_list=[paper_id])))
74
+ paper.download_pdf(filename="paper.pdf")
75
+ return None
76
+
77
+
78
+ @tool
79
+ def read_pdf_file(file_path: str) -> str:
80
+ """
81
+ This function reads the first three pages of a PDF file and returns its content as a string.
82
+ Args:
83
+ file_path: The path to the PDF file.
84
+ Returns:
85
+ A string containing the content of the PDF file.
86
+ """
87
+ content = ""
88
+ reader = PdfReader('paper.pdf')
89
+ print(len(reader.pages))
90
+ pages = reader.pages[:3]
91
+ for page in pages:
92
+ content += page.extract_text()
93
+ return content
94
+
95
+
96
+ model_id = "Qwen/Qwen2.5-Coder-32B-Instruct"
97
+
98
+ model = HfApiModel(model_id=model_id, token='')
99
+ agent = CodeAgent(tools=[get_hugging_face_top_daily_paper,
100
+ get_paper_id_by_title,
101
+ download_paper_by_id,
102
+ read_pdf_file],
103
+ model=model,
104
+ add_base_tools=True)
105
+
106
+ agent.run(
107
+ "Summarize today's top paper on Hugging Face daily papers by reading it.",
108
+ )