Spaces:
Sleeping
Sleeping
Merge pull request #13 from AutoLLM/fixes
Browse files- README.md +12 -1
- readme_images/hf_example.png +0 -0
- src/app.py +52 -27
- src/utils.py +1 -0
README.md
CHANGED
|
@@ -1,6 +1,10 @@
|
|
| 1 |
# ArxivDigest
|
| 2 |
This repo aims to provide a better daily digest for newly published arXiv papers based on your own research interests and descriptions via relevancy ratings from GPT.
|
| 3 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
## π Contents
|
| 5 |
|
| 6 |
- [What this repo does](#π-what-this-repo-does)
|
|
@@ -24,8 +28,15 @@ This repository offers a method to curate a daily digest, sorted by relevance, u
|
|
| 24 |
* The code pulls all the abstracts for papers in those categories and ranks how relevant they are to your interest on a scale of 1-10 using `gpt-3.5-turbo`.
|
| 25 |
* The code then emits an HTML digest listing all the relevant papers, and optionally emails it to you using [SendGrid](https://sendgrid.com). You will need to have a SendGrid account with an API key for this functionality to work.
|
| 26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
-
### Some examples:
|
| 29 |
|
| 30 |
#### Digest Configuration:
|
| 31 |
- Subject/Topic: Computer Science
|
|
|
|
| 1 |
# ArxivDigest
|
| 2 |
This repo aims to provide a better daily digest for newly published arXiv papers based on your own research interests and descriptions via relevancy ratings from GPT.
|
| 3 |
|
| 4 |
+
You can try it out at [https://huggingface.co/spaces/AutoLLM/ArxivDigest](https://huggingface.co/spaces/AutoLLM/ArxivDigest) using your own OpenAI api key.
|
| 5 |
+
|
| 6 |
+
You can also create a daily subscription pipeline to email you the results.
|
| 7 |
+
|
| 8 |
## π Contents
|
| 9 |
|
| 10 |
- [What this repo does](#π-what-this-repo-does)
|
|
|
|
| 28 |
* The code pulls all the abstracts for papers in those categories and ranks how relevant they are to your interest on a scale of 1-10 using `gpt-3.5-turbo`.
|
| 29 |
* The code then emits an HTML digest listing all the relevant papers, and optionally emails it to you using [SendGrid](https://sendgrid.com). You will need to have a SendGrid account with an API key for this functionality to work.
|
| 30 |
|
| 31 |
+
### Testing it out with Hugging Face:
|
| 32 |
+
|
| 33 |
+
We provide a demo at [https://huggingface.co/spaces/AutoLLM/ArxivDigest](https://huggingface.co/spaces/AutoLLM/ArxivDigest). Simply enter your [OpenAI API key](https://platform.openai.com/account/api-keys) and then fill in the configuration on the right. Note that we do not store your key.
|
| 34 |
+
|
| 35 |
+

|
| 36 |
+
|
| 37 |
+
You can also send yourself an email of the digest by creating a SendGrid account and [api key](https://app.SendGrid.com/settings/api_keys).
|
| 38 |
|
| 39 |
+
### Some examples of results:
|
| 40 |
|
| 41 |
#### Digest Configuration:
|
| 42 |
- Subject/Topic: Computer Science
|
readme_images/hf_example.png
ADDED
|
src/app.py
CHANGED
|
@@ -1,9 +1,11 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
from download_new_papers import get_papers
|
|
|
|
| 3 |
from relevancy import generate_relevance_score, process_subject_fields
|
| 4 |
from sendgrid.helpers.mail import Mail, Email, To, Content
|
| 5 |
import sendgrid
|
| 6 |
import os
|
|
|
|
| 7 |
|
| 8 |
topics = {
|
| 9 |
"Physics": "",
|
|
@@ -57,7 +59,9 @@ categories_map = {
|
|
| 57 |
|
| 58 |
|
| 59 |
def sample(email, topic, physics_topic, categories, interest):
|
| 60 |
-
if
|
|
|
|
|
|
|
| 61 |
if isinstance(physics_topic, list):
|
| 62 |
raise gr.Error("You must choose a physics topic.")
|
| 63 |
topic = physics_topic
|
|
@@ -72,6 +76,7 @@ def sample(email, topic, physics_topic, categories, interest):
|
|
| 72 |
else:
|
| 73 |
papers = get_papers(abbr, limit=4)
|
| 74 |
if interest:
|
|
|
|
| 75 |
relevancy, _ = generate_relevance_score(
|
| 76 |
papers,
|
| 77 |
query={"interest": interest},
|
|
@@ -86,7 +91,6 @@ def change_subsubject(subject, physics_subject):
|
|
| 86 |
if subject != "Physics":
|
| 87 |
return gr.Dropdown.update(choices=categories_map[subject], value=[], visible=True)
|
| 88 |
else:
|
| 89 |
-
print(physics_subject)
|
| 90 |
if physics_subject and not isinstance(physics_subject, list):
|
| 91 |
return gr.Dropdown.update(choices=categories_map[physics_subject], value=[], visible=True)
|
| 92 |
else:
|
|
@@ -100,7 +104,9 @@ def change_physics(subject):
|
|
| 100 |
return gr.Dropdown.update(physics_topics, visible=True)
|
| 101 |
|
| 102 |
|
| 103 |
-
def test(email, topic, physics_topic, categories, interest):
|
|
|
|
|
|
|
| 104 |
if topic == "Physics":
|
| 105 |
if isinstance(physics_topic, list):
|
| 106 |
raise gr.Error("You must choose a physics topic.")
|
|
@@ -116,19 +122,19 @@ def test(email, topic, physics_topic, categories, interest):
|
|
| 116 |
else:
|
| 117 |
papers = get_papers(abbr, limit=4)
|
| 118 |
if interest:
|
|
|
|
| 119 |
relevancy, hallucination = generate_relevance_score(
|
| 120 |
papers,
|
| 121 |
query={"interest": interest},
|
| 122 |
threshold_score=7,
|
| 123 |
num_paper_in_prompt=8)
|
| 124 |
-
print(relevancy[0].keys())
|
| 125 |
body = "<br><br>".join([f'Title: <a href="{paper["main_page"]}">{paper["title"]}</a><br>Authors: {paper["authors"]}<br>Score: {paper["Relevancy score"]}<br>Reason: {paper["Reasons for match"]}' for paper in relevancy])
|
| 126 |
if hallucination:
|
| 127 |
body = "Warning: the model hallucinated some papers. We have tried to remove them, but the scores may not be accurate.<br><br>" + body
|
| 128 |
else:
|
| 129 |
body = "<br><br>".join([f'Title: <a href="{paper["main_page"]}">{paper["title"]}</a><br>Authors: {paper["authors"]}' for paper in papers])
|
| 130 |
-
sg = sendgrid.SendGridAPIClient(api_key=
|
| 131 |
-
from_email = Email(
|
| 132 |
to_email = To(email)
|
| 133 |
subject = "arXiv digest"
|
| 134 |
content = Content("text/html", body)
|
|
@@ -138,33 +144,52 @@ def test(email, topic, physics_topic, categories, interest):
|
|
| 138 |
# Send an HTTP POST request to /mail/send
|
| 139 |
response = sg.client.mail.send.post(request_body=mail_json)
|
| 140 |
if response.status_code >= 200 and response.status_code <= 300:
|
| 141 |
-
return "
|
| 142 |
else:
|
| 143 |
-
return
|
|
|
|
| 144 |
|
|
|
|
|
|
|
| 145 |
|
| 146 |
with gr.Blocks() as demo:
|
| 147 |
-
with gr.
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 165 |
subject.change(fn=sample, inputs=[email, subject, physics_subject, subsubject, interest], outputs=sample_output)
|
| 166 |
physics_subject.change(fn=sample, inputs=[email, subject, physics_subject, subsubject, interest], outputs=sample_output)
|
| 167 |
subsubject.change(fn=sample, inputs=[email, subject, physics_subject, subsubject, interest], outputs=sample_output)
|
| 168 |
interest.submit(fn=sample, inputs=[email, subject, physics_subject, subsubject, interest], outputs=sample_output)
|
| 169 |
|
| 170 |
-
demo.launch()
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
from download_new_papers import get_papers
|
| 3 |
+
import utils
|
| 4 |
from relevancy import generate_relevance_score, process_subject_fields
|
| 5 |
from sendgrid.helpers.mail import Mail, Email, To, Content
|
| 6 |
import sendgrid
|
| 7 |
import os
|
| 8 |
+
import openai
|
| 9 |
|
| 10 |
topics = {
|
| 11 |
"Physics": "",
|
|
|
|
| 59 |
|
| 60 |
|
| 61 |
def sample(email, topic, physics_topic, categories, interest):
|
| 62 |
+
if not topic:
|
| 63 |
+
raise gr.Error("You must choose a topic.")
|
| 64 |
+
if topic == "Physics":
|
| 65 |
if isinstance(physics_topic, list):
|
| 66 |
raise gr.Error("You must choose a physics topic.")
|
| 67 |
topic = physics_topic
|
|
|
|
| 76 |
else:
|
| 77 |
papers = get_papers(abbr, limit=4)
|
| 78 |
if interest:
|
| 79 |
+
if not openai.api_key: raise gr.Error("Set your OpenAI api key on the left first")
|
| 80 |
relevancy, _ = generate_relevance_score(
|
| 81 |
papers,
|
| 82 |
query={"interest": interest},
|
|
|
|
| 91 |
if subject != "Physics":
|
| 92 |
return gr.Dropdown.update(choices=categories_map[subject], value=[], visible=True)
|
| 93 |
else:
|
|
|
|
| 94 |
if physics_subject and not isinstance(physics_subject, list):
|
| 95 |
return gr.Dropdown.update(choices=categories_map[physics_subject], value=[], visible=True)
|
| 96 |
else:
|
|
|
|
| 104 |
return gr.Dropdown.update(physics_topics, visible=True)
|
| 105 |
|
| 106 |
|
| 107 |
+
def test(email, topic, physics_topic, categories, interest, key):
|
| 108 |
+
if not email: raise gr.Error("Set your email")
|
| 109 |
+
if not key: raise gr.Error("Set your SendGrid key")
|
| 110 |
if topic == "Physics":
|
| 111 |
if isinstance(physics_topic, list):
|
| 112 |
raise gr.Error("You must choose a physics topic.")
|
|
|
|
| 122 |
else:
|
| 123 |
papers = get_papers(abbr, limit=4)
|
| 124 |
if interest:
|
| 125 |
+
if not openai.api_key: raise gr.Error("Set your OpenAI api key on the left first")
|
| 126 |
relevancy, hallucination = generate_relevance_score(
|
| 127 |
papers,
|
| 128 |
query={"interest": interest},
|
| 129 |
threshold_score=7,
|
| 130 |
num_paper_in_prompt=8)
|
|
|
|
| 131 |
body = "<br><br>".join([f'Title: <a href="{paper["main_page"]}">{paper["title"]}</a><br>Authors: {paper["authors"]}<br>Score: {paper["Relevancy score"]}<br>Reason: {paper["Reasons for match"]}' for paper in relevancy])
|
| 132 |
if hallucination:
|
| 133 |
body = "Warning: the model hallucinated some papers. We have tried to remove them, but the scores may not be accurate.<br><br>" + body
|
| 134 |
else:
|
| 135 |
body = "<br><br>".join([f'Title: <a href="{paper["main_page"]}">{paper["title"]}</a><br>Authors: {paper["authors"]}' for paper in papers])
|
| 136 |
+
sg = sendgrid.SendGridAPIClient(api_key=key)
|
| 137 |
+
from_email = Email(email)
|
| 138 |
to_email = To(email)
|
| 139 |
subject = "arXiv digest"
|
| 140 |
content = Content("text/html", body)
|
|
|
|
| 144 |
# Send an HTTP POST request to /mail/send
|
| 145 |
response = sg.client.mail.send.post(request_body=mail_json)
|
| 146 |
if response.status_code >= 200 and response.status_code <= 300:
|
| 147 |
+
return "Success!"
|
| 148 |
else:
|
| 149 |
+
return "Failure: ({response.status_code})"
|
| 150 |
+
|
| 151 |
|
| 152 |
+
def register_openai_token(token):
|
| 153 |
+
openai.api_key = token
|
| 154 |
|
| 155 |
with gr.Blocks() as demo:
|
| 156 |
+
with gr.Row():
|
| 157 |
+
with gr.Column(scale=1):
|
| 158 |
+
token = gr.Textbox(label="OpenAI API Key", type="password")
|
| 159 |
+
subject = gr.Radio(
|
| 160 |
+
list(topics.keys()), label="Topic"
|
| 161 |
+
)
|
| 162 |
+
physics_subject = gr.Dropdown(physics_topics, value=[], multiselect=False, label="Physics category", visible=False, info="")
|
| 163 |
+
subsubject = gr.Dropdown(
|
| 164 |
+
[], value=[], multiselect=True, label="Subtopic", info="Optional. Leaving it empty will use all subtopics.", visible=False)
|
| 165 |
+
subject.change(fn=change_physics, inputs=[subject], outputs=physics_subject)
|
| 166 |
+
subject.change(fn=change_subsubject, inputs=[subject, physics_subject], outputs=subsubject)
|
| 167 |
+
physics_subject.change(fn=change_subsubject, inputs=[subject, physics_subject], outputs=subsubject)
|
| 168 |
+
|
| 169 |
+
interest = gr.Textbox(label="A natural language description of what you are interested in. We will generate relevancy scores (1-10) and explanations for the papers in the selected topics according to this statement.", info="Press shift-enter or click the button below to update.", lines=7)
|
| 170 |
+
sample_btn = gr.Button("Generate Digest")
|
| 171 |
+
sample_output = gr.Textbox(label="Results for your configuration.", info="For runtime purposes, this is only done on a small subset of recent papers in the topic you have selected. Papers will not be filtered by relevancy, only sorted on a scale of 1-10.")
|
| 172 |
+
with gr.Column(scale=0.40):
|
| 173 |
+
with gr.Box():
|
| 174 |
+
title = gr.Markdown(
|
| 175 |
+
"""
|
| 176 |
+
# Email Setup, Optional
|
| 177 |
+
Send an email to the below address using the configuration on the right. Requires a sendgrid token. These values are not needed to use the right side of this page.
|
| 178 |
+
|
| 179 |
+
To create a scheduled job for this, see our [Github Repository](https://github.com/AutoLLM/ArxivDigest)
|
| 180 |
+
""",
|
| 181 |
+
interactive=False, show_label=False)
|
| 182 |
+
email = gr.Textbox(label="Email address", type="email", placeholder="")
|
| 183 |
+
sendgrid_token = gr.Textbox(label="SendGrid API Key", type="password")
|
| 184 |
+
with gr.Row():
|
| 185 |
+
test_btn = gr.Button("Send email")
|
| 186 |
+
output = gr.Textbox(show_label=False, placeholder="email status")
|
| 187 |
+
test_btn.click(fn=test, inputs=[email, subject, physics_subject, subsubject, interest, sendgrid_token], outputs=output)
|
| 188 |
+
token.change(fn=register_openai_token, inputs=[token])
|
| 189 |
+
sample_btn.click(fn=sample, inputs=[email, subject, physics_subject, subsubject, interest], outputs=sample_output)
|
| 190 |
subject.change(fn=sample, inputs=[email, subject, physics_subject, subsubject, interest], outputs=sample_output)
|
| 191 |
physics_subject.change(fn=sample, inputs=[email, subject, physics_subject, subsubject, interest], outputs=sample_output)
|
| 192 |
subsubject.change(fn=sample, inputs=[email, subject, physics_subject, subsubject, interest], outputs=sample_output)
|
| 193 |
interest.submit(fn=sample, inputs=[email, subject, physics_subject, subsubject, interest], outputs=sample_output)
|
| 194 |
|
| 195 |
+
demo.launch(show_api=False)
|
src/utils.py
CHANGED
|
@@ -15,6 +15,7 @@ import copy
|
|
| 15 |
|
| 16 |
StrOrOpenAIObject = Union[str, openai_object.OpenAIObject]
|
| 17 |
|
|
|
|
| 18 |
openai_org = os.getenv("OPENAI_ORG")
|
| 19 |
if openai_org is not None:
|
| 20 |
openai.organization = openai_org
|
|
|
|
| 15 |
|
| 16 |
StrOrOpenAIObject = Union[str, openai_object.OpenAIObject]
|
| 17 |
|
| 18 |
+
|
| 19 |
openai_org = os.getenv("OPENAI_ORG")
|
| 20 |
if openai_org is not None:
|
| 21 |
openai.organization = openai_org
|