Spaces:
Runtime error
Runtime error
| import re | |
| import urllib | |
| import urllib.request | |
| import xml.etree.ElementTree as ET | |
| import gradio as gr | |
| import torch | |
| from transformers import pipeline | |
| classifier = pipeline(model="Yozhikoff/arxiv-topics-distilbert-base-cased") | |
| def get_arxiv_title_and_abstract(link): | |
| try: | |
| # Validate the arxiv link | |
| pattern = r'^https?://arxiv.org/(abs|pdf)/(\d{4}\.\d{4,5})(\.pdf)?$' | |
| match = re.match(pattern, link) | |
| if not match: | |
| raise ValueError('Invalid arxiv link') | |
| # Construct the arxiv API URL | |
| arxiv_id = match.group(2) | |
| api_url = f'http://export.arxiv.org/api/query?id_list={arxiv_id}' | |
| # Send a request to the arxiv API | |
| response = urllib.request.urlopen(api_url) | |
| xml_data = response.read() | |
| # Parse the XML data | |
| root = ET.fromstring(xml_data) | |
| entry = root.find('{http://www.w3.org/2005/Atom}entry') | |
| title = entry.find('{http://www.w3.org/2005/Atom}title').text | |
| summary = entry.find('{http://www.w3.org/2005/Atom}summary').text | |
| return title, summary | |
| except: | |
| raise gr.Error('Invalid arXiv URL!') | |
| def classify_paper(title, abstract): | |
| if title == '' and abstract == '': | |
| raise gr.Error('Fill Title or/and Abstract') | |
| text = f"TITLE\n{title}\n\nABSTRACT\n{abstract}" | |
| item = classifier.tokenizer(text) | |
| input_tensor = torch.tensor(item['input_ids'])[None] | |
| logits = classifier.model(input_tensor).logits[0] | |
| preds = torch.sigmoid(logits).detach().cpu().numpy() | |
| result = {} | |
| for num, prob in enumerate(preds): | |
| if prob < 0.25: | |
| continue | |
| if classifier.model.config.id2label[num] in result: | |
| if result[classifier.model.config.id2label[num]] > prob: | |
| continue | |
| result[classifier.model.config.id2label[num]] = float(prob) | |
| return result | |
| with gr.Blocks(title='Paper classifier') as demo: | |
| gr.Markdown('# Paper Topic Classifier') | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown('## Inputs') | |
| gr.Markdown('#### Please enter an arXiv link **OR** fill title and abstract manually') | |
| arxiv_link = gr.Textbox(label="Arxiv link", placeholder="https://arxiv.org/abs/1706.03762") | |
| b1 = gr.Button("Parse Link") | |
| title = gr.Textbox(label="Paper title", placeholder="Title text") | |
| abstract = gr.Textbox(label="Paper abstract", placeholder="Abstract text") | |
| b2 = gr.Button("Classify Paper", variant='primary') | |
| b1.click(fn=get_arxiv_title_and_abstract, inputs=arxiv_link, outputs=[title, abstract], api_name="parse") | |
| with gr.Column(): | |
| gr.Markdown('## Topics') | |
| gr.Markdown('## ') | |
| gr.Markdown('## ') | |
| out = gr.Label(label="Topics") | |
| b2.click(classify_paper, inputs=[title, abstract], outputs=out) | |
| gr.Markdown('## Examples') | |
| gr.Examples( | |
| examples=[['https://arxiv.org/abs/1706.03762'], ['https://arxiv.org/abs/2304.06718'], ['https://arxiv.org/abs/1307.0058']], | |
| inputs=arxiv_link, | |
| outputs=[title, abstract], | |
| fn=get_arxiv_title_and_abstract, | |
| cache_examples=True, | |
| ) | |
| demo.launch() | |
| with gr.Blocks(title='Paper classifier') as demo: | |
| gr.Markdown('# Paper Topic Classifier') | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown('## Inputs') | |
| gr.Markdown('#### Please enter an arXiv link **OR** fill title and abstract manually') | |
| arxiv_link = gr.Textbox(label="Arxiv link", placeholder="https://arxiv.org/abs/1706.03762") | |
| b1 = gr.Button("Parse Link") | |
| title = gr.Textbox(label="Paper title", placeholder="Title text") | |
| abstract = gr.Textbox(label="Paper abstract", placeholder="Abstract text") | |
| b2 = gr.Button("Classify Paper", variant='primary') | |
| b1.click(fn=get_arxiv_title_and_abstract, inputs=arxiv_link, outputs=[title, abstract], api_name="parse") | |
| with gr.Column(): | |
| gr.Markdown('## Topics') | |
| gr.Markdown('## ') | |
| gr.Markdown('## ') | |
| out = gr.Label(label="Topics") | |
| b2.click(classify_paper, inputs=[title, abstract], outputs=out) | |
| gr.Markdown('## Examples') | |
| gr.Examples( | |
| examples=[['https://arxiv.org/abs/1706.03762'], ['https://arxiv.org/abs/2304.06718'], ['https://arxiv.org/abs/1307.0058']], | |
| inputs=arxiv_link, | |
| outputs=[title, abstract], | |
| fn=get_arxiv_title_and_abstract, | |
| cache_examples=True, | |
| ) | |
| demo.launch() |