ssocean's picture
Update app.py
66a05f8 verified
import gradio as gr
import spaces
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from peft import AutoPeftModelForSequenceClassification
import torch.nn.functional as F
import re
device = 'cuda:0'
# ===== v1 部分 =====
model_v1, tokenizer_v1 = None, None
model_path_v1 = r'ssocean/NAIP'
@spaces.GPU(duration=60, enable_queue=True)
# def predict_v1(title, abstract):
# global model_v1, tokenizer_v1
# if model_v1 is None:
# model_v1 = AutoModelForSequenceClassification.from_pretrained(
# model_path_v1,
# num_labels=1,
# load_in_8bit=True,
# ).eval()
# tokenizer_v1 = AutoTokenizer.from_pretrained(model_path_v1)
# model_v1.eval()
# text = f"Given a certain paper, Title: {title}\n Abstract: {abstract}. \n Predict its normalized academic impact (between 0 and 1):"
# inputs = tokenizer_v1(text, return_tensors="pt").to(device)
# with torch.no_grad():
# outputs = model_v1(**inputs)
# prob = torch.sigmoid(outputs.logits).item()
# if prob + 0.05 >= 1.0:
# return round(1, 4)
# return round(prob + 0.05, 4)
@spaces.GPU(duration=60, enable_queue=True)
def predict_v1(title, abstract):
global model_v1, tokenizer_v1
if model_v1 is None:
model_v1 = AutoModelForSequenceClassification.from_pretrained(
model_path_v1,
num_labels=1,
load_in_8bit=True,
).eval()
tokenizer_v1 = AutoTokenizer.from_pretrained(model_path_v1)
model_v1.eval()
text = f"Given a certain paper, Title: {title}\n Abstract: {abstract}. \n Predict its normalized academic impact (between 0 and 1):"
inputs = tokenizer_v1(text, return_tensors="pt").to(device)
with torch.no_grad():
outputs = model_v1(**inputs)
raw_score = outputs.logits.item() # 原始绝对数值 (logit)
final_score = torch.sigmoid(outputs.logits).item() # 概率
final_score = min(1.0, final_score + 0.05) # 你的归一化规则
return round(raw_score, 4), round(final_score, 4)
# ===== v2 部分 =====
scorer_v2 = None
model_path_v2 = r'ssocean/NAIPv2'
class PaperScorer:
def __init__(self, model_path: str, device: str = 'cuda', max_length: int = 512):
self.device = torch.device(device if torch.cuda.is_available() else 'cpu')
self.max_length = max_length
# PEFT 模型 (LoRA)
self.model = AutoModelForSequenceClassification.from_pretrained(
model_path,
num_labels=1,
load_in_8bit=True,
).eval()
self.tokenizer = AutoTokenizer.from_pretrained(model_path)
self.tokenizer.pad_token = self.tokenizer.eos_token
self.model.config.pad_token_id = self.tokenizer.pad_token_id
self.prompt_template = (
"Given a certain paper, Title: {title}\nAbstract: {abstract}\nEvaluate the quality of this paper:"
)
# def _rescale_score(self, s: float) -> float:
# """
# Piecewise linear rescaling:
# 0.0 ~ 0.2 -> 0.0 ~ 0.5
# 0.2 ~ 0.6 -> 0.5 ~ 1.0
# >0.6 -> 1.0
# """
# s = max(0.0, min(1.0, s)) # clamp input to [0,1]
# if s <= 0.2:
# # scale [0,0.2] to [0,0.5]
# y = (s / 0.2) * 0.5
# elif s <= 0.6:
# # scale (0.2,0.6] to (0.5,1.0]
# y = 0.5 + ((s - 0.2) / 0.4) * 0.5
# else:
# # everything above 0.6 maps to 1.0
# y = 1.0
# return round(y, 4)
def _rescale_score(self, s: float) -> float:
"""
Piecewise linear rescaling with custom anchors.
Mapping examples:
0.233 -> 0.465
0.372 -> 0.608
0.423 -> 0.714
0.496 -> 0.786
Entire range still in [0,1].
"""
# clamp input
s = max(0.0, min(1.0, s))
# define anchors (x -> y)
anchors = [
(0.0, 0.0),
(0.233, 0.465),
(0.372, 0.608),
(0.423, 0.714),
(0.496, 0.786),
(1.0, 1.0),
]
# find interval
for (x1, y1), (x2, y2) in zip(anchors, anchors[1:]):
if x1 <= s <= x2:
# linear interpolation
t = (s - x1) / (x2 - x1)
y = y1 + t * (y2 - y1)
return round(y, 4)
return 1.0
def score(self, title: str, abstract: str) -> float:
prompt = self.prompt_template.format(title=title.strip(), abstract=abstract.strip())
inputs = self.tokenizer(
prompt,
return_tensors='pt',
padding=True,
truncation=True,
max_length=self.max_length
).to(self.device)
with torch.no_grad():
logits = self.model(**inputs).logits
raw_score = logits.view(-1).item() # 原始 logit
sigmoid_score = torch.sigmoid(torch.tensor(raw_score)).item() # 压缩到 0-1
final_score = self._rescale_score(sigmoid_score) # 在 sigmoid 基础上做 piecewise
return round(raw_score, 4), round(final_score, 4)
@spaces.GPU(duration=60, enable_queue=True)
def predict_v2(title, abstract):
global scorer_v2
if scorer_v2 is None:
scorer_v2 = PaperScorer(model_path_v2)
return scorer_v2.score(title, abstract)
def predict(title, abstract, model_version):
title = title.replace("\n", " ").strip().replace('’', "'")
abstract = abstract.replace("\n", " ").strip().replace('’', "'")
print(f'-------------------------------------------------------------------------------')
print(f'Model Version: {model_version}')
print(f'Title: {title}')
print(f'Abstract: {abstract}')
if model_version == "v1":
raw, final = predict_v1(title, abstract)
print(f'Raw Score: {raw}, Normalized Score: {final}')
print(f'-------------------------------------------------------------------------------\n\n')
return [["Raw Score", round(raw, 4)], ["Final Score", round(final, 4)]]
else:
raw, final = predict_v2(title, abstract)
print(f'Raw Score: {raw}, Normalized Score: {final}')
print(f'-------------------------------------------------------------------------------\n\n')
return [["Raw Score", round(raw, 4)], ["Final Score", round(final, 4)]]
# def predict(title, abstract, model_version):
# title = title.replace("\n", " ").strip().replace('’', "'")
# abstract = abstract.replace("\n", " ").strip().replace('’', "'")
# if model_version == "v1":
# raw, final = predict_v1(title, abstract)
# else:
# raw, final = predict_v2(title, abstract)
# print(f"Raw: {raw}, Final: {final}")
# return {"Raw Score": raw, "Final Score": final}
def validate_input(title, abstract):
title = title.replace("\n", " ").strip().replace('’', "'")
abstract = abstract.replace("\n", " ").strip().replace('’', "'")
non_latin_pattern = re.compile(r'[^\u0000-\u007F]')
non_latin_in_title = non_latin_pattern.findall(title)
non_latin_in_abstract = non_latin_pattern.findall(abstract)
if len(title.strip().split(' ')) < 3:
return False, "The title must be at least 3 words long."
if len(abstract.strip().split(' ')) < 50:
return False, "The abstract must be at least 50 words long."
if len((title + abstract).split(' ')) > 600:
return True, "Warning, the input length is approaching tokenization limits (512) and may be truncated without further warning!"
if non_latin_in_title:
return False, f"The title contains invalid characters: {', '.join(non_latin_in_title)}. Only English letters and special symbols are allowed."
if non_latin_in_abstract:
return False, f"The abstract contains invalid characters: {', '.join(non_latin_in_abstract)}. Only English letters and special symbols are allowed."
return True, "Inputs are valid! Good to go!"
def update_button_status(title, abstract):
valid, message = validate_input(title, abstract)
if not valid:
return gr.update(value="Error: " + message), gr.update(interactive=False)
return gr.update(value=message), gr.update(interactive=True)
examples = [
[
"Mean Flows for One-step Generative Modeling",
('''We propose a principled and effective framework for one-step generative modeling. We introduce the notion of average velocity to characterize flow fields, in contrast to instantaneous velocity modeled by Flow Matching methods. A well-defined identity between average and instantaneous velocities is derived and used to guide neural network training. Our method, termed the MeanFlow model, is self-contained and requires no pre-training, distillation, or curriculum learning. MeanFlow demonstrates strong empirical performance: it achieves an FID of 3.43 with a single function evaluation (1-NFE) on ImageNet 256x256 trained from scratch, significantly outperforming previous state-of-the-art one-step diffusion/flow models. Our study substantially narrows the gap between one-step diffusion/flow models and their multi-step predecessors, and we hope it will motivate future research to revisit the foundations of these powerful models.''')
],
[
"SARDet-100K: Towards Open-Source Benchmark and ToolKit for Large-Scale SAR Object Detection",
('''Synthetic Aperture Radar (SAR) object detection has gained significant attention recently due to its irreplaceable all-weather imaging capabilities. However, this research field suffers from both limited public datasets (mostly comprising <2K images with only mono-category objects) and inaccessible source code. To tackle these challenges, we establish a new benchmark dataset and an open-source method for large-scale SAR object detection. Our dataset, SARDet-100K, is a result of intense surveying, collecting, and standardizing 10 existing SAR detection datasets, providing a large-scale and diverse dataset for research purposes. To the best of our knowledge, SARDet-100K is the first COCO-level large-scale multi-class SAR object detection dataset ever created. With this high-quality dataset, we conducted comprehensive experiments and uncovered a crucial challenge in SAR object detection: the substantial disparities between the pretraining on RGB datasets and finetuning on SAR datasets in terms of both data domain and model structure. To bridge these gaps, we propose a novel Multi-Stage with Filter Augmentation (MSFA) pretraining framework that tackles the problems from the perspective of data input, domain transition, and model migration. The proposed MSFA method significantly enhances the performance of SAR object detection models while demonstrating exceptional generalizability and flexibility across diverse models. This work aims to pave the way for further advancements in SAR object detection. The dataset and code is available at this https URL.''')
],
[
"Enhanced ZSSR for Super-resolution Reconstruction of the Historical Tibetan Document Images",
"Due to the poor preservation and imaging conditions, the image quality of historical Tibetan document images is relatively unsatisfactory. In this paper, we adopt super-resolution technology to reconstruct high quality images of historical Tibetan document. To address the problem of low quantity and poor quality of historical Tibetan document images, we propose the EZSSR network based on the Zero-Shot Super-resolution Network (ZSSR), which borrows the idea of feature pyramid in Deep Laplacian Pyramid Networks (LapSRN) to extract different levels of features while alleviating the ringing artifacts. EZSSR neither requires paired training datasets nor preprocessing stage. The computational complexity of EZSSR is low, and thus, EZSSR can also reconstruct image within the acceptable time frame. Experimental results show that EZSSR reconstructs images with better visual effects and higher PSNR and SSIM values."
]
]
# ===== Gradio 界面 =====
with gr.Blocks() as iface:
gr.Markdown("""
# 📈 Predict Impact & Quality of Newborn Papers
### LLM-powered estimates from a paper’s title and abstract.
#### Which model should I use?
- [**NAIPv1**](https://arxiv.org/abs/2408.03934) — predicts **academic impact**
- [**NAIPv2**](https://arxiv.org/abs/2509.25179) — predicts **paper quality** *(default)*
*See the papers for methodology and evaluation details.*
> ⚡ **Note:** Local inference is instantaneous. On **Hugging Face ZeroGPU**, the quantized model is reloaded **from disk** on each prediction, which can introduce significant disk-I/O delays (typically <30 s).
> For **NAIPv2**, the output **Normalized score** may not be comparable across different domains. It is recommended to use the **Raw score** magnitude for quality estimation within the same domain.
""")
with gr.Row():
with gr.Column():
model_selector = gr.Dropdown(
choices=["v1", "v2.1"],
value="v2.1", # 默认 v2
label="Select Model Version"
)
title_input = gr.Textbox(
lines=2,
placeholder="Enter Paper Title Here...",
label="Paper Title"
)
abstract_input = gr.Textbox(
lines=5,
placeholder="Enter Paper Abstract Here... (Tip: For v2, remove sentences like 'Our code is released at xxx' for more accurate results)",
label="Paper Abstract"
)
validation_status = gr.Textbox(label="Validation Status", interactive=False)
submit_button = gr.Button("Predict Impact", interactive=False)
with gr.Column():
output = gr.Dataframe(
headers=["Type", "Score"],
datatype=["str", "number"],
row_count=2,
col_count=2,
interactive=False,
label="Predicted Scores"
)
gr.Markdown("""
## Important Notes
- The reported performance reflects aggregate statistical outcomes, rather than guarantees for individual instances.
- It is intended as a tool **for research and educational purposes only**.
- Please refrain from deliberately embellishing the title and abstract to boost scores, and **avoid making false claims**.
- This demo is an early exploration of using LLMs for paper quality estimation and is not optimized against prompt injection attacks.
- The **predicted value** is a probability generated by the model and **does NOT reflect the paper's true quality or novelty**.
- For **NAIPv1**, a normalized score greater than **0.60** is considered to indicate a potentially impactful paper.
- For **NAIPv2**, a normalized score above **0.60** corresponds to the statistical mean of NeurIPS accepted papers (Poster).
""")
title_input.change(
update_button_status,
inputs=[title_input, abstract_input],
outputs=[validation_status, submit_button]
)
abstract_input.change(
update_button_status,
inputs=[title_input, abstract_input],
outputs=[validation_status, submit_button]
)
submit_button.click(
predict,
inputs=[title_input, abstract_input, model_selector],
outputs=output
)
gr.Examples(
examples=examples,
inputs=[title_input, abstract_input],
outputs=[validation_status, output],
cache_examples=False
)
iface.launch()