Spaces:

ssocean
/

Newborn_Article_Impact_Predict

Running on Zero

File size: 15,852 Bytes

57a9ebf
470f648
57a9ebf
 
984938f
57a9ebf
67aea18
984938f
a749c57
57a9ebf
984938f
 
 
9646494
2ef4dc2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
530316e
984938f
 
 
 
 
 
 
fee908c
984938f
 
57a9ebf
984938f
 
57a9ebf
984938f
47e27e9
 
 
 
 
 
984938f
 
 
 
 
 
 
 
 
 
13ac977
984938f
13ac977
 
fee908c
984938f
 
 
 
 
 
 
 
ad8a799
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f6b8d98
 
ad8a799
 
 
 
 
 
 
f6b8d98
ad8a799
 
 
 
 
 
 
 
 
 
 
 
f6b8d98
ad8a799
 
 
 
 
 
 
f6b8d98
ad8a799
 
984938f
 
f6b8d98
 
 
 
 
 
 
 
984938f
f6b8d98
47e27e9
 
 
 
2ef4dc2
47e27e9
4fe6183
984938f
 
 
4fe6183
984938f
 
 
530316e
984938f
 
 
b5e9c50
 
 
 
 
984938f
2ef4dc2
ad8a799
b5e9c50
47e27e9
984938f
2ef4dc2
ad8a799
b5e9c50
47e27e9
2ef4dc2
 
 
 
984938f
2ef4dc2
 
 
 
984938f
2ef4dc2
 
530316e
1fd1dd9
984938f
 
c2171bf
1fd1dd9
c2171bf
 
984938f
c2171bf
1fd1dd9
fb9334a
1fd1dd9
2ef4dc2
 
fa3936e
 
 
 
984938f
1fd1dd9
 
fb9334a
984938f
1fd1dd9
 
 
 
 
984938f
530316e
984938f
8f72d39
984938f
8f72d39
 
984938f
 
8f72d39
 
984938f
 
 
 
 
 
 
 
 
d771f77
 
ad8a799
8f72d39
 
 
 
 
 
 
 
ad8a799
 
 
 
d771f77
984938f
d771f77
 
984938f
7a6488d
 
984938f
 
d771f77
 
984938f
d771f77
 
 
 
2ef4dc2
d771f77
 
222f125
 
984938f
d771f77
47e27e9
 
 
 
 
 
 
 
012edbb
ad8a799
 
51eb0c1
011ee26
ad8a799
8f72d39
ad8a799
66a05f8
012edbb
d771f77
222f125
 
 
 
 
 
 
 
 
 
 
 
 
984938f
222f125
 
 
 
 
 
 
012edbb
222f125
984938f
57a9ebf
984938f

import gradio as gr
import spaces
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from peft import AutoPeftModelForSequenceClassification
import torch.nn.functional as F
import re

device = 'cuda:0'

# ===== v1 部分 =====
model_v1, tokenizer_v1 = None, None
model_path_v1 = r'ssocean/NAIP'

@spaces.GPU(duration=60, enable_queue=True)
# def predict_v1(title, abstract):
#     global model_v1, tokenizer_v1
#     if model_v1 is None:
#         model_v1 = AutoModelForSequenceClassification.from_pretrained(
#             model_path_v1,
#             num_labels=1,
#             load_in_8bit=True,
#         ).eval()
#         tokenizer_v1 = AutoTokenizer.from_pretrained(model_path_v1)
#         model_v1.eval()

#     text = f"Given a certain paper, Title: {title}\n Abstract: {abstract}. \n Predict its normalized academic impact (between 0 and 1):"
#     inputs = tokenizer_v1(text, return_tensors="pt").to(device)
#     with torch.no_grad():
#         outputs = model_v1(**inputs)
#     prob = torch.sigmoid(outputs.logits).item()
#     if prob + 0.05 >= 1.0:
#         return round(1, 4)
#     return round(prob + 0.05, 4) 
@spaces.GPU(duration=60, enable_queue=True)
def predict_v1(title, abstract):
    global model_v1, tokenizer_v1
    if model_v1 is None:
        model_v1 = AutoModelForSequenceClassification.from_pretrained(
            model_path_v1,
            num_labels=1,
            load_in_8bit=True,
        ).eval()
        tokenizer_v1 = AutoTokenizer.from_pretrained(model_path_v1)
        model_v1.eval()

    text = f"Given a certain paper, Title: {title}\n Abstract: {abstract}. \n Predict its normalized academic impact (between 0 and 1):"
    inputs = tokenizer_v1(text, return_tensors="pt").to(device)
    with torch.no_grad():
        outputs = model_v1(**inputs)
    
    raw_score = outputs.logits.item()                  # 原始绝对数值 (logit)
    final_score = torch.sigmoid(outputs.logits).item() # 概率
    final_score = min(1.0, final_score + 0.05)         # 你的归一化规则
    
    return round(raw_score, 4), round(final_score, 4)
# ===== v2 部分 =====
scorer_v2 = None
model_path_v2 = r'ssocean/NAIPv2'

class PaperScorer:
    def __init__(self, model_path: str, device: str = 'cuda', max_length: int = 512):
        self.device = torch.device(device if torch.cuda.is_available() else 'cpu')
        self.max_length = max_length

        # PEFT 模型 (LoRA)
        self.model = AutoModelForSequenceClassification.from_pretrained(
            model_path,
            num_labels=1,
            load_in_8bit=True,
        ).eval()

        self.tokenizer = AutoTokenizer.from_pretrained(model_path)
        self.tokenizer.pad_token = self.tokenizer.eos_token
        self.model.config.pad_token_id = self.tokenizer.pad_token_id

        self.prompt_template = (
            "Given a certain paper, Title: {title}\nAbstract: {abstract}\nEvaluate the quality of this paper:"
        )
    # def _rescale_score(self, s: float) -> float:
    #     """
    #     Piecewise linear rescaling:
    #       0.0 ~ 0.2  ->  0.0 ~ 0.5
    #       0.2 ~ 0.6  ->  0.5 ~ 1.0
    #       >0.6       ->  1.0
    #     """
    #     s = max(0.0, min(1.0, s))  # clamp input to [0,1]
    
    #     if s <= 0.2:
    #         # scale [0,0.2] to [0,0.5]
    #         y = (s / 0.2) * 0.5
    #     elif s <= 0.6:
    #         # scale (0.2,0.6] to (0.5,1.0]
    #         y = 0.5 + ((s - 0.2) / 0.4) * 0.5
    #     else:
    #         # everything above 0.6 maps to 1.0
    #         y = 1.0
    
    #     return round(y, 4)
    def _rescale_score(self, s: float) -> float:
        """
        Piecewise linear rescaling with custom anchors.
        Mapping examples:
          0.233 -> 0.465
          0.372 -> 0.608
          0.423 -> 0.714
          0.496 -> 0.786
        Entire range still in [0,1].
        """
        # clamp input
        s = max(0.0, min(1.0, s))
    
        # define anchors (x -> y)
        anchors = [
            (0.0,   0.0),
            (0.233, 0.465),
            (0.372, 0.608),
            (0.423, 0.714),
            (0.496, 0.786),
            (1.0,   1.0),
        ]
    
        # find interval
        for (x1, y1), (x2, y2) in zip(anchors, anchors[1:]):
            if x1 <= s <= x2:
                # linear interpolation
                t = (s - x1) / (x2 - x1)
                y = y1 + t * (y2 - y1)
                return round(y, 4)
    
        return 1.0

    def score(self, title: str, abstract: str) -> float:
        prompt = self.prompt_template.format(title=title.strip(), abstract=abstract.strip())
        inputs = self.tokenizer(
            prompt,
            return_tensors='pt',
            padding=True,
            truncation=True,
            max_length=self.max_length
        ).to(self.device)

        with torch.no_grad():
            logits = self.model(**inputs).logits
            raw_score = logits.view(-1).item()                  # 原始 logit
        
        sigmoid_score = torch.sigmoid(torch.tensor(raw_score)).item()  # 压缩到 0-1
        final_score = self._rescale_score(sigmoid_score)               # 在 sigmoid 基础上做 piecewise
        return round(raw_score, 4), round(final_score, 4)
        
@spaces.GPU(duration=60, enable_queue=True)
def predict_v2(title, abstract):
    global scorer_v2
    if scorer_v2 is None:
        scorer_v2 = PaperScorer(model_path_v2)
    return scorer_v2.score(title, abstract)



def predict(title, abstract, model_version):
    title = title.replace("\n", " ").strip().replace('’', "'")
    abstract = abstract.replace("\n", " ").strip().replace('’', "'")
    print(f'-------------------------------------------------------------------------------')
    print(f'Model Version: {model_version}')
    print(f'Title: {title}')
    print(f'Abstract: {abstract}')
    
    if model_version == "v1":
        raw, final = predict_v1(title, abstract)
        print(f'Raw Score: {raw}, Normalized Score: {final}')
        print(f'-------------------------------------------------------------------------------\n\n')
        return [["Raw Score", round(raw, 4)], ["Final Score", round(final, 4)]]
    else:
        raw, final = predict_v2(title, abstract)
        print(f'Raw Score: {raw}, Normalized Score: {final}')
        print(f'-------------------------------------------------------------------------------\n\n')
        return [["Raw Score", round(raw, 4)], ["Final Score", round(final, 4)]]

# def predict(title, abstract, model_version):
#     title = title.replace("\n", " ").strip().replace('’', "'")
#     abstract = abstract.replace("\n", " ").strip().replace('’', "'")

#     if model_version == "v1":
#         raw, final = predict_v1(title, abstract)
#     else:
#         raw, final = predict_v2(title, abstract)

#     print(f"Raw: {raw}, Final: {final}")
#     return {"Raw Score": raw, "Final Score": final}

def validate_input(title, abstract):
    title = title.replace("\n", " ").strip().replace('’', "'")
    abstract = abstract.replace("\n", " ").strip().replace('’', "'")

    non_latin_pattern = re.compile(r'[^\u0000-\u007F]')
    non_latin_in_title = non_latin_pattern.findall(title)
    non_latin_in_abstract = non_latin_pattern.findall(abstract)

    if len(title.strip().split(' ')) < 3:
        return False, "The title must be at least 3 words long."
    if len(abstract.strip().split(' ')) < 50:
        return False, "The abstract must be at least 50 words long."
    if len((title + abstract).split(' ')) > 600:
        return True, "Warning, the input length is approaching tokenization limits (512) and may be truncated without further warning!"
    if non_latin_in_title:
        return False, f"The title contains invalid characters: {', '.join(non_latin_in_title)}. Only English letters and special symbols are allowed."
    if non_latin_in_abstract:
        return False, f"The abstract contains invalid characters: {', '.join(non_latin_in_abstract)}. Only English letters and special symbols are allowed."

    return True, "Inputs are valid! Good to go!"


def update_button_status(title, abstract):
    valid, message = validate_input(title, abstract)
    if not valid:
        return gr.update(value="Error: " + message), gr.update(interactive=False)
    return gr.update(value=message), gr.update(interactive=True)



examples = [

    [
        "Mean Flows for One-step Generative Modeling",
        ('''We propose a principled and effective framework for one-step generative modeling. We introduce the notion of average velocity to characterize flow fields, in contrast to instantaneous velocity modeled by Flow Matching methods. A well-defined identity between average and instantaneous velocities is derived and used to guide neural network training. Our method, termed the MeanFlow model, is self-contained and requires no pre-training, distillation, or curriculum learning. MeanFlow demonstrates strong empirical performance: it achieves an FID of 3.43 with a single function evaluation (1-NFE) on ImageNet 256x256 trained from scratch, significantly outperforming previous state-of-the-art one-step diffusion/flow models. Our study substantially narrows the gap between one-step diffusion/flow models and their multi-step predecessors, and we hope it will motivate future research to revisit the foundations of these powerful models.''')
    ],
    [
        "SARDet-100K: Towards Open-Source Benchmark and ToolKit for Large-Scale SAR Object Detection",
        ('''Synthetic Aperture Radar (SAR) object detection has gained significant attention recently due to its irreplaceable all-weather imaging capabilities. However, this research field suffers from both limited public datasets (mostly comprising <2K images with only mono-category objects) and inaccessible source code. To tackle these challenges, we establish a new benchmark dataset and an open-source method for large-scale SAR object detection. Our dataset, SARDet-100K, is a result of intense surveying, collecting, and standardizing 10 existing SAR detection datasets, providing a large-scale and diverse dataset for research purposes. To the best of our knowledge, SARDet-100K is the first COCO-level large-scale multi-class SAR object detection dataset ever created. With this high-quality dataset, we conducted comprehensive experiments and uncovered a crucial challenge in SAR object detection: the substantial disparities between the pretraining on RGB datasets and finetuning on SAR datasets in terms of both data domain and model structure. To bridge these gaps, we propose a novel Multi-Stage with Filter Augmentation (MSFA) pretraining framework that tackles the problems from the perspective of data input, domain transition, and model migration. The proposed MSFA method significantly enhances the performance of SAR object detection models while demonstrating exceptional generalizability and flexibility across diverse models. This work aims to pave the way for further advancements in SAR object detection. The dataset and code is available at this https URL.''')
    ],
    [
        "Enhanced ZSSR for Super-resolution Reconstruction of the Historical Tibetan Document Images",
        "Due to the poor preservation and imaging conditions, the image quality of historical Tibetan document images is relatively unsatisfactory. In this paper, we adopt super-resolution technology to reconstruct high quality images of historical Tibetan document. To address the problem of low quantity and poor quality of historical Tibetan document images, we propose the EZSSR network based on the Zero-Shot Super-resolution Network (ZSSR), which borrows the idea of feature pyramid in Deep Laplacian Pyramid Networks (LapSRN) to extract different levels of features while alleviating the ringing artifacts. EZSSR neither requires paired training datasets nor preprocessing stage. The computational complexity of EZSSR is low, and thus, EZSSR can also reconstruct image within the acceptable time frame. Experimental results show that EZSSR reconstructs images with better visual effects and higher PSNR and SSIM values."
    ]
    
]

# ===== Gradio 界面 =====
with gr.Blocks() as iface:
    gr.Markdown("""
    # 📈 Predict Impact & Quality of Newborn Papers
    
    ### LLM-powered estimates from a paper’s title and abstract.
    
    #### Which model should I use?
    - [**NAIPv1**](https://arxiv.org/abs/2408.03934) — predicts **academic impact**  
    - [**NAIPv2**](https://arxiv.org/abs/2509.25179) — predicts **paper quality** *(default)*  
    *See the papers for methodology and evaluation details.*
    
    > ⚡ **Note:** Local inference is instantaneous. On **Hugging Face ZeroGPU**, the quantized model is reloaded **from disk** on each prediction, which can introduce significant disk-I/O delays (typically <30 s).

    > For **NAIPv2**, the output **Normalized score** may not be comparable across different domains.  It is recommended to use the **Raw score** magnitude for quality estimation within the same domain.

    """)

    with gr.Row():
        with gr.Column():
            model_selector = gr.Dropdown(
                choices=["v1", "v2.1"],
                value="v2.1",   # 默认 v2
                label="Select Model Version"
            )
            title_input = gr.Textbox(
                lines=2,
                placeholder="Enter Paper Title Here...",
                label="Paper Title"
            )
            abstract_input = gr.Textbox(
                lines=5,
                placeholder="Enter Paper Abstract Here... (Tip: For v2, remove sentences like 'Our code is released at xxx' for more accurate results)",
                label="Paper Abstract"
            )
            validation_status = gr.Textbox(label="Validation Status", interactive=False)
            submit_button = gr.Button("Predict Impact", interactive=False)

        with gr.Column():
            output = gr.Dataframe(
                headers=["Type", "Score"],
                datatype=["str", "number"],
                row_count=2,
                col_count=2,
                interactive=False,
                label="Predicted Scores"
            )
            gr.Markdown("""
            ## Important Notes 
            - The reported performance reflects aggregate statistical outcomes, rather than guarantees for individual instances.  
            - It is intended as a tool **for research and educational purposes only**.
            - Please refrain from deliberately embellishing the title and abstract to boost scores, and **avoid making false claims**.
            - This demo is an early exploration of using LLMs for paper quality estimation and is not optimized against prompt injection attacks.
            - The **predicted value** is a probability generated by the model and **does NOT reflect the paper's true quality or novelty**.  
            - For **NAIPv1**, a normalized score greater than **0.60** is considered to indicate a potentially impactful paper.  
            - For **NAIPv2**, a normalized score above **0.60** corresponds to the statistical mean of NeurIPS accepted papers (Poster).  
            """)

    title_input.change(
        update_button_status,
        inputs=[title_input, abstract_input],
        outputs=[validation_status, submit_button]
    )
    abstract_input.change(
        update_button_status,
        inputs=[title_input, abstract_input],
        outputs=[validation_status, submit_button]
    )

    submit_button.click(
        predict,
        inputs=[title_input, abstract_input, model_selector],
        outputs=output
    )

    gr.Examples(
        examples=examples,
        inputs=[title_input, abstract_input],
        outputs=[validation_status, output],
        cache_examples=False
    )

iface.launch()