cdminix commited on
Commit
9e7cbd8
Β·
1 Parent(s): 108916e

init commit

Browse files
Files changed (9) hide show
  1. .gitignore +1 -1
  2. .gradio/certificate.pem +31 -0
  3. README.md +62 -10
  4. app.py +295 -256
  5. dataset_card_template.py +0 -40
  6. env.example +3 -0
  7. packages.txt +0 -1
  8. pitfalls.json +92 -0
  9. requirements.in +2 -4
.gitignore CHANGED
@@ -1,2 +1,2 @@
1
  __pycache__/
2
-
 
1
  __pycache__/
2
+ .env
.gradio/certificate.pem ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -----BEGIN CERTIFICATE-----
2
+ MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
3
+ TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
4
+ cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
5
+ WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
6
+ ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
7
+ MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
8
+ h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
9
+ 0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
10
+ A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
11
+ T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
12
+ B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
13
+ B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
14
+ KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
15
+ OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
16
+ jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
17
+ qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
18
+ rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
19
+ HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
20
+ hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
21
+ ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
22
+ 3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
23
+ NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
24
+ ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
25
+ TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
26
+ jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
27
+ oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
28
+ 4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
29
+ mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
30
+ emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
31
+ -----END CERTIFICATE-----
README.md CHANGED
@@ -1,18 +1,70 @@
1
  ---
2
- title: PDF to Page Images Dataset
3
- emoji: πŸ“‚πŸ€—
4
- colorFrom: pink
5
- colorTo: red
6
  sdk: gradio
7
  sdk_version: 4.44.0
8
  app_file: app.py
9
  pinned: false
10
- hf_oauth: true
11
- hf_oauth_scopes:
12
- - read-repos
13
- - write-repos
14
- - manage-repos
15
  license: agpl-3.0
16
  ---
17
 
18
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Research Paper Pitfall Checker
3
+ emoji: πŸ”πŸ“„
4
+ colorFrom: blue
5
+ colorTo: purple
6
  sdk: gradio
7
  sdk_version: 4.44.0
8
  app_file: app.py
9
  pinned: false
 
 
 
 
 
10
  license: agpl-3.0
11
  ---
12
 
13
+ # Research Paper Pitfall Checker
14
+
15
+ A Gradio application that analyzes research papers to identify potential evaluation pitfalls using AI-powered analysis.
16
+
17
+ ## Features
18
+
19
+ - **PDF Text Extraction**: Extracts text content from uploaded research papers
20
+ - **AI-Powered Analysis**: Uses OpenRouter API with Qwen model for intelligent pitfall detection
21
+ - **Comprehensive Pitfall Detection**: Identifies common evaluation pitfalls including:
22
+ - πŸ”’ The Lock-In Effect
23
+ - 🍎🍊 Apples-to-Oranges Comparisons
24
+ - πŸ’§ Contamination Leak
25
+ - πŸ€–β“ Unvalidated Automation
26
+ - 🧐 Vague Scales
27
+
28
+ ## How to Use
29
+
30
+ 1. **Get an API Key**: Sign up at [OpenRouter.ai](https://openrouter.ai) to get your API key
31
+ 2. **Set Environment Variable**: Set your API key as an environment variable
32
+ 3. **Upload a PDF**: Upload your research paper PDF file
33
+ 4. **Analyze**: Click "Analyze Paper for Pitfalls" to get a detailed analysis
34
+ 5. **Review Results**: Review the analysis report for potential issues and improvement suggestions
35
+
36
+ ## Setup
37
+
38
+ 1. Install dependencies:
39
+ ```bash
40
+ pip install -r requirements.in
41
+ ```
42
+
43
+ 2. Set your OpenRouter API key as an environment variable:
44
+ ```bash
45
+ export OPENROUTER_API_KEY="your-api-key-here"
46
+ ```
47
+
48
+ Or create a `.env` file (copy from `env.example`):
49
+ ```
50
+ OPENROUTER_API_KEY=your-api-key-here
51
+ ```
52
+
53
+ 3. Run the application:
54
+ ```bash
55
+ python app.py
56
+ ```
57
+
58
+ ## Configuration
59
+
60
+ The pitfalls are defined in `pitfalls.json`. You can modify this file to add or remove specific pitfalls to check for.
61
+
62
+ ## API Requirements
63
+
64
+ - OpenRouter API key (get one from [OpenRouter.ai](https://openrouter.ai))
65
+ - Uses the `qwen/qwen2.5-next-80b-a3b-instruct` model via OpenRouter
66
+ - API key must be set as the `OPENROUTER_API_KEY` environment variable
67
+
68
+ ## License
69
+
70
+ AGPL-3.0
app.py CHANGED
@@ -1,303 +1,342 @@
1
- import multiprocessing
2
  import os
3
- import random
4
- import shutil
5
  import tempfile
6
- import zipfile
7
- from concurrent.futures import ThreadPoolExecutor, as_completed
8
- from datetime import datetime
9
 
10
  import fitz # PyMuPDF
11
  import gradio as gr
12
- from huggingface_hub import DatasetCard, DatasetCardData, HfApi
13
- from PIL import Image
14
 
 
 
15
 
16
- from dataset_card_template import DATASET_CARD_TEMPLATE
17
 
18
- os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
 
 
 
 
20
 
21
- CPU_COUNT = multiprocessing.cpu_count()
22
- MAX_WORKERS = min(32, CPU_COUNT) # Use CPU count directly for processes
 
 
23
 
24
 
25
- def process_pdf(pdf_file, sample_percentage, temp_dir):
26
- try:
27
- pdf_path = pdf_file.name
28
- doc = fitz.open(pdf_path)
29
- total_pages = len(doc)
30
 
31
- pages_to_convert = int(total_pages * (sample_percentage / 100))
32
- pages_to_convert = max(
33
- 1, min(pages_to_convert, total_pages)
34
- ) # Ensure at least one page and not more than total pages
 
 
 
35
 
36
- selected_pages = (
37
- sorted(random.sample(range(total_pages), pages_to_convert))
38
- if 0 < sample_percentage < 100
39
- else range(total_pages)
40
- )
41
 
42
- images = []
43
- for page_num in selected_pages:
44
- page = doc[page_num]
45
- pix = page.get_pixmap() # Remove the Matrix scaling
46
- image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
47
- image_path = os.path.join(
48
- temp_dir, f"{os.path.basename(pdf_path)}_page_{page_num+1}.jpg"
49
- )
50
- image.save(image_path, "JPEG", quality=85, optimize=True)
51
- images.append(image_path)
52
 
53
- doc.close()
54
- return images, None, len(images)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  except Exception as e:
56
- return [], f"Error processing {pdf_file.name}: {str(e)}", 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
 
58
 
59
- def pdf_to_images(pdf_files, sample_percentage, temp_dir, progress=gr.Progress()):
60
- if not os.path.exists(temp_dir):
61
- os.makedirs(temp_dir)
 
62
 
63
- progress(0, desc="Starting conversion")
64
- all_images = []
65
- skipped_pdfs = []
66
 
67
- total_pages = sum(len(fitz.open(pdf.name)) for pdf in pdf_files)
68
- processed_pages = 0
69
 
70
- with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
71
- future_to_pdf = {
72
- executor.submit(process_pdf, pdf, sample_percentage, temp_dir): pdf
73
- for pdf in pdf_files
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  }
 
 
 
 
 
 
75
 
76
- for future in as_completed(future_to_pdf):
77
- pdf = future_to_pdf[future]
78
- images, error, pages_processed = future.result()
79
- if error:
80
- skipped_pdfs.append(error)
81
- gr.Info(error)
82
- else:
83
- all_images.extend(images)
84
-
85
- processed_pages += pages_processed
86
- progress((processed_pages / total_pages), desc=f"Processing {pdf.name}")
87
-
88
- message = f"Saved {len(all_images)} images to temporary directory"
89
- if skipped_pdfs:
90
- message += f"\nSkipped {len(skipped_pdfs)} PDFs due to errors: {', '.join(skipped_pdfs)}"
91
- return all_images, message
92
-
93
-
94
- def get_size_category(num_images):
95
- if num_images < 1000:
96
- return "n<1K"
97
- elif num_images < 10000:
98
- return "1K<n<10K"
99
- elif num_images < 100000:
100
- return "10K<n<100K"
101
- elif num_images < 1000000:
102
- return "100K<n<1M"
103
- else:
104
- return "n>1M"
105
-
106
-
107
- def process_pdfs(
108
- pdf_files,
109
- sample_percentage,
110
- hf_repo,
111
- create_zip,
112
- private_repo,
113
- oauth_token: gr.OAuthToken | None,
114
- progress=gr.Progress(),
115
- ):
116
- if not pdf_files:
117
- return (
118
- None,
119
- None,
120
- gr.Markdown(
121
- "⚠️ No PDF files uploaded. Please upload at least one PDF file."
122
- ),
123
  )
124
 
125
- # if oauth_token is None:
126
- # return (
127
- # None,
128
- # None,
129
- # gr.Markdown(
130
- # "⚠️ Not logged in to Hugging Face. Please log in to upload to a Hugging Face dataset."
131
- # ),
132
- # )
133
 
134
  try:
135
- temp_dir = tempfile.mkdtemp()
136
- images_dir = os.path.join(temp_dir, "images")
137
- os.makedirs(images_dir)
138
-
139
- progress(0, desc="Starting PDF processing")
140
- images, message = pdf_to_images(pdf_files, sample_percentage, images_dir)
141
-
142
- # Create a new directory for sampled images
143
- sampled_images_dir = os.path.join(temp_dir, "sampled_images")
144
- os.makedirs(sampled_images_dir)
145
-
146
- # Move sampled images to the new directory and update paths
147
- updated_images = []
148
- for image in images:
149
- new_path = os.path.join(sampled_images_dir, os.path.basename(image))
150
- shutil.move(image, new_path)
151
- updated_images.append(new_path)
152
-
153
- # Update the images list with new paths
154
- images = updated_images
155
-
156
- zip_path = None
157
- if create_zip:
158
- # Create a zip file of the sampled images
159
- zip_path = os.path.join(temp_dir, "converted_images.zip")
160
- with zipfile.ZipFile(zip_path, "w") as zipf:
161
- progress(0, desc="Zipping images")
162
- for image in progress.tqdm(images, desc="Zipping images"):
163
- zipf.write(
164
- os.path.join(sampled_images_dir, os.path.basename(image)),
165
- os.path.basename(image),
166
- )
167
- message += f"\nCreated zip file with {len(images)} images"
168
-
169
- if hf_repo:
170
- if oauth_token is None:
171
- raise gr.Error(
172
- "Not logged in to Hugging Face. Please log in to upload to a Hugging Face dataset."
173
- )
174
- try:
175
- hf_api = HfApi(token=oauth_token.token)
176
- hf_api.create_repo(
177
- hf_repo,
178
- repo_type="dataset",
179
- private=private_repo,
180
- )
181
- # Upload only the sampled images directory
182
- hf_api.upload_folder(
183
- folder_path=sampled_images_dir,
184
- repo_id=hf_repo,
185
- repo_type="dataset",
186
- path_in_repo="images",
187
- )
188
-
189
- # Determine size category
190
- size_category = get_size_category(len(images))
191
-
192
- # Create DatasetCardData instance
193
- card_data = DatasetCardData(
194
- tags=["created-with-pdfs-to-page-images-converter", "pdf-to-image"],
195
- size_categories=[size_category],
196
- )
197
-
198
- # Create and populate the dataset card
199
- card = DatasetCard.from_template(
200
- card_data,
201
- template_path=None, # Use default template
202
- hf_repo=hf_repo,
203
- num_images=len(images),
204
- num_pdfs=len(pdf_files),
205
- sample_size=sample_percentage
206
- if sample_percentage > 0
207
- else "All pages",
208
- creation_date=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
209
- )
210
-
211
- # Add our custom content to the card
212
- card.text = DATASET_CARD_TEMPLATE.format(
213
- hf_repo=hf_repo,
214
- num_images=len(images),
215
- num_pdfs=len(pdf_files),
216
- sample_size=sample_percentage
217
- if sample_percentage > 0
218
- else "All pages",
219
- creation_date=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
220
- size_category=size_category,
221
- )
222
-
223
- repo_url = f"https://huggingface.co/datasets/{hf_repo}"
224
- message += f"\nUploaded dataset card to Hugging Face repo: [{hf_repo}]({repo_url})"
225
-
226
- card.push_to_hub(hf_repo, token=oauth_token.token)
227
- except Exception as e:
228
- message += f"\nFailed to upload to Hugging Face: {str(e)}"
229
-
230
- return images, zip_path, message
231
  except Exception as e:
232
- if "temp_dir" in locals():
233
- shutil.rmtree(temp_dir)
234
- return None, None, f"An error occurred: {str(e)}"
235
 
236
 
237
  # Define the Gradio interface
238
- with gr.Blocks() as demo:
239
  gr.HTML(
240
- """<h1 style='text-align: center;'> PDFs to Page Images Converter</h1>
241
- <center><i> &#128193; Convert PDFs to an image dataset, splitting pages into individual images &#128193; </i></center>"""
242
  )
 
243
  gr.HTML(
244
  """
245
- <div style="display: flex; justify-content: center; align-items: center; max-width: 1000px; margin: 0 auto;">
246
- <div style="flex: 1; padding-right: 20px;">
247
- <p>This app allows you to:</p>
248
- <ol>
249
- <li>Upload one or more PDF files</li>
250
- <li>Convert each page of the PDFs into separate image files</li>
251
- <li>(Optionally) sample a specific number of pages from each PDF</li>
252
- <li>(Optionally) Create a downloadable ZIP file of the converted images</li>
253
- <li>(Optionally) Upload the images to a Hugging Face dataset repository</li>
254
  </ol>
255
- </div>
256
- <div style="flex: 1;">
257
- <img src="https://huggingface.co/spaces/Dataset-Creation-Tools/pdf-to-page-images-dataset/resolve/main/assets/PDF%20page%20split%20illustration.png"
258
- alt="PDF page split illustration"
259
- style="max-width: 50%; height: auto;">
260
- </div>
 
 
 
261
  </div>
262
  """
263
  )
264
 
265
  with gr.Row():
266
- pdf_files = gr.File(
267
- file_count="multiple", label="Upload PDF(s)", file_types=["*.pdf"]
268
- )
 
 
 
 
 
 
 
269
 
270
  with gr.Row():
271
- sample_percentage = gr.Slider(
272
- minimum=0,
273
- maximum=100,
274
- value=100,
275
- step=1,
276
- label="Percentage of pages to sample per PDF",
277
- info="0% for no sampling (all pages), 100% for all pages",
278
  )
279
- create_zip = gr.Checkbox(label="Create ZIP file of images?", value=False)
280
-
281
- with gr.Accordion("Hugging Face Upload Options", open=True):
282
- gr.LoginButton(size="sm")
283
- with gr.Row():
284
- hf_repo = gr.Textbox(
285
- label="Hugging Face Repo",
286
- placeholder="username/repo-name",
287
- info="Enter the Hugging Face repository name in the format 'username/repo-name'",
288
- )
289
- private_repo = gr.Checkbox(label="Make repository private?", value=False)
290
-
291
- with gr.Accordion("View converted images", open=False):
292
- output_gallery = gr.Gallery(label="Converted Images")
293
 
294
- status_text = gr.Markdown(label="Status")
295
- download_button = gr.File(label="Download Converted Images")
296
-
297
- submit_button = gr.Button("Convert PDFs to page images")
298
- submit_button.click(
299
- process_pdfs,
300
- inputs=[pdf_files, sample_percentage, hf_repo, create_zip, private_repo],
301
- outputs=[output_gallery, download_button, status_text],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
302
  )
303
- demo.launch()
 
 
 
1
+ import json
2
  import os
 
 
3
  import tempfile
4
+ from typing import List, Dict, Any
 
 
5
 
6
  import fitz # PyMuPDF
7
  import gradio as gr
8
+ from openai import OpenAI
 
9
 
10
+ # Load API key from environment variable
11
+ OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
12
 
 
13
 
14
+ def load_pitfalls() -> List[Dict[str, Any]]:
15
+ """Load pitfalls from the JSON file."""
16
+ try:
17
+ with open("pitfalls.json", "r") as f:
18
+ data = json.load(f)
19
+ return data.get("pitfalls", [])
20
+ except FileNotFoundError:
21
+ gr.Warning("pitfalls.json file not found!")
22
+ return []
23
+ except json.JSONDecodeError:
24
+ gr.Warning("Invalid JSON in pitfalls.json file!")
25
+ return []
26
+
27
+
28
+ def extract_text_from_pdf(pdf_file) -> str:
29
+ """Extract text content from a PDF file."""
30
+ try:
31
+ pdf_path = pdf_file.name
32
+ doc = fitz.open(pdf_path)
33
+ text_content = ""
34
 
35
+ for page_num in range(len(doc)):
36
+ page = doc[page_num]
37
+ text_content += f"\n--- Page {page_num + 1} ---\n"
38
+ text_content += page.get_text()
39
 
40
+ doc.close()
41
+ return text_content
42
+ except Exception as e:
43
+ raise gr.Error(f"Error extracting text from {pdf_file.name}: {str(e)}")
44
 
45
 
46
+ def format_paper_text(paper_text: str) -> Dict[str, Any]:
47
+ """First stage: Format the paper text to make it more readable and suitable for analysis."""
 
 
 
48
 
49
+ # Check if API key is available
50
+ if not OPENROUTER_API_KEY:
51
+ return {
52
+ "formatted_text": None,
53
+ "success": False,
54
+ "error": "OpenRouter API key not found. Please set the OPENROUTER_API_KEY environment variable.",
55
+ }
56
 
57
+ # Initialize OpenAI client with OpenRouter
58
+ client = OpenAI(
59
+ base_url="https://openrouter.ai/api/v1",
60
+ api_key=OPENROUTER_API_KEY,
61
+ )
62
 
63
+ format_prompt = f"""You are an expert academic text processor. Your task is to clean and format the following research paper text to make it more readable and suitable for detailed analysis.
 
 
 
 
 
 
 
 
 
64
 
65
+ Please:
66
+ 1. Remove excessive whitespace and formatting artifacts
67
+ 2. Organize the text into clear sections (Abstract, Introduction, Methods, Results, Discussion, Conclusion, References, Appendix)
68
+ 3. Preserve all important content including figures, tables, and equations
69
+ 4. Make the text flow better while maintaining academic integrity
70
+ 5. Ensure all evaluation-related content is clearly identifiable
71
+ 6. Keep the text under 8000 characters while preserving key information
72
+
73
+ Original paper text:
74
+ {paper_text}
75
+
76
+ Please provide the cleaned and formatted text:"""
77
+
78
+ try:
79
+ completion = client.chat.completions.create(
80
+ extra_headers={
81
+ "HTTP-Referer": "https://github.com/paper-eval-checker",
82
+ "X-Title": "Paper Evaluation Pitfall Checker",
83
+ },
84
+ model="x-ai/grok-4-fast:free",
85
+ messages=[{"role": "user", "content": format_prompt}],
86
+ temperature=0.1, # Very low temperature for consistent formatting
87
+ max_tokens=3000,
88
+ )
89
+
90
+ return {
91
+ "formatted_text": completion.choices[0].message.content,
92
+ "success": True,
93
+ "error": None,
94
+ }
95
  except Exception as e:
96
+ return {"formatted_text": None, "success": False, "error": str(e)}
97
+
98
+
99
+ def analyze_paper_for_pitfalls(
100
+ formatted_text: str, pitfalls: List[Dict[str, Any]]
101
+ ) -> Dict[str, Any]:
102
+ """Second stage: Use OpenRouter API with Grok model to analyze the formatted paper for potential pitfalls."""
103
+
104
+ # Check if API key is available
105
+ if not OPENROUTER_API_KEY:
106
+ return {
107
+ "analysis": None,
108
+ "success": False,
109
+ "error": "OpenRouter API key not found. Please set the OPENROUTER_API_KEY environment variable.",
110
+ }
111
+
112
+ # Initialize OpenAI client with OpenRouter
113
+ client = OpenAI(
114
+ base_url="https://openrouter.ai/api/v1",
115
+ api_key=OPENROUTER_API_KEY,
116
+ )
117
+
118
+ # Create the prompt for pitfall analysis
119
+ pitfalls_description = "\n\n".join(
120
+ [
121
+ f"**{pitfall['name']}** {pitfall['emoji']}\n"
122
+ f"Category: {pitfall['category']}\n"
123
+ f"Description: {pitfall['description']}\n"
124
+ f"Subjective/Objective: {pitfall['subjective_objective']}\n"
125
+ f"Actors Most Affected: {', '.join(pitfall['actors_most_affected'])}\n"
126
+ f"Evaluation Use: {pitfall['evaluation_use']}\n"
127
+ f"Modalities: {', '.join(pitfall['modalities'])}"
128
+ for pitfall in pitfalls
129
+ ]
130
+ )
131
+
132
+ analysis_prompt = f"""You are an expert research paper reviewer specializing in identifying evaluation pitfalls in academic papers.
133
+
134
+ Your task is to analyze the provided formatted research paper text and identify any potential pitfalls from the following list:
135
+
136
+ {pitfalls_description}
137
+
138
+ Please analyze the paper carefully and provide:
139
+ 1. A list of potential pitfalls found (if any)
140
+ 2. For each pitfall found, provide:
141
+ - The pitfall name
142
+ - Specific evidence from the paper that suggests this pitfall
143
+ - The section/page where this evidence appears
144
+ - A confidence level (High/Medium/Low) for your assessment
145
+ - Suggestions for improvement
146
+ 3. Be concise, and use markdown formatting.
147
+ 4. If you find evidence of a pitfall, make sure to look at ALL of the paper to see if it is mitigated elsewhere -- make sure to check the appendix of the paper as well.
148
 
149
+ The output format:
150
 
151
+ # Overall
152
+ <img src="https://img.shields.io/severity/high-red" alt="Severity: High"> (for low use green, for medium use yellow, for high use red)
153
+ <img src="https://img.shields.io/evaluation/objective-blue" alt="Objective evaluation"> (either write 'subjective', 'objective', or include two images in case both are present in the paper)
154
+ [One sentence summary of evaluation use]
155
 
156
+ # Pitfall
 
 
157
 
158
+ ## Evidence
159
+ "specific evidence from the paper"
160
 
161
+ If no pitfalls are found, please state that clearly.
162
+
163
+ Formatted paper text to analyze:
164
+ {formatted_text}
165
+
166
+ Please provide your analysis in a structured format."""
167
+
168
+ try:
169
+ completion = client.chat.completions.create(
170
+ extra_headers={
171
+ "HTTP-Referer": "https://github.com/paper-eval-checker",
172
+ "X-Title": "Paper Evaluation Pitfall Checker",
173
+ },
174
+ model="x-ai/grok-4-fast:free",
175
+ messages=[{"role": "user", "content": analysis_prompt}],
176
+ temperature=0.3, # Lower temperature for more consistent analysis
177
+ max_tokens=2000,
178
+ )
179
+
180
+ return {
181
+ "analysis": completion.choices[0].message.content,
182
+ "success": True,
183
+ "error": None,
184
  }
185
+ except Exception as e:
186
+ return {"analysis": None, "success": False, "error": str(e)}
187
+
188
+
189
+ def process_paper(pdf_file, progress=gr.Progress()):
190
+ """Main function to process a research paper for pitfall detection using two-stage approach."""
191
 
192
+ if not pdf_file:
193
+ return gr.Markdown(
194
+ "⚠️ No PDF file uploaded. Please upload a research paper PDF."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
195
  )
196
 
197
+ if not OPENROUTER_API_KEY:
198
+ return gr.Markdown(
199
+ "⚠️ OpenRouter API key not found. Please set the OPENROUTER_API_KEY environment variable."
200
+ )
 
 
 
 
201
 
202
  try:
203
+ # Step 1: Load pitfalls
204
+ progress(0.1, desc="Loading pitfalls definitions...")
205
+ pitfalls = load_pitfalls()
206
+
207
+ if not pitfalls:
208
+ return gr.Markdown(
209
+ "⚠️ No pitfalls definitions found. Please check pitfalls.json file."
210
+ )
211
+
212
+ # Step 2: Extract text from PDF
213
+ progress(0.2, desc="Extracting text from PDF...")
214
+ paper_text = extract_text_from_pdf(pdf_file)
215
+
216
+ if not paper_text.strip():
217
+ return gr.Markdown(
218
+ "⚠️ No text content found in the PDF. Please check if the PDF contains readable text."
219
+ )
220
+
221
+ # Step 3: Format paper text (First AI call)
222
+ progress(0.3, desc="Formatting paper text for analysis...")
223
+ format_result = format_paper_text(paper_text)
224
+
225
+ if not format_result["success"]:
226
+ return gr.Markdown(
227
+ f"❌ Error during text formatting: {format_result['error']}"
228
+ )
229
+
230
+ # Step 4: Analyze for pitfalls (Second AI call)
231
+ progress(0.7, desc="Analyzing paper for potential pitfalls...")
232
+ analysis_result = analyze_paper_for_pitfalls(
233
+ format_result["formatted_text"], pitfalls
234
+ )
235
+
236
+ if not analysis_result["success"]:
237
+ return gr.Markdown(f"❌ Error during analysis: {analysis_result['error']}")
238
+
239
+ # Step 5: Format final results
240
+ progress(0.9, desc="Preparing final report...")
241
+ analysis_text = analysis_result["analysis"]
242
+
243
+ # Create a formatted markdown report
244
+ report = f"""# Research Paper Pitfall Analysis Report
245
+
246
+ ## Analysis Results
247
+
248
+ {analysis_text}
249
+
250
+ ---
251
+ *Analysis completed using OpenRouter API with Grok model (two-stage processing)*
252
+ """
253
+
254
+ progress(1.0, desc="Analysis complete!")
255
+ return gr.Markdown(report)
256
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
257
  except Exception as e:
258
+ return gr.Markdown(f"❌ An error occurred: {str(e)}")
 
 
259
 
260
 
261
  # Define the Gradio interface
262
+ with gr.Blocks(title="Research Paper Pitfall Checker") as demo:
263
  gr.HTML(
264
+ """<h1 style='text-align: center;'>πŸ” Research Paper Pitfall Checker</h1>
265
+ <center><i>Identify potential evaluation pitfalls in academic research papers</i></center>"""
266
  )
267
+
268
  gr.HTML(
269
  """
270
+ <div style="max-width: 800px; margin: 0 auto; padding: 20px;">
271
+ <h3>How it works:</h3>
272
+ <ol>
273
+ <li><strong>Upload a PDF</strong> of your research paper</li>
274
+ <li><strong>Click "Analyze Paper"</strong> to scan for potential pitfalls</li>
275
+ <li><strong>Review the analysis</strong> to identify areas for improvement</li>
 
 
 
276
  </ol>
277
+
278
+ <h3>Supported Pitfalls:</h3>
279
+ <ul>
280
+ <li>πŸ”’ The Lock-In Effect</li>
281
+ <li>🍎🍊 Apples-to-Oranges Comparisons</li>
282
+ <li>πŸ’§ Contamination Leak</li>
283
+ <li>πŸ€–β“ Unvalidated Automation</li>
284
+ <li>🧐 Vague Scales</li>
285
+ </ul>
286
  </div>
287
  """
288
  )
289
 
290
  with gr.Row():
291
+ with gr.Column(scale=3):
292
+ pdf_file = gr.File(label="Upload Research Paper (PDF)", file_types=[".pdf"])
293
+
294
+ with gr.Column(scale=1):
295
+ analyze_button = gr.Button(
296
+ "πŸ” Analyze Paper for Pitfalls",
297
+ variant="primary",
298
+ size="lg",
299
+ elem_id="analyze-btn",
300
+ )
301
 
302
  with gr.Row():
303
+ results = gr.Markdown(
304
+ value="Upload a PDF to get started with pitfall analysis.",
305
+ elem_id="results",
 
 
 
 
306
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
307
 
308
+ # Add loading animation CSS
309
+ demo.css = """
310
+ #analyze-btn {
311
+ background: linear-gradient(45deg, #ff6b6b, #4ecdc4, #45b7d1, #96ceb4, #feca57);
312
+ background-size: 400% 400%;
313
+ animation: gradient 3s ease infinite;
314
+ border: none;
315
+ color: white;
316
+ font-weight: bold;
317
+ }
318
+
319
+ @keyframes gradient {
320
+ 0% { background-position: 0% 50%; }
321
+ 50% { background-position: 100% 50%; }
322
+ 100% { background-position: 0% 50%; }
323
+ }
324
+
325
+ #results {
326
+ min-height: 200px;
327
+ padding: 20px;
328
+ border: 1px solid #e0e0e0;
329
+ border-radius: 8px;
330
+ background-color: #fafafa;
331
+ }
332
+ """
333
+
334
+ # Connect the button to the processing function
335
+ analyze_button.click(
336
+ fn=process_paper,
337
+ inputs=[pdf_file],
338
+ outputs=[results],
339
  )
340
+
341
+ if __name__ == "__main__":
342
+ demo.launch(share=True, server_name="localhost", server_port=9090)
dataset_card_template.py DELETED
@@ -1,40 +0,0 @@
1
- DATASET_CARD_TEMPLATE = """
2
- # Dataset Card for {hf_repo}
3
-
4
- ## Dataset Description
5
-
6
- This dataset contains images converted from PDFs using the PDFs to Page Images Converter Space.
7
-
8
- - **Number of images:** {num_images}
9
- - **Number of PDFs processed:** {num_pdfs}
10
- - **Sample size per PDF:** {sample_size}
11
- - **Created on:** {creation_date}
12
-
13
- ## Dataset Creation
14
-
15
- ### Source Data
16
-
17
- The images in this dataset were generated from user-uploaded PDF files.
18
-
19
- ### Processing Steps
20
-
21
- 1. PDF files were uploaded to the PDFs to Page Images Converter.
22
- 2. Each PDF was processed, converting selected pages to images.
23
- 3. The resulting images were saved and uploaded to this dataset.
24
-
25
- ## Dataset Structure
26
-
27
- The dataset consists of JPEG images, each representing a single page from the source PDFs.
28
-
29
- ### Data Fields
30
-
31
- - `images/`: A folder containing all the converted images.
32
-
33
- ### Data Splits
34
-
35
- This dataset does not have specific splits.
36
-
37
- ## Additional Information
38
-
39
- - **Contributions:** Thanks to the PDFs to Page Images Converter for creating this dataset.
40
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
env.example ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # OpenRouter API Key
2
+ # Get your API key from https://openrouter.ai
3
+ OPENROUTER_API_KEY=your-api-key-here
packages.txt DELETED
@@ -1 +0,0 @@
1
- poppler-utils
 
 
pitfalls.json ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "pitfalls": [
3
+ {
4
+ "name": "The Lock-In Effect",
5
+ "emoji": "πŸ”’",
6
+ "category": "General",
7
+ "description": "Practices known to be problematic remain widespread simply because they are already widespread, making it difficult for new, better methods to be adopted.",
8
+ "subjective_objective": "Both",
9
+ "actors_most_affected": [
10
+ "Academic researcher",
11
+ "Model creator"
12
+ ],
13
+ "evaluation_use": "Compare models",
14
+ "modalities": [
15
+ "General"
16
+ ],
17
+ "sources": []
18
+ },
19
+ {
20
+ "name": "Apples-to-Oranges",
21
+ "emoji": "🍎🍊",
22
+ "category": "General",
23
+ "description": "Models or data are compared on an unequal footing, such as evaluating models using a different number of examples or under different conditions.",
24
+ "subjective_objective": "Both",
25
+ "actors_most_affected": [
26
+ "Academic researcher",
27
+ "Model deployer"
28
+ ],
29
+ "evaluation_use": "Compare models",
30
+ "modalities": [
31
+ "General",
32
+ "NLP",
33
+ "Speech"
34
+ ],
35
+ "sources": []
36
+ },
37
+ {
38
+ "name": "Contamination Leak",
39
+ "emoji": "πŸ’§",
40
+ "category": "General",
41
+ "description": "The model has already been exposed to the evaluation data during its training phase, which invalidates the results. This is a widespread and subtle problem.",
42
+ "subjective_objective": "Both",
43
+ "actors_most_affected": [
44
+ "Academic researcher",
45
+ "Model creator"
46
+ ],
47
+ "evaluation_use": "Compare models, assess system reliability",
48
+ "modalities": [
49
+ "General"
50
+ ],
51
+ "sources": []
52
+ },
53
+ {
54
+ "name": "Unvalidated Automation",
55
+ "emoji": "πŸ€–β“",
56
+ "category": "NLP",
57
+ "description": "Using an LLM-as-a-judge to evaluate outputs without first validating the judge LLM's performance against human experts or established criteria. While LLMs can scale evaluation, they are not yet reliable enough to be the sole evaluators.",
58
+ "subjective_objective": "Both",
59
+ "actors_most_affected": [
60
+ "Academic researcher",
61
+ "Model creator",
62
+ "Model deployer"
63
+ ],
64
+ "evaluation_use": "Assess system reliability",
65
+ "modalities": [
66
+ "Text",
67
+ "General"
68
+ ],
69
+ "sources": [
70
+ "The LLM Evaluation guidebook"
71
+ ]
72
+ },
73
+ {
74
+ "name": "Vague Scales",
75
+ "emoji": "🧐",
76
+ "category": "TTS",
77
+ "description": "Papers on synthetic speech fail to report crucial details, such as whether they are evaluating 'quality' or 'naturalness,' or do not disclose the labels used in their Mean Opinion Score (MOS) scale.",
78
+ "subjective_objective": "Subjective",
79
+ "actors_most_affected": [
80
+ "Academic researcher"
81
+ ],
82
+ "evaluation_use": "Compare models, assess system reliability",
83
+ "modalities": [
84
+ "Speech"
85
+ ],
86
+ "sources": [
87
+ "Good practices for evaluation of synthesized speech",
88
+ "Hot topics in speech synthesis evaluation"
89
+ ]
90
+ }
91
+ ]
92
+ }
requirements.in CHANGED
@@ -1,6 +1,4 @@
1
 
2
- Pillow
3
- gradio[oauth]==4.44.0
4
- huggingface_hub[hf_transfer]
5
- pdf2image
6
  PyMuPDF
 
1
 
2
+ gradio==4.44.0
3
+ openai
 
 
4
  PyMuPDF