oddadmix's picture
Update app.py
a7ac470 verified
import gradio as gr
import os
import json
from huggingface_hub import upload_file
import pandas as pd
from datasets import load_dataset
HF_TOKEN = os.getenv("HF_TOKEN")
SUBMISSIONS_REPO = "NAMAA-Space/ocr-competition-submissions"
RESULTS_REPO = "NAMAA-Space/ocr-competition-results"
def validate_fields(team_name, email, model_name, hf_model_id, hf_token, code):
if not team_name or not email or not model_name or not hf_model_id or not hf_token or not code:
return "All fields are required. Please fill in all fields."
return submit(team_name, email, model_name, hf_model_id, hf_token, code)
def submit(team_name, email, model_name, hf_model_id, hf_token, code):
# entry = {
# "team_name": team_name,
# "email": email,
# "model_name": model_name,
# "hf_model_id": hf_model_id,
# "hf_token": hf_token,
# "code": code
# }
# filename = f"{team_name}_{model_name}_{pd.Timestamp.now().strftime('%Y%m%d%H%M%S')}.json"
# filename = filename.replace("/", "-")
# with open(filename, "w") as f:
# json.dump(entry, f)
# upload_file(path_or_fileobj=filename,
# path_in_repo=filename,
# repo_id=SUBMISSIONS_REPO,
# repo_type="dataset",
# token=HF_TOKEN)
return "Submission is closed"
def show_results():
try:
ds = load_dataset(RESULTS_REPO, split="train")
df = ds.to_pandas()[["team_name", "model_name", "WER", "CER", "BLEU"]]
# Calculate composite score
# Formula: (100 - WER) Γ— 0.35 + (100 - CER) Γ— 0.35 + BLEU Γ— 0.30
df['Score'] = (100 - df['WER']) * 0.35 + (100 - df['CER']) * 0.35 + df['BLEU'] * 0.30
# Round score to 2 decimal places
df['Score'] = df['Score'].round(2)
# Reorder columns to show Score first
df = df[['Score', 'team_name', 'model_name', 'WER', 'CER', 'BLEU']]
# Sort by Score (descending - highest is best)
df = df.sort_values(by='Score', ascending=False).reset_index(drop=True)
# Add rank column
df.insert(0, 'Rank', range(1, len(df) + 1))
return df
except Exception as e:
return f"An error occurred while loading the results: {e}"
with gr.Blocks() as demo:
# Welcome message
gr.Markdown("""
<h2 style="font-size:28px;">πŸ‘‹ Welcome to the VLM OCR Competition!</h2>
<p style="font-size:18px;">
This competition aims to improve **open-source Arabic OCR models**.
It's part of the NAMAA Community mission to strengthen the Arabic presence in the ML space.
This competition is designed to **push the boundaries** of OCR performance on diverse Arabic documents.
</p>
""")
with gr.Tabs():
with gr.Tab("πŸ“œ Rules"):
# Text instructions
gr.Markdown("""
<h3 style="font-size:22px;">QARI OCR Competition Rules</h3>
<p style="font-size:18px;">
Welcome to the <b>QARI OCR Competition</b> organized by the <b>NAMAA Community</b> and sponsored by <b>KANDCA</b>!
The competition runs from <b>September 15 to October 15</b>.
Join the <a href="https://discord.gg/GDTpeHZt" target="_blank">Discord server</a> for support and discussion.
Full rules and submission portal: <a href="https://huggingface.co/spaces/NAMAA-Space/QARI-Competition" target="_blank">Hugging Face Space</a>.
</p>
<h4 style="font-size:20px;">πŸ“œ Submission Rules</h4>
<ul style="font-size:18px;">
<li>Each team can submit <b>one model evaluation per week</b>.</li>
<li>Provide with your submission:
<ol>
<li>Team name (must stay consistent across submissions)</li>
<li>Model name & Hugging Face Model ID</li>
<li>A valid Hugging Face token with access</li>
<li><b>The inference code</b> and any <b>dependency installation instructions</b></li>
<li><b>The OCR output must be a single-page structured HTML</b> using the following tags:
<ul>
<li>&lt;header&gt;, &lt;footer&gt;, &lt;main&gt;, &lt;section id="1"&gt;, &lt;section id="2"&gt;</li>
<li>&lt;p&gt;, &lt;h1&gt;-&lt;h5&gt;, &lt;b&gt;, &lt;i&gt;, &lt;u&gt;</li>
<li>&lt;img&gt;, &lt;table&gt;, &lt;hr&gt;, &lt;ul&gt;, &lt;ol&gt;</li>
</ul>
</li>
<li><b>Submitting only unstructured output will result in a 5-point deduction</b> from your final score.</li>
</ol>
</li>
<li><b>The submitted code is the responsibility of the submitting team.</b></li>
<li>Ideally, provide a working <b>Google Colab link</b> with all details and dependencies.</li>
</ul>
<h4 style="font-size:20px;">πŸ“† Evaluation Schedule</h4>
<ul style="font-size:18px;">
<li>Submissions received by <strong>Sunday at midnight</strong> will be evaluated on <strong>Monday</strong>.</li>
<li>The leaderboard will be updated by <strong>Wednesday or Thursday</strong> of the same week.</li>
</ul>
""")
with gr.Tab("🎁 Prizes"):
gr.Markdown("""
<h3 style="font-size:22px;">Prize Distribution ((bank transfer or API credits))</h3>
<ul style="font-size:18px;">
<li>1st Place: πŸ₯‡ 250 USD</li>
<li>2nd Place: πŸ₯ˆ 125 USD</li>
<li>3rd Place: πŸ₯‰ 75 USD</li>
<li>4th Place: πŸŽ–οΈ 50 USD</li>
<li>5th Place: πŸŽ–οΈ 25 USD</li>
</ul>
""")
with gr.Tab("πŸ“Š Evaluation"):
gr.Markdown("""
<h3 style="font-size:22px;">Evaluation Details</h3>
<ul style="font-size:18px;">
<li>The evaluation dataset will remain <b>private</b> and is not shared with participants.</li>
<li>It will include:
<ul>
<li>Historical documents</li>
<li>Scanned pages</li>
<li>Different layouts</li>
<li>Handwritten pages</li>
</ul>
</li>
<li>Models will be evaluated on <b>accuracy metrics</b> such as:
<ul>
<li>Word Error Rate (WER)</li>
<li>Character Error Rate (CER)</li>
<li>BLEU score</li>
</ul>
</li>
<li><b>Evaluation schedule:</b>
<ul>
<li>Submissions received by <b>Sunday at midnight</b> will be evaluated on <b>Monday</b>.</li>
<li>The leaderboard will be updated by <b>Wednesday or Thursday</b> of the same week.</li>
</ul>
</li>
</ul>
""")
with gr.Tab("πŸš€ Submit & Leaderboard"):
gr.Markdown("<h3 style='font-size:22px;'>Submit Your Model</h3>")
with gr.Row():
team = gr.Textbox(label="Team Name", placeholder="Enter your team name")
email = gr.Textbox(label="Email", placeholder="Enter your email")
model = gr.Textbox(label="Model Name", placeholder="Enter your model name")
hf_model = gr.Textbox(label="Hugging Face Model ID", placeholder="Enter your HF Model ID")
hf_token = gr.Textbox(label="Hugging Face Access Token", type="password", placeholder="Enter your HF token")
code = gr.Textbox(label="Code (instructions to run your model) or colab link", lines=6, placeholder="Paste your run code here...")
submit_btn = gr.Button("Submit")
status = gr.Textbox(label="Status")
submit_btn.click(fn=validate_fields,
inputs=[team, email, model, hf_model, hf_token, code],
outputs=status)
gr.Markdown("<h3 style='font-size:22px;'>Leaderboard Results</h3>")
results = gr.Dataframe(headers=["model_name", "WER", "CER", "BLEU", "team_name"])
demo.load(fn=show_results, outputs=results)
demo.launch()