Spaces:
Running
on
Zero
Running
on
Zero
Roll back interruption change
Browse files
app.py
CHANGED
|
@@ -10,7 +10,6 @@ from utils.models import generate_summaries, model_names
|
|
| 10 |
from utils.ui_helpers import toggle_context_display, update_feedback, get_context_html, toggle_reference_answer
|
| 11 |
from utils.leaderboard import load_leaderboard_data, submit_vote_with_elo, generate_leaderboard_html
|
| 12 |
from utils.vote_logger import save_vote_details
|
| 13 |
-
from utils.shared import generation_interrupt
|
| 14 |
|
| 15 |
feedback_options = {
|
| 16 |
"left": [
|
|
@@ -99,13 +98,8 @@ def weighted_sample_without_replacement(population, weights, k=2):
|
|
| 99 |
|
| 100 |
return selected
|
| 101 |
|
| 102 |
-
def load_context(
|
| 103 |
-
|
| 104 |
-
generation_interrupt.set()
|
| 105 |
-
time.sleep(0.5) # Give more time for interrupt to take effect
|
| 106 |
-
|
| 107 |
-
# DON'T clear the interrupt here - let the new inference clear it
|
| 108 |
-
# generation_interrupt.clear() # REMOVED THIS LINE
|
| 109 |
example = get_random_example()
|
| 110 |
|
| 111 |
context_desc = example.get('processed_context_desc', '')
|
|
@@ -117,10 +111,10 @@ def load_context(set_interrupt=False):
|
|
| 117 |
|
| 118 |
return [
|
| 119 |
example,
|
| 120 |
-
gr.update(value=example['question'], elem_classes="query-text"),
|
| 121 |
gr.update(value=context_desc, visible=bool(context_desc)),
|
| 122 |
gr.update(value=context_html),
|
| 123 |
-
gr.update(value="Show Full Context", elem_classes=["context-toggle-button"], visible=True),
|
| 124 |
show_full
|
| 125 |
]
|
| 126 |
|
|
@@ -149,13 +143,6 @@ def generate_model_summaries(example):
|
|
| 149 |
"completed": False
|
| 150 |
}
|
| 151 |
|
| 152 |
-
# Clear the interrupt flag when NEW inference starts
|
| 153 |
-
generation_interrupt.clear()
|
| 154 |
-
print("Interrupt flag cleared for new inference")
|
| 155 |
-
|
| 156 |
-
if generation_interrupt.is_set():
|
| 157 |
-
return result
|
| 158 |
-
|
| 159 |
try:
|
| 160 |
# Get current leaderboard data to determine model usage counts
|
| 161 |
leaderboard_data = load_leaderboard_data()
|
|
@@ -188,13 +175,10 @@ def generate_model_summaries(example):
|
|
| 188 |
print(f"Starting generation with models: {m_a_name} vs {m_b_name}")
|
| 189 |
s_a, s_b = generate_summaries(example, m_a_name, m_b_name)
|
| 190 |
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
print("Generation completed successfully")
|
| 196 |
-
else:
|
| 197 |
-
print("Generation was interrupted")
|
| 198 |
except Exception as e:
|
| 199 |
print(f"Error in generation: {e}")
|
| 200 |
|
|
@@ -208,8 +192,8 @@ def process_generation_result(result):
|
|
| 208 |
result.get("summary_a", ""),
|
| 209 |
result.get("summary_b", ""),
|
| 210 |
None, [], False, load_leaderboard_data(),
|
| 211 |
-
gr.update(value=result.get("summary_a", "Generation was interrupted
|
| 212 |
-
gr.update(value=result.get("summary_b", "Generation was interrupted
|
| 213 |
gr.update(interactive=False, elem_classes=["vote-button"]),
|
| 214 |
gr.update(interactive=False, elem_classes=["vote-button"]),
|
| 215 |
gr.update(interactive=False, elem_classes=["vote-button"]),
|
|
@@ -218,7 +202,7 @@ def process_generation_result(result):
|
|
| 218 |
gr.update(visible=False),
|
| 219 |
gr.update(interactive=False, visible=True),
|
| 220 |
gr.update(visible=False),
|
| 221 |
-
gr.update(interactive=True),
|
| 222 |
gr.update(elem_classes=[])
|
| 223 |
]
|
| 224 |
|
|
@@ -239,7 +223,7 @@ def process_generation_result(result):
|
|
| 239 |
gr.update(visible=False),
|
| 240 |
gr.update(interactive=False, visible=True),
|
| 241 |
gr.update(visible=False),
|
| 242 |
-
gr.update(interactive=True),
|
| 243 |
gr.update(elem_classes=[])
|
| 244 |
]
|
| 245 |
|
|
@@ -295,12 +279,12 @@ def show_loading_state():
|
|
| 295 |
gr.update(visible=False), # feedback_section
|
| 296 |
gr.update(interactive=False), # submit_button
|
| 297 |
gr.update(visible=False), # results_reveal_area
|
| 298 |
-
gr.update(interactive=
|
| 299 |
None # Reset selected_winner
|
| 300 |
]
|
| 301 |
|
| 302 |
def handle_new_example_click():
|
| 303 |
-
return load_context(
|
| 304 |
|
| 305 |
def update_ui_for_new_context(example):
|
| 306 |
context_desc = example.get('processed_context_desc', '')
|
|
@@ -308,10 +292,10 @@ def update_ui_for_new_context(example):
|
|
| 308 |
context_desc = f"<div class='context-topic'><span class='topic-label'>The question and context are about:</span> {context_desc}</div>"
|
| 309 |
|
| 310 |
return [
|
| 311 |
-
gr.update(value=example['question'], elem_classes="query-text"),
|
| 312 |
gr.update(value=context_desc, visible=bool(context_desc)),
|
| 313 |
gr.update(value=get_context_html(example, False)),
|
| 314 |
-
gr.update(value="Show Full Context", elem_classes=["context-toggle-button"], visible=True),
|
| 315 |
False
|
| 316 |
]
|
| 317 |
|
|
@@ -326,7 +310,7 @@ def reset_reference_section():
|
|
| 326 |
|
| 327 |
def cleanup_on_disconnect():
|
| 328 |
print(f"Browser disconnected. Cleaning up resources...")
|
| 329 |
-
|
| 330 |
|
| 331 |
# Helper functions for showing/hiding UI elements
|
| 332 |
def initialize_empty_app():
|
|
@@ -343,7 +327,7 @@ def show_all_after_loading():
|
|
| 343 |
gr.update(visible=True), # model_section
|
| 344 |
gr.update(visible=True), # voting_section
|
| 345 |
gr.update(visible=True), # submit_button
|
| 346 |
-
gr.update(value="🔄 Try a New Question", elem_classes=["query-button"], interactive=True) #
|
| 347 |
]
|
| 348 |
|
| 349 |
with gr.Blocks(theme=gr.themes.Default(
|
|
@@ -377,8 +361,8 @@ with gr.Blocks(theme=gr.themes.Default(
|
|
| 377 |
show_results_state = gr.State(False)
|
| 378 |
results_agg = gr.State(load_leaderboard_data())
|
| 379 |
show_full_context = gr.State(False)
|
| 380 |
-
show_reference_answer = gr.State(False)
|
| 381 |
-
faq_expanded = gr.State(False)
|
| 382 |
|
| 383 |
with gr.Tabs() as tabs:
|
| 384 |
with gr.TabItem("Arena", id="arena-tab"):
|
|
@@ -430,7 +414,6 @@ with gr.Blocks(theme=gr.themes.Default(
|
|
| 430 |
with gr.Column(visible=False, elem_id="model-section") as model_section:
|
| 431 |
gr.Markdown("---")
|
| 432 |
|
| 433 |
-
# NEW: Model comparison header (simple)
|
| 434 |
gr.Markdown("### 🔍 Compare Models - Are these Grounded, Complete Answers or Correct Rejections?", elem_classes="section-heading")
|
| 435 |
|
| 436 |
with gr.Row(elem_id="summary-containers"):
|
|
@@ -455,12 +438,11 @@ with gr.Blocks(theme=gr.themes.Default(
|
|
| 455 |
elem_id="summary-b-display"
|
| 456 |
)
|
| 457 |
|
| 458 |
-
|
| 459 |
-
# NEW: Reference Answer Toggle (exactly like FAQ style)
|
| 460 |
with gr.Row(elem_id="reference-toggle-row"):
|
| 461 |
reference_toggle_btn = gr.Button("▶ Show Reference Answer", elem_classes=["faq-toggle-button"])
|
| 462 |
|
| 463 |
-
# Reference Answer Content - initially hidden
|
| 464 |
with gr.Row(visible=False, elem_id="reference-content") as reference_content:
|
| 465 |
reference_answer_display = gr.Markdown("", elem_classes="faq-text")
|
| 466 |
|
|
@@ -527,7 +509,7 @@ The Elo rating system provides a more accurate ranking than simple win rates:
|
|
| 527 |
outputs=[show_full_context, context_display, context_toggle_btn]
|
| 528 |
)
|
| 529 |
|
| 530 |
-
#
|
| 531 |
reference_toggle_btn.click(
|
| 532 |
fn=toggle_reference_answer,
|
| 533 |
inputs=[show_reference_answer, current_example],
|
|
@@ -573,12 +555,10 @@ The Elo rating system provides a more accurate ranking than simple win rates:
|
|
| 573 |
outputs=[query_display, context_description, context_display,
|
| 574 |
context_toggle_btn, show_full_context]
|
| 575 |
).then(
|
| 576 |
-
# NEW: Reset reference section when loading new question
|
| 577 |
fn=reset_reference_section,
|
| 578 |
inputs=[],
|
| 579 |
outputs=[show_reference_answer, reference_content, reference_toggle_btn, reference_answer_display]
|
| 580 |
).then(
|
| 581 |
-
# IMPORTANT: Explicitly hide FAQ here
|
| 582 |
fn=hide_faq_section,
|
| 583 |
inputs=[],
|
| 584 |
outputs=[faq_container, faq_content]
|
|
@@ -622,12 +602,10 @@ The Elo rating system provides a more accurate ranking than simple win rates:
|
|
| 622 |
outputs=[query_display, context_description, context_display,
|
| 623 |
context_toggle_btn, show_full_context]
|
| 624 |
).then(
|
| 625 |
-
# NEW: Reset reference section when trying another question
|
| 626 |
fn=reset_reference_section,
|
| 627 |
inputs=[],
|
| 628 |
outputs=[show_reference_answer, reference_content, reference_toggle_btn, reference_answer_display]
|
| 629 |
).then(
|
| 630 |
-
# IMPORTANT: Explicitly hide FAQ here too
|
| 631 |
fn=hide_faq_section,
|
| 632 |
inputs=[],
|
| 633 |
outputs=[faq_container, faq_content]
|
|
|
|
| 10 |
from utils.ui_helpers import toggle_context_display, update_feedback, get_context_html, toggle_reference_answer
|
| 11 |
from utils.leaderboard import load_leaderboard_data, submit_vote_with_elo, generate_leaderboard_html
|
| 12 |
from utils.vote_logger import save_vote_details
|
|
|
|
| 13 |
|
| 14 |
feedback_options = {
|
| 15 |
"left": [
|
|
|
|
| 98 |
|
| 99 |
return selected
|
| 100 |
|
| 101 |
+
def load_context():
|
| 102 |
+
# Simplified - no interrupt logic
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
example = get_random_example()
|
| 104 |
|
| 105 |
context_desc = example.get('processed_context_desc', '')
|
|
|
|
| 111 |
|
| 112 |
return [
|
| 113 |
example,
|
| 114 |
+
gr.update(value=example['question'], elem_classes="query-text"),
|
| 115 |
gr.update(value=context_desc, visible=bool(context_desc)),
|
| 116 |
gr.update(value=context_html),
|
| 117 |
+
gr.update(value="Show Full Context", elem_classes=["context-toggle-button"], visible=True),
|
| 118 |
show_full
|
| 119 |
]
|
| 120 |
|
|
|
|
| 143 |
"completed": False
|
| 144 |
}
|
| 145 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 146 |
try:
|
| 147 |
# Get current leaderboard data to determine model usage counts
|
| 148 |
leaderboard_data = load_leaderboard_data()
|
|
|
|
| 175 |
print(f"Starting generation with models: {m_a_name} vs {m_b_name}")
|
| 176 |
s_a, s_b = generate_summaries(example, m_a_name, m_b_name)
|
| 177 |
|
| 178 |
+
result["summary_a"] = s_a
|
| 179 |
+
result["summary_b"] = s_b
|
| 180 |
+
result["completed"] = bool(s_a and s_b)
|
| 181 |
+
print("Generation completed successfully")
|
|
|
|
|
|
|
|
|
|
| 182 |
except Exception as e:
|
| 183 |
print(f"Error in generation: {e}")
|
| 184 |
|
|
|
|
| 192 |
result.get("summary_a", ""),
|
| 193 |
result.get("summary_b", ""),
|
| 194 |
None, [], False, load_leaderboard_data(),
|
| 195 |
+
gr.update(value=result.get("summary_a", "Generation failed or was interrupted.")),
|
| 196 |
+
gr.update(value=result.get("summary_b", "Generation failed or was interrupted.")),
|
| 197 |
gr.update(interactive=False, elem_classes=["vote-button"]),
|
| 198 |
gr.update(interactive=False, elem_classes=["vote-button"]),
|
| 199 |
gr.update(interactive=False, elem_classes=["vote-button"]),
|
|
|
|
| 202 |
gr.update(visible=False),
|
| 203 |
gr.update(interactive=False, visible=True),
|
| 204 |
gr.update(visible=False),
|
| 205 |
+
gr.update(interactive=True), # Re-enable button
|
| 206 |
gr.update(elem_classes=[])
|
| 207 |
]
|
| 208 |
|
|
|
|
| 223 |
gr.update(visible=False),
|
| 224 |
gr.update(interactive=False, visible=True),
|
| 225 |
gr.update(visible=False),
|
| 226 |
+
gr.update(interactive=True), # Re-enable button
|
| 227 |
gr.update(elem_classes=[])
|
| 228 |
]
|
| 229 |
|
|
|
|
| 279 |
gr.update(visible=False), # feedback_section
|
| 280 |
gr.update(interactive=False), # submit_button
|
| 281 |
gr.update(visible=False), # results_reveal_area
|
| 282 |
+
gr.update(interactive=False), # DISABLE button during inference
|
| 283 |
None # Reset selected_winner
|
| 284 |
]
|
| 285 |
|
| 286 |
def handle_new_example_click():
|
| 287 |
+
return load_context()[0]
|
| 288 |
|
| 289 |
def update_ui_for_new_context(example):
|
| 290 |
context_desc = example.get('processed_context_desc', '')
|
|
|
|
| 292 |
context_desc = f"<div class='context-topic'><span class='topic-label'>The question and context are about:</span> {context_desc}</div>"
|
| 293 |
|
| 294 |
return [
|
| 295 |
+
gr.update(value=example['question'], elem_classes="query-text"),
|
| 296 |
gr.update(value=context_desc, visible=bool(context_desc)),
|
| 297 |
gr.update(value=get_context_html(example, False)),
|
| 298 |
+
gr.update(value="Show Full Context", elem_classes=["context-toggle-button"], visible=True),
|
| 299 |
False
|
| 300 |
]
|
| 301 |
|
|
|
|
| 310 |
|
| 311 |
def cleanup_on_disconnect():
|
| 312 |
print(f"Browser disconnected. Cleaning up resources...")
|
| 313 |
+
# Remove interrupt logic
|
| 314 |
|
| 315 |
# Helper functions for showing/hiding UI elements
|
| 316 |
def initialize_empty_app():
|
|
|
|
| 327 |
gr.update(visible=True), # model_section
|
| 328 |
gr.update(visible=True), # voting_section
|
| 329 |
gr.update(visible=True), # submit_button
|
| 330 |
+
gr.update(value="🔄 Try a New Question", elem_classes=["query-button"], interactive=True) # RE-ENABLE button
|
| 331 |
]
|
| 332 |
|
| 333 |
with gr.Blocks(theme=gr.themes.Default(
|
|
|
|
| 361 |
show_results_state = gr.State(False)
|
| 362 |
results_agg = gr.State(load_leaderboard_data())
|
| 363 |
show_full_context = gr.State(False)
|
| 364 |
+
show_reference_answer = gr.State(False)
|
| 365 |
+
faq_expanded = gr.State(False)
|
| 366 |
|
| 367 |
with gr.Tabs() as tabs:
|
| 368 |
with gr.TabItem("Arena", id="arena-tab"):
|
|
|
|
| 414 |
with gr.Column(visible=False, elem_id="model-section") as model_section:
|
| 415 |
gr.Markdown("---")
|
| 416 |
|
|
|
|
| 417 |
gr.Markdown("### 🔍 Compare Models - Are these Grounded, Complete Answers or Correct Rejections?", elem_classes="section-heading")
|
| 418 |
|
| 419 |
with gr.Row(elem_id="summary-containers"):
|
|
|
|
| 438 |
elem_id="summary-b-display"
|
| 439 |
)
|
| 440 |
|
| 441 |
+
# Reference Answer Toggle
|
|
|
|
| 442 |
with gr.Row(elem_id="reference-toggle-row"):
|
| 443 |
reference_toggle_btn = gr.Button("▶ Show Reference Answer", elem_classes=["faq-toggle-button"])
|
| 444 |
|
| 445 |
+
# Reference Answer Content - initially hidden
|
| 446 |
with gr.Row(visible=False, elem_id="reference-content") as reference_content:
|
| 447 |
reference_answer_display = gr.Markdown("", elem_classes="faq-text")
|
| 448 |
|
|
|
|
| 509 |
outputs=[show_full_context, context_display, context_toggle_btn]
|
| 510 |
)
|
| 511 |
|
| 512 |
+
# Reference answer toggle functionality
|
| 513 |
reference_toggle_btn.click(
|
| 514 |
fn=toggle_reference_answer,
|
| 515 |
inputs=[show_reference_answer, current_example],
|
|
|
|
| 555 |
outputs=[query_display, context_description, context_display,
|
| 556 |
context_toggle_btn, show_full_context]
|
| 557 |
).then(
|
|
|
|
| 558 |
fn=reset_reference_section,
|
| 559 |
inputs=[],
|
| 560 |
outputs=[show_reference_answer, reference_content, reference_toggle_btn, reference_answer_display]
|
| 561 |
).then(
|
|
|
|
| 562 |
fn=hide_faq_section,
|
| 563 |
inputs=[],
|
| 564 |
outputs=[faq_container, faq_content]
|
|
|
|
| 602 |
outputs=[query_display, context_description, context_display,
|
| 603 |
context_toggle_btn, show_full_context]
|
| 604 |
).then(
|
|
|
|
| 605 |
fn=reset_reference_section,
|
| 606 |
inputs=[],
|
| 607 |
outputs=[show_reference_answer, reference_content, reference_toggle_btn, reference_answer_display]
|
| 608 |
).then(
|
|
|
|
| 609 |
fn=hide_faq_section,
|
| 610 |
inputs=[],
|
| 611 |
outputs=[faq_container, faq_content]
|