Spaces:
Sleeping
Sleeping
File size: 29,179 Bytes
6a5434e c95447d 428030b becbb53 6a5434e becbb53 c95447d 428030b becbb53 c95447d 428030b c95447d becbb53 c95447d becbb53 c95447d b568a13 becbb53 c95447d b568a13 c95447d becbb53 c95447d becbb53 c95447d becbb53 428030b becbb53 c95447d becbb53 c95447d becbb53 c95447d becbb53 428030b becbb53 428030b becbb53 428030b becbb53 428030b becbb53 428030b becbb53 c95447d 428030b d609ac9 becbb53 d609ac9 6a5434e becbb53 6a5434e d609ac9 6a5434e d609ac9 6a5434e d609ac9 6a5434e d609ac9 428030b 6a5434e becbb53 d609ac9 becbb53 d609ac9 6a5434e becbb53 6a5434e becbb53 6a5434e becbb53 6a5434e d609ac9 6a5434e d609ac9 6a5434e d609ac9 428030b d609ac9 428030b 6a5434e d609ac9 6a5434e c95447d d609ac9 6a5434e 428030b d609ac9 b568a13 d609ac9 6a5434e 428030b d609ac9 428030b c95447d becbb53 d609ac9 c95447d 428030b becbb53 c95447d becbb53 d609ac9 6a5434e d609ac9 428030b 6a5434e c95447d becbb53 c95447d 6a5434e becbb53 c95447d 428030b 6a5434e becbb53 c95447d 6a5434e 428030b becbb53 428030b c95447d 428030b c95447d 428030b c95447d becbb53 c95447d 6a5434e 428030b c95447d becbb53 c95447d 6a5434e 428030b c95447d 6a5434e 428030b becbb53 c95447d 6a5434e 428030b c95447d 6a5434e c95447d d609ac9 428030b c95447d 428030b becbb53 428030b c95447d 428030b c95447d d609ac9 c95447d becbb53 c95447d 428030b c95447d 428030b c95447d d609ac9 c95447d becbb53 c95447d 428030b c95447d 428030b c95447d d609ac9 c95447d becbb53 c95447d 428030b c95447d 428030b c95447d becbb53 c95447d 428030b c95447d becbb53 c95447d 0f21577 428030b c95447d becbb53 c95447d becbb53 c95447d becbb53 c95447d 428030b becbb53 428030b becbb53 428030b becbb53 428030b becbb53 428030b becbb53 428030b becbb53 d609ac9 428030b becbb53 428030b becbb53 a1f0616 becbb53 428030b becbb53 428030b becbb53 428030b becbb53 428030b becbb53 428030b becbb53 428030b becbb53 428030b becbb53 428030b becbb53 428030b becbb53 428030b becbb53 428030b becbb53 428030b becbb53 d609ac9 becbb53 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 |
import streamlit as st
import pandas as pd
import random
import time
import string
import gspread
import os
import json
import datetime
import re
from oauth2client.service_account import ServiceAccountCredentials
# Set page config at the very beginning
st.set_page_config(page_title="LLM Output Evaluation", layout="wide")
# Define the primary highlight color (keeping it consistent with previous apps)
HIGHLIGHT_COLOR = "#2c7be5"
# --- ALL UTILITY FUNCTIONS DEFINED AT THE TOP (Solving NameError) ---
def highlight_keyword(sentence, keyword, color=HIGHLIGHT_COLOR):
"""Highlights a specific keyword in a sentence, ignoring case."""
# Use word boundaries (\b) to match whole words and ignore case
return re.sub(r'\b' + re.escape(keyword) + r'\b',
r"<strong style='color:" + color + ";'>\g<0></strong>",
sentence, flags=re.IGNORECASE)
def generate_passcode(worker_id):
suffix = ''.join(random.choices(string.ascii_uppercase + string.digits, k=6))
return f"EXP2-pilot-W{worker_id:02d}-{suffix}"
def get_google_creds():
service_account_json = os.getenv("SERVICE_ACCOUNT_JSON")
if service_account_json:
try:
creds_dict = json.loads(service_account_json)
scope = ["https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive"]
creds = ServiceAccountCredentials.from_json_keyfile_dict(creds_dict, scope)
return gspread.authorize(creds)
except json.JSONDecodeError:
st.error("Invalid JSON format in SERVICE_ACCOUNT_JSON environment variable. Please ensure it's a single, valid JSON string.")
return None
except Exception as e:
st.error(f"Error loading Google credentials: {e}")
return None
else:
st.error("Google service account credentials (SERVICE_ACCOUNT_JSON) not found in environment variables. Please configure your Streamlit app secrets or local environment.")
return None
def upload_to_google_drive(response_df):
if response_df.empty:
st.warning("No responses to upload.")
return
try:
client = get_google_creds()
if client is None:
st.error("β Google credentials not loaded. Cannot upload results.")
return
sheet_name = "EXP2-pilot" # Sheet name for Experiment 2
try:
sheet = client.open(sheet_name).sheet1
except gspread.exceptions.SpreadsheetNotFound:
st.info(f"Creating new Google Sheet: {sheet_name}")
sheet = client.create(sheet_name).sheet1
# Get current headers from the sheet
current_sheet_headers = sheet.row_values(1) if sheet.row_count > 0 else []
expected_headers = list(response_df.columns)
# Add headers if the sheet is empty or headers don't match
if not current_sheet_headers or current_sheet_headers != expected_headers:
# if sheet.row_count > 0:
# st.warning("Google Sheet headers do not match. Data will be appended, but consider manual alignment or creating a new sheet/worksheet.")
if not current_sheet_headers: # Only add if sheet is truly empty after potential clear
sheet.append_row(expected_headers)
# st.info("Added headers to the Google Sheet.")
# elif current_sheet_headers != expected_headers:
# st.error("Existing sheet headers mismatch. Data will be appended, but columns might be misaligned.")
# Prepare data: Replace NaN, inf with empty string, then convert to list of lists
response_df_clean = response_df.replace([float("inf"), float("-inf")], None).fillna("")
data_to_upload = response_df_clean.values.tolist()
# Append all rows at once for efficiency
if data_to_upload:
sheet.append_rows(data_to_upload)
st.success("β
Your responses have been recorded successfully.")
# Clear responses after successful upload to prevent re-uploading on rerun
st.session_state.responses = []
else:
st.warning("No new responses to upload.")
except Exception as e:
st.error("β Error uploading to Google Drive:")
st.error(f"Details: {e}")
# Function to record responses for the current section
def record_section_responses(idx, sec_idx, current_sample_data, current_section_title, acc_score, comp_score, interp_score):
worker_id = st.session_state.get("worker_id", "N/A")
passcode = st.session_state.get("passcode", "N/A")
timestamp = datetime.datetime.now().isoformat()
# Calculate response_time_sec *before* appending to state, as time.time() changes.
start_time_for_section = st.session_state.get("response_start_time", time.time())
response_time = time.time() - start_time_for_section
# Define common fields for all metrics from this section
base_record = {
"timestamp": timestamp,
"worker_id": worker_id,
"passcode": passcode,
"sample_index": idx,
"section_index_within_sample": sec_idx,
"section_title": current_section_title,
"original_text": current_sample_data["text"],
"keyword": current_sample_data["keyword"],
"response_time_sec": response_time,
}
# Record each metric as a separate row
st.session_state.responses.append({**base_record, "metric": "Accuracy", "score": acc_score})
st.session_state.responses.append({**base_record, "metric": "Completeness", "score": comp_score})
st.session_state.responses.append({**base_record, "metric": "Interpretability", "score": interp_score})
def generate_rating_prompt(section_title: str) -> str:
# Remove leading number and colon
if ". " in section_title:
section_title = section_title.split(". ", 1)[1]
if ":" in section_title:
section_name = section_title.split(":", 1)[0].strip()
else:
section_name = section_title.strip()
section_name = section_name.lower()
if "engaged event" in section_name:
return "How well does this capture the events involving the keyword in this situation? More specifically: "
elif "generalizable propert" in section_name: # 'propert' for 'property' or 'properties'
return "How well does this reflect the relevant properties of the keyword in this situation? More specifically: "
elif "evoked emotion" in section_name:
return "How well does this capture the emotions evoked by the keyword in this situation? More specifically: "
else:
return f"How well does this describe the {section_name}? More specifically: "
# --- Data Definition for Samples (Moved to after utility functions) ---
stimuli_list = [
{
"text": "The mournful cry of a pair of crows and a single lost lamb added an eeriness to the scene.",
"keyword": "crow",
"scene_output": {
"1. Engaged Events: What is happening in the situation?": [
"They emit a mournful cry",
"Their presence adds eeriness to the scene"
],
"2. Generalizable Properties: What are the relevant properties of crow in the situation?": [
"They are often associated with foreboding or ominous situations",
"Their vocalizations can enhance the emotional tone of a setting"
],
"3. Evoked Emotions: Which emotions do you observe in the situation?": [
"Eerie: Their cries contribute to a haunting atmosphere.",
"Mourning: Their sound suggests themes of loss and sorrow."
]
}
},
{
"text": "Not knowing what else to do, I got up. Tea, I told myself. Chamomile. Or white. White tea is soothing, and there's nothing in it that sets me off.",
"keyword": "tea",
"scene_output": {
"1. Engaged Events: What is happening in the situation?": [ # Corrected
"PersonX considers chamomile tea",
"PersonX considers white tea",
"PersonX plans to prepare tea"
],
"2. Generalizable Properties: What are the relevant properties of tea in the situation?": [ # Corrected
"It is associated with comfort and relaxation",
"It has various types that can cater to different needs"
],
"3. Evoked Emotions: Which emotions do you observe in the situation?": [ # Corrected
"Comfort: The choice of tea is aimed at providing solace.",
"Uncertainty: The initial indecision reflects a search for clarity."
]
}
},
{
"text": "One morning when Tessie lifted the lid of the crate, she found a beautiful monarch butterfly clinging upside down from the broken cocoon.",
"keyword": "butterfly",
"scene_output": {
"1. Engaged Events: What is happening in the situation?": [
"AnimalX clings to ObjectY",
"AnimalX emerges from ObjectY"
],
"2. Generalizable Properties: What are the relevant properties of butterfly in the situation?": [
"It symbolizes transformation and beauty",
"It represents new beginnings after a period of change"
],
"3. Evoked Emotions: Which emotions do you observe in the situation?": [
"Wonder: The discovery of a butterfly can evoke feelings of awe and appreciation for nature."
]
}
}
]
# --- Page Functions ---
def instructions_1():
st.title("Experiment 2: LLM Scene Abstraction Evaluation")
st.header("π Instructions (1/2)")
st.write(f"""
Welcome to Experiment 2! Hereβs how it works:
- You will read a sentence that contains a specific <span style='color:{HIGHLIGHT_COLOR}; font-weight:500;'>**keyword**</span>.
- You will then see <span style='color:{HIGHLIGHT_COLOR}; font-weight:500;'>**scene-level information about the keyword** in the given situation</span>, generated by a large language model (LLM).
- The information is organized into three sections:
1. **Engaged Events** β What is happening to the keyword in this situation?
2. **Generalizable Properties** β What context-relevant properties of the keyword are revealed through this situation?
3. **Evoked Emotions** β What emotions are associated with the keyword in this scene, and why?
<br>
Your task is to **evaluate each section** based on how well it reflects the information conveyed in the original sentence.
- For each section, please rate the following dimensions on a 1β5 scale:
- **Accuracy** β How accurate is it? Is the content factually consistent with the sentence?
- **Completeness** β How complete and rich is it? Does it fully capture the relevant aspects of the keyword?
- **Interpretability** β How interpretable is it? Is it easy to understand?
<br>
If you have questions or feedback, please feel free to let us know via email.
<br><br>
""", unsafe_allow_html=True)
if st.button("Next β‘οΈ"):
st.session_state.step = "instructions_2"
st.rerun()
st.stop()
def instructions_2():
st.title("Experiment 2: LLM Scene Abstraction Evaluation")
st.header("π Instructions (2/2)")
st.write(f"""
<b>Placeholder notation guide</b><br>
In the scene descriptions, you will encounter placeholder labels like <span style='color:{HIGHLIGHT_COLOR}; font-weight:600;'>PersonX</span> and <span style='color:{HIGHLIGHT_COLOR}; font-weight:600;'>AnimalX</span>. These can be interpreted as follows:
- <span style='color:{HIGHLIGHT_COLOR}; font-weight:600;'>PersonX</span>: someone in the scene
- <span style='color:{HIGHLIGHT_COLOR}; font-weight:600;'>PersonY</span>: another individual in the scene
- <span style='color:{HIGHLIGHT_COLOR}; font-weight:600;'>AnimalX</span>: some animal in the scene
- <span style='color:{HIGHLIGHT_COLOR}; font-weight:600;'>ObjectX</span>: some non-living object in the scene
- <span style='color:{HIGHLIGHT_COLOR}; font-weight:600;'>PersonGroupX</span>: a group of people
- <span style='color:{HIGHLIGHT_COLOR}; font-weight:600;'>AnimalGroupX</span>: a group of animals (e.g., a flock of birds, a pack of wolves)
These labels are used instead of specific names to help you focus on the roles and actions of each entity in the scene, rather than their exact names or identities.
When you're ready, click below to begin!
<br>
<br>
""", unsafe_allow_html=True)
if st.button("Start practicing βΆοΈ"):
st.session_state.step = "training"
# The response_start_time will be set inside the training() function
# when the first section is actually displayed.
st.rerun()
st.stop()
def training():
st.title("Experiment 2: LLM Scene Abstraction Evaluation")
stimuli = stimuli_list # Using the predefined stimuli_list for training
idx = st.session_state.training_index
# --- Handle Training Completion ---
if idx >= len(stimuli):
st.session_state.training_complete = True
st.header("π Practice Complete!")
st.markdown("""
<div style='font-size:18px; line-height:1.6;'>
You've successfully completed the training phase of the experiment. Great work! π―<br><br>
If you have any questions, suggestions, or feedback about the task, please let us know.<br>
If everything is clear, just let us know that you're ready to proceed to the main experiment.
</div>
""", unsafe_allow_html=True)
# Generate passcode if not already generated (e.g., for direct training start)
if st.session_state.passcode is None:
st.session_state.passcode = generate_passcode(st.session_state.get("worker_id", 0))
# Prepare DataFrame for upload from stored responses
response_df = pd.DataFrame(st.session_state.responses)
# Ensure column order for consistency in Google Sheet
# Define all possible columns that could be in a response record
all_possible_cols = [
"timestamp", "worker_id", "passcode", "sample_index", "section_index_within_sample",
"section_title", "original_text", "keyword", "metric", "score", "response_time_sec"
]
# Filter and reorder DataFrame columns to match expected order
final_cols = [col for col in all_possible_cols if col in response_df.columns]
response_df = response_df[final_cols]
upload_to_google_drive(response_df)
st.markdown("#### π Your Unique Completion Code")
st.code(st.session_state.passcode)
st.stop()
# --- Display Current Sample and Section ---
current_sample_data = stimuli[idx]
total_samples = len(stimuli)
# π Initialize section_index or reset for new sample
if "section_index" not in st.session_state or \
st.session_state.section_index >= len(list(current_sample_data['scene_output'].keys())):
st.session_state.section_index = 0 # Reset for new sample
# Reset timer only when moving to a new *sample* or if it's the very first display
st.session_state.response_start_time = time.time()
section_keys = list(current_sample_data['scene_output'].keys())
sec_idx = st.session_state.section_index
current_section_title = section_keys[sec_idx]
# Ensure response_start_time is set for this particular section display
# This specifically starts/restarts the timer for *this* section if it's new.
# It's also set by the `st.session_state.section_index = 0` block above.
if "response_start_time" not in st.session_state or st.session_state.response_start_time == 0:
st.session_state.response_start_time = time.time()
with st.form(key=f"form_{idx}_{sec_idx}"):
# --- Left Column: Sentence and Section Summary ---
left_col, right_col = st.columns([1,1])
with left_col:
st.markdown(f"<p style='color: gray; font-size: 15px;'>Sentence {idx + 1} of {total_samples}</p>", unsafe_allow_html=True)
# Keyword display
st.markdown(
f"<p style='font-size:18px; font-weight: bold; color: {HIGHLIGHT_COLOR};'>Keyword: {current_sample_data.get('keyword', 'N/A')}</p>",
unsafe_allow_html=True
)
# Text box
st.markdown("Text:")
text = current_sample_data['text']
keyword = current_sample_data['keyword']
pattern = re.compile(re.escape(keyword), re.IGNORECASE)
text_with_bold = pattern.sub(r"<b>\g<0></b>", text, count=1)
st.markdown(
f"""
<div style='border: 1px solid #ccc; border-radius: 6px;
padding: 12px; background-color: #ffffff;
color: #000000; font-size: 16px; line-height: 1.6;
margin-bottom: 1.2rem;'>
{text_with_bold}
</div>
""",
unsafe_allow_html=True
)
# Section title and description box
st.markdown("Scene information:")
section_title_parts = current_section_title.split(":")
bold_title = section_title_parts[0].strip() if len(section_title_parts) >= 1 else current_section_title
subtitle = section_title_parts[1].strip() if len(section_title_parts) == 2 else ""
st.markdown(
f"""
<div style='border-left: 4px solid {HIGHLIGHT_COLOR}; background-color: #ffffff;
color: #000000; padding: 12px 16px; border-radius: 6px;
margin: 1rem 0 0.8rem 0; font-size: 16px; font-weight: 600;'>
<div><b>{bold_title}</b></div>
<div style='font-weight: normal; font-size: 15px; margin-top: 4px;'>( {subtitle} )</div>
</div>
""",
unsafe_allow_html=True
)
# Scene output bullets
bullets = current_sample_data['scene_output'][current_section_title]
st.markdown(
"<ul style='margin-bottom: 0.2rem; padding-left: 1.2rem;'>" +
"".join(f"<li style='margin-bottom: 0.2rem; font-size:17px;'>{b}</li>" for b in bullets) +
"</ul>", unsafe_allow_html=True
)
# --- Right Column: Evaluation ---
with right_col:
prompt_text = generate_rating_prompt(current_section_title)
highlight = "the keyword"
if highlight in prompt_text:
prompt_text = prompt_text.replace(
highlight,
f"<b style='color:{HIGHLIGHT_COLOR};'>{highlight}</b>"
)
st.markdown(
f"<p style='font-size:16px; font-weight:normal; margin-bottom: 1.2rem;'>{prompt_text}</p>",
unsafe_allow_html=True
)
# Rating Keys (using session state to retrieve prior selections)
acc_key = f"rating_acc_{idx}_{sec_idx}"
comp_key = f"rating_comp_{idx}_{sec_idx}"
interp_key = f"rating_interp_{idx}_{sec_idx}"
# Retrieve current selected values from session state to pre-fill radio buttons
current_acc_val = st.session_state.get(acc_key)
current_comp_val = st.session_state.get(comp_key)
current_interp_val = st.session_state.get(interp_key)
# Accuracy
st.markdown("<p style='font-size:16px;'>How accurate is it? Is the content factually consistent with the sentence?</p>", unsafe_allow_html=True)
acc = st.radio(
label="Accuracy",
options=[1,2,3,4,5],
index=current_acc_val - 1 if current_acc_val else None, # Convert value (1-5) to index (0-4)
key=acc_key,
horizontal=True,
label_visibility="collapsed"
)
st.markdown("""
<div class='radio-description'>
1 = Very inaccurate,
2 = Inaccurate,
3 = Neutral,
4 = Accurate,
5 = Very accurate
</div>
""", unsafe_allow_html=True)
st.markdown("<hr style='margin: 1rem 0;'/>", unsafe_allow_html=True)
# Completeness
st.markdown("<p style='font-size:16px;'>How complete and rich is it? Does it fully capture the relevant aspects of the keyword?</p>", unsafe_allow_html=True)
comp = st.radio(
label="Completeness",
options=[1,2,3,4,5],
index=current_comp_val - 1 if current_comp_val else None,
key=comp_key,
horizontal=True,
label_visibility="collapsed"
)
st.markdown("""
<div class='radio-description'>
1 = Very incomplete,
2 = Incomplete,
3 = Moderate,
4 = Mostly complete,
5 = Very complete
</div>
""", unsafe_allow_html=True)
st.markdown("<hr style='margin: 1rem 0;'/>", unsafe_allow_html=True)
# Interpretability
st.markdown("<p style='font-size:16px;'>How interpretable is it? Is it easy to understand?</p>", unsafe_allow_html=True)
interp = st.radio(
label="Interpretability",
options=[1,2,3,4,5],
index=current_interp_val - 1 if current_interp_val else None,
key=interp_key,
horizontal=True,
label_visibility="collapsed"
)
st.markdown("""
<div class='radio-description'>
1 = Very difficult to interpret,
2 = Difficult,
3 = Moderate,
4 = Easy,
5 = Very easy to interpret
</div>
""", unsafe_allow_html=True)
# Navigation Button within the form
st.markdown("<div style='margin-top: 2rem;'></div>", unsafe_allow_html=True)
submit_button = st.form_submit_button("Next β‘οΈ")
# --- Form submission handler ---
if submit_button:
# Validate all ratings are selected
if acc is None or comp is None or interp is None:
st.warning("β οΈ Please complete all ratings before proceeding.")
st.stop()
# Validate response time
response_time = time.time() - st.session_state.response_start_time
if response_time < 5: # Minimum 5 seconds for careful evaluation per section
st.warning("β οΈ Please take enough time to read and evaluate carefully before proceeding.")
st.stop()
# If all validations pass, record responses for this section
record_section_responses(
idx=idx,
sec_idx=sec_idx,
current_sample_data=current_sample_data,
current_section_title=current_section_title,
acc_score=acc,
comp_score=comp,
interp_score=interp
)
# Move to the next section or next sample
if st.session_state.section_index < len(section_keys) - 1:
st.session_state.section_index += 1
st.session_state.response_start_time = time.time() # Reset timer for next section
st.rerun()
else:
st.session_state.section_index = 0 # Reset section for next sample
st.session_state.training_index += 1 # Move to next sample
st.session_state.response_start_time = time.time() # Reset timer for the first section of the new sample
st.rerun()
st.stop()
# --- Main App Flow Manager ---
def instructions_page_manager():
# Page 0: Worker ID Input (first logical step)
if st.session_state.step == "worker_id_input":
st.title("Welcome to Experiment 2")
st.write("Please enter your participant ID to begin the pilot run:")
with st.form(key='worker_id_form'):
participant_input = st.text_input("Participant ID (e.g., 4)")
submit_btn = st.form_submit_button("Submit")
if submit_btn:
try:
worker_id = int(participant_input)
st.session_state.worker_id = worker_id
st.session_state.passcode = generate_passcode(worker_id)
st.session_state.step = "instructions_1" # Move to instructions page 1
st.rerun()
except ValueError:
st.error("Please enter a valid numeric ID.")
st.stop()
# Page 1: Instructions (1/2)
elif st.session_state.step == "instructions_1":
instructions_1()
# Page 2: Instructions (2/2)
elif st.session_state.step == "instructions_2":
instructions_2()
# Training Phase
elif st.session_state.step == "training":
training()
# Training Complete Page
elif st.session_state.step == "training_complete":
st.header("π Practice Complete!")
st.markdown("""
You have completed the practice phase! Please let us know if you had any questions or comments on the task/experiment. If everything is clear, we will provide you the link for the main experiment.
""", unsafe_allow_html=True)
# Prepare DataFrame for upload
response_df = pd.DataFrame(st.session_state.responses)
# Define all possible columns for the final DataFrame for Google Sheets
expected_upload_cols = [
"timestamp", "worker_id", "passcode", "sample_index", "section_index_within_sample",
"section_title", "original_text", "keyword", "metric", "score", "response_time_sec"
]
# Filter and reorder DataFrame columns to match expected order for upload
final_response_df = response_df[[col for col in expected_upload_cols if col in response_df.columns]]
upload_to_google_drive(final_response_df)
st.markdown("#### π Your Unique Completion Code")
st.code(st.session_state.passcode)
st.stop()
# --- Main App Entry Point ---
if __name__ == "__main__":
# Initialize session state variables if they don't exist
if "step" not in st.session_state:
st.session_state.step = "worker_id_input" # Initial step
if "worker_id" not in st.session_state:
st.session_state.worker_id = None
if "passcode" not in st.session_state:
st.session_state.passcode = None
if "training_index" not in st.session_state:
st.session_state.training_index = 0
if "section_index" not in st.session_state: # To track current section within a sample
st.session_state.section_index = 0
if "responses" not in st.session_state:
st.session_state.responses = [] # Stores all collected individual rating records
if "response_start_time" not in st.session_state: # Timer for each section evaluation
st.session_state.response_start_time = 0 # Initialize to 0, actual time.time() set when section displayed
# Global CSS styling (moved here for consistency as it applies globally)
st.markdown("""
<style>
.stApp {
font-size: 17px !important;
font-family: 'Verdana', sans-serif !important;
}
/* Standardize radio button label size */
div[data-testid="stRadio"] label,
div[data-testid="stRadio"] label > div > span,
div[data-testid="stRadio"] p { /* Target the paragraph inside label for consistent sizing */
font-family: 'Verdana', sans-serif !important;
font-size: 17px !important; /* Main content font size */
}
div[data-testid="stMarkdownContainer"] p,
div[data-testid="stVerticalBlock"] p { /* General paragraph text */
font-size: 17px !important;
font-family: 'Verdana', sans-serif !important;
}
/* Specific highlight for instructions */
.highlight-blue {
color: #2c7be5; /* Using direct color for this specific style */
font-weight: 500;
}
/* General highlight red (if used for warnings/important text) */
.highlight-red {
color: #D9534F;
font-weight: bold;
}
/* For smaller descriptive text near radio buttons */
.radio-description {
font-size: 14px !important; /* Smaller font for descriptions */
color: gray !important;
line-height: 1.5 !important;
}
</style>
""", unsafe_allow_html=True)
# Call the manager function to control the app flow
instructions_page_manager() |