jhauret commited on
Commit
85bf837
·
verified ·
1 Parent(s): ea95078

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -28
app.py CHANGED
@@ -1,29 +1,26 @@
1
  import gradio as gr
2
- from datasets import load_dataset, Audio
3
 
4
  # --- Configuration ---
5
  DATASET_NAME = "Cnam-LMSSC/vibravox-test"
6
  DATASET_CONFIG = "speech_clean"
7
  DATASET_SPLIT = "train"
8
  TEXT_COLUMN = "raw_text"
 
 
9
  AUDIO_COLUMNS = [
10
- "audio.headset_microphone",
11
- "audio.throat_microphone",
12
- "audio.soft_in_ear_microphone",
13
- "audio.rigid_in_ear_microphone",
14
  "audio.forehead_accelerometer",
15
  "audio.temple_vibration_pickup"
16
  ]
17
 
18
  # --- Load Dataset ---
19
  try:
 
20
  dataset = load_dataset(DATASET_NAME, DATASET_CONFIG, split=DATASET_SPLIT)
21
-
22
- new_features = dataset.features.copy()
23
- for col in AUDIO_COLUMNS:
24
- new_features[col] = Audio(decode=False)
25
- dataset = dataset.cast(new_features)
26
-
27
  except Exception as e:
28
  dataset = None
29
  app_error = e
@@ -31,27 +28,23 @@ except Exception as e:
31
  # --- App Logic ---
32
  def get_audio_row(index: int):
33
  """
34
- Retrieves a row and returns the text and the full URLs to the audio files.
35
  """
36
  row_index = int(index)
37
  sample = dataset[row_index]
38
 
39
  sentence = sample[TEXT_COLUMN]
40
 
41
- # This is the URL structure that *should* be correct.
42
- base_url = f"https://huggingface.co/datasets/{DATASET_NAME}/resolve/main/{DATASET_CONFIG}"
 
 
 
 
 
 
43
 
44
- # --- !! THIS IS THE DEBUGGING STEP !! ---
45
- # We will print the first URL to the logs to see exactly what is being generated.
46
- # This will help us find any hidden typos or path errors.
47
- first_file_path = sample[AUDIO_COLUMNS[0]]['path']
48
- first_full_url = f"{base_url}/{first_file_path}"
49
- print(f"DEBUGGING URL: '{first_full_url}'")
50
- # ----------------------------------------
51
-
52
- audio_urls = [f"{base_url}/{sample[col]['path']}" for col in AUDIO_COLUMNS]
53
-
54
- return [sentence] + audio_urls
55
 
56
  # --- Build the Gradio Interface ---
57
  with gr.Blocks(css="footer {display: none !important}") as demo:
@@ -61,15 +54,23 @@ with gr.Blocks(css="footer {display: none !important}") as demo:
61
  gr.Markdown("## 💥 Application Error")
62
  gr.Markdown(f"Could not load or process the dataset. Error: `{app_error}`")
63
  else:
64
- # The UI part remains the same
65
  gr.Markdown("Select a row to listen to all corresponding audio sensor recordings.")
 
66
  slider = gr.Slider(minimum=0, maximum=len(dataset) - 1, step=1, value=0, label="Select Data Row")
 
67
  sentence_output = gr.Textbox(label="Raw Text", interactive=False)
 
68
  with gr.Row():
69
- audio1, audio2, audio3 = gr.Audio(label="Headset Microphone"), gr.Audio(label="Laryngophone"), gr.Audio(label="Soft In-Ear Microphone")
 
 
70
  with gr.Row():
71
- audio4, audio5, audio6 = gr.Audio(label="Rigid In-Ear Microphone"), gr.Audio(label="Forehead Accelerometer"), gr.Audio(label="Temple Vibration Pickup")
 
 
 
72
  outputs = [sentence_output, audio1, audio2, audio3, audio4, audio5, audio6]
 
73
  demo.load(fn=get_audio_row, inputs=gr.State(0), outputs=outputs)
74
  slider.change(fn=get_audio_row, inputs=slider, outputs=outputs)
75
 
 
1
  import gradio as gr
2
+ from datasets import load_dataset
3
 
4
  # --- Configuration ---
5
  DATASET_NAME = "Cnam-LMSSC/vibravox-test"
6
  DATASET_CONFIG = "speech_clean"
7
  DATASET_SPLIT = "train"
8
  TEXT_COLUMN = "raw_text"
9
+
10
+ # The CORRECT column names, taken from your data instance example
11
  AUDIO_COLUMNS = [
12
+ "audio.headset_mic",
13
+ "audio.laryngophone",
14
+ "audio.soft_in_ear_mic",
15
+ "audio.rigid_in_ear_mic",
16
  "audio.forehead_accelerometer",
17
  "audio.temple_vibration_pickup"
18
  ]
19
 
20
  # --- Load Dataset ---
21
  try:
22
+ # Load the dataset normally, without any 'cast' operation.
23
  dataset = load_dataset(DATASET_NAME, DATASET_CONFIG, split=DATASET_SPLIT)
 
 
 
 
 
 
24
  except Exception as e:
25
  dataset = None
26
  app_error = e
 
28
  # --- App Logic ---
29
  def get_audio_row(index: int):
30
  """
31
+ Retrieves a row and returns the text and the RAW audio data.
32
  """
33
  row_index = int(index)
34
  sample = dataset[row_index]
35
 
36
  sentence = sample[TEXT_COLUMN]
37
 
38
+ # --- THE FIX IS HERE ---
39
+ # We now extract the raw audio (NumPy array) and sampling rate directly.
40
+ # We return a list of tuples: (sampling_rate, audio_array).
41
+ # This is the most robust way and avoids all URL/path errors.
42
+ raw_audio_data = [
43
+ (sample[col]['sampling_rate'], sample[col]['array']) for col in AUDIO_COLUMNS
44
+ ]
45
+ # --------------------
46
 
47
+ return [sentence] + raw_audio_data
 
 
 
 
 
 
 
 
 
 
48
 
49
  # --- Build the Gradio Interface ---
50
  with gr.Blocks(css="footer {display: none !important}") as demo:
 
54
  gr.Markdown("## 💥 Application Error")
55
  gr.Markdown(f"Could not load or process the dataset. Error: `{app_error}`")
56
  else:
 
57
  gr.Markdown("Select a row to listen to all corresponding audio sensor recordings.")
58
+
59
  slider = gr.Slider(minimum=0, maximum=len(dataset) - 1, step=1, value=0, label="Select Data Row")
60
+
61
  sentence_output = gr.Textbox(label="Raw Text", interactive=False)
62
+
63
  with gr.Row():
64
+ audio1 = gr.Audio(label="Headset Mic")
65
+ audio2 = gr.Audio(label="Laryngophone")
66
+ audio3 = gr.Audio(label="Soft In-Ear Mic")
67
  with gr.Row():
68
+ audio4 = gr.Audio(label="Rigid In-Ear Mic")
69
+ audio5 = gr.Audio(label="Forehead Accelerometer")
70
+ audio6 = gr.Audio(label="Temple Pickup")
71
+
72
  outputs = [sentence_output, audio1, audio2, audio3, audio4, audio5, audio6]
73
+
74
  demo.load(fn=get_audio_row, inputs=gr.State(0), outputs=outputs)
75
  slider.change(fn=get_audio_row, inputs=slider, outputs=outputs)
76