Spaces:

chrisjay
/

afro-speech

Build error

App Files Files Community

chrisjay commited on May 19, 2022

Commit

0724b59

1 Parent(s): d370dc9

dashboard

Browse files

Files changed (3) hide show

app.py +42 -4
data +1 -1
requirements.txt +3 -1

app.py CHANGED Viewed

@@ -5,9 +5,12 @@ import os
 import csv
 import random
 import pandas as pd
 import gradio as gr
 from article import ARTICLE
 from utils import *
 import scipy.io.wavfile as wavf
 from huggingface_hub import Repository, upload_file
@@ -169,6 +172,40 @@ def save_record(language,text,record,number,age,gender,accent,number_history,cur
         next_number_image = f'number/best.gif'
         return output_string,next_number_image,number_history,next_number,done_recording,default_record
 def display_records():
     repo.git_pull()
     REPOSITORY_DATA_DIR = os.path.join(REPOSITORY_DIR,'data')
@@ -261,16 +298,17 @@ with block:
             save.click(save_record, inputs=[language,text,record,number,age,gender,accent,state,current_number,country,email,done_recording],outputs=[output_result,number,state,current_number,done_recording,record])
-        with gr.TabItem('Listen') as listen_tab:
             gr.Markdown("Listen to the recordings contributed. You can find them <a href='https://huggingface.co/datasets/chrisjay/crowd-speech-africa' target='blank'>here</a>.")
             display_html =  gr.HTML("""<div style="color: green">
-                <p> ⌛ Please wait. Loading dataset... </p>
                 </div>
                """)
             #listen = gr.Button("Listen")
-            listen_tab.select(display_records,inputs=[],outputs=display_html)
     gr.Markdown(ARTICLE)
 block.launch()

 import csv
 import random
 import pandas as pd
+import numpy as np
 import gradio as gr
+from collections import Counter
 from article import ARTICLE
 from utils import *
+import matplotlib.pyplot as plt
 import scipy.io.wavfile as wavf
 from huggingface_hub import Repository, upload_file
         next_number_image = f'number/best.gif'
         return output_string,next_number_image,number_history,next_number,done_recording,default_record
+def show_records():
+    repo.git_pull()
+    REPOSITORY_DATA_DIR = os.path.join(REPOSITORY_DIR,'data')
+    repo_recordings = [os.path.join(REPOSITORY_DATA_DIR,f.name) for f in os.scandir(REPOSITORY_DATA_DIR)] if os.path.isdir(REPOSITORY_DATA_DIR) else []
+    audio_repo = [os.path.join(f,'audio.wav') for f in repo_recordings]
+    audio_repo = [a.replace('data/data/','https://huggingface.co/datasets/chrisjay/crowd-speech-africa/resolve/main/data/') for a in audio_repo]
+    metadata_all = [read_json_lines(os.path.join(f,'metadata.jsonl'))[0] for f in repo_recordings]
+    audios_all =  audio_repo
+    langs=[m['language_name'] for m in metadata_all]
+    lang_dict = Counter(langs)
+    lang_dict.update({'All others':0})
+    all_langs = list(lang_dict.keys())
+    langs_count  = [lang_dict[k] for k in all_langs]
+    y_pos = np.arange(len(all_langs))
+    plt.barh(all_langs, langs_count)
+    plt.ylabel("Language")
+    plt.xlabel('Number of audio samples')
+    plt.title('Distribution of audio samples over languages')
+    #audios = [a for a in audios_all]
+    #texts = [m['text'] for m in metadata_all]
+    #numbers = [m['number'] for m in metadata_all]
+    html = f"""<div class="infoPoint">
+                <h1> Hooray! We have collected {len(metadata_all)} samples!</h1>
+                """
+    return html,plt
 def display_records():
     repo.git_pull()
     REPOSITORY_DATA_DIR = os.path.join(REPOSITORY_DIR,'data')
             save.click(save_record, inputs=[language,text,record,number,age,gender,accent,state,current_number,country,email,done_recording],outputs=[output_result,number,state,current_number,done_recording,record])
+        with gr.TabItem('Dashboard') as listen_tab:
             gr.Markdown("Listen to the recordings contributed. You can find them <a href='https://huggingface.co/datasets/chrisjay/crowd-speech-africa' target='blank'>here</a>.")
             display_html =  gr.HTML("""<div style="color: green">
+                <p> ⌛ Please wait. Loading dashboard... </p>
                 </div>
                """)
+            plot = gr.Plot(type="matplotlib")
             #listen = gr.Button("Listen")
+            listen_tab.select(show_records,inputs=[],outputs=[display_html,plot])
     gr.Markdown(ARTICLE)
 block.launch()

data CHANGED Viewed

	@@ -1 +1 @@
1	- Subproject commit ~~c10367e2eb0d27b88a70eeba0258400ea0e22469~~


1	+ Subproject commit c252b9acd77ce54411e803ecf5c66bfeafb1b887

requirements.txt CHANGED Viewed

@@ -1,3 +1,5 @@
 pandas
 scipy
-pycountry

 pandas
 scipy
+pycountry
+numpy
+matplotlib