khateeb_standalone

Sleeping

App Files Files Community

Bisher commited on Apr 17

Commit

17ddd97

verified ·

1 Parent(s): 7b8a9a4

Update app.py

Browse files

Files changed (1) hide show

app.py +92 -65

app.py CHANGED Viewed

@@ -4,6 +4,10 @@ import jiwer
 import os
 import time
 import warnings
 # Suppress specific UserWarnings from jiwer related to empty strings
 warnings.filterwarnings("ignore", message="Reference is empty.*", category=UserWarning)
@@ -13,16 +17,29 @@ warnings.filterwarnings("ignore", message="Hypothesis is empty.*", category=User
 DIACRITIZATION_API_URL = "Bisher/CATT.diacratization"
 TRANSCRIPTION_API_URL = "gh-kaka22/diacritic_level_arabic_transcription"
-# --- Gradio API Clients ---
-# It's good practice to initialize clients outside the functions
-# if the app runs continuously, but be mindful of potential state issues
-# or connection timeouts in long-running deployments. For simplicity here,
-# we might re-initialize, though a single initialization is often preferred.
 def get_diacritization_client():
     """Initializes and returns the client for the text diacritization API."""
     try:
-        # Added timeout for robustness
         return Client(DIACRITIZATION_API_URL, download_files=True)
     except Exception as e:
         print(f"Error initializing diacritization client: {e}")
@@ -31,7 +48,6 @@ def get_diacritization_client():
 def get_transcription_client():
     """Initializes and returns the client for the audio transcription API."""
     try:
-        # Added timeout for robustness
         return Client(TRANSCRIPTION_API_URL, download_files=True)
     except Exception as e:
         print(f"Error initializing transcription client: {e}")
@@ -52,31 +68,26 @@ def diacritize_text_api(text_to_diacritize):
     """
     if not text_to_diacritize or not text_to_diacritize.strip():
         error_msg = "Please enter some text to diacritize."
-        # Return the error message twice
         return error_msg, error_msg
     client = get_diacritization_client()
     if not client:
         error_msg = "Error: Could not connect to the diacritization service."
-        # Return the error message twice
         return error_msg, error_msg
     try:
         print(f"Sending text to diacritization API: {text_to_diacritize}")
         result = client.predict(
-            model_type="Encoder-Only",  # Or 'Encoder-Decoder' if preferred
             input_text=text_to_diacritize,
             api_name="/predict"
         )
         print(f"Received diacritized text: {result}")
-        # Ensure result is a string before returning
         result_str = str(result) if result is not None else "Error: Received empty response from diacritization service."
-        # Return the result twice
         return result_str, result_str
     except Exception as e:
         print(f"Error during text diacritization API call: {e}")
         error_msg = f"Error during diacritization: {e}"
-        # Return the error message twice
         return error_msg, error_msg
 def transcribe_audio_api(audio_filepath):
@@ -92,7 +103,6 @@ def transcribe_audio_api(audio_filepath):
     if not audio_filepath:
         return "Error: Please provide an audio recording or file."
-    # Check if file exists and is accessible
     if not os.path.exists(audio_filepath):
          return f"Error: Audio file not found at {audio_filepath}"
@@ -102,14 +112,11 @@ def transcribe_audio_api(audio_filepath):
     try:
         print(f"Sending audio file to transcription API: {audio_filepath}")
-        # Use handle_file to manage the audio file for the API call
         result = client.predict(
             audio=handle_file(audio_filepath),
             api_name="/predict"
         )
         print(f"Received transcript: {result}")
-        # The API might return more structure, adapt if needed. Assuming it returns the text directly.
-        # Example: if result is {'text': '...'}, use result['text']
         if isinstance(result, dict) and 'text' in result:
              transcript = result['text']
         elif isinstance(result, str):
@@ -118,17 +125,40 @@ def transcribe_audio_api(audio_filepath):
              print(f"Unexpected transcription result format: {result}")
              return "Error: Unexpected format received from transcription service."
-        # Ensure transcript is a string
         return str(transcript) if transcript is not None else "Error: Received empty response from transcription service."
     except Exception as e:
         print(f"Error during audio transcription API call: {e}")
-        # Provide more specific error feedback if possible
         return f"Error during transcription: {e}"
 def calculate_metrics(reference, hypothesis):
     """
     Calculates Word Error Rate (WER) and Diacritic Error Rate (DER).
     Args:
         reference (str): The original diacritized text.
@@ -140,45 +170,53 @@ def calculate_metrics(reference, hypothesis):
     # Ensure inputs are strings before proceeding
     if not isinstance(reference, str):
         print(f"Error: Reference input is not a string (type: {type(reference)}). Value: {reference}")
-        reference = "" # Default to empty string to avoid downstream errors
     if not isinstance(hypothesis, str):
         print(f"Error: Hypothesis input is not a string (type: {type(hypothesis)}). Value: {hypothesis}")
-        hypothesis = "" # Default to empty string
-    # Handle empty strings to avoid jiwer warnings/errors if not suppressed
     ref_strip = reference.strip()
     hyp_strip = hypothesis.strip()
-    if not ref_strip and not hyp_strip:
-        return 0.0, 0.0 # Both empty, 0% error
-    if not ref_strip:
-        print("Warning: Reference text is empty.")
-        # WER/DER are typically 1.0 (or inf) if reference is empty and hypothesis is not.
-        return 1.0, 1.0
-    # Note: If hypothesis is empty but reference is not, jiwer calculates WER=1.0, which is correct.
     try:
         # 1. Calculate Word Error Rate (WER)
-        wer = jiwer.wer(reference, hypothesis)
-        # 2. Calculate Diacritic Error Rate (DER)
-        #    - Treat each character (including diacritics) as a token.
-        #    - Join characters with spaces to make jiwer treat them as "words".
-        ref_chars = ' '.join(list(reference))
-        hyp_chars = ' '.join(list(hypothesis))
-        # Need to handle potential empty strings after join for jiwer
-        if not ref_chars.strip() and not hyp_chars.strip():
-             der = 0.0
-        elif not ref_chars.strip():
-             der = 1.0
         else:
-             der = jiwer.wer(ref_chars, hyp_chars)
-        return round(wer, 4), round(der, 4)
     except Exception as e:
         print(f"Error calculating metrics: {e}")
         return None, None
@@ -194,30 +232,23 @@ def process_audio_and_compare(audio_input, original_diacritized_text):
                der (float | None): Diacritic Error Rate or None if error.
     """
     print("Processing audio and comparing...")
-    # Check if original_diacritized_text is valid
     if not original_diacritized_text or not isinstance(original_diacritized_text, str) or original_diacritized_text.startswith("Error:"):
         error_msg = "Error: Valid reference diacritized text not available. Please diacritize text first."
         print(error_msg)
-        # Return default/error values for all outputs
         return error_msg, None, None
-    # --- 1. Transcribe Audio ---
-    # Gradio provides the audio data (e.g., filepath for upload/mic)
     transcript = transcribe_audio_api(audio_input)
     if not isinstance(transcript, str) or transcript.startswith("Error:"):
-        # If transcription failed, return the error and None for metrics
         error_msg = transcript if isinstance(transcript, str) else "Error: Transcription failed with non-string output."
         print(error_msg)
         return error_msg, None, None
-    # --- 2. Calculate Metrics ---
     wer, der = calculate_metrics(original_diacritized_text, transcript)
     if wer is None or der is None:
         print("Metrics calculation failed.")
-        # Return transcript but indicate metric failure
-        return transcript, None, None
     print(f"Comparison complete. WER: {wer}, DER: {der}")
     return transcript, wer, der
@@ -231,11 +262,12 @@ with gr.Blocks(theme=gr.themes.Soft()) as app:
         1.  Enter undiacritized Arabic text and click **Diacritize Text**.
         2.  Read the generated **Diacritized Text** aloud and record it using the microphone or upload an audio file.
         3.  Click **Transcribe and Compare** to get the transcript and see the WER/DER scores compared to the original diacritized text.
         """
     )
-    # Store the original diacritized text for comparison later
-    original_diacritized_state = gr.State("") # Initialize state
     with gr.Row():
         with gr.Column(scale=1):
@@ -243,20 +275,20 @@ with gr.Blocks(theme=gr.themes.Soft()) as app:
                 label="1. Enter Undiacritized Arabic Text",
                 placeholder="مثال: السلام عليكم",
                 lines=3,
-                text_align="right", # Align text right for Arabic
             )
             diacritize_button = gr.Button("Diacritize Text")
             diacritized_text_output = gr.Textbox(
                 label="2. Diacritized Text (Reference)",
                 lines=3,
-                interactive=False, # User shouldn't edit this directly
                 text_align="right",
             )
         with gr.Column(scale=1):
             audio_input = gr.Audio(
                 sources=["microphone", "upload"],
-                type="filepath", # Get the path to the saved audio file
                 label="3. Record or Upload Audio of Reading Diacritized Text",
             )
             transcribe_button = gr.Button("Transcribe and Compare")
@@ -267,26 +299,21 @@ with gr.Blocks(theme=gr.themes.Soft()) as app:
                 text_align="right",
             )
             with gr.Row():
-                 # Set precision for number outputs
                  wer_output = gr.Number(label="Word Error Rate (WER)", interactive=False, precision=4)
                  der_output = gr.Number(label="Diacritic Error Rate (DER)", interactive=False, precision=4)
     # --- Connect Components ---
-    # Action for Diacritize Button
     diacritize_button.click(
         fn=diacritize_text_api,
         inputs=[text_input],
-        # Expects two outputs now from the modified function
         outputs=[diacritized_text_output, original_diacritized_state]
     )
-    # Action for Transcribe Button
     transcribe_button.click(
         fn=process_audio_and_compare,
-        inputs=[audio_input, original_diacritized_state], # Pass audio and stored text
-        outputs=[transcript_output, wer_output, der_output] # Update transcript and metrics
     )
-app.launch(debug=True, share=True)

 import os
 import time
 import warnings
+# Import pyarabic for diacritic identification
+import pyarabic.araby as araby
 # Suppress specific UserWarnings from jiwer related to empty strings
 warnings.filterwarnings("ignore", message="Reference is empty.*", category=UserWarning)
 DIACRITIZATION_API_URL = "Bisher/CATT.diacratization"
 TRANSCRIPTION_API_URL = "gh-kaka22/diacritic_level_arabic_transcription"
+# Define the set of Arabic diacritic characters using pyarabic constants if available
+if araby:
+    ARABIC_DIACRITICS = {
+        araby.FATHA,      # U+064E
+        araby.FATHATAN,   # U+064B
+        araby.DAMMA,      # U+064F
+        araby.DAMMATAN,   # U+064C
+        araby.KASRA,      # U+0650
+        araby.KASRATAN,   # U+064D
+        araby.SUKUN,      # U+0652
+        araby.SHADDA,     # U+0651
+        # Consider adding others if needed, e.g., araby.MADDA (U+0653), araby.HAMZA_ABOVE (U+0654), etc.
+        # Sticking to the main 8 Tashkeel for now.
+    }
+else:
+    # Fallback if pyarabic failed to import
+    ARABIC_DIACRITICS = {'\u064B', '\u064C', '\u064D', '\u064E', '\u064F', '\u0650', '\u0651', '\u0652'}
+# --- Gradio API Clients ---
 def get_diacritization_client():
     """Initializes and returns the client for the text diacritization API."""
     try:
         return Client(DIACRITIZATION_API_URL, download_files=True)
     except Exception as e:
         print(f"Error initializing diacritization client: {e}")
 def get_transcription_client():
     """Initializes and returns the client for the audio transcription API."""
     try:
         return Client(TRANSCRIPTION_API_URL, download_files=True)
     except Exception as e:
         print(f"Error initializing transcription client: {e}")
     """
     if not text_to_diacritize or not text_to_diacritize.strip():
         error_msg = "Please enter some text to diacritize."
         return error_msg, error_msg
     client = get_diacritization_client()
     if not client:
         error_msg = "Error: Could not connect to the diacritization service."
         return error_msg, error_msg
     try:
         print(f"Sending text to diacritization API: {text_to_diacritize}")
         result = client.predict(
+            model_type="Encoder-Only",
             input_text=text_to_diacritize,
             api_name="/predict"
         )
         print(f"Received diacritized text: {result}")
         result_str = str(result) if result is not None else "Error: Received empty response from diacritization service."
         return result_str, result_str
     except Exception as e:
         print(f"Error during text diacritization API call: {e}")
         error_msg = f"Error during diacritization: {e}"
         return error_msg, error_msg
 def transcribe_audio_api(audio_filepath):
     if not audio_filepath:
         return "Error: Please provide an audio recording or file."
     if not os.path.exists(audio_filepath):
          return f"Error: Audio file not found at {audio_filepath}"
     try:
         print(f"Sending audio file to transcription API: {audio_filepath}")
         result = client.predict(
             audio=handle_file(audio_filepath),
             api_name="/predict"
         )
         print(f"Received transcript: {result}")
         if isinstance(result, dict) and 'text' in result:
              transcript = result['text']
         elif isinstance(result, str):
              print(f"Unexpected transcription result format: {result}")
              return "Error: Unexpected format received from transcription service."
         return str(transcript) if transcript is not None else "Error: Received empty response from transcription service."
     except Exception as e:
         print(f"Error during audio transcription API call: {e}")
         return f"Error during transcription: {e}"
+def get_diacritics_sequence(text):
+    """
+    Extracts only the Arabic diacritic characters from a string.
+    Args:
+        text (str): The input string potentially containing diacritics.
+    Returns:
+        str: A space-separated string of diacritics found in the text.
+             Returns an empty string if no diacritics are found or input is not a string.
+    """
+    if not isinstance(text, str):
+        return "" # Return empty string for non-string input
+    # Check if pyarabic was imported successfully
+    if not araby and not ARABIC_DIACRITICS:
+         print("Warning: pyarabic not loaded, cannot reliably extract diacritics.")
+         return "" # Cannot proceed without diacritic definitions
+    diacritics_only = [char for char in text if char in ARABIC_DIACRITICS]
+    # Return as a space-separated string for jiwer.wer
+    return ' '.join(diacritics_only)
 def calculate_metrics(reference, hypothesis):
     """
     Calculates Word Error Rate (WER) and Diacritic Error Rate (DER).
+    DER is calculated based *only* on the sequence of diacritic marks.
     Args:
         reference (str): The original diacritized text.
     # Ensure inputs are strings before proceeding
     if not isinstance(reference, str):
         print(f"Error: Reference input is not a string (type: {type(reference)}). Value: {reference}")
+        reference = ""
     if not isinstance(hypothesis, str):
         print(f"Error: Hypothesis input is not a string (type: {type(hypothesis)}). Value: {hypothesis}")
+        hypothesis = ""
     ref_strip = reference.strip()
     hyp_strip = hypothesis.strip()
+    wer = None
+    der = None
     try:
         # 1. Calculate Word Error Rate (WER)
+        if not ref_strip and not hyp_strip:
+            wer = 0.0
+        elif not ref_strip:
+            wer = 1.0 # Reference empty, hypothesis not
         else:
+            # Jiwer handles hyp_strip being empty if ref_strip is not
+            wer = jiwer.wer(reference, hypothesis)
+        # 2. Calculate Diacritic Error Rate (DER) based *only* on diacritics
+        ref_diacritics = get_diacritics_sequence(reference)
+        hyp_diacritics = get_diacritics_sequence(hypothesis)
+        ref_diacritics_strip = ref_diacritics.strip()
+        hyp_diacritics_strip = hyp_diacritics.strip()
+        if not ref_diacritics_strip and not hyp_diacritics_strip:
+            der = 0.0 # No diacritics in either reference or hypothesis
+        elif not ref_diacritics_strip:
+            # Reference has no diacritics, but hypothesis does. DER is 1.0 (all hyp diacritics are insertions).
+            der = 1.0
+            print("Warning: No diacritics found in reference text for DER calculation.")
+        else:
+            # Reference has diacritics. Jiwer calculates WER on the diacritic sequences.
+            # If hypothesis has no diacritics, jiwer.wer will be 1.0 (all ref diacritics deleted).
+            der = jiwer.wer(ref_diacritics, hyp_diacritics) # Use the space-separated strings
+        # Round the results if they were calculated successfully
+        wer_rounded = round(wer, 4) if wer is not None else None
+        der_rounded = round(der, 4) if der is not None else None
+        return wer_rounded, der_rounded
     except Exception as e:
         print(f"Error calculating metrics: {e}")
+        # Return None if any exception occurred during calculation
         return None, None
                der (float | None): Diacritic Error Rate or None if error.
     """
     print("Processing audio and comparing...")
     if not original_diacritized_text or not isinstance(original_diacritized_text, str) or original_diacritized_text.startswith("Error:"):
         error_msg = "Error: Valid reference diacritized text not available. Please diacritize text first."
         print(error_msg)
         return error_msg, None, None
     transcript = transcribe_audio_api(audio_input)
     if not isinstance(transcript, str) or transcript.startswith("Error:"):
         error_msg = transcript if isinstance(transcript, str) else "Error: Transcription failed with non-string output."
         print(error_msg)
         return error_msg, None, None
     wer, der = calculate_metrics(original_diacritized_text, transcript)
     if wer is None or der is None:
         print("Metrics calculation failed.")
+        return transcript, None, None # Return transcript but None for metrics
     print(f"Comparison complete. WER: {wer}, DER: {der}")
     return transcript, wer, der
         1.  Enter undiacritized Arabic text and click **Diacritize Text**.
         2.  Read the generated **Diacritized Text** aloud and record it using the microphone or upload an audio file.
         3.  Click **Transcribe and Compare** to get the transcript and see the WER/DER scores compared to the original diacritized text.
+        **Note:** Requires `pyarabic` library (`pip install pyarabic`) for accurate Diacritic Error Rate (DER) calculation.
         """
     )
+    original_diacritized_state = gr.State("")
     with gr.Row():
         with gr.Column(scale=1):
                 label="1. Enter Undiacritized Arabic Text",
                 placeholder="مثال: السلام عليكم",
                 lines=3,
+                text_align="right",
             )
             diacritize_button = gr.Button("Diacritize Text")
             diacritized_text_output = gr.Textbox(
                 label="2. Diacritized Text (Reference)",
                 lines=3,
+                interactive=False,
                 text_align="right",
             )
         with gr.Column(scale=1):
             audio_input = gr.Audio(
                 sources=["microphone", "upload"],
+                type="filepath",
                 label="3. Record or Upload Audio of Reading Diacritized Text",
             )
             transcribe_button = gr.Button("Transcribe and Compare")
                 text_align="right",
             )
             with gr.Row():
                  wer_output = gr.Number(label="Word Error Rate (WER)", interactive=False, precision=4)
                  der_output = gr.Number(label="Diacritic Error Rate (DER)", interactive=False, precision=4)
     # --- Connect Components ---
     diacritize_button.click(
         fn=diacritize_text_api,
         inputs=[text_input],
         outputs=[diacritized_text_output, original_diacritized_state]
     )
     transcribe_button.click(
         fn=process_audio_and_compare,
+        inputs=[audio_input, original_diacritized_state],
+        outputs=[transcript_output, wer_output, der_output]
     )
+app.launch(debug=True, share=True)