Spaces:

raygiles3
/

speech_analyzer

Runtime error

App Files Files Community

raygiles3 commited on Jul 19, 2024

Commit

419f5db

verified ·

1 Parent(s): 6fe1666

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -71

app.py CHANGED Viewed

@@ -1,73 +1,49 @@
-import torch
-import os
 import gradio as gr
-#from langchain.llms import OpenAI
-from langchain.llms import HuggingFaceHub
-from transformers import pipeline
-from langchain.prompts import PromptTemplate
-from langchain.chains import LLMChain
-from ibm_watson_machine_learning.foundation_models import Model
-from ibm_watson_machine_learning.foundation_models.extensions.langchain import WatsonxLLM
-from ibm_watson_machine_learning.metanames import GenTextParamsMetaNames as GenParams
-my_credentials = {
-    "url"    : "https://us-south.ml.cloud.ibm.com"
-}
-params = {
-        GenParams.MAX_NEW_TOKENS: 800, # The maximum number of tokens that the model can generate in a single run.
-        GenParams.TEMPERATURE: 0.1,   # A parameter that controls the randomness of the token generation. A lower value makes the generation more deterministic, while a higher value introduces more randomness.
-    }
-LLAMA2_model = Model(
-        model_id= 'meta-llama/llama-2-70b-chat',
-        credentials=my_credentials,
-        params=params,
-        project_id="skills-network",
-        )
-llm = WatsonxLLM(LLAMA2_model)
-#######------------- Prompt Template-------------####
-temp = """
-<s><<SYS>>
-List the key points with details from the context:
-[INST] The context : {context} [/INST]
-<</SYS>>
-"""
-pt = PromptTemplate(
-    input_variables=["context"],
-    template= temp)
-prompt_to_LLAMA2 = LLMChain(llm=llm, prompt=pt)
-#######------------- Speech2text-------------####
-def transcript_audio(audio_file):
-    # Initialize the speech recognition pipeline
-    pipe = pipeline(
-        "automatic-speech-recognition",
-        model="openai/whisper-tiny.en",
-        chunk_length_s=30,
-    )
-    # Transcribe the audio file and return the result
-    transcript_txt = pipe(audio_file, batch_size=8)["text"]
-    result = prompt_to_LLAMA2.run(transcript_txt)
-    return result
-#######------------- Gradio-------------####
-audio_input = gr.Audio(sources="upload", type="filepath")
-output_text = gr.Textbox()
-iface = gr.Interface(fn= transcript_audio,
-                    inputs= audio_input, outputs= output_text,
-                    title= "Audio Transcription App",
-                    description= "Upload the audio file")
-iface.launch(server_name="0.0.0.0", server_port=7860)

 import gradio as gr
+import torch
+from transformers import pipeline, WhisperProcessor, WhisperForConditionalGeneration, AutoModelForSeq2SeqLM, AutoTokenizer
+# Initialize the Whisper processor and model
+whisper_processor = WhisperProcessor.from_pretrained("openai/whisper-base")
+whisper_model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-base")
+# Initialize the summarization model and tokenizer
+summarization_model = AutoModelForSeq2SeqLM.from_pretrained("meta-llama/Llama-2-7b-hf")
+summarization_tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")
+# Function to transcribe audio
+def transcribe_audio(audio_file):
+    # Load audio file
+    audio_input, _ = whisper_processor(audio_file, return_tensors="pt", sampling_rate=16000).input_values
+    # Generate transcription
+    transcription_ids = whisper_model.generate(audio_input)
+    transcription = whisper_processor.decode(transcription_ids[0])
+    return transcription
+# Function to summarize text
+def summarize_text(text):
+    inputs = summarization_tokenizer.encode("summarize: " + text, return_tensors="pt", max_length=512, truncation=True)
+    summary_ids = summarization_model.generate(inputs, max_length=150, min_length=40, length_penalty=2.0, num_beams=4, early_stopping=True)
+    summary = summarization_tokenizer.decode(summary_ids[0], skip_special_tokens=True)
+    return summary
+# Gradio interface
+def process_audio(audio_file):
+    transcription = transcribe_audio(audio_file)
+    summary = summarize_text(transcription)
+    return transcription, summary
+# Gradio UI
+iface = gr.Interface(
+    fn=process_audio,
+    inputs=gr.Audio(source="upload", type="file"),
+    outputs=[
+        gr.Textbox(label="Transcription"),
+        gr.Textbox(label="Summary")
+    ],
+    title="Audio Transcription and Summarization",
+    description="Upload an audio file to transcribe and summarize the conversation."
+)
+# Launch the app
+iface.launch()