Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -21,16 +21,16 @@ summarization_model_names = [
|
|
| 21 |
|
| 22 |
# Placeholder for the summarizer pipeline, tokenizer, and maximum tokens
|
| 23 |
summarizer = None
|
| 24 |
-
|
| 25 |
max_tokens = None
|
| 26 |
|
| 27 |
|
| 28 |
# Function to load the selected model
|
| 29 |
def load_summarization_model(model_name):
|
| 30 |
-
global summarizer,
|
| 31 |
try:
|
| 32 |
summarizer = pipeline("summarization", model=model_name, torch_dtype=torch.bfloat16)
|
| 33 |
-
|
| 34 |
config = AutoConfig.from_pretrained(model_name)
|
| 35 |
|
| 36 |
if hasattr(config, 'max_position_embeddings'):
|
|
@@ -51,7 +51,7 @@ def summarize_text(input, min_length, max_length):
|
|
| 51 |
if summarizer is None:
|
| 52 |
return "No model loaded!"
|
| 53 |
|
| 54 |
-
input_tokens =
|
| 55 |
num_tokens = input_tokens.shape[1]
|
| 56 |
if num_tokens > max_tokens:
|
| 57 |
return f"Error: The input text has {num_tokens} tokens, which exceeds the maximum allowed {max_tokens} tokens. Please enter shorter text."
|
|
@@ -79,11 +79,9 @@ tokenizer = M2M100Tokenizer.from_pretrained(pretrained_model, cache_dir=cache_di
|
|
| 79 |
translation_model = M2M100ForConditionalGeneration.from_pretrained(
|
| 80 |
pretrained_model, cache_dir=cache_dir)
|
| 81 |
|
| 82 |
-
transcription = pipeline("automatic-speech-recognition", model= "openai/whisper-base")
|
| 83 |
-
clasification = pipeline(
|
| 84 |
-
|
| 85 |
-
model="anton-l/xtreme_s_xlsr_300m_minds14",
|
| 86 |
-
)
|
| 87 |
def language_names(json_path):
|
| 88 |
with open(json_path, 'r') as json_file:
|
| 89 |
data = json.load(json_file)
|
|
@@ -97,13 +95,13 @@ def get_name(label):
|
|
| 97 |
name = label2name[iso_3]
|
| 98 |
return name
|
| 99 |
|
| 100 |
-
def audio_a_text(audio):
|
| 101 |
-
|
| 102 |
-
return text
|
| 103 |
|
| 104 |
-
def text_to_sentimient(audio):
|
| 105 |
-
|
| 106 |
-
|
| 107 |
|
| 108 |
lang_id = {
|
| 109 |
"Afrikaans": "af",
|
|
@@ -229,6 +227,7 @@ with demo:
|
|
| 229 |
text = gr.Textbox()
|
| 230 |
#gr.Markdown("Speech analyzer")
|
| 231 |
#audio = gr.Audio(type="filepath", label = "Upload a file")
|
|
|
|
| 232 |
model_dropdown = gr.Dropdown(choices = summarization_model_names, label="Choose a model", value="sshleifer/distilbart-cnn-12-6")
|
| 233 |
load_message = gr.Textbox(label="Load Status", interactive=False)
|
| 234 |
b1 = gr.Button("Load Model")
|
|
@@ -249,6 +248,7 @@ with demo:
|
|
| 249 |
# inputs=[
|
| 250 |
# source_lang])
|
| 251 |
#b1 = gr.Button("convert to text")
|
|
|
|
| 252 |
b3 = gr.Button("translate")
|
| 253 |
b3.click(translation_text, inputs = [source_lang, target_lang, text0], outputs = text)
|
| 254 |
#b1.click(audio_a_text, inputs=audio, outputs=text)
|
|
|
|
| 21 |
|
| 22 |
# Placeholder for the summarizer pipeline, tokenizer, and maximum tokens
|
| 23 |
summarizer = None
|
| 24 |
+
tokenizer_sum = None
|
| 25 |
max_tokens = None
|
| 26 |
|
| 27 |
|
| 28 |
# Function to load the selected model
|
| 29 |
def load_summarization_model(model_name):
|
| 30 |
+
global summarizer, tokenizer_sum, max_tokens
|
| 31 |
try:
|
| 32 |
summarizer = pipeline("summarization", model=model_name, torch_dtype=torch.bfloat16)
|
| 33 |
+
tokenizer_sum = AutoTokenizer.from_pretrained(model_name)
|
| 34 |
config = AutoConfig.from_pretrained(model_name)
|
| 35 |
|
| 36 |
if hasattr(config, 'max_position_embeddings'):
|
|
|
|
| 51 |
if summarizer is None:
|
| 52 |
return "No model loaded!"
|
| 53 |
|
| 54 |
+
input_tokens = tokenizer_sum.encode(input, return_tensors="pt")
|
| 55 |
num_tokens = input_tokens.shape[1]
|
| 56 |
if num_tokens > max_tokens:
|
| 57 |
return f"Error: The input text has {num_tokens} tokens, which exceeds the maximum allowed {max_tokens} tokens. Please enter shorter text."
|
|
|
|
| 79 |
translation_model = M2M100ForConditionalGeneration.from_pretrained(
|
| 80 |
pretrained_model, cache_dir=cache_dir)
|
| 81 |
|
| 82 |
+
#transcription = pipeline("automatic-speech-recognition", model= "openai/whisper-base")
|
| 83 |
+
#clasification = pipeline("audio-classification",model="anton-l/xtreme_s_xlsr_300m_minds14",)
|
| 84 |
+
|
|
|
|
|
|
|
| 85 |
def language_names(json_path):
|
| 86 |
with open(json_path, 'r') as json_file:
|
| 87 |
data = json.load(json_file)
|
|
|
|
| 95 |
name = label2name[iso_3]
|
| 96 |
return name
|
| 97 |
|
| 98 |
+
#def audio_a_text(audio):
|
| 99 |
+
# text = transcription(audio)["text"]
|
| 100 |
+
#return text
|
| 101 |
|
| 102 |
+
#def text_to_sentimient(audio):
|
| 103 |
+
# #text = transcription(audio)["text"]
|
| 104 |
+
# return clasification(audio)
|
| 105 |
|
| 106 |
lang_id = {
|
| 107 |
"Afrikaans": "af",
|
|
|
|
| 227 |
text = gr.Textbox()
|
| 228 |
#gr.Markdown("Speech analyzer")
|
| 229 |
#audio = gr.Audio(type="filepath", label = "Upload a file")
|
| 230 |
+
|
| 231 |
model_dropdown = gr.Dropdown(choices = summarization_model_names, label="Choose a model", value="sshleifer/distilbart-cnn-12-6")
|
| 232 |
load_message = gr.Textbox(label="Load Status", interactive=False)
|
| 233 |
b1 = gr.Button("Load Model")
|
|
|
|
| 248 |
# inputs=[
|
| 249 |
# source_lang])
|
| 250 |
#b1 = gr.Button("convert to text")
|
| 251 |
+
|
| 252 |
b3 = gr.Button("translate")
|
| 253 |
b3.click(translation_text, inputs = [source_lang, target_lang, text0], outputs = text)
|
| 254 |
#b1.click(audio_a_text, inputs=audio, outputs=text)
|