Spaces:
Build error
Build error
| import torch | |
| import torchaudio | |
| import torch | |
| from transformers import pipeline | |
| import soundfile as sf | |
| import torch | |
| from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor | |
| import argparse | |
| from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
| import librosa | |
| from huggingface_hub.hf_api import HfFolder | |
| from transformers import MarianMTModel, MarianTokenizer | |
| from elevenlabs import set_api_key | |
| from elevenlabs import clone, generate, play | |
| HfFolder.save_token('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') | |
| access_token = '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' | |
| tokenizer_en_hn = AutoTokenizer.from_pretrained("vasudevgupta/mbart-bhasha-hin-eng") | |
| model_translate_en_hm = AutoModelForSeq2SeqLM.from_pretrained("vasudevgupta/mbart-bhasha-hin-eng") | |
| processor = Wav2Vec2Processor.from_pretrained("Harveenchadha/vakyansh-wav2vec2-hindi-him-4200") | |
| model_speech = Wav2Vec2ForCTC.from_pretrained("Harveenchadha/vakyansh-wav2vec2-hindi-him-4200") | |
| def parse_transcription(wav_file): | |
| # load audio | |
| audio_input, sample_rate = librosa.load(wav_file, sr=16000) | |
| # pad input values and return pt tensor | |
| input_values = processor(audio_input, sampling_rate=sample_rate, return_tensors="pt").input_values | |
| # INFERENCE | |
| # retrieve logits & take argmax | |
| logits = model_speech(input_values).logits | |
| predicted_ids = torch.argmax(logits, dim=-1) | |
| # transcribe | |
| transcription = processor.decode(predicted_ids[0], skip_special_tokens=True) | |
| return transcription | |
| def hindi_to_english(text): | |
| inputs = tokenizer_en_hn.encode( | |
| text, return_tensors="pt",padding=True,max_length=512,truncation=True) | |
| outputs = model_translate_en_hm.generate( | |
| inputs, max_length=128, num_beams=4, early_stopping=True | |
| ) | |
| translated = tokenizer_en_hn.decode(outputs[0]).replace('<pad>',"").replace('<s>', "").strip().lower() | |
| return translated | |
| def translate_english_to_hindi(input_text): | |
| # Load the pre-trained English to Hindi translation model and tokenizer | |
| model_name = "Helsinki-NLP/opus-mt-en-hi" | |
| tokenizer = MarianTokenizer.from_pretrained(model_name) | |
| model = MarianMTModel.from_pretrained(model_name) | |
| # Tokenize the input text and generate translation | |
| inputs = tokenizer(input_text, return_tensors="pt", padding=True) | |
| translated_ids = model.generate(inputs.input_ids) | |
| # Decode the translated output | |
| translated_text = tokenizer.decode(translated_ids[0], skip_special_tokens=True) | |
| return translated_text | |
| def hindi_tts(text): | |
| audio = generate( | |
| text=text, | |
| voice="Customer Service", | |
| model="eleven_monolingual_v1" | |
| ) | |
| return audio | |