Spaces:
Running
on
Zero
Running
on
Zero
| import gradio as gr | |
| from transformers import Qwen2AudioForConditionalGeneration, AutoProcessor | |
| import librosa | |
| import spaces | |
| MODEL_ID = "Qwen/Qwen2-Audio-7B-Instruct" | |
| processor = AutoProcessor.from_pretrained(MODEL_ID) | |
| model = Qwen2AudioForConditionalGeneration.from_pretrained(MODEL_ID, device_map="auto") | |
| def run_qwen2audio(audio_path: str, instruction: str) -> str: | |
| if not audio_path: | |
| return "Please upload an audio file." | |
| conversation = [ | |
| { | |
| "role": "user", | |
| "content": [ | |
| {"type": "audio", "audio_url": audio_path}, | |
| {"type": "text", "text": instruction}, | |
| ], | |
| } | |
| ] | |
| text = processor.apply_chat_template(conversation, add_generation_prompt=True, tokenize=False) | |
| audios = [] | |
| target_sr = processor.feature_extractor.sampling_rate | |
| audio, _ = librosa.load(audio_path, sr=target_sr) | |
| audios.append(audio) | |
| inputs = processor(text=text, audio=audios, return_tensors="pt", padding=True) | |
| inputs = inputs.to(model.device) | |
| output_ids = model.generate(**inputs, max_new_tokens=4096) | |
| output_ids = output_ids[:, inputs.input_ids.size(1):] | |
| response = processor.batch_decode(output_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0] | |
| return response | |
| with gr.Blocks(title="Qwen2-Audio Demo") as demo: | |
| gr.Markdown("# Qwen2-Audio Demo") | |
| gr.Markdown("Upload audio and run an instruction with Qwen2-Audio.") | |
| with gr.Row(): | |
| with gr.Column(): | |
| audio_input = gr.Audio(type="filepath", label="Upload Audio") | |
| instruction = gr.Textbox(label="Instruction", value="Transcribe the audio.") | |
| submit_btn = gr.Button("Run", variant="primary") | |
| with gr.Column(): | |
| output_text = gr.Textbox(label="Response", lines=12) | |
| submit_btn.click(run_qwen2audio, [audio_input, instruction], output_text) | |
| if __name__ == "__main__": | |
| demo.queue().launch(share=False, ssr_mode=False) |