harshith1411 commited on
Commit
92f18c3
·
verified ·
1 Parent(s): f3cc838

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -56
app.py CHANGED
@@ -2,95 +2,94 @@ import gradio as gr
2
  import torch
3
  import yt_dlp
4
  import os
5
- import subprocess
6
- import json
7
- from transformers import AutoTokenizer, AutoModelForCausalLM
8
  import moviepy.editor as mp
9
- import langdetect
10
  import uuid
 
 
11
 
12
- # Load model
13
  model_path = "Qwen/Qwen2.5-7B-Instruct"
14
  tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
15
- model = AutoModelForCausalLM.from_pretrained(
16
- model_path, torch_dtype=torch.float16, trust_remote_code=True
17
- ).cuda().eval()
18
 
 
19
  def generate_unique_filename(extension):
20
  return f"{uuid.uuid4()}{extension}"
21
 
 
22
  def download_youtube_audio(url):
23
- output_path = generate_unique_filename(".wav")
24
  ydl_opts = {
25
  'format': 'bestaudio/best',
26
- 'postprocessors': [{'key': 'FFmpegExtractAudio', 'preferredcodec': 'wav'}],
27
  'outtmpl': output_path,
28
- 'keepvideo': False,
29
  }
30
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
31
  ydl.download([url])
32
- return output_path
 
 
 
 
 
 
 
33
 
34
- def transcribe_audio(file_path):
35
- output_file = generate_unique_filename(".json")
36
- command = [
37
- "insanely-fast-whisper",
38
- "--file-name", file_path,
39
- "--device-id", "0",
40
- "--model-name", "openai/whisper-large-v3",
41
- "--task", "transcribe",
42
- "--timestamp", "chunk",
43
- "--transcript-path", output_file
44
- ]
45
- subprocess.run(command, check=True)
46
- with open(output_file, "r") as f:
47
- transcription = json.load(f)
48
- os.remove(output_file)
49
- return transcription.get("text", "")
50
 
51
- def generate_summary(transcription):
52
- if not transcription.strip():
53
- return "Error: No transcription available to summarize."
54
- detected_language = langdetect.detect(transcription)
55
- prompt = f"Summarize the following text in the detected language ({detected_language}):\n{transcription[:1000]}"
56
  response, _ = model.chat(tokenizer, prompt, history=[])
57
  return response
58
 
 
59
  def process_youtube(url):
60
  if not url:
61
- return "Error: Please enter a valid YouTube URL.", ""
62
- try:
63
- audio_file = download_youtube_audio(url)
64
- transcription = transcribe_audio(audio_file)
65
- os.remove(audio_file)
66
- return transcription, ""
67
- except Exception as e:
68
- return f"Error processing YouTube: {str(e)}", ""
69
 
70
- def process_uploaded_video(video_path):
71
- try:
72
- transcription = transcribe_audio(video_path)
73
- return transcription, ""
74
- except Exception as e:
75
- return f"Error processing video: {str(e)}", ""
 
 
 
76
 
77
- demo = gr.Blocks()
78
- with demo:
79
- gr.Markdown("## 🎥 Video Summarization Tool")
 
80
  with gr.Tabs():
81
  with gr.TabItem("📤 Upload Video"):
82
- video_input = gr.File()
83
  video_button = gr.Button("Process Video")
 
84
  with gr.TabItem("🔗 YouTube Link"):
85
- url_input = gr.Textbox()
86
  url_button = gr.Button("Process URL")
87
 
88
- transcription_output = gr.Textbox(label="Transcription", lines=10)
89
- summary_output = gr.Textbox(label="Summary", lines=10)
90
- summary_button = gr.Button("Generate Summary")
91
 
92
  video_button.click(process_uploaded_video, inputs=[video_input], outputs=[transcription_output, summary_output])
93
  url_button.click(process_youtube, inputs=[url_input], outputs=[transcription_output, summary_output])
94
- summary_button.click(generate_summary, inputs=[transcription_output], outputs=[summary_output])
95
 
96
  demo.launch()
 
2
  import torch
3
  import yt_dlp
4
  import os
 
 
 
5
  import moviepy.editor as mp
6
+ import whisper
7
  import uuid
8
+ import langdetect
9
+ from transformers import AutoTokenizer, AutoModelForCausalLM
10
 
11
+ # Load LLM Model
12
  model_path = "Qwen/Qwen2.5-7B-Instruct"
13
  tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
14
+ model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.float16, trust_remote_code=True).cuda()
15
+ model.eval()
 
16
 
17
+ # Function to generate a unique filename
18
  def generate_unique_filename(extension):
19
  return f"{uuid.uuid4()}{extension}"
20
 
21
+ # Function to download audio from a YouTube video
22
  def download_youtube_audio(url):
23
+ output_path = generate_unique_filename(".mp3")
24
  ydl_opts = {
25
  'format': 'bestaudio/best',
26
+ 'postprocessors': [{'key': 'FFmpegExtractAudio', 'preferredcodec': 'mp3'}],
27
  'outtmpl': output_path,
 
28
  }
29
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
30
  ydl.download([url])
31
+ return output_path if os.path.exists(output_path) else None
32
+
33
+ # Function to extract audio from a video file
34
+ def extract_audio(video_path):
35
+ video = mp.VideoFileClip(video_path)
36
+ audio_path = generate_unique_filename(".mp3")
37
+ video.audio.write_audiofile(audio_path)
38
+ return audio_path
39
 
40
+ # Function to transcribe audio using Whisper
41
+ def transcribe_audio(audio_path):
42
+ model = whisper.load_model("base")
43
+ result = model.transcribe(audio_path)
44
+ return result["text"]
 
 
 
 
 
 
 
 
 
 
 
45
 
46
+ # Function to summarize text using LLM
47
+ def summarize_text(text):
48
+ detected_language = langdetect.detect(text)
49
+ prompt = f"Summarize the following text in 150-300 words in {detected_language}: {text[:300000]}..."
 
50
  response, _ = model.chat(tokenizer, prompt, history=[])
51
  return response
52
 
53
+ # Gradio function to process YouTube links
54
  def process_youtube(url):
55
  if not url:
56
+ return "Please enter a YouTube URL.", ""
57
+ audio_path = download_youtube_audio(url)
58
+ if not audio_path:
59
+ return "Error downloading YouTube audio.", ""
60
+ transcription = transcribe_audio(audio_path)
61
+ summary = summarize_text(transcription)
62
+ os.remove(audio_path)
63
+ return transcription, summary
64
 
65
+ # Gradio function to process uploaded videos
66
+ def process_uploaded_video(video):
67
+ if not video:
68
+ return "No video uploaded.", ""
69
+ audio_path = extract_audio(video)
70
+ transcription = transcribe_audio(audio_path)
71
+ summary = summarize_text(transcription)
72
+ os.remove(audio_path)
73
+ return transcription, summary
74
 
75
+ # Gradio UI
76
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
77
+ gr.Markdown("# 🎥 Video Summarizer")
78
+
79
  with gr.Tabs():
80
  with gr.TabItem("📤 Upload Video"):
81
+ video_input = gr.Video(label="Upload Video")
82
  video_button = gr.Button("Process Video")
83
+
84
  with gr.TabItem("🔗 YouTube Link"):
85
+ url_input = gr.Textbox(label="Enter YouTube URL")
86
  url_button = gr.Button("Process URL")
87
 
88
+ with gr.Row():
89
+ transcription_output = gr.Textbox(label="📝 Transcription", lines=10)
90
+ summary_output = gr.Textbox(label="📊 Summary", lines=10)
91
 
92
  video_button.click(process_uploaded_video, inputs=[video_input], outputs=[transcription_output, summary_output])
93
  url_button.click(process_youtube, inputs=[url_input], outputs=[transcription_output, summary_output])
 
94
 
95
  demo.launch()