Spaces:
Running
Running
| import gradio as gr | |
| import whisper | |
| import cv2 | |
| import numpy as np | |
| import moviepy.editor as mp | |
| from moviepy.video.fx import resize | |
| from transformers import pipeline, AutoTokenizer, AutoModel | |
| import torch | |
| import re | |
| import os | |
| import tempfile | |
| from typing import List, Dict, Tuple | |
| import json | |
| import librosa | |
| from textblob import TextBlob | |
| import emoji | |
| import yt_dlp | |
| import requests | |
| from urllib.parse import urlparse, parse_qs | |
| class AIVideoClipper: | |
| def __init__(self): | |
| # Initialize models | |
| print("Loading models...") | |
| self.whisper_model = whisper.load_model("base") # Using base model for free tier | |
| self.sentiment_analyzer = pipeline("sentiment-analysis", | |
| model="cardiffnlp/twitter-roberta-base-sentiment-latest") | |
| self.emotion_analyzer = pipeline("text-classification", | |
| model="j-hartmann/emotion-english-distilroberta-base") | |
| # Viral keywords and patterns | |
| self.viral_keywords = [ | |
| "wow", "amazing", "incredible", "unbelievable", "shocking", "surprise", | |
| "secret", "trick", "hack", "tip", "mistake", "fail", "success", | |
| "breakthrough", "discovery", "reveal", "expose", "truth", "lie", | |
| "before", "after", "transformation", "change", "upgrade", "improve", | |
| "money", "rich", "poor", "expensive", "cheap", "free", "save", | |
| "love", "hate", "angry", "happy", "sad", "funny", "laugh", "cry", | |
| "first time", "last time", "never", "always", "everyone", "nobody", | |
| "finally", "suddenly", "immediately", "instantly", "quickly" | |
| ] | |
| self.hook_patterns = [ | |
| r"you won't believe", | |
| r"this will change", | |
| r"nobody talks about", | |
| r"the truth about", | |
| r"what happens when", | |
| r"here's what", | |
| r"this is why", | |
| r"the secret", | |
| r"watch this", | |
| r"wait for it" | |
| ] | |
| def download_youtube_video(self, url: str, temp_dir: str) -> Tuple[str, Dict]: | |
| """Download YouTube video and return path + metadata""" | |
| print(f"Downloading YouTube video: {url}") | |
| # Validate YouTube URL | |
| if not self.is_valid_youtube_url(url): | |
| raise ValueError("Invalid YouTube URL. Please provide a valid YouTube video link.") | |
| # Configure yt-dlp options for free tier optimization | |
| ydl_opts = { | |
| 'format': 'best[height<=720][ext=mp4]/best[ext=mp4]/best', # Limit to 720p for performance | |
| 'outtmpl': os.path.join(temp_dir, '%(title)s.%(ext)s'), | |
| 'noplaylist': True, | |
| 'extractaudio': False, | |
| 'audioformat': 'mp3', | |
| 'ignoreerrors': False, | |
| 'no_warnings': False, | |
| 'extract_flat': False, | |
| } | |
| try: | |
| with yt_dlp.YoutubeDL(ydl_opts) as ydl: | |
| # Extract info first | |
| info = ydl.extract_info(url, download=False) | |
| # Check video duration (limit to 60 minutes for free tier) | |
| duration = info.get('duration', 0) | |
| if duration > 3600: # 1 hour limit | |
| raise ValueError("Video too long. Please use videos shorter than 1 hour.") | |
| # Download the video | |
| ydl.download([url]) | |
| # Find the downloaded file | |
| video_title = info.get('title', 'video') | |
| video_ext = info.get('ext', 'mp4') | |
| video_path = os.path.join(temp_dir, f"{video_title}.{video_ext}") | |
| # Sometimes yt-dlp changes the filename, so find the actual file | |
| downloaded_files = [f for f in os.listdir(temp_dir) if f.endswith(('.mp4', '.mkv', '.webm'))] | |
| if downloaded_files: | |
| video_path = os.path.join(temp_dir, downloaded_files[0]) | |
| metadata = { | |
| 'title': video_title, | |
| 'duration': duration, | |
| 'uploader': info.get('uploader', 'Unknown'), | |
| 'view_count': info.get('view_count', 0), | |
| 'upload_date': info.get('upload_date', 'Unknown') | |
| } | |
| print(f"Successfully downloaded: {video_title}") | |
| return video_path, metadata | |
| except Exception as e: | |
| raise Exception(f"Failed to download YouTube video: {str(e)}") | |
| def is_valid_youtube_url(self, url: str) -> bool: | |
| """Check if URL is a valid YouTube URL""" | |
| youtube_regex = re.compile( | |
| r'(https?://)?(www\.)?(youtube|youtu|youtube-nocookie)\.(com|be)/' | |
| r'(watch\?v=|embed/|v/|.+\?v=)?([^&=%\?]{11})' | |
| ) | |
| return youtube_regex.match(url) is not None | |
| def extract_video_id(self, url: str) -> str: | |
| """Extract video ID from YouTube URL""" | |
| patterns = [ | |
| r'(?:v=|\/)([0-9A-Za-z_-]{11}).*', | |
| r'(?:embed\/)([0-9A-Za-z_-]{11})', | |
| r'(?:v\/)([0-9A-Za-z_-]{11})' | |
| ] | |
| for pattern in patterns: | |
| match = re.search(pattern, url) | |
| if match: | |
| return match.group(1) | |
| return None | |
| """Extract audio features for engagement analysis""" | |
| y, sr = librosa.load(audio_path) | |
| # Extract features | |
| tempo, _ = librosa.beat.beat_track(y=y, sr=sr) | |
| spectral_centroids = librosa.feature.spectral_centroid(y=y, sr=sr)[0] | |
| spectral_rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)[0] | |
| mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13) | |
| return { | |
| 'tempo': float(tempo), | |
| 'spectral_centroid_mean': float(np.mean(spectral_centroids)), | |
| 'spectral_rolloff_mean': float(np.mean(spectral_rolloff)), | |
| 'mfcc_mean': float(np.mean(mfccs)), | |
| 'energy_variance': float(np.var(librosa.feature.rms(y=y)[0])) | |
| } | |
| def transcribe_video(self, video_path: str) -> List[Dict]: | |
| """Transcribe video and return segments with timestamps""" | |
| print("Transcribing video...") | |
| result = self.whisper_model.transcribe(video_path, word_timestamps=True) | |
| segments = [] | |
| for segment in result["segments"]: | |
| segments.append({ | |
| 'start': segment['start'], | |
| 'end': segment['end'], | |
| 'text': segment['text'].strip(), | |
| 'words': segment.get('words', []) | |
| }) | |
| return segments | |
| def calculate_virality_score(self, text: str, audio_features: Dict, | |
| segment_duration: float) -> float: | |
| """Calculate virality score for a text segment""" | |
| score = 0.0 | |
| text_lower = text.lower() | |
| # Sentiment analysis | |
| sentiment = self.sentiment_analyzer(text)[0] | |
| if sentiment['label'] == 'POSITIVE' and sentiment['score'] > 0.8: | |
| score += 2.0 | |
| elif sentiment['label'] == 'NEGATIVE' and sentiment['score'] > 0.8: | |
| score += 1.5 | |
| # Emotion analysis | |
| emotion = self.emotion_analyzer(text)[0] | |
| high_engagement_emotions = ['surprise', 'excitement', 'anger', 'joy'] | |
| if emotion['label'].lower() in high_engagement_emotions and emotion['score'] > 0.7: | |
| score += 2.0 | |
| # Viral keywords | |
| for keyword in self.viral_keywords: | |
| if keyword in text_lower: | |
| score += 1.0 | |
| # Hook patterns | |
| for pattern in self.hook_patterns: | |
| if re.search(pattern, text_lower): | |
| score += 3.0 | |
| # Audio engagement features | |
| if audio_features['tempo'] > 120: # Higher tempo = more engaging | |
| score += 1.0 | |
| if audio_features['energy_variance'] > 0.01: # Energy variation | |
| score += 1.0 | |
| # Segment duration (30-60 seconds ideal for clips) | |
| if 25 <= segment_duration <= 65: | |
| score += 2.0 | |
| elif 15 <= segment_duration <= 90: | |
| score += 1.0 | |
| # Text length (not too short, not too long) | |
| word_count = len(text.split()) | |
| if 20 <= word_count <= 100: | |
| score += 1.0 | |
| return min(score, 10.0) # Cap at 10 | |
| def find_best_moments(self, segments: List[Dict], audio_features: Dict, | |
| clip_duration: int = 30) -> List[Dict]: | |
| """Find the best moments for short clips""" | |
| print("Analyzing segments for viral potential...") | |
| scored_segments = [] | |
| for i, segment in enumerate(segments): | |
| # Group segments into potential clips | |
| clip_segments = [segment] | |
| current_duration = segment['end'] - segment['start'] | |
| # Extend clip to reach desired duration | |
| j = i + 1 | |
| while j < len(segments) and current_duration < clip_duration: | |
| next_segment = segments[j] | |
| if next_segment['end'] - segment['start'] <= clip_duration * 1.5: | |
| clip_segments.append(next_segment) | |
| current_duration = next_segment['end'] - segment['start'] | |
| j += 1 | |
| else: | |
| break | |
| # Calculate combined text and virality score | |
| combined_text = " ".join([s['text'] for s in clip_segments]) | |
| virality_score = self.calculate_virality_score( | |
| combined_text, audio_features, current_duration | |
| ) | |
| scored_segments.append({ | |
| 'start': segment['start'], | |
| 'end': clip_segments[-1]['end'], | |
| 'text': combined_text, | |
| 'duration': current_duration, | |
| 'virality_score': virality_score, | |
| 'segments': clip_segments | |
| }) | |
| # Sort by virality score and remove overlaps | |
| scored_segments.sort(key=lambda x: x['virality_score'], reverse=True) | |
| # Remove overlapping segments | |
| final_segments = [] | |
| for segment in scored_segments: | |
| overlap = False | |
| for existing in final_segments: | |
| if (segment['start'] < existing['end'] and | |
| segment['end'] > existing['start']): | |
| overlap = True | |
| break | |
| if not overlap: | |
| final_segments.append(segment) | |
| if len(final_segments) >= 5: # Limit to top 5 clips | |
| break | |
| return final_segments | |
| def add_emojis_to_text(self, text: str) -> str: | |
| """Add relevant emojis to text based on content""" | |
| emoji_map = { | |
| 'money': 'π°', 'rich': 'π°', 'dollar': 'π΅', | |
| 'love': 'β€οΈ', 'heart': 'β€οΈ', 'like': 'π', | |
| 'fire': 'π₯', 'hot': 'π₯', 'amazing': 'π₯', | |
| 'laugh': 'π', 'funny': 'π', 'lol': 'π', | |
| 'wow': 'π±', 'omg': 'π±', 'shocking': 'π±', | |
| 'cool': 'π', 'awesome': 'π', 'great': 'π', | |
| 'think': 'π€', 'question': 'β', 'why': 'π€', | |
| 'warning': 'β οΈ', 'careful': 'β οΈ', 'danger': 'β οΈ', | |
| 'success': 'β ', 'win': 'π', 'winner': 'π', | |
| 'music': 'π΅', 'song': 'π΅', 'sound': 'π' | |
| } | |
| words = text.lower().split() | |
| for word in words: | |
| clean_word = re.sub(r'[^\w]', '', word) | |
| if clean_word in emoji_map: | |
| text = re.sub(f"\\b{re.escape(word)}\\b", | |
| f"{word} {emoji_map[clean_word]}", text, flags=re.IGNORECASE) | |
| return text | |
| def create_clip(self, video_path: str, start_time: float, end_time: float, | |
| text: str, output_path: str, add_subtitles: bool = True) -> str: | |
| """Create a short clip from the video""" | |
| print(f"Creating clip: {start_time:.1f}s - {end_time:.1f}s") | |
| # Load video | |
| video = mp.VideoFileClip(video_path).subclip(start_time, end_time) | |
| # Resize to 9:16 aspect ratio (1080x1920) | |
| target_width = 1080 | |
| target_height = 1920 | |
| # Calculate scaling to fit the video in the frame | |
| scale_w = target_width / video.w | |
| scale_h = target_height / video.h | |
| scale = min(scale_w, scale_h) | |
| # Resize video | |
| video_resized = video.resize(scale) | |
| # Create background (blur or solid color) | |
| if video_resized.h < target_height or video_resized.w < target_width: | |
| # Create blurred background | |
| background = video.resize((target_width, target_height)) | |
| background = background.fl_image(lambda frame: cv2.GaussianBlur(frame, (21, 21), 0)) | |
| # Overlay the main video in center | |
| final_video = mp.CompositeVideoClip([ | |
| background, | |
| video_resized.set_position('center') | |
| ], size=(target_width, target_height)) | |
| else: | |
| final_video = video_resized | |
| # Add subtitles if requested | |
| if add_subtitles and text: | |
| # Add emojis to text | |
| text_with_emojis = self.add_emojis_to_text(text) | |
| # Create text clip | |
| txt_clip = mp.TextClip( | |
| text_with_emojis, | |
| fontsize=60, | |
| color='white', | |
| stroke_color='black', | |
| stroke_width=3, | |
| size=(target_width - 100, None), | |
| method='caption' | |
| ).set_position(('center', 0.8), relative=True).set_duration(final_video.duration) | |
| final_video = mp.CompositeVideoClip([final_video, txt_clip]) | |
| # Write the final video | |
| final_video.write_videofile( | |
| output_path, | |
| codec='libx264', | |
| audio_codec='aac', | |
| temp_audiofile='temp-audio.m4a', | |
| remove_temp=True, | |
| fps=30, | |
| preset='ultrafast' # Faster encoding for free tier | |
| ) | |
| # Clean up | |
| video.close() | |
| final_video.close() | |
| return output_path | |
| def process_video(input_type, video_file, youtube_url, clip_duration, num_clips, add_subtitles): | |
| """Main function to process video and create clips""" | |
| clipper = AIVideoClipper() | |
| try: | |
| # Create temporary directory | |
| with tempfile.TemporaryDirectory() as temp_dir: | |
| video_path = None | |
| video_metadata = {} | |
| # Handle input based on type | |
| if input_type == "Upload Video File": | |
| if video_file is None: | |
| return "Please upload a video file.", [], [] | |
| video_path = video_file.name | |
| video_metadata = {'title': 'Uploaded Video', 'source': 'upload'} | |
| elif input_type == "YouTube URL": | |
| if not youtube_url or not youtube_url.strip(): | |
| return "Please enter a YouTube URL.", [], [] | |
| try: | |
| video_path, video_metadata = clipper.download_youtube_video(youtube_url.strip(), temp_dir) | |
| video_metadata['source'] = 'youtube' | |
| except Exception as e: | |
| return f"Error downloading YouTube video: {str(e)}", [], [] | |
| else: | |
| return "Please select an input method.", [], [] | |
| if not video_path or not os.path.exists(video_path): | |
| return "Video file not found or invalid.", [], [] | |
| # Extract audio features | |
| print("Extracting audio features...") | |
| audio_features = clipper.extract_audio_features(video_path) | |
| # Transcribe video | |
| segments = clipper.transcribe_video(video_path) | |
| if not segments: | |
| return "Could not transcribe video. Please check the audio quality.", [], [] | |
| # Find best moments | |
| best_moments = clipper.find_best_moments(segments, audio_features, clip_duration) | |
| best_moments = best_moments[:num_clips] # Limit to requested number | |
| if not best_moments: | |
| return "No suitable clips found. Try adjusting parameters.", [], [] | |
| # Create clips | |
| output_videos = [] | |
| clip_info = [] | |
| for i, moment in enumerate(best_moments): | |
| output_path = os.path.join(temp_dir, f"clip_{i+1}.mp4") | |
| try: | |
| clipper.create_clip( | |
| video_path, | |
| moment['start'], | |
| moment['end'], | |
| moment['text'], | |
| output_path, | |
| add_subtitles | |
| ) | |
| # Copy to permanent location | |
| permanent_path = f"clip_{i+1}_{hash(video_path)}_{i}.mp4" | |
| os.rename(output_path, permanent_path) | |
| output_videos.append(permanent_path) | |
| clip_info.append({ | |
| 'clip_number': i + 1, | |
| 'start_time': f"{moment['start']:.1f}s", | |
| 'end_time': f"{moment['end']:.1f}s", | |
| 'duration': f"{moment['duration']:.1f}s", | |
| 'virality_score': f"{moment['virality_score']:.2f}/10", | |
| 'text_preview': moment['text'][:100] + "..." if len(moment['text']) > 100 else moment['text'], | |
| 'source_video': video_metadata.get('title', 'Unknown') | |
| }) | |
| except Exception as e: | |
| print(f"Error creating clip {i+1}: {str(e)}") | |
| continue | |
| success_msg = f"β Successfully created {len(output_videos)} clips from: {video_metadata.get('title', 'video')}" | |
| return success_msg, output_videos, clip_info | |
| except Exception as e: | |
| return f"Error processing video: {str(e)}", [], [] | |
| # Create Gradio interface | |
| def create_interface(): | |
| with gr.Blocks(title="AI Video Clipper", theme=gr.themes.Soft()) as demo: | |
| gr.Markdown( | |
| """ | |
| # π¬ AI Video Clipper | |
| Transform your long videos into viral short clips automatically! | |
| Upload a video file or paste a YouTube URL and let AI find the most engaging moments. | |
| **Features:** | |
| - π€ AI-powered moment detection | |
| - π± Auto 9:16 aspect ratio conversion | |
| - π Automatic subtitles with emojis | |
| - π Virality scoring | |
| - π― Multi-language support | |
| - π YouTube video download support | |
| """ | |
| ) | |
| with gr.Row(): | |
| with gr.Column(): | |
| # Input method selection | |
| input_type = gr.Radio( | |
| choices=["Upload Video File", "YouTube URL"], | |
| value="Upload Video File", | |
| label="Choose Input Method", | |
| interactive=True | |
| ) | |
| # Video file upload (conditional) | |
| video_input = gr.File( | |
| label="Upload Video File", | |
| file_types=[".mp4", ".avi", ".mov", ".mkv", ".webm"], | |
| type="filepath", | |
| visible=True | |
| ) | |
| # YouTube URL input (conditional) | |
| youtube_input = gr.Textbox( | |
| label="YouTube URL", | |
| placeholder="https://www.youtube.com/watch?v=...", | |
| visible=False, | |
| info="Paste any YouTube video URL (supports various formats)" | |
| ) | |
| # Show example URLs | |
| gr.Markdown( | |
| """ | |
| **Supported URL formats:** | |
| - `https://www.youtube.com/watch?v=VIDEO_ID` | |
| - `https://youtu.be/VIDEO_ID` | |
| - `https://www.youtube.com/embed/VIDEO_ID` | |
| """, | |
| visible=False, | |
| elem_id="url_examples" | |
| ) | |
| with gr.Row(): | |
| clip_duration = gr.Slider( | |
| minimum=15, | |
| maximum=90, | |
| value=30, | |
| step=5, | |
| label="Target Clip Duration (seconds)" | |
| ) | |
| num_clips = gr.Slider( | |
| minimum=1, | |
| maximum=5, | |
| value=3, | |
| step=1, | |
| label="Number of Clips to Generate" | |
| ) | |
| add_subtitles = gr.Checkbox( | |
| label="Add Subtitles with Emojis", | |
| value=True | |
| ) | |
| process_btn = gr.Button( | |
| "π Create Clips", | |
| variant="primary", | |
| size="lg" | |
| ) | |
| with gr.Column(): | |
| status_output = gr.Textbox( | |
| label="Status", | |
| interactive=False, | |
| lines=3 | |
| ) | |
| clips_output = gr.Gallery( | |
| label="Generated Clips", | |
| show_label=True, | |
| elem_id="gallery", | |
| columns=1, | |
| rows=3, | |
| height="auto", | |
| allow_preview=True, | |
| show_download_button=True | |
| ) | |
| with gr.Row(): | |
| info_output = gr.JSON( | |
| label="Clip Analysis", | |
| visible=True | |
| ) | |
| # Dynamic input visibility | |
| def update_input_visibility(choice): | |
| if choice == "Upload Video File": | |
| return ( | |
| gr.update(visible=True), # video_input | |
| gr.update(visible=False), # youtube_input | |
| gr.update(visible=False) # url_examples | |
| ) | |
| else: # YouTube URL | |
| return ( | |
| gr.update(visible=False), # video_input | |
| gr.update(visible=True), # youtube_input | |
| gr.update(visible=True) # url_examples | |
| ) | |
| input_type.change( | |
| update_input_visibility, | |
| inputs=[input_type], | |
| outputs=[video_input, youtube_input, gr.Markdown(elem_id="url_examples")] | |
| ) | |
| # Example videos section | |
| gr.Markdown("### πΊ Tips for Best Results:") | |
| gr.Markdown(""" | |
| **π File Upload:** | |
| - Upload videos with clear speech (podcasts, interviews, tutorials work great!) | |
| - Supported formats: MP4, AVI, MOV, MKV, WebM | |
| - Maximum recommended duration: 2 hours | |
| **π YouTube Videos:** | |
| - Any public YouTube video (no age restrictions) | |
| - Automatically downloads in optimal quality (720p max for performance) | |
| - Works with livestreams, premieres, and regular videos | |
| - Maximum duration: 1 hour for free tier | |
| **π― Content Tips:** | |
| - Longer videos (5+ minutes) provide more clip opportunities | |
| - Videos with engaging content and emotional moments score higher | |
| - Good audio quality improves transcription accuracy | |
| - Educational content, podcasts, and interviews work exceptionally well | |
| """) | |
| process_btn.click( | |
| process_video, | |
| inputs=[input_type, video_input, youtube_input, clip_duration, num_clips, add_subtitles], | |
| outputs=[status_output, clips_output, info_output] | |
| ) | |
| return demo | |
| # Launch the app | |
| if __name__ == "__main__": | |
| demo = create_interface() | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| share=False | |
| ) |