#!/usr/bin/env python3 """BirdNET Real-Time Audio Classification Script This script captures audio from the microphone and uses the BirdNET ONNX model to predict bird species in real-time with continuous display updates. Created using Copilot. """ from __future__ import annotations import numpy as np import sounddevice as sd import onnxruntime as ort import argparse import os import time import threading from collections import deque from datetime import datetime import queue class RealTimeBirdDetector: """Real-time bird detection using microphone input.""" def __init__( self, model_path: str = "model.onnx", labels_path: str = "BirdNET_GLOBAL_6K_V2.4_Labels.txt", sample_rate: int = 48000, window_duration: float = 3.0, confidence_threshold: float = 0.1, top_k: int = 5, update_interval: float = 1.0, ): """ Initialize the real-time bird detector. Args: model_path: Path to the ONNX model file labels_path: Path to the species labels file sample_rate: Audio sample rate (48kHz for BirdNET) window_duration: Duration of each analysis window in seconds confidence_threshold: Minimum confidence for detections top_k: Number of top predictions to display update_interval: How often to update predictions (seconds) """ self.model_path = model_path self.labels_path = labels_path self.sample_rate = sample_rate self.window_duration = window_duration self.window_size = int(sample_rate * window_duration) self.confidence_threshold = confidence_threshold self.top_k = top_k self.update_interval = update_interval # Audio buffer for continuous recording self.audio_buffer = deque(maxlen=self.window_size * 2) # 6 seconds buffer self.audio_queue = queue.Queue() # Detection results self.current_detections = [] self.detection_history = deque(maxlen=100) # Keep last 100 detections self.running = False # Load model and labels self._load_model() self._load_labels() def _load_model(self) -> None: """Load the ONNX model.""" try: print(f"Loading ONNX model: {self.model_path}") self.session = ort.InferenceSession(self.model_path) # Get model info input_info = self.session.get_inputs()[0] output_info = self.session.get_outputs()[0] print(f"Model input: {input_info.name}, shape: {input_info.shape}") print(f"Model output: {output_info.name}, shape: {output_info.shape}") except Exception as e: raise RuntimeError(f"Error loading ONNX model {self.model_path}: {str(e)}") def _load_labels(self) -> None: """Load species labels from file.""" try: print(f"Loading labels from: {self.labels_path}") self.labels = [] with open(self.labels_path, "r", encoding="utf-8") as f: for line in f: line = line.strip() if line: # Format: "Scientific_name_Common Name" if "_" in line: common_name = line.split("_", 1)[1] self.labels.append(common_name) else: self.labels.append(line) print(f"Loaded {len(self.labels)} species labels") except Exception as e: raise RuntimeError( f"Error loading labels file {self.labels_path}: {str(e)}" ) def _audio_callback( self, indata: np.ndarray, frames: int, time_info, status ) -> None: """Callback function for audio input.""" if status: print(f"Audio status: {status}") # Convert stereo to mono if needed if len(indata.shape) > 1: audio_data = np.mean(indata, axis=1) else: audio_data = indata.flatten() # Add to queue for processing self.audio_queue.put(audio_data.copy()) def _process_audio_buffer(self) -> None: """Process audio data from the queue.""" while self.running: try: # Get audio data from queue (with timeout) audio_chunk = self.audio_queue.get(timeout=0.1) # Add to rolling buffer self.audio_buffer.extend(audio_chunk) # Process if we have enough data if len(self.audio_buffer) >= self.window_size: # Get the most recent window window_data = np.array(list(self.audio_buffer)[-self.window_size :]) # Run inference self._analyze_audio_window(window_data) except queue.Empty: continue except Exception as e: print(f"Error processing audio: {e}") def _analyze_audio_window(self, audio_data: np.ndarray) -> None: """Analyze a single audio window.""" try: # Ensure correct format audio_data = audio_data.astype(np.float32) # Add batch dimension input_data = np.expand_dims(audio_data, axis=0) # Get input name from the model input_name = self.session.get_inputs()[0].name # Run inference outputs = self.session.run(None, {input_name: input_data}) predictions = outputs[0] # Get scores for this window predictions = np.array(predictions) if len(predictions.shape) > 1: scores = predictions[0] else: scores = predictions # Find detections above threshold above_threshold = np.where(scores > self.confidence_threshold)[0] # Create detection results detections = [] for idx in above_threshold: confidence = float(scores[idx]) species_name = ( self.labels[idx] if idx < len(self.labels) else f"Class {idx}" ) detections.append( { "species": species_name, "confidence": confidence, "timestamp": datetime.now(), } ) # Sort by confidence detections.sort(key=lambda x: x["confidence"], reverse=True) # Update current detections self.current_detections = detections[: self.top_k] # Add to history if detections: self.detection_history.extend(detections[: self.top_k]) except Exception as e: print(f"Error during inference: {e}") def _display_results(self) -> None: """Continuously display detection results.""" while self.running: try: # Clear screen (works on most terminals) os.system("clear" if os.name == "posix" else "cls") # Display header print("šŸŽ¤ BirdNET Real-Time Detection") print("=" * 50) print(f"Listening... (Confidence > {self.confidence_threshold:.2f})") print(f"Time: {datetime.now().strftime('%H:%M:%S')}") print() # Display current detections if self.current_detections: print( f"🐦 Current Detections (Top {len(self.current_detections)}):" ) print("-" * 40) for i, detection in enumerate(self.current_detections, 1): confidence = detection["confidence"] species = detection["species"] # Add confidence bars bar_length = int(confidence * 20) # Scale to 20 chars bar = "ā–ˆ" * bar_length + "ā–‘" * (20 - bar_length) print(f"{i:2d}. {species}") print(f" {bar} {confidence:.4f}") else: print("šŸ” No detections above threshold...") print() # Display recent activity if self.detection_history: print("šŸ“Š Recent Activity (Last 10):") print("-" * 40) recent = list(self.detection_history)[-10:] for detection in reversed(recent): timestamp = detection["timestamp"].strftime("%H:%M:%S") species = detection["species"] confidence = detection["confidence"] print(f"{timestamp} - {species} ({confidence:.3f})") print() print("Press Ctrl+C to stop") # Wait before next update time.sleep(self.update_interval) except KeyboardInterrupt: break except Exception as e: print(f"Display error: {e}") def start_detection(self) -> None: """Start real-time detection.""" try: print("Starting real-time bird detection...") print(f"Sample rate: {self.sample_rate} Hz") print(f"Window size: {self.window_duration} seconds") print(f"Confidence threshold: {self.confidence_threshold}") print("Press Ctrl+C to stop\n") self.running = True # Start audio processing thread audio_thread = threading.Thread( target=self._process_audio_buffer, daemon=True ) audio_thread.start() # Start display thread display_thread = threading.Thread(target=self._display_results, daemon=True) display_thread.start() # Start audio input stream with sd.InputStream( callback=self._audio_callback, channels=1, samplerate=self.sample_rate, blocksize=int(self.sample_rate * 0.1), # 100ms blocks dtype=np.float32, ): print("šŸŽ¤ Microphone active - listening for birds...") # Keep main thread alive try: while self.running: time.sleep(0.1) except KeyboardInterrupt: pass except Exception as e: print(f"Error during detection: {e}") finally: self.running = False print("\nšŸ›‘ Detection stopped.") def stop_detection(self) -> None: """Stop detection.""" self.running = False def main() -> int: """Main function for real-time detection.""" parser = argparse.ArgumentParser( description="BirdNET Real-Time Audio Classification" ) parser.add_argument( "--model", default="model.onnx", help="Path to the ONNX model file" ) parser.add_argument( "--labels", default="BirdNET_GLOBAL_6K_V2.4_Labels.txt", help="Path to the labels file", ) parser.add_argument( "--confidence", type=float, default=0.1, help="Minimum confidence threshold for detections (default: 0.1)", ) parser.add_argument( "--top-k", type=int, default=5, help="Number of top predictions to show (default: 5)", ) parser.add_argument( "--update-interval", type=float, default=1.0, help="Display update interval in seconds (default: 1.0)", ) parser.add_argument( "--list-devices", action="store_true", help="List available audio input devices" ) args = parser.parse_args() # List audio devices if requested if args.list_devices: print("Available audio input devices:") print(sd.query_devices()) return 0 # Check if files exist if not os.path.exists(args.model): print(f"Error: Model file '{args.model}' not found.") return 1 if not os.path.exists(args.labels): print(f"Error: Labels file '{args.labels}' not found.") return 1 try: # Create detector detector = RealTimeBirdDetector( model_path=args.model, labels_path=args.labels, confidence_threshold=args.confidence, top_k=args.top_k, update_interval=args.update_interval, ) # Start detection detector.start_detection() return 0 except Exception as e: print(f"Error: {str(e)}") return 1 if __name__ == "__main__": exit(main())