mirage / voice_processor.py
MacBook pro
feat(voice): add voice processor skeleton and integrate timing into audio metrics
fc4f80f
"""Voice Processor Skeleton.
Phase: B3
Provides a minimal singleton VoiceProcessor with a lazy load() and a
process_pcm_int16 method. For now it only measures timing and returns
pass-through audio.
Future expansion hooks:
- VAD / segmentation
- Feature extraction (MFCCs, log-mel)
- Model inference (ASR, voice conversion, TTS, etc.)
- Streaming state management
The design keeps the API intentionally small so upstream code can remain
stable while internals evolve.
"""
from __future__ import annotations
import threading
import time
from dataclasses import dataclass
from typing import Optional
@dataclass
class VoiceResult:
"""Container for voice processing output.
For now, just echoes the PCM input.
"""
pcm: memoryview # zero-copy view of processed PCM int16 data
sample_rate: int
# Future: add tokens, text, features, etc.
class VoiceProcessor:
_instance: Optional["VoiceProcessor"] = None
_instance_lock = threading.Lock()
def __init__(self) -> None:
self._loaded = False
self._load_lock = threading.Lock()
# Placeholder for model / pipeline objects
self._models_ready = False
# ------------- Singleton Access -------------
@classmethod
def get(cls) -> "VoiceProcessor":
if cls._instance is None:
with cls._instance_lock:
if cls._instance is None: # double-checked
cls._instance = cls()
return cls._instance
# ------------- Lifecycle -------------
def load(self) -> None:
"""Lazy load models / resources.
Keep it extremely fast right now. Simulate a trivial setup only
on first call.
"""
if self._loaded:
return
with self._load_lock:
if self._loaded:
return
# Simulate minimal setup work (no sleep to keep fast)
self._models_ready = True
self._loaded = True
# ------------- Processing -------------
def process_pcm_int16(self, pcm: bytes | bytearray | memoryview, sample_rate: int) -> tuple[memoryview, float]:
"""Process an int16 PCM chunk.
Returns a tuple of (processed_pcm_memoryview, elapsed_ms).
Currently pass-through.
"""
if not self._loaded:
self.load()
start = time.time() * 1000.0
# Pass-through: we could copy but we prefer zero-copy memoryview
mv = memoryview(pcm)
# Placeholder for future signal chain
end = time.time() * 1000.0
return mv, end - start
# Export singleton accessor
voice_processor = VoiceProcessor.get()