Mohammed Zeeshan Parvez
feat: initialize ParlerVoice Hugging Face Space
4089011
from .constants import GENDER_MAP
def build_advanced_description(
speaker: str,
pace: str = "moderate speed",
noise: str = "very clear",
reverberation: str = "very close-sounding",
monotony: str = "expressive and animated",
pitch: str = "moderate pitch",
emotion: str = "neutral",
tone: str = "neutral",
add_context: bool = True,
) -> str:
gender = GENDER_MAP.get(speaker, "male")
he_she = "he" if gender == "male" else "she"
his_her = "his" if gender == "male" else "her"
tone_phrases = {
"serious": "serious and focused",
"dramatic": "dramatic and compelling",
"casual": "casual and relaxed",
"professional": "professional and articulate",
"storytelling": "narrative and engaging",
"narrative": "storytelling and captivating",
"emotional": "emotional and expressive",
"energetic": "energetic and lively",
"loving": "soft, warm, and affectionate",
}
emotion_phrases = {
"neutral": "a neutral, balanced composure",
"sad": "a sad, melancholic undertone",
"happy": "a happy, cheerful and uplifting energy",
"angry": "an angry, intense and forceful emotion",
"excited": "an excited, enthusiastic and vibrant spirit",
"confused": "a confused, uncertain and questioning demeanor",
"loving": "a loving, tender and affectionate emotion",
"casual": "a relaxed, friendly and easy-going mood",
}
tone_desc = tone_phrases.get(tone, tone)
emotion_desc = emotion_phrases.get(emotion, emotion)
sentence1 = f"{speaker} speaks with a {tone_desc} manner, conveying {emotion_desc}."
pitch_descriptions = {
"very low-pitch": f"{he_she.capitalize()} possesses a very low pitch, creating deep resonance and gravitas.",
"low-pitch": f"{he_she.capitalize()} has a low pitch that sounds calm, grounded, and authoritative.",
"slightly low-pitch": f"{he_she.capitalize()} speaks with a slightly low pitch, adding subtle depth.",
"moderate pitch": f"{he_she.capitalize()} maintains a moderate pitch with natural vocal balance.",
"slightly high-pitch": f"{he_she.capitalize()} uses a slightly high pitch, enhancing expressiveness.",
"high-pitch": f"{he_she.capitalize()} speaks in a high pitch with bright, energetic quality.",
"very high-pitch": f"{he_she.capitalize()} has a very high pitch, creating animated intensity.",
}
pace_descriptions = {
"very slowly": f"{his_her.capitalize()} delivery is very slow and methodical, emphasizing clarity.",
"slowly": f"{his_her.capitalize()} pace is slow and deliberate, creating contemplative rhythm.",
"slightly slowly": f"{his_her.capitalize()} pace is slightly measured, ensuring clear articulation.",
"moderate speed": f"{his_her.capitalize()} speaking rate is moderate and naturally flowing.",
"slightly fast": f"{his_her.capitalize()} pace is slightly brisk, maintaining engagement.",
"fast": f"{his_her.capitalize()} delivery is fast and dynamic with energetic momentum.",
"very fast": f"{his_her.capitalize()} pace is very rapid, creating urgency and excitement.",
}
monotony_descriptions = {
"very monotone": f"{his_her.capitalize()} speech is very monotone with consistent, steady delivery.",
"monotone": f"{his_her.capitalize()} voice is monotone, maintaining even emotional range.",
"slightly expressive and animated": f"{his_her.capitalize()} voice shows subtle variation and life.",
"expressive and animated": f"{his_her.capitalize()} delivery is expressive with dynamic modulation.",
"very expressive and animated": f"{his_her.capitalize()} speech is highly animated and captivating.",
}
sentence2 = " ".join(
[
pitch_descriptions.get(pitch, ""),
pace_descriptions.get(pace, ""),
monotony_descriptions.get(monotony, ""),
]
).strip()
if noise in ["very clear", "almost no noise"]:
noise_desc = "The recording quality is pristine and professional-grade"
else:
noise_desc = f"The audio contains {noise}, adding environmental texture"
reverb_descriptions = {
"very distant-sounding": "with expansive, hall-like acoustics creating spacious depth",
"distant-sounding": "with noticeable spatial distance and ambient character",
"slightly distant-sounding": "with subtle room presence and mild spaciousness",
"slightly close-sounding": "with intimate proximity and warm presence",
"very close-sounding": "with immediate, close-mic intimacy and clarity",
}
sentence3 = f"{noise_desc} {reverb_descriptions.get(reverberation, '')}."
full_description = f"{sentence1} {sentence2} {sentence3}".strip()
if add_context:
full_description += (
f" The overall vocal presentation is coherent and well-suited for {tone} communication."
)
return full_description