Spaces:

voicing-ai
/

ParlerVoice

Running on Zero

ParlerVoice / parlervoice_infer /description.py

Mohammed Zeeshan Parvez

feat: initialize ParlerVoice Hugging Face Space

4089011 about 2 months ago

5.06 kB

	from .constants import GENDER_MAP


	def build_advanced_description(
	speaker: str,
	pace: str = "moderate speed",
	noise: str = "very clear",
	reverberation: str = "very close-sounding",
	monotony: str = "expressive and animated",
	pitch: str = "moderate pitch",
	emotion: str = "neutral",
	tone: str = "neutral",
	add_context: bool = True,
	) -> str:
	gender = GENDER_MAP.get(speaker, "male")
	he_she = "he" if gender == "male" else "she"
	his_her = "his" if gender == "male" else "her"

	tone_phrases = {
	"serious": "serious and focused",
	"dramatic": "dramatic and compelling",
	"casual": "casual and relaxed",
	"professional": "professional and articulate",
	"storytelling": "narrative and engaging",
	"narrative": "storytelling and captivating",
	"emotional": "emotional and expressive",
	"energetic": "energetic and lively",
	"loving": "soft, warm, and affectionate",
	}

	emotion_phrases = {
	"neutral": "a neutral, balanced composure",
	"sad": "a sad, melancholic undertone",
	"happy": "a happy, cheerful and uplifting energy",
	"angry": "an angry, intense and forceful emotion",
	"excited": "an excited, enthusiastic and vibrant spirit",
	"confused": "a confused, uncertain and questioning demeanor",
	"loving": "a loving, tender and affectionate emotion",
	"casual": "a relaxed, friendly and easy-going mood",
	}

	tone_desc = tone_phrases.get(tone, tone)
	emotion_desc = emotion_phrases.get(emotion, emotion)
	sentence1 = f"{speaker} speaks with a {tone_desc} manner, conveying {emotion_desc}."

	pitch_descriptions = {
	"very low-pitch": f"{he_she.capitalize()} possesses a very low pitch, creating deep resonance and gravitas.",
	"low-pitch": f"{he_she.capitalize()} has a low pitch that sounds calm, grounded, and authoritative.",
	"slightly low-pitch": f"{he_she.capitalize()} speaks with a slightly low pitch, adding subtle depth.",
	"moderate pitch": f"{he_she.capitalize()} maintains a moderate pitch with natural vocal balance.",
	"slightly high-pitch": f"{he_she.capitalize()} uses a slightly high pitch, enhancing expressiveness.",
	"high-pitch": f"{he_she.capitalize()} speaks in a high pitch with bright, energetic quality.",
	"very high-pitch": f"{he_she.capitalize()} has a very high pitch, creating animated intensity.",
	}
	pace_descriptions = {
	"very slowly": f"{his_her.capitalize()} delivery is very slow and methodical, emphasizing clarity.",
	"slowly": f"{his_her.capitalize()} pace is slow and deliberate, creating contemplative rhythm.",
	"slightly slowly": f"{his_her.capitalize()} pace is slightly measured, ensuring clear articulation.",
	"moderate speed": f"{his_her.capitalize()} speaking rate is moderate and naturally flowing.",
	"slightly fast": f"{his_her.capitalize()} pace is slightly brisk, maintaining engagement.",
	"fast": f"{his_her.capitalize()} delivery is fast and dynamic with energetic momentum.",
	"very fast": f"{his_her.capitalize()} pace is very rapid, creating urgency and excitement.",
	}
	monotony_descriptions = {
	"very monotone": f"{his_her.capitalize()} speech is very monotone with consistent, steady delivery.",
	"monotone": f"{his_her.capitalize()} voice is monotone, maintaining even emotional range.",
	"slightly expressive and animated": f"{his_her.capitalize()} voice shows subtle variation and life.",
	"expressive and animated": f"{his_her.capitalize()} delivery is expressive with dynamic modulation.",
	"very expressive and animated": f"{his_her.capitalize()} speech is highly animated and captivating.",
	}

	sentence2 = " ".join(
	[
	pitch_descriptions.get(pitch, ""),
	pace_descriptions.get(pace, ""),
	monotony_descriptions.get(monotony, ""),
	]
	).strip()

	if noise in ["very clear", "almost no noise"]:
	noise_desc = "The recording quality is pristine and professional-grade"
	else:
	noise_desc = f"The audio contains {noise}, adding environmental texture"

	reverb_descriptions = {
	"very distant-sounding": "with expansive, hall-like acoustics creating spacious depth",
	"distant-sounding": "with noticeable spatial distance and ambient character",
	"slightly distant-sounding": "with subtle room presence and mild spaciousness",
	"slightly close-sounding": "with intimate proximity and warm presence",
	"very close-sounding": "with immediate, close-mic intimacy and clarity",
	}
	sentence3 = f"{noise_desc} {reverb_descriptions.get(reverberation, '')}."

	full_description = f"{sentence1} {sentence2} {sentence3}".strip()
	if add_context:
	full_description += (
	f" The overall vocal presentation is coherent and well-suited for {tone} communication."
	)
	return full_description