fciannella's picture
Working with service run on 7860
53ea588
raw
history blame
5.63 kB
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: BSD 2-Clause License
"""Gesture provider processor.
A frame processor that automatically manages facial expressions for the ACE avatar
based on conversation events and speaking states. Helps create more natural interactions
by adding contextual facial gestures during conversations.
For available facial gestures, see the ACE Animgraph documentation:
https://docs.nvidia.com/ace/animation-graph-microservice/latest/default-animation-graph.html
"""
import random
from loguru import logger
from pipecat.frames.frames import (
BotStartedSpeakingFrame,
BotStoppedSpeakingFrame,
Frame,
StartInterruptionFrame,
UserStoppedSpeakingFrame,
)
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
from nvidia_pipecat.frames.action import StartFacialGestureBotActionFrame
class FacialGestureProviderProcessor(FrameProcessor):
"""Manages automated facial gestures for the ACE avatar during conversations.
This processor monitors conversation state changes and triggers appropriate facial
expressions in response to events like the user finishing speaking or interruptions
occurring. It includes configurable randomization to make gestures feel natural.
Input Frames:
- UserStoppedSpeakingFrame (consumed): Triggered when user finishes speaking
- StartInterruptionFrame (consumed): Triggered during conversation interruptions
- BotStartedSpeakingFrame (consumed): Indicates bot began speaking
- BotStoppedSpeakingFrame (consumed): Indicates bot finished speaking
Output Frames:
- StartFacialGestureBotActionFrame: Triggers facial expressions on the avatar
Args:
user_stopped_speaking_gesture (str): Facial gesture to trigger when user stops speaking.
See ACE Animgraph docs for available gestures. Defaults to "Taunt".
start_interruption_gesture (str): Facial gesture to trigger during interruptions.
See ACE Animgraph docs for available gestures. Defaults to "Pensive".
probability (float): Probability (0.0 to 1.0) that a gesture will be triggered
for any given event. Used to make behavior less predictable. Defaults to 0.5.
**kwargs: Additional arguments passed to parent FrameProcessor.
Typical usage example:
>>> processor = FacialGestureProviderProcessor(
... user_stopped_speaking_gesture="Smile",
... start_interruption_gesture="Concerned",
... probability=0.75
... )
"""
def __init__(
self, user_stopped_speaking_gesture="Taunt", start_interruption_gesture="Pensive", probability=0.5, **kwargs
):
"""Initialize the facial gesture provider.
Args:
user_stopped_speaking_gesture (str): Facial gesture to trigger when user stops speaking.
See ACE Animgraph docs for available gestures. Defaults to "Taunt" by default.
start_interruption_gesture (str): Facial gesture to trigger during interruptions.
See ACE Animgraph docs for available gestures. Defaults to "Pensive" by default.
probability (float): Probability (0.0 to 1.0) that a gesture will be triggered
for any given event. Used to make behavior less predictable. Defaults to 0.5.
**kwargs: Additional arguments passed to parent FrameProcessor.
"""
super().__init__(**kwargs)
self.user_stopped_speaking_gesture = user_stopped_speaking_gesture
self.start_interruption_gesture = start_interruption_gesture
self._bot_speaking = False
self.probability = probability
async def process_frame(self, frame: Frame, direction: FrameDirection):
"""Process an incoming frame and trigger facial gestures if appropriate.
Monitors conversation state changes and randomly triggers configured facial
gestures based on the probability setting.
Args:
frame (Frame): The incoming frame to process.
direction (FrameDirection): The direction the frame is traveling.
Returns:
None
"""
await super().process_frame(frame, direction)
new_frame: Frame | None = None
frame_direction: FrameDirection | None = None
if isinstance(frame, UserStoppedSpeakingFrame):
if random.random() < self.probability:
logger.info("User stopped speaking gesture provider")
new_frame = StartFacialGestureBotActionFrame(facial_gesture=self.user_stopped_speaking_gesture)
frame_direction = FrameDirection.DOWNSTREAM
elif isinstance(frame, StartInterruptionFrame):
logger.info("Start interruption frame gesture provider")
if self._bot_speaking and random.random() < self.probability:
new_frame = StartFacialGestureBotActionFrame(facial_gesture=self.start_interruption_gesture)
frame_direction = FrameDirection.DOWNSTREAM
self._bot_speaking = False
elif isinstance(frame, BotStartedSpeakingFrame):
self._bot_speaking = True
elif isinstance(frame, BotStoppedSpeakingFrame):
self._bot_speaking = False
# Push facial gesture frame after the incoming frame.
# With this the StartInterruptionFrame will not delete it by resetting the frame queues.
await self.push_frame(frame, direction)
await self.push_frame(new_frame, frame_direction)