Spaces:
Running
Running
| # SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. | |
| # SPDX-License-Identifier: BSD 2-Clause License | |
| """Gesture provider processor. | |
| A frame processor that automatically manages facial expressions for the ACE avatar | |
| based on conversation events and speaking states. Helps create more natural interactions | |
| by adding contextual facial gestures during conversations. | |
| For available facial gestures, see the ACE Animgraph documentation: | |
| https://docs.nvidia.com/ace/animation-graph-microservice/latest/default-animation-graph.html | |
| """ | |
| import random | |
| from loguru import logger | |
| from pipecat.frames.frames import ( | |
| BotStartedSpeakingFrame, | |
| BotStoppedSpeakingFrame, | |
| Frame, | |
| StartInterruptionFrame, | |
| UserStoppedSpeakingFrame, | |
| ) | |
| from pipecat.processors.frame_processor import FrameDirection, FrameProcessor | |
| from nvidia_pipecat.frames.action import StartFacialGestureBotActionFrame | |
| class FacialGestureProviderProcessor(FrameProcessor): | |
| """Manages automated facial gestures for the ACE avatar during conversations. | |
| This processor monitors conversation state changes and triggers appropriate facial | |
| expressions in response to events like the user finishing speaking or interruptions | |
| occurring. It includes configurable randomization to make gestures feel natural. | |
| Input Frames: | |
| - UserStoppedSpeakingFrame (consumed): Triggered when user finishes speaking | |
| - StartInterruptionFrame (consumed): Triggered during conversation interruptions | |
| - BotStartedSpeakingFrame (consumed): Indicates bot began speaking | |
| - BotStoppedSpeakingFrame (consumed): Indicates bot finished speaking | |
| Output Frames: | |
| - StartFacialGestureBotActionFrame: Triggers facial expressions on the avatar | |
| Args: | |
| user_stopped_speaking_gesture (str): Facial gesture to trigger when user stops speaking. | |
| See ACE Animgraph docs for available gestures. Defaults to "Taunt". | |
| start_interruption_gesture (str): Facial gesture to trigger during interruptions. | |
| See ACE Animgraph docs for available gestures. Defaults to "Pensive". | |
| probability (float): Probability (0.0 to 1.0) that a gesture will be triggered | |
| for any given event. Used to make behavior less predictable. Defaults to 0.5. | |
| **kwargs: Additional arguments passed to parent FrameProcessor. | |
| Typical usage example: | |
| >>> processor = FacialGestureProviderProcessor( | |
| ... user_stopped_speaking_gesture="Smile", | |
| ... start_interruption_gesture="Concerned", | |
| ... probability=0.75 | |
| ... ) | |
| """ | |
| def __init__( | |
| self, user_stopped_speaking_gesture="Taunt", start_interruption_gesture="Pensive", probability=0.5, **kwargs | |
| ): | |
| """Initialize the facial gesture provider. | |
| Args: | |
| user_stopped_speaking_gesture (str): Facial gesture to trigger when user stops speaking. | |
| See ACE Animgraph docs for available gestures. Defaults to "Taunt" by default. | |
| start_interruption_gesture (str): Facial gesture to trigger during interruptions. | |
| See ACE Animgraph docs for available gestures. Defaults to "Pensive" by default. | |
| probability (float): Probability (0.0 to 1.0) that a gesture will be triggered | |
| for any given event. Used to make behavior less predictable. Defaults to 0.5. | |
| **kwargs: Additional arguments passed to parent FrameProcessor. | |
| """ | |
| super().__init__(**kwargs) | |
| self.user_stopped_speaking_gesture = user_stopped_speaking_gesture | |
| self.start_interruption_gesture = start_interruption_gesture | |
| self._bot_speaking = False | |
| self.probability = probability | |
| async def process_frame(self, frame: Frame, direction: FrameDirection): | |
| """Process an incoming frame and trigger facial gestures if appropriate. | |
| Monitors conversation state changes and randomly triggers configured facial | |
| gestures based on the probability setting. | |
| Args: | |
| frame (Frame): The incoming frame to process. | |
| direction (FrameDirection): The direction the frame is traveling. | |
| Returns: | |
| None | |
| """ | |
| await super().process_frame(frame, direction) | |
| new_frame: Frame | None = None | |
| frame_direction: FrameDirection | None = None | |
| if isinstance(frame, UserStoppedSpeakingFrame): | |
| if random.random() < self.probability: | |
| logger.info("User stopped speaking gesture provider") | |
| new_frame = StartFacialGestureBotActionFrame(facial_gesture=self.user_stopped_speaking_gesture) | |
| frame_direction = FrameDirection.DOWNSTREAM | |
| elif isinstance(frame, StartInterruptionFrame): | |
| logger.info("Start interruption frame gesture provider") | |
| if self._bot_speaking and random.random() < self.probability: | |
| new_frame = StartFacialGestureBotActionFrame(facial_gesture=self.start_interruption_gesture) | |
| frame_direction = FrameDirection.DOWNSTREAM | |
| self._bot_speaking = False | |
| elif isinstance(frame, BotStartedSpeakingFrame): | |
| self._bot_speaking = True | |
| elif isinstance(frame, BotStoppedSpeakingFrame): | |
| self._bot_speaking = False | |
| # Push facial gesture frame after the incoming frame. | |
| # With this the StartInterruptionFrame will not delete it by resetting the frame queues. | |
| await self.push_frame(frame, direction) | |
| await self.push_frame(new_frame, frame_direction) | |