Spaces:
Runtime error
Runtime error
| from tqdm import tqdm | |
| import logging | |
| from .agent import BloggerAgent, WriterAgent, StructureAgent, Conversation | |
| from .fetcher import AutoFetcher | |
| from .voicevox import VoiceVoxClient, SpeakerId, Audio | |
| class PodcastStudio: | |
| def __init__(self, api_key: str, logging_level: int = logging.INFO): | |
| self.blogger = BloggerAgent(api_key=api_key) | |
| self.writer = WriterAgent(api_key=api_key) | |
| self.structure_agent = StructureAgent(api_key=api_key) | |
| self.logger = logging.getLogger(__name__) | |
| self.logger.setLevel(logging_level) | |
| self.fetcher = AutoFetcher() | |
| async def create_conversation(self, url: str) -> tuple[str, str, Conversation]: | |
| self.logger.info(f"Fetching paper from {url}...") | |
| paper = await self.fetcher.fetch(url) | |
| self.logger.info("Paper fetched successfully.") | |
| self.logger.debug( | |
| f"Paper content: {paper[:100]}..." | |
| ) # Log first 100 characters | |
| self.logger.info("Creating blog from paper...") | |
| blog = await self.blogger.task(paper) | |
| self.logger.info("Blog created successfully.") | |
| self.logger.debug(f"{blog[:100]}...") # Log first 100 characters | |
| self.logger.info("Creating dialogue from blog...") | |
| dialogue = await self.writer.task(paper, blog) | |
| self.logger.info("Dialogue created successfully.") | |
| self.logger.debug(f"{dialogue[:100]}...") # Log first 100 characters | |
| self.logger.info("Structuring conversation from dialogue...") | |
| conversation = await self.structure_agent.task(dialogue) | |
| self.logger.info("Conversation structured successfully.") | |
| for _d in conversation.conversation: | |
| self.logger.debug(f"{_d.role}: {_d.content[:100]}...") | |
| return blog, dialogue, conversation | |
| async def record_podcast( | |
| self, | |
| conversation: Conversation, | |
| voicevox_client: VoiceVoxClient, | |
| speaker_id: SpeakerId, | |
| supporter_id: SpeakerId, | |
| ) -> Audio: | |
| progress_bar = tqdm( | |
| total=len(conversation.conversation), | |
| desc="Synthesizing audio", | |
| ncols=100, | |
| ) | |
| async def _synthesis( | |
| speaker_id: SpeakerId, | |
| text: str, | |
| index: int, | |
| progress: tqdm, | |
| ) -> tuple[int, Audio]: | |
| audio_query = await voicevox_client.post_audio_query( | |
| text=text, | |
| speaker=speaker_id, | |
| ) | |
| if audio_query.tempoDynamicsScale is not None: | |
| audio_query.tempoDynamicsScale = 1.1 | |
| else: | |
| audio_query.speedScale = 1.1 | |
| audio = await voicevox_client.post_synthesis( | |
| speaker=speaker_id, | |
| audio_query=audio_query, | |
| ) | |
| progress.update(1) | |
| progress.set_postfix({"text": text[:20] + "..."}) | |
| return index, audio | |
| results = [] | |
| for i, dialogue in enumerate(conversation.conversation): | |
| results.append( | |
| await _synthesis( | |
| speaker_id=( | |
| speaker_id if dialogue.role == "speaker" else supporter_id | |
| ), | |
| text=dialogue.content, | |
| index=i, | |
| progress=progress_bar, | |
| ) | |
| ) | |
| progress_bar.close() | |
| # sort results by index | |
| results.sort(key=lambda x: x[0]) | |
| audios = [audio for _, audio in results] | |
| # connect audio files | |
| podcast = await voicevox_client.post_connect_waves( | |
| audio_list=audios, | |
| ) | |
| return podcast | |