Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -71,6 +71,8 @@ class ConversationBot:
|
|
| 71 |
if tool == "Generate Image From User Input Text" or tool == "Generate Text From The Audio" or tool == "Transcribe speech":
|
| 72 |
print("======>Current memory:\n %s" % self.agent.memory)
|
| 73 |
response = re.sub('(image/\S*png)', lambda m: f'})*{m.group(0)}*', res['output'])
|
|
|
|
|
|
|
| 74 |
state = state + [(text, response)]
|
| 75 |
print("Outputs:", state)
|
| 76 |
return state, state, gr.Audio.update(visible=False), gr.Image.update(visible=False), gr.Button.update(visible=False)
|
|
@@ -160,9 +162,9 @@ class ConversationBot:
|
|
| 160 |
self.t2s = T2S(device="cpu")
|
| 161 |
self.i2a = I2A(device="cuda:0")
|
| 162 |
self.a2t = A2T(device="cpu")
|
| 163 |
-
self.asr = ASR(device="
|
| 164 |
self.inpaint = Inpaint(device="cuda:0")
|
| 165 |
-
self.tts_ood = TTS_OOD(device="cpu")
|
| 166 |
self.tools = [
|
| 167 |
Tool(name="Generate Image From User Input Text", func=self.t2i.inference,
|
| 168 |
description="useful for when you want to generate an image from a user input text and it saved it to a file. like: generate an image of an object or something, or generate an image that includes some objects. "
|
|
@@ -173,11 +175,11 @@ class ConversationBot:
|
|
| 173 |
Tool(name="Generate Audio From User Input Text", func=self.t2a.inference,
|
| 174 |
description="useful for when you want to generate an audio from a user input text and it saved it to a file."
|
| 175 |
"The input to this tool should be a string, representing the text used to generate audio."),
|
| 176 |
-
Tool(
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
Tool(name="Generate singing voice From User Input Text, Note and Duration Sequence", func= self.t2s.inference,
|
| 182 |
description="useful for when you want to generate a piece of singing voice (Optional: from User Input Text, Note and Duration Sequence) and save it to a file."
|
| 183 |
"If Like: Generate a piece of singing voice, the input to this tool should be \"\" since there is no User Input Text, Note and Duration Sequence ."
|
|
|
|
| 71 |
if tool == "Generate Image From User Input Text" or tool == "Generate Text From The Audio" or tool == "Transcribe speech":
|
| 72 |
print("======>Current memory:\n %s" % self.agent.memory)
|
| 73 |
response = re.sub('(image/\S*png)', lambda m: f'})*{m.group(0)}*', res['output'])
|
| 74 |
+
image_filename = res['intermediate_steps'][0][1]
|
| 75 |
+
response = response + f"*{image_filename}*"
|
| 76 |
state = state + [(text, response)]
|
| 77 |
print("Outputs:", state)
|
| 78 |
return state, state, gr.Audio.update(visible=False), gr.Image.update(visible=False), gr.Button.update(visible=False)
|
|
|
|
| 162 |
self.t2s = T2S(device="cpu")
|
| 163 |
self.i2a = I2A(device="cuda:0")
|
| 164 |
self.a2t = A2T(device="cpu")
|
| 165 |
+
self.asr = ASR(device="cuda:0")
|
| 166 |
self.inpaint = Inpaint(device="cuda:0")
|
| 167 |
+
# self.tts_ood = TTS_OOD(device="cpu")
|
| 168 |
self.tools = [
|
| 169 |
Tool(name="Generate Image From User Input Text", func=self.t2i.inference,
|
| 170 |
description="useful for when you want to generate an image from a user input text and it saved it to a file. like: generate an image of an object or something, or generate an image that includes some objects. "
|
|
|
|
| 175 |
Tool(name="Generate Audio From User Input Text", func=self.t2a.inference,
|
| 176 |
description="useful for when you want to generate an audio from a user input text and it saved it to a file."
|
| 177 |
"The input to this tool should be a string, representing the text used to generate audio."),
|
| 178 |
+
# Tool(
|
| 179 |
+
# name="Generate human speech with style derived from a speech reference and user input text and save it to a file", func= self.tts_ood.inference,
|
| 180 |
+
# description="useful for when you want to generate speech samples with styles (e.g., timbre, emotion, and prosody) derived from a reference custom voice."
|
| 181 |
+
# "Like: Generate a speech with style transferred from this voice. The text is xxx., or speak using the voice of this audio. The text is xxx."
|
| 182 |
+
# "The input to this tool should be a comma seperated string of two, representing reference audio path and input text."),
|
| 183 |
Tool(name="Generate singing voice From User Input Text, Note and Duration Sequence", func= self.t2s.inference,
|
| 184 |
description="useful for when you want to generate a piece of singing voice (Optional: from User Input Text, Note and Duration Sequence) and save it to a file."
|
| 185 |
"If Like: Generate a piece of singing voice, the input to this tool should be \"\" since there is no User Input Text, Note and Duration Sequence ."
|