kamahori commited on
Commit
87e0f69
·
1 Parent(s): 42a1f72

Add eleven labs

Browse files
Files changed (3) hide show
  1. app.py +10 -5
  2. requirements.txt +1 -0
  3. utils.py +46 -0
app.py CHANGED
@@ -16,7 +16,7 @@ from pydub import AudioSegment
16
  import requests
17
  import json
18
 
19
- from utils import get_google_credentials, get_google_tts, get_openai_tts
20
 
21
  os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = get_google_credentials()
22
 
@@ -42,7 +42,7 @@ KOTOBA_API_URL = os.getenv('KOTOBA_API_URL', 'https://api.example.com/tts')
42
  AVAILABLE_MODELS = {
43
  # 'XTTSv2': 'xtts',
44
  # # 'WhisperSpeech': 'whisperspeech',
45
- # 'ElevenLabs': 'eleven',
46
  # # 'OpenVoice': 'openvoice',
47
  # 'OpenVoice V2': 'openvoicev2',
48
  # 'Play.HT 2.0': 'playht',
@@ -54,10 +54,10 @@ AVAILABLE_MODELS = {
54
  # 'VoiceCraft 2.0': 'voicecraft',
55
  # 'Parler TTS': 'parler'
56
  'MOE-VITS': 'moe-vits',
57
- 'BARK': 'bark',
58
  'KOTOBA-TTS': 'kotoba-tts',
59
  #'BLANE-TTS': 'blane-tts',
60
- 'AMITARO-VITS': 'amitaro-vits',
61
  'GOOGLE-TTS': 'google-tts',
62
  'OPENAI-TTS': 'openai-tts'
63
  }
@@ -337,7 +337,8 @@ model_names = {
337
  # 'xtts2': 'Coqui XTTSv2',
338
  # 'xtts': 'Coqui XTTS',
339
  # 'openvoice': 'MyShell OpenVoice',
340
- # 'elevenlabs': 'ElevenLabs',
 
341
  # 'openai': 'OpenAI',
342
  # 'hierspeech': 'HierSpeech++',
343
  # 'pheme': 'PolyAI Pheme',
@@ -743,6 +744,10 @@ def synthandreturn(text, retry=0):
743
  elif model == "openai-tts":
744
  local_filename = '/tmp/' + str(mkuuid(None)) + '.wav'
745
  result = get_openai_tts(text, local_filename=local_filename)
 
 
 
 
746
  elif model == "kotoba-tts":
747
  result = get_kotoba_tts(text)
748
  print(f"API TTS audio file: {result}")
 
16
  import requests
17
  import json
18
 
19
+ from utils import get_google_credentials, get_google_tts, get_openai_tts, get_elevenlabs_tts
20
 
21
  os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = get_google_credentials()
22
 
 
42
  AVAILABLE_MODELS = {
43
  # 'XTTSv2': 'xtts',
44
  # # 'WhisperSpeech': 'whisperspeech',
45
+ 'ElevenLabs': 'eleven',
46
  # # 'OpenVoice': 'openvoice',
47
  # 'OpenVoice V2': 'openvoicev2',
48
  # 'Play.HT 2.0': 'playht',
 
54
  # 'VoiceCraft 2.0': 'voicecraft',
55
  # 'Parler TTS': 'parler'
56
  'MOE-VITS': 'moe-vits',
57
+ # 'BARK': 'bark',
58
  'KOTOBA-TTS': 'kotoba-tts',
59
  #'BLANE-TTS': 'blane-tts',
60
+ # 'AMITARO-VITS': 'amitaro-vits',
61
  'GOOGLE-TTS': 'google-tts',
62
  'OPENAI-TTS': 'openai-tts'
63
  }
 
337
  # 'xtts2': 'Coqui XTTSv2',
338
  # 'xtts': 'Coqui XTTS',
339
  # 'openvoice': 'MyShell OpenVoice',
340
+ 'elevenlabs': 'ElevenLabs',
341
+ 'ElevenLabs': 'ElevenLabs', # Add this to match AVAILABLE_MODELS key
342
  # 'openai': 'OpenAI',
343
  # 'hierspeech': 'HierSpeech++',
344
  # 'pheme': 'PolyAI Pheme',
 
744
  elif model == "openai-tts":
745
  local_filename = '/tmp/' + str(mkuuid(None)) + '.wav'
746
  result = get_openai_tts(text, local_filename=local_filename)
747
+ elif model == "eleven":
748
+ local_filename = '/tmp/' + str(mkuuid(None)) + '.wav'
749
+ result = get_elevenlabs_tts(text, local_filename=local_filename)
750
+ print(f"ElevenLabs TTS audio file: {result}")
751
  elif model == "kotoba-tts":
752
  result = get_kotoba_tts(text)
753
  print(f"API TTS audio file: {result}")
requirements.txt CHANGED
@@ -9,3 +9,4 @@ pydub
9
  google-cloud-texttospeech
10
  openai
11
  numpy
 
 
9
  google-cloud-texttospeech
10
  openai
11
  numpy
12
+ requests
utils.py CHANGED
@@ -2,6 +2,7 @@ import os
2
  import json
3
  import tempfile
4
  from google.cloud import texttospeech
 
5
 
6
  from pathlib import Path
7
  from openai import OpenAI
@@ -59,4 +60,49 @@ def get_google_tts(text, local_filename):
59
  out.write(response.audio_content)
60
  print(f'Audio content written to file {local_filename}')
61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  return local_filename
 
2
  import json
3
  import tempfile
4
  from google.cloud import texttospeech
5
+ import requests
6
 
7
  from pathlib import Path
8
  from openai import OpenAI
 
60
  out.write(response.audio_content)
61
  print(f'Audio content written to file {local_filename}')
62
 
63
+ return local_filename
64
+
65
+ def get_elevenlabs_tts(text, local_filename):
66
+ """
67
+ Call the Eleven Labs API to generate speech from text.
68
+
69
+ Args:
70
+ text (str): The text to convert to speech
71
+ local_filename (str): Path to save the generated audio file
72
+
73
+ Returns:
74
+ str: Path to the generated audio file
75
+ """
76
+ api_key = os.getenv("ELEVENLABS_API_KEY")
77
+
78
+ # API endpoint for text-to-speech
79
+ url = "https://api.elevenlabs.io/v1/text-to-speech/21m00Tcm4TlvDq8ikWAM" # Default voice ID (Rachel)
80
+
81
+ # Headers with API key
82
+ headers = {
83
+ "Accept": "audio/mpeg",
84
+ "Content-Type": "application/json",
85
+ "xi-api-key": api_key
86
+ }
87
+
88
+ # Request body
89
+ data = {
90
+ "text": text,
91
+ "model_id": "eleven_multilingual_v2",
92
+ "voice_settings": {
93
+ "stability": 0.5,
94
+ "similarity_boost": 0.5
95
+ }
96
+ }
97
+
98
+ # Make the request
99
+ response = requests.post(url, json=data, headers=headers)
100
+
101
+ # Check if the request was successful
102
+ response.raise_for_status()
103
+
104
+ # Save the audio content to the specified file
105
+ with open(local_filename, "wb") as f:
106
+ f.write(response.content)
107
+
108
  return local_filename