Update orpheus-tts/kartoffel_decoder.py
Browse files
orpheus-tts/kartoffel_decoder.py
CHANGED
|
@@ -6,9 +6,9 @@ import threading
|
|
| 6 |
import queue
|
| 7 |
import os
|
| 8 |
|
| 9 |
-
# Kartoffel-spezifische Konstanten
|
| 10 |
CODE_TOKEN_OFFSET = 128266
|
| 11 |
-
CODE_START_TOKEN_ID = 128257
|
| 12 |
CODE_REMOVE_TOKEN_ID = 128258
|
| 13 |
|
| 14 |
print("DEBUG KARTOFFEL: Loading SNAC model...")
|
|
@@ -75,8 +75,17 @@ def convert_to_audio_kartoffel(audio_tensor):
|
|
| 75 |
def extract_kartoffel_tokens(token_text, tokenizer):
|
| 76 |
"""Extrahiert Audio-Token-IDs aus dem generierten Text"""
|
| 77 |
try:
|
| 78 |
-
|
| 79 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
|
| 81 |
# Nach Start-Token suchen
|
| 82 |
start_idx = -1
|
|
@@ -86,10 +95,14 @@ def extract_kartoffel_tokens(token_text, tokenizer):
|
|
| 86 |
break
|
| 87 |
|
| 88 |
if start_idx == -1:
|
|
|
|
| 89 |
return []
|
| 90 |
|
|
|
|
|
|
|
| 91 |
# Audio-Tokens extrahieren (nach Start-Token)
|
| 92 |
potential_code_tokens = token_ids[start_idx + 1:]
|
|
|
|
| 93 |
|
| 94 |
# Nur gültige Audio-Tokens (>= CODE_TOKEN_OFFSET, nicht REMOVE_TOKEN)
|
| 95 |
valid_raw_codes = [
|
|
@@ -97,6 +110,8 @@ def extract_kartoffel_tokens(token_text, tokenizer):
|
|
| 97 |
if token != CODE_REMOVE_TOKEN_ID and token >= CODE_TOKEN_OFFSET
|
| 98 |
]
|
| 99 |
|
|
|
|
|
|
|
| 100 |
# Offset abziehen
|
| 101 |
valid_codes = [token - CODE_TOKEN_OFFSET for token in valid_raw_codes]
|
| 102 |
|
|
|
|
| 6 |
import queue
|
| 7 |
import os
|
| 8 |
|
| 9 |
+
# Kartoffel-spezifische Konstanten (basierend auf Referenz-Implementierung)
|
| 10 |
CODE_TOKEN_OFFSET = 128266
|
| 11 |
+
CODE_START_TOKEN_ID = 128257 # Token für Audio-Code-Start
|
| 12 |
CODE_REMOVE_TOKEN_ID = 128258
|
| 13 |
|
| 14 |
print("DEBUG KARTOFFEL: Loading SNAC model...")
|
|
|
|
| 75 |
def extract_kartoffel_tokens(token_text, tokenizer):
|
| 76 |
"""Extrahiert Audio-Token-IDs aus dem generierten Text"""
|
| 77 |
try:
|
| 78 |
+
print(f"DEBUG KARTOFFEL: Received token_text: {token_text}")
|
| 79 |
+
|
| 80 |
+
# Prüfen ob es sich um numerische Token-IDs handelt (neues Format)
|
| 81 |
+
if isinstance(token_text, str) and all(c.isdigit() or c.isspace() for c in token_text):
|
| 82 |
+
# Numerische Token-IDs direkt parsen
|
| 83 |
+
token_ids = [int(x) for x in token_text.split()]
|
| 84 |
+
print(f"DEBUG KARTOFFEL: Parsed token_ids from string: {token_ids}")
|
| 85 |
+
else:
|
| 86 |
+
# Fallback: Text zu Token-IDs konvertieren (altes Format)
|
| 87 |
+
token_ids = tokenizer.encode(token_text)
|
| 88 |
+
print(f"DEBUG KARTOFFEL: Encoded token_ids: {token_ids}")
|
| 89 |
|
| 90 |
# Nach Start-Token suchen
|
| 91 |
start_idx = -1
|
|
|
|
| 95 |
break
|
| 96 |
|
| 97 |
if start_idx == -1:
|
| 98 |
+
print(f"DEBUG KARTOFFEL: No start token found ({CODE_START_TOKEN_ID})")
|
| 99 |
return []
|
| 100 |
|
| 101 |
+
print(f"DEBUG KARTOFFEL: Found start token at index {start_idx}")
|
| 102 |
+
|
| 103 |
# Audio-Tokens extrahieren (nach Start-Token)
|
| 104 |
potential_code_tokens = token_ids[start_idx + 1:]
|
| 105 |
+
print(f"DEBUG KARTOFFEL: Potential code tokens: {potential_code_tokens[:10]}...")
|
| 106 |
|
| 107 |
# Nur gültige Audio-Tokens (>= CODE_TOKEN_OFFSET, nicht REMOVE_TOKEN)
|
| 108 |
valid_raw_codes = [
|
|
|
|
| 110 |
if token != CODE_REMOVE_TOKEN_ID and token >= CODE_TOKEN_OFFSET
|
| 111 |
]
|
| 112 |
|
| 113 |
+
print(f"DEBUG KARTOFFEL: Valid raw codes count: {len(valid_raw_codes)}")
|
| 114 |
+
|
| 115 |
# Offset abziehen
|
| 116 |
valid_codes = [token - CODE_TOKEN_OFFSET for token in valid_raw_codes]
|
| 117 |
|