Commit
·
40c4763
1
Parent(s):
cc68f3b
clearer log messages for sentence segmentation
Browse files- src/whisper_streaming/online_asr.py +9 -3
- whisper_online.py +1 -1
src/whisper_streaming/online_asr.py
CHANGED
|
@@ -194,10 +194,16 @@ class OnlineASRProcessor:
|
|
| 194 |
def chunk_completed_sentence(self):
|
| 195 |
if self.commited == []:
|
| 196 |
return
|
| 197 |
-
|
|
|
|
|
|
|
|
|
|
| 198 |
sents = self.words_to_sentences(self.commited)
|
|
|
|
|
|
|
|
|
|
| 199 |
for s in sents:
|
| 200 |
-
logger.debug(f"
|
| 201 |
if len(sents) < 2:
|
| 202 |
return
|
| 203 |
while len(sents) > 2:
|
|
@@ -205,7 +211,7 @@ class OnlineASRProcessor:
|
|
| 205 |
# we will continue with audio processing at this timestamp
|
| 206 |
chunk_at = sents[-2][1]
|
| 207 |
|
| 208 |
-
logger.debug(f"
|
| 209 |
self.chunk_at(chunk_at)
|
| 210 |
|
| 211 |
def chunk_completed_segment(self, res):
|
|
|
|
| 194 |
def chunk_completed_sentence(self):
|
| 195 |
if self.commited == []:
|
| 196 |
return
|
| 197 |
+
|
| 198 |
+
raw_text = self.asr.sep.join([s[2] for s in self.commited])
|
| 199 |
+
logger.debug(f"[Sentence-segmentation] Raw Text: {raw_text}")
|
| 200 |
+
|
| 201 |
sents = self.words_to_sentences(self.commited)
|
| 202 |
+
|
| 203 |
+
|
| 204 |
+
|
| 205 |
for s in sents:
|
| 206 |
+
logger.debug(f"[Sentence-segmentation] completed sentence: {s}")
|
| 207 |
if len(sents) < 2:
|
| 208 |
return
|
| 209 |
while len(sents) > 2:
|
|
|
|
| 211 |
# we will continue with audio processing at this timestamp
|
| 212 |
chunk_at = sents[-2][1]
|
| 213 |
|
| 214 |
+
logger.debug(f"[Sentence-segmentation]: sentence chunked at {chunk_at:2.2f}")
|
| 215 |
self.chunk_at(chunk_at)
|
| 216 |
|
| 217 |
def chunk_completed_segment(self, res):
|
whisper_online.py
CHANGED
|
@@ -58,7 +58,7 @@ def create_tokenizer(lan):
|
|
| 58 |
lan
|
| 59 |
in "as ba bo br bs fo haw hr ht jw lb ln lo mi nn oc sa sd sn so su sw tk tl tt".split()
|
| 60 |
):
|
| 61 |
-
logger.
|
| 62 |
f"{lan} code is not supported by wtpsplit. Going to use None lang_code option."
|
| 63 |
)
|
| 64 |
lan = None
|
|
|
|
| 58 |
lan
|
| 59 |
in "as ba bo br bs fo haw hr ht jw lb ln lo mi nn oc sa sd sn so su sw tk tl tt".split()
|
| 60 |
):
|
| 61 |
+
logger.warning(
|
| 62 |
f"{lan} code is not supported by wtpsplit. Going to use None lang_code option."
|
| 63 |
)
|
| 64 |
lan = None
|