SilasKieser commited on
Commit
40c4763
·
1 Parent(s): cc68f3b

clearer log messages for sentence segmentation

Browse files
src/whisper_streaming/online_asr.py CHANGED
@@ -194,10 +194,16 @@ class OnlineASRProcessor:
194
  def chunk_completed_sentence(self):
195
  if self.commited == []:
196
  return
197
- logger.debug("COMPLETED SENTENCE: ", [s[2] for s in self.commited])
 
 
 
198
  sents = self.words_to_sentences(self.commited)
 
 
 
199
  for s in sents:
200
- logger.debug(f"\t\tSENT: {s}")
201
  if len(sents) < 2:
202
  return
203
  while len(sents) > 2:
@@ -205,7 +211,7 @@ class OnlineASRProcessor:
205
  # we will continue with audio processing at this timestamp
206
  chunk_at = sents[-2][1]
207
 
208
- logger.debug(f"--- sentence chunked at {chunk_at:2.2f}")
209
  self.chunk_at(chunk_at)
210
 
211
  def chunk_completed_segment(self, res):
 
194
  def chunk_completed_sentence(self):
195
  if self.commited == []:
196
  return
197
+
198
+ raw_text = self.asr.sep.join([s[2] for s in self.commited])
199
+ logger.debug(f"[Sentence-segmentation] Raw Text: {raw_text}")
200
+
201
  sents = self.words_to_sentences(self.commited)
202
+
203
+
204
+
205
  for s in sents:
206
+ logger.debug(f"[Sentence-segmentation] completed sentence: {s}")
207
  if len(sents) < 2:
208
  return
209
  while len(sents) > 2:
 
211
  # we will continue with audio processing at this timestamp
212
  chunk_at = sents[-2][1]
213
 
214
+ logger.debug(f"[Sentence-segmentation]: sentence chunked at {chunk_at:2.2f}")
215
  self.chunk_at(chunk_at)
216
 
217
  def chunk_completed_segment(self, res):
whisper_online.py CHANGED
@@ -58,7 +58,7 @@ def create_tokenizer(lan):
58
  lan
59
  in "as ba bo br bs fo haw hr ht jw lb ln lo mi nn oc sa sd sn so su sw tk tl tt".split()
60
  ):
61
- logger.debug(
62
  f"{lan} code is not supported by wtpsplit. Going to use None lang_code option."
63
  )
64
  lan = None
 
58
  lan
59
  in "as ba bo br bs fo haw hr ht jw lb ln lo mi nn oc sa sd sn so su sw tk tl tt".split()
60
  ):
61
+ logger.warning(
62
  f"{lan} code is not supported by wtpsplit. Going to use None lang_code option."
63
  )
64
  lan = None