FLS

Sleeping

kambris commited on Dec 13, 2024

Commit

ecd2a8d

verified ·

1 Parent(s): f3a40fd

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -76,25 +76,26 @@ class SpeechAnalyzer:
         self.ner_model = AutoModelForTokenClassification.from_pretrained("dslim/bert-base-NER")
         self.ner_pipeline = pipeline("ner", model=self.ner_model, tokenizer=self.ner_tokenizer)
-    def split_text(self, text, max_length=512):
         """Split long text into overlapping segments"""
         words = text.split()
         segments = []
         current_segment = []
         current_length = 0
         for word in words:
             if current_length + len(word.split()) > max_length:
                 segments.append(' '.join(current_segment))
                 current_segment = current_segment[-overlap:] + [word]
                 current_length = len(' '.join(current_segment).split())
             else:
                 current_segment.append(word)
                 current_length = len(' '.join(current_segment).split())
         if current_segment:
             segments.append(' '.join(current_segment))
         return segments
     def analyze_moral_foundations(self, text):

         self.ner_model = AutoModelForTokenClassification.from_pretrained("dslim/bert-base-NER")
         self.ner_pipeline = pipeline("ner", model=self.ner_model, tokenizer=self.ner_tokenizer)
+    def split_text(self, text, max_length=512, overlap=50):
         """Split long text into overlapping segments"""
         words = text.split()
         segments = []
         current_segment = []
         current_length = 0
         for word in words:
             if current_length + len(word.split()) > max_length:
                 segments.append(' '.join(current_segment))
+                # Use the overlap parameter from the method arguments
                 current_segment = current_segment[-overlap:] + [word]
                 current_length = len(' '.join(current_segment).split())
             else:
                 current_segment.append(word)
                 current_length = len(' '.join(current_segment).split())
         if current_segment:
             segments.append(' '.join(current_segment))
         return segments
     def analyze_moral_foundations(self, text):