Update app.py
Browse files
app.py
CHANGED
|
@@ -76,25 +76,26 @@ class SpeechAnalyzer:
|
|
| 76 |
self.ner_model = AutoModelForTokenClassification.from_pretrained("dslim/bert-base-NER")
|
| 77 |
self.ner_pipeline = pipeline("ner", model=self.ner_model, tokenizer=self.ner_tokenizer)
|
| 78 |
|
| 79 |
-
def split_text(self, text, max_length=512):
|
| 80 |
"""Split long text into overlapping segments"""
|
| 81 |
words = text.split()
|
| 82 |
segments = []
|
| 83 |
current_segment = []
|
| 84 |
current_length = 0
|
| 85 |
-
|
| 86 |
for word in words:
|
| 87 |
if current_length + len(word.split()) > max_length:
|
| 88 |
segments.append(' '.join(current_segment))
|
|
|
|
| 89 |
current_segment = current_segment[-overlap:] + [word]
|
| 90 |
current_length = len(' '.join(current_segment).split())
|
| 91 |
else:
|
| 92 |
current_segment.append(word)
|
| 93 |
current_length = len(' '.join(current_segment).split())
|
| 94 |
-
|
| 95 |
if current_segment:
|
| 96 |
segments.append(' '.join(current_segment))
|
| 97 |
-
|
| 98 |
return segments
|
| 99 |
|
| 100 |
def analyze_moral_foundations(self, text):
|
|
|
|
| 76 |
self.ner_model = AutoModelForTokenClassification.from_pretrained("dslim/bert-base-NER")
|
| 77 |
self.ner_pipeline = pipeline("ner", model=self.ner_model, tokenizer=self.ner_tokenizer)
|
| 78 |
|
| 79 |
+
def split_text(self, text, max_length=512, overlap=50):
|
| 80 |
"""Split long text into overlapping segments"""
|
| 81 |
words = text.split()
|
| 82 |
segments = []
|
| 83 |
current_segment = []
|
| 84 |
current_length = 0
|
| 85 |
+
|
| 86 |
for word in words:
|
| 87 |
if current_length + len(word.split()) > max_length:
|
| 88 |
segments.append(' '.join(current_segment))
|
| 89 |
+
# Use the overlap parameter from the method arguments
|
| 90 |
current_segment = current_segment[-overlap:] + [word]
|
| 91 |
current_length = len(' '.join(current_segment).split())
|
| 92 |
else:
|
| 93 |
current_segment.append(word)
|
| 94 |
current_length = len(' '.join(current_segment).split())
|
| 95 |
+
|
| 96 |
if current_segment:
|
| 97 |
segments.append(' '.join(current_segment))
|
| 98 |
+
|
| 99 |
return segments
|
| 100 |
|
| 101 |
def analyze_moral_foundations(self, text):
|