Spaces:
Runtime error
Runtime error
Commit
Β·
2919f24
1
Parent(s):
a00f9ba
added code comments in keyword extraction file
Browse files- keyword_extraction.py +13 -6
keyword_extraction.py
CHANGED
|
@@ -90,7 +90,10 @@ class KeywordExtractor:
|
|
| 90 |
|
| 91 |
len_indices = 0
|
| 92 |
while True:
|
|
|
|
| 93 |
merged = self.merge_overlapping_indices(keyword_indices)
|
|
|
|
|
|
|
| 94 |
if len_indices == len(merged):
|
| 95 |
out_indices = sorted(merged, key=itemgetter(0))
|
| 96 |
return out_indices
|
|
@@ -108,18 +111,22 @@ class KeywordExtractor:
|
|
| 108 |
annotation (list): list of tuples for generating html
|
| 109 |
"""
|
| 110 |
|
|
|
|
| 111 |
arr = list(text)
|
|
|
|
|
|
|
| 112 |
for idx in sorted(keyword_indices, reverse=True):
|
| 113 |
arr.insert(idx[0], "<kw>")
|
| 114 |
-
arr.insert(idx[1]+1, "
|
|
|
|
|
|
|
| 115 |
joined_annotation = ''.join(arr)
|
|
|
|
|
|
|
| 116 |
split = joined_annotation.split('<kw>')
|
| 117 |
-
annotation = [(x.replace('XXXxxxXXXxxxXXX ', ''), "KEY", "#26aaef") if "XXXxxxXXXxxxXXX" in x else x for x in split]
|
| 118 |
|
| 119 |
-
|
| 120 |
-
for
|
| 121 |
-
if type(i) is tuple:
|
| 122 |
-
kws_check.append(i[0])
|
| 123 |
|
| 124 |
return annotation
|
| 125 |
|
|
|
|
| 90 |
|
| 91 |
len_indices = 0
|
| 92 |
while True:
|
| 93 |
+
# Merge overlapping indices
|
| 94 |
merged = self.merge_overlapping_indices(keyword_indices)
|
| 95 |
+
# Check to see if merging reduced number of annotation indices
|
| 96 |
+
# If merging did not reduce list return final indicies
|
| 97 |
if len_indices == len(merged):
|
| 98 |
out_indices = sorted(merged, key=itemgetter(0))
|
| 99 |
return out_indices
|
|
|
|
| 111 |
annotation (list): list of tuples for generating html
|
| 112 |
"""
|
| 113 |
|
| 114 |
+
# Turn list to numpy array
|
| 115 |
arr = list(text)
|
| 116 |
+
|
| 117 |
+
# Loop through indices in list and insert delimeters
|
| 118 |
for idx in sorted(keyword_indices, reverse=True):
|
| 119 |
arr.insert(idx[0], "<kw>")
|
| 120 |
+
arr.insert(idx[1]+1, "<!kw> <kw>")
|
| 121 |
+
|
| 122 |
+
# join array
|
| 123 |
joined_annotation = ''.join(arr)
|
| 124 |
+
|
| 125 |
+
# split array on delimeter
|
| 126 |
split = joined_annotation.split('<kw>')
|
|
|
|
| 127 |
|
| 128 |
+
# Create annotation for keywords in text
|
| 129 |
+
annotation = [(x.replace('<!kw> ', ''), "KEY", "#26aaef") if "<!kw>" in x else x for x in split]
|
|
|
|
|
|
|
| 130 |
|
| 131 |
return annotation
|
| 132 |
|