Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -123,14 +123,14 @@ def main():
|
|
| 123 |
result2 = re.sub(r'[^\w\s]','',result1)
|
| 124 |
result.append(result2)
|
| 125 |
|
| 126 |
-
st.write("--- %s seconds ---" % (time.time() - start_time))
|
| 127 |
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased") #bert-base-uncased
|
| 128 |
|
| 129 |
model_path = "checkpoint-2850"
|
| 130 |
|
| 131 |
model = AutoModelForSequenceClassification.from_pretrained(model_path,id2label={0:'non-causal',1:'causal'})
|
| 132 |
|
| 133 |
-
st.write('base sequence classification loaded')
|
| 134 |
pipe1 = pipeline("text-classification", model=model,tokenizer=tokenizer)
|
| 135 |
for sent in result:
|
| 136 |
pred = pipe1(sent)
|
|
@@ -138,34 +138,31 @@ def main():
|
|
| 138 |
if lab['label'] == 'causal': #causal
|
| 139 |
causal_sents.append(sent)
|
| 140 |
|
| 141 |
-
st.write('causal sentence classification finished')
|
| 142 |
-
st.write("--- %s seconds ---" % (time.time() - start_time))
|
| 143 |
|
| 144 |
model_name = "distilbert-base-cased"
|
| 145 |
tokenizer = DistilBertTokenizerFast.from_pretrained(model_name)
|
| 146 |
|
| 147 |
-
|
| 148 |
-
|
| 149 |
model_path1 = "DistilBertforTokenclassification"
|
| 150 |
|
| 151 |
model = DistilBertForTokenClassification.from_pretrained(model_path1) #len(unique_tags),, num_labels= 7, , id2label={0:'CT',1:'E',2:'C',3:'O'}
|
| 152 |
pipe = pipeline('ner', model=model, tokenizer=tokenizer,aggregation_strategy='simple') #grouped_entities=True
|
| 153 |
-
st.write('DistilBERT loaded')
|
| 154 |
sentence_pred = []
|
| 155 |
class_list = []
|
| 156 |
entity_list = []
|
| 157 |
for k in causal_sents:
|
| 158 |
pred= pipe(k)
|
| 159 |
#st.write(pred)
|
| 160 |
-
st.write('preds')
|
| 161 |
-
for i in pred:
|
| 162 |
-
|
| 163 |
sentence_pred.append(k)
|
| 164 |
class_list.append(i['word'])
|
| 165 |
entity_list.append(i['entity_group'])
|
| 166 |
|
| 167 |
-
st.write('causality extraction finished')
|
| 168 |
-
st.write("--- %s seconds ---" % (time.time() - start_time))
|
| 169 |
|
| 170 |
# filename = 'Checkpoint-classification.sav'
|
| 171 |
# loaded_model = pickle.load(open(filename, 'rb'))
|
|
@@ -193,8 +190,8 @@ def main():
|
|
| 193 |
predictions = loaded_model.predict(pad_sequences(tokenizer.texts_to_sequences(class_list),maxlen=MAX_SEQUENCE_LENGTH))
|
| 194 |
predicted = np.argmax(predictions,axis=1)
|
| 195 |
|
| 196 |
-
st.write('stakeholder taxonomy finished')
|
| 197 |
-
st.write("--- %s seconds ---" % (time.time() - start_time))
|
| 198 |
pred1 = predicted
|
| 199 |
level0 = []
|
| 200 |
count =0
|
|
|
|
| 123 |
result2 = re.sub(r'[^\w\s]','',result1)
|
| 124 |
result.append(result2)
|
| 125 |
|
| 126 |
+
#st.write("--- %s seconds ---" % (time.time() - start_time))
|
| 127 |
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased") #bert-base-uncased
|
| 128 |
|
| 129 |
model_path = "checkpoint-2850"
|
| 130 |
|
| 131 |
model = AutoModelForSequenceClassification.from_pretrained(model_path,id2label={0:'non-causal',1:'causal'})
|
| 132 |
|
| 133 |
+
#st.write('base sequence classification loaded')
|
| 134 |
pipe1 = pipeline("text-classification", model=model,tokenizer=tokenizer)
|
| 135 |
for sent in result:
|
| 136 |
pred = pipe1(sent)
|
|
|
|
| 138 |
if lab['label'] == 'causal': #causal
|
| 139 |
causal_sents.append(sent)
|
| 140 |
|
| 141 |
+
#st.write('causal sentence classification finished')
|
| 142 |
+
#st.write("--- %s seconds ---" % (time.time() - start_time))
|
| 143 |
|
| 144 |
model_name = "distilbert-base-cased"
|
| 145 |
tokenizer = DistilBertTokenizerFast.from_pretrained(model_name)
|
| 146 |
|
|
|
|
|
|
|
| 147 |
model_path1 = "DistilBertforTokenclassification"
|
| 148 |
|
| 149 |
model = DistilBertForTokenClassification.from_pretrained(model_path1) #len(unique_tags),, num_labels= 7, , id2label={0:'CT',1:'E',2:'C',3:'O'}
|
| 150 |
pipe = pipeline('ner', model=model, tokenizer=tokenizer,aggregation_strategy='simple') #grouped_entities=True
|
| 151 |
+
#st.write('DistilBERT loaded')
|
| 152 |
sentence_pred = []
|
| 153 |
class_list = []
|
| 154 |
entity_list = []
|
| 155 |
for k in causal_sents:
|
| 156 |
pred= pipe(k)
|
| 157 |
#st.write(pred)
|
| 158 |
+
#st.write('preds')
|
| 159 |
+
for i in pred:
|
|
|
|
| 160 |
sentence_pred.append(k)
|
| 161 |
class_list.append(i['word'])
|
| 162 |
entity_list.append(i['entity_group'])
|
| 163 |
|
| 164 |
+
# st.write('causality extraction finished')
|
| 165 |
+
# st.write("--- %s seconds ---" % (time.time() - start_time))
|
| 166 |
|
| 167 |
# filename = 'Checkpoint-classification.sav'
|
| 168 |
# loaded_model = pickle.load(open(filename, 'rb'))
|
|
|
|
| 190 |
predictions = loaded_model.predict(pad_sequences(tokenizer.texts_to_sequences(class_list),maxlen=MAX_SEQUENCE_LENGTH))
|
| 191 |
predicted = np.argmax(predictions,axis=1)
|
| 192 |
|
| 193 |
+
# st.write('stakeholder taxonomy finished')
|
| 194 |
+
# st.write("--- %s seconds ---" % (time.time() - start_time))
|
| 195 |
pred1 = predicted
|
| 196 |
level0 = []
|
| 197 |
count =0
|