| import gradio as gr | |
| import nltk | |
| from fincat_utils import extract_context_words | |
| from fincat_utils import bert_embedding_extract | |
| import pickle | |
| lr_clf = pickle.load(open("lr_clf_FiNCAT.pickle",'rb')) | |
| nltk.download('punkt') | |
| def score_fincat(txt): | |
| li = [] | |
| highlight = [] | |
| txt = " " + txt + " " | |
| k = '' | |
| for word in txt.split(): | |
| if any(char.isdigit() for char in word): | |
| if word[-1] in ['.', ',', ';', ":", "-", "!", "?", ")", '"', "'"]: | |
| k = word[-1] | |
| word = word[:-1] | |
| st = txt.find(" " + word + k + " ")+1 | |
| k = '' | |
| ed = st + len(word) | |
| x = {'paragraph' : txt, 'offset_start':st, 'offset_end':ed} | |
| context_text = extract_context_words(x) | |
| features = bert_embedding_extract(context_text, word) | |
| if(features[0]=='None'): | |
| continue | |
| prediction = lr_clf.predict(features.reshape(1, 768)) | |
| prediction_probability = '{:.4f}'.format(round(lr_clf.predict_proba(features.reshape(1, 768))[:,1][0], 4)) | |
| highlight.append((word, ' In-claim' if prediction==1 else 'Out-of-Claim')) | |
| else: | |
| continue | |
| if(len(highlight)<1): | |
| highlight.append((txt,'None')) | |
| return highlight |