Spaces:
Sleeping
Sleeping
| # import all packages | |
| import requests | |
| import streamlit as st | |
| from sklearn.model_selection import StratifiedKFold | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.model_selection import KFold | |
| # tokenizer | |
| from transformers import AutoTokenizer, DistilBertTokenizerFast | |
| # sequence tagging model + training-related | |
| from transformers import DistilBertForTokenClassification, Trainer, TrainingArguments | |
| import torch | |
| import sys | |
| import os | |
| from sklearn.metrics import classification_report | |
| from pandas import read_csv | |
| from sklearn.linear_model import LogisticRegression | |
| import sklearn.model_selection | |
| from sklearn.feature_extraction.text import TfidfTransformer | |
| from sklearn.feature_extraction.text import CountVectorizer | |
| from sklearn.pipeline import Pipeline, FeatureUnion | |
| import math | |
| # from sklearn.metrics import accuracy_score | |
| # from sklearn.metrics import precision_recall_fscore_support | |
| import json | |
| import re | |
| import numpy as np | |
| import pandas as pd | |
| import nltk | |
| nltk.download("punkt") | |
| import string | |
| from sklearn.model_selection import train_test_split | |
| from transformers import AutoTokenizer, Trainer, TrainingArguments, AutoModelForSequenceClassification, AutoConfig | |
| from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler | |
| import itertools | |
| from transformers import TextClassificationPipeline, TFAutoModelForSequenceClassification, AutoTokenizer | |
| from transformers import pipeline | |
| import pickle | |
| import csv | |
| import pdfplumber | |
| import pathlib | |
| import shutil | |
| import webbrowser | |
| from streamlit.components.v1 import html | |
| import streamlit.components.v1 as components | |
| from PyPDF2 import PdfReader | |
| from huggingface_hub import HfApi | |
| import io | |
| from datasets import load_dataset | |
| import time | |
| import huggingface_hub | |
| from huggingface_hub import Repository | |
| from datetime import datetime | |
| import pathlib as Path | |
| from requests import get | |
| import urllib.request | |
| # import gradio as gr | |
| # from gradio import inputs, outputs | |
| from datasets import load_dataset | |
| from huggingface_hub import HfApi, list_models | |
| import os | |
| from huggingface_hub import HfFileSystem | |
| from tensorflow.keras.models import Sequential, model_from_json | |
| #import tensorflow_datasets as tfds | |
| import tensorflow as tf | |
| from tensorflow.keras.preprocessing.sequence import pad_sequences | |
| import spacy | |
| from tensorflow.keras.preprocessing.text import Tokenizer | |
| #from spacy import en_core_web_lg | |
| #import en_core_web_lg | |
| #nlp = en_core_web_lg.load() | |
| nlp = spacy.load('en_core_web_sm') | |
| #tfds.disable_progress_bar() | |
| MAX_SEQUENCE_LENGTH = 500 | |
| # dataset = load_dataset('Seetha/Visualization', streaming=True) | |
| # df = pd.DataFrame.from_dict(dataset['train']) | |
| # DATASET_REPO_URL = "https://huggingface.co/datasets/Seetha/Visualization" | |
| # DATA_FILENAME = "level2.json" | |
| #DATA_FILE = os.path.join("data", DATA_FILENAME) | |
| DATASET_REPO_URL = "https://huggingface.co/datasets/Seetha/visual_files" | |
| DATA_FILENAME = "detailedResults.json" | |
| DATA_FILENAME1 = "level2.json" | |
| HF_TOKEN = os.environ.get("HF_TOKEN") | |
| #st.write("is none?", HF_TOKEN is None) | |
| def main(): | |
| st.title("Text to Causal Knowledge Graph") | |
| st.sidebar.title("Please upload your text documents in one file here:") | |
| k=2 | |
| seed = 1 | |
| k1= 5 | |
| text_list = [] | |
| causal_sents = [] | |
| uploaded_file = None | |
| try: | |
| uploaded_file = st.sidebar.file_uploader("Choose a file", type = "pdf") | |
| except: | |
| uploaded_file = PdfReader('sample_anno.pdf') | |
| st.error("Please upload your own PDF to be analyzed") | |
| if uploaded_file is not None: | |
| reader = PdfReader(uploaded_file) | |
| for page in reader.pages: | |
| text = page.extract_text() | |
| text_list.append(text) | |
| else: | |
| st.error("Please upload your own PDF to be analyzed") | |
| st.stop() | |
| text_list_final = [x.replace('\n', '') for x in text_list] | |
| text_list_final = re.sub('"', '', str(text_list_final)) | |
| sentences = nltk.sent_tokenize(text_list_final) | |
| result =[] | |
| for i in sentences: | |
| result1 = i.lower() | |
| result2 = re.sub(r'[^\w\s]','',result1) | |
| result.append(result2) | |
| #st.write("--- %s seconds ---" % (time.time() - start_time)) | |
| tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased") #bert-base-uncased | |
| model_path = "checkpoint-2850" | |
| model = AutoModelForSequenceClassification.from_pretrained(model_path,id2label={0:'non-causal',1:'causal'}) | |
| #st.write('sequence classification loaded') | |
| pipe1 = pipeline("text-classification", model=model,tokenizer=tokenizer) | |
| for sent in result: | |
| pred = pipe1(sent) | |
| for lab in pred: | |
| if lab['label'] == 'causal': #causal | |
| causal_sents.append(sent) | |
| # st.write('causal sentence classification finished') | |
| # st.write("--- %s seconds ---" % (time.time() - start_time)) | |
| model_name = "distilbert-base-cased" | |
| tokenizer = DistilBertTokenizerFast.from_pretrained(model_name,low_cpu_mem_usage=True) | |
| model_path1 = "DistilBertforTokenclassification" | |
| model = DistilBertForTokenClassification.from_pretrained(model_path1,low_cpu_mem_usage=True) #len(unique_tags),, num_labels= 7, , id2label={0:'CT',1:'E',2:'C',3:'O'} | |
| pipe = pipeline('ner', model=model, tokenizer=tokenizer,aggregation_strategy='simple') #grouped_entities=True | |
| st.write('DistilBERT loaded') | |
| sentence_pred = [] | |
| class_list = [] | |
| entity_list = [] | |
| for k in causal_sents: | |
| pred= pipe(k) | |
| #st.write(pred) | |
| #st.write('preds') | |
| for i in pred: | |
| sentence_pred.append(k) | |
| class_list.append(i['word']) | |
| entity_list.append(i['entity_group']) | |
| # st.write('causality extraction finished') | |
| # st.write("--- %s seconds ---" % (time.time() - start_time)) | |
| filename = 'Checkpoint-classification.sav' | |
| loaded_model = pickle.load(open(filename, 'rb')) | |
| loaded_vectorizer = pickle.load(open('vectorizefile_classification.pickle', 'rb')) | |
| pipeline_test_output = loaded_vectorizer.transform(class_list) | |
| predicted = loaded_model.predict(pipeline_test_output) | |
| # tokenizer = Tokenizer(num_words=100000) | |
| # tokenizer.fit_on_texts(class_list) | |
| # word_index = tokenizer.word_index | |
| # text_embedding = np.zeros((len(word_index) + 1, 300)) | |
| # for word, i in word_index.items(): | |
| # text_embedding[i] = nlp(word).vector | |
| # json_file = open('model.json', 'r') | |
| # loaded_model_json = json_file.read() | |
| # json_file.close() | |
| # loaded_model = model_from_json(loaded_model_json) | |
| # # load weights into new model | |
| # loaded_model.load_weights("model.h5") | |
| # loss = tf.keras.losses.CategoricalCrossentropy() #from_logits=True | |
| # loaded_model.compile(loss=loss,optimizer=tf.keras.optimizers.Adam(1e-4)) | |
| # predictions = loaded_model.predict(pad_sequences(tokenizer.texts_to_sequences(class_list),maxlen=MAX_SEQUENCE_LENGTH)) | |
| # predicted = np.argmax(predictions,axis=1) | |
| # st.write(predictions) | |
| # st.write(predicted) | |
| # st.write('stakeholder taxonomy finished') | |
| # st.write("--- %s seconds ---" % (time.time() - start_time)) | |
| pred1 = predicted | |
| level0 = [] | |
| count =0 | |
| for i in predicted: | |
| if i == 3: | |
| level0.append('Non-Performance') | |
| count +=1 | |
| else: | |
| level0.append('Performance') | |
| count +=1 | |
| list_pred = {0: 'Customers',1:'Employees',2:'Investors',3:'Non-performance',4:'Society',5:'Unclassified'} | |
| pred_val = [list_pred[i] for i in pred1] | |
| #print('count',count) | |
| for ind,(sent,preds) in enumerate(zip(class_list,pred_val)): | |
| if 'customers' in sent or 'client' in sent or 'consumer' in sent or 'user' in sent: | |
| pred_val[ind] = 'Customers' | |
| elif 'investor' in sent or 'finance' in sent or 'shareholder' in sent or 'stockholder' in sent or 'owners' in sent: | |
| pred_val[ind] = 'Investors' | |
| elif 'employee' in sent or 'worker' in sent or 'staff' in sent: | |
| pred_val[ind] = 'Employees' | |
| elif 'society' in sent or 'societal' in sent or 'social responsib*' in sent or 'social performance' in sent or 'community' in sent: | |
| pred_val[ind] = 'Society' | |
| sent_id, unique = pd.factorize(sentence_pred) | |
| final_list = pd.DataFrame( | |
| {'Id': sent_id, | |
| 'Fullsentence': sentence_pred, | |
| 'Component': class_list, | |
| 'causeOrEffect': entity_list, | |
| 'Labellevel1': level0, | |
| 'Labellevel2': pred_val | |
| }) | |
| s = final_list['Component'].shift(-1) | |
| m = s.str.startswith('##', na=False) | |
| final_list.loc[m, 'Component'] += (' ' + s[m]) | |
| final_list1 = final_list[~final_list['Component'].astype(str).str.startswith('##')] | |
| li = [] | |
| uni = final_list1['Id'].unique() | |
| for i in uni: | |
| df_new = final_list1[final_list1['Id'] == i] | |
| uni1 = df_new['Id'].unique() | |
| # if 'E' not in df_new.values: | |
| # li.append(uni1) | |
| # out = np.concatenate(li).ravel() | |
| # li_pan = pd.DataFrame(out,columns=['Id']) | |
| # df3 = pd.merge(final_list1, li_pan[['Id']], on='Id', how='left', indicator=True) \ | |
| # .query("_merge == 'left_only'") \ | |
| # .drop("_merge",axis=1) | |
| df3 = final_list1 | |
| #df = df3.groupby(['Id','Fullsentence','causeOrEffect', 'Labellevel1', 'Labellevel2'])['Component'].apply(', '.join).reset_index() | |
| #st.write(df) | |
| #df = df3 | |
| df3["causeOrEffect"].replace({"C": "cause", "E": "effect"}, inplace=True) | |
| df_final = df3[df3['causeOrEffect'] != 'CT'] | |
| df3['New string'] = df_final['Component'].replace(r'[##]+', ' ', regex=True) | |
| df_final = df_final.drop("Component",axis=1) | |
| df_final.insert(2, "Component", df3['New string'], True) | |
| df_final1 = df_final[df_final['Component'].str.split().str.len().gt(1)] | |
| #st.write(df_final[df_final['Component'].str.len() != 1]) | |
| #df_final1.to_csv('predictions.csv') | |
| # buffer = io.BytesIO() | |
| # with pd.ExcelWriter(buffer, engine="xlsxwriter") as writer: | |
| # df_final.to_excel(writer, sheet_name="Sheet1", index=False) | |
| # writer.close() | |
| count_NP_NP = 0 | |
| count_NP_investor = 0 | |
| count_NP_customer = 0 | |
| count_NP_employees = 0 | |
| count_NP_society = 0 | |
| count_inv_np = 0 | |
| count_inv_investor = 0 | |
| count_inv_customer = 0 | |
| count_inv_employee = 0 | |
| count_inv_society = 0 | |
| count_cus_np = 0 | |
| count_cus_investor = 0 | |
| count_cus_customer = 0 | |
| count_cus_employee = 0 | |
| count_cus_society = 0 | |
| count_emp_np = 0 | |
| count_emp_investor = 0 | |
| count_emp_customer = 0 | |
| count_emp_employee = 0 | |
| count_emp_society = 0 | |
| count_soc_np = 0 | |
| count_soc_investor = 0 | |
| count_soc_customer = 0 | |
| count_soc_employee = 0 | |
| count_soc_society = 0 | |
| for i in range(0,df_final['Id'].max()): | |
| j = df_final.loc[df_final['Id'] == i] | |
| cause_tab = j.loc[j['causeOrEffect'] == 'cause'] | |
| effect_tab = j.loc[j['causeOrEffect'] == 'effect'] | |
| cause_coun_NP = (cause_tab.Labellevel2 == 'Non-performance').sum() | |
| effect_coun_NP = (effect_tab.Labellevel2 == 'Non-performance').sum() | |
| if (cause_coun_NP > 0) and (effect_coun_NP > 0): | |
| count_NP = cause_coun_NP if cause_coun_NP >= effect_coun_NP else effect_coun_NP | |
| else: | |
| count_NP = 0 | |
| effect_NP_inv = (effect_tab.Labellevel2 == 'Investors').sum() | |
| if (cause_coun_NP > 0) and (effect_NP_inv > 0): | |
| count_NP_inv = cause_coun_NP if cause_coun_NP >= effect_NP_inv else effect_NP_inv | |
| else: | |
| count_NP_inv = 0 | |
| effect_NP_cus = (effect_tab.Labellevel2 == 'Customers').sum() | |
| if (cause_coun_NP > 0) and (effect_NP_cus > 0): | |
| count_NP_cus = cause_coun_NP if cause_coun_NP >= effect_NP_cus else effect_NP_cus | |
| else: | |
| count_NP_cus = 0 | |
| effect_NP_emp = (effect_tab.Labellevel2 == 'Employees').sum() | |
| if (cause_coun_NP > 0) and (effect_NP_emp > 0): | |
| count_NP_emp = cause_coun_NP if cause_coun_NP >= effect_NP_emp else effect_NP_emp | |
| else: | |
| count_NP_emp = 0 | |
| effect_NP_soc = (effect_tab.Labellevel2 == 'Society').sum() | |
| if (cause_coun_NP > 0) and (effect_NP_soc > 0): | |
| count_NP_soc = cause_coun_NP if cause_coun_NP >= effect_NP_soc else effect_NP_soc | |
| else: | |
| count_NP_soc = 0 | |
| cause_coun_inv = (cause_tab.Labellevel2 == 'Investors').sum() | |
| effect_coun_inv = (effect_tab.Labellevel2 == 'Non-performance').sum() | |
| if (cause_coun_inv > 0) and (effect_coun_inv > 0): | |
| count_NP_inv = cause_coun_inv if cause_coun_inv >= effect_coun_inv else effect_coun_inv | |
| else: | |
| count_NP_inv = 0 | |
| effect_inv_inv = (effect_tab.Labellevel2 == 'Investors').sum() | |
| if (cause_coun_inv > 0) and (effect_inv_inv > 0): | |
| count_inv_inv = cause_coun_inv if cause_coun_inv >= effect_inv_inv else effect_inv_inv | |
| else: | |
| count_inv_inv = 0 | |
| effect_inv_cus = (effect_tab.Labellevel2 == 'Customers').sum() | |
| if (cause_coun_inv > 0) and (effect_inv_cus > 0): | |
| count_inv_cus = cause_coun_inv if cause_coun_inv >= effect_inv_cus else effect_inv_cus | |
| else: | |
| count_inv_cus = 0 | |
| effect_inv_emp = (effect_tab.Labellevel2 == 'Employees').sum() | |
| if (cause_coun_inv > 0) and (effect_inv_emp > 0): | |
| count_inv_emp = cause_coun_inv if cause_coun_inv >= effect_inv_emp else effect_inv_emp | |
| else: | |
| count_inv_emp = 0 | |
| effect_inv_soc = (effect_tab.Labellevel2 == 'Society').sum() | |
| if (cause_coun_inv > 0) and (effect_inv_soc > 0): | |
| count_inv_soc = cause_coun_inv if cause_coun_inv >= effect_inv_soc else effect_inv_soc | |
| else: | |
| count_inv_soc = 0 | |
| cause_coun_cus = (cause_tab.Labellevel2 == 'Customers').sum() | |
| effect_coun_cus = (effect_tab.Labellevel2 == 'Non-performance').sum() | |
| if (cause_coun_cus > 0) and (effect_coun_cus > 0): | |
| count_NP_cus = cause_coun_cus if cause_coun_cus >= effect_coun_cus else effect_coun_cus | |
| else: | |
| count_NP_cus = 0 | |
| effect_cus_inv = (effect_tab.Labellevel2 == 'Investors').sum() | |
| if (cause_coun_cus > 0) and (effect_cus_inv > 0): | |
| count_cus_inv = cause_coun_cus if cause_coun_cus >= effect_cus_inv else effect_cus_inv | |
| else: | |
| count_cus_inv = 0 | |
| effect_cus_cus = (effect_tab.Labellevel2 == 'Customers').sum() | |
| if (cause_coun_cus > 0) and (effect_cus_cus > 0): | |
| count_cus_cus = cause_coun_cus if cause_coun_cus >= effect_cus_cus else effect_cus_cus | |
| else: | |
| count_cus_cus = 0 | |
| effect_cus_emp = (effect_tab.Labellevel2 == 'Employees').sum() | |
| if (cause_coun_cus > 0) and (effect_cus_emp > 0): | |
| count_cus_emp = cause_coun_cus if cause_coun_cus >= effect_cus_emp else effect_cus_emp | |
| else: | |
| count_cus_emp = 0 | |
| effect_cus_soc = (effect_tab.Labellevel2 == 'Society').sum() | |
| if (cause_coun_cus > 0) and (effect_cus_soc > 0): | |
| count_cus_soc = cause_coun_cus if cause_coun_cus >= effect_cus_soc else effect_cus_soc | |
| else: | |
| count_cus_soc = 0 | |
| cause_coun_emp = (cause_tab.Labellevel2 == 'Employees').sum() | |
| effect_coun_emp = (effect_tab.Labellevel2 == 'Non-performance').sum() | |
| if (cause_coun_emp > 0) and (effect_coun_emp > 0): | |
| count_NP_emp = cause_coun_emp if cause_coun_emp >= effect_coun_emp else effect_coun_emp | |
| else: | |
| count_NP_emp = 0 | |
| effect_emp_inv = (effect_tab.Labellevel2 == 'Investors').sum() | |
| if (cause_coun_emp > 0) and (effect_emp_inv > 0): | |
| count_emp_inv = cause_coun_emp if cause_coun_emp >= effect_emp_inv else effect_emp_inv | |
| else: | |
| count_emp_inv = 0 | |
| effect_emp_cus = (effect_tab.Labellevel2 == 'Customers').sum() | |
| if (cause_coun_emp > 0) and (effect_emp_cus > 0): | |
| count_emp_cus = cause_coun_emp if cause_coun_emp >= effect_emp_cus else effect_emp_cus | |
| else: | |
| count_emp_cus = 0 | |
| effect_emp_emp = (effect_tab.Labellevel2 == 'Employees').sum() | |
| if (cause_coun_emp > 0) and (effect_emp_emp > 0): | |
| count_emp_emp = cause_coun_emp if cause_coun_emp >= effect_emp_emp else effect_emp_emp | |
| else: | |
| count_emp_emp = 0 | |
| effect_emp_soc = (effect_tab.Labellevel2 == 'Society').sum() | |
| if (cause_coun_emp > 0) and (effect_emp_soc > 0): | |
| count_emp_soc = cause_coun_emp if cause_coun_emp >= effect_emp_soc else effect_emp_soc | |
| else: | |
| count_emp_soc = 0 | |
| cause_coun_soc = (cause_tab.Labellevel2 == 'Society').sum() | |
| effect_coun_soc = (effect_tab.Labellevel2 == 'Non-performance').sum() | |
| if (cause_coun_soc > 0) and (effect_coun_soc > 0): | |
| count_NP_soc = cause_coun_soc if cause_coun_soc >= effect_coun_soc else effect_coun_soc | |
| else: | |
| count_NP_soc = 0 | |
| effect_soc_inv = (effect_tab.Labellevel2 == 'Investors').sum() | |
| if (cause_coun_soc > 0) and (effect_soc_inv > 0): | |
| count_soc_inv = cause_coun_soc if cause_coun_soc >= effect_soc_inv else effect_soc_inv | |
| else: | |
| count_soc_inv = 0 | |
| effect_soc_cus = (effect_tab.Labellevel2 == 'Customers').sum() | |
| if (cause_coun_soc > 0) and (effect_soc_cus > 0): | |
| count_soc_cus = cause_coun_soc if cause_coun_soc >= effect_soc_cus else effect_soc_cus | |
| else: | |
| count_soc_cus = 0 | |
| effect_soc_emp = (effect_tab.Labellevel2 == 'Employees').sum() | |
| if (cause_coun_soc > 0) and (effect_soc_emp > 0): | |
| count_soc_emp = cause_coun_soc if cause_coun_soc >= effect_soc_emp else effect_soc_emp | |
| else: | |
| count_soc_emp = 0 | |
| effect_soc_soc = (effect_tab.Labellevel2 == 'Society').sum() | |
| if (cause_coun_soc > 0) and (effect_soc_soc > 0): | |
| count_soc_soc = cause_coun_soc if cause_coun_soc >= effect_soc_soc else effect_soc_soc | |
| else: | |
| count_soc_soc = 0 | |
| count_NP_NP = count_NP_NP + count_NP | |
| count_NP_investor = count_NP_investor + count_NP_inv | |
| count_NP_customer = count_NP_customer + count_NP_cus | |
| count_NP_employees = count_NP_employees + count_NP_emp | |
| count_NP_society = count_NP_society + count_NP_soc | |
| count_inv_np = count_inv_np + count_NP_inv | |
| count_inv_investor = count_inv_investor + count_inv_inv | |
| count_inv_customer = count_inv_customer + count_inv_cus | |
| count_inv_employee = count_inv_employee + count_inv_emp | |
| count_inv_society = count_inv_society + count_inv_soc | |
| count_cus_np = count_cus_np + count_NP_cus | |
| count_cus_investor = count_cus_investor + count_cus_inv | |
| count_cus_customer = count_cus_customer + count_cus_cus | |
| count_cus_employee = count_cus_employee + count_cus_emp | |
| count_cus_society = count_cus_society + count_cus_soc | |
| count_emp_np = count_emp_np + count_NP_emp | |
| count_emp_investor = count_emp_investor + count_emp_inv | |
| count_emp_customer = count_emp_customer + count_emp_cus | |
| count_emp_employee = count_emp_employee + count_emp_emp | |
| count_emp_society = count_emp_society + count_emp_soc | |
| count_soc_np = count_soc_np + count_NP_soc | |
| count_soc_investor = count_soc_investor + count_soc_inv | |
| count_soc_customer = count_soc_customer + count_soc_cus | |
| count_soc_employee = count_soc_employee + count_soc_emp | |
| count_soc_society = count_soc_society + count_soc_soc | |
| df_tab = pd.DataFrame(columns = ['Non-performance', 'Investors', 'Customers', 'Employees', 'Society'],index=['Non-performance', 'Investors', 'Customers', 'Employees', 'Society'], dtype=object) | |
| df_tab.loc['Non-performance'] = [count_NP_NP, count_NP_investor, count_NP_customer, count_NP_employees, count_NP_society] | |
| df_tab.loc['Investors'] = [count_inv_np, count_inv_investor, count_inv_customer, count_inv_employee, count_inv_society] | |
| df_tab.loc['Customers'] = [count_cus_np, count_cus_investor, count_cus_customer, count_cus_employee, count_cus_society] | |
| df_tab.loc['Employees'] = [count_emp_np, count_emp_investor, count_emp_customer, count_emp_employee, count_emp_society] | |
| df_tab.loc['Society'] = [count_soc_np, count_soc_investor, count_soc_customer, count_soc_employee, count_soc_society] | |
| # df_tab = pd.DataFrame({ | |
| # 'Non-performance': [count_NP_NP, count_NP_investor, count_NP_customer, count_NP_employees, count_NP_society], | |
| # 'Investors': [count_inv_np, count_inv_investor, count_inv_customer, count_inv_employee, count_inv_society], | |
| # 'Customers': [count_cus_np, count_cus_investor, count_cus_customer, count_cus_employee, count_cus_society], | |
| # 'Employees': [count_emp_np, count_emp_investor, count_emp_customer, count_emp_employee, count_emp_society], | |
| # 'Society': [count_soc_np, count_soc_investor, count_soc_customer, count_soc_employee, count_soc_society]}, | |
| # index=['Non-performance', 'Investors', 'Customers', 'Employees', 'Society']) | |
| #df_tab.to_csv('final_data.csv') | |
| buffer = io.BytesIO() | |
| with pd.ExcelWriter(buffer, engine="xlsxwriter") as writer: | |
| df_tab.to_excel(writer,sheet_name="count_result",index=False) | |
| df_final1.to_excel(writer,sheet_name="Detailed_results",index=False) | |
| writer.close() | |
| #df = pd.read_csv('final_data.csv', index_col=0) | |
| #474-515 | |
| # Convert to JSON format | |
| json_data = [] | |
| for row in df_tab.index: | |
| for col in df_tab.columns: | |
| json_data.append({ | |
| 'source': row, | |
| 'target': col, | |
| 'value': int(df_tab.loc[row, col]) | |
| }) | |
| HfApi().delete_file(path_in_repo = DATA_FILENAME1 ,repo_id = 'Seetha/visual_files',token= HF_TOKEN,repo_type='dataset') | |
| #st.write('file-deleted') | |
| fs = HfFileSystem(token=HF_TOKEN) | |
| with fs.open('datasets/Seetha/visual_files/level2.json', 'w') as f: | |
| json.dump(json_data, f) | |
| df_final1.to_csv('predictions.csv') | |
| csv_file = "predictions.csv" | |
| json_file = "detailedResults.json" | |
| # Open the CSV file and read the data | |
| with open(csv_file, "r") as f: | |
| csv_data = csv.DictReader(f) | |
| # # Convert the CSV data to a list of dictionaries | |
| data_list = [] | |
| for row in csv_data: | |
| data_list.append(dict(row)) | |
| # # Convert the list of dictionaries to JSON | |
| json_data = json.dumps(data_list) | |
| HfApi().delete_file(path_in_repo = DATA_FILENAME ,repo_id = 'Seetha/visual_files',token= HF_TOKEN,repo_type='dataset') | |
| #st.write('file2-deleted') | |
| with fs.open('datasets/Seetha/visual_files/detailedResults.json','w') as fi: | |
| #data = json.load(fi) | |
| fi.write(json_data) | |
| def convert_df(df): | |
| #IMPORTANT: Cache the conversion to prevent computation on every rerun | |
| return df.to_csv().encode('utf-8') | |
| csv1 = convert_df(df_final1.astype(str)) | |
| csv2 = convert_df(df_tab.astype(str)) | |
| with st.container(): | |
| st.download_button(label="Download the result table",data=buffer,file_name="t2cg_outputs.xlsx",mime="application/vnd.ms-excel") | |
| st.markdown('<a href="https://huggingface.co/spaces/Seetha/visual-knowledgegraph" target="_blank">Click this link in a separate tab to view knowledge graph</a>', unsafe_allow_html=True) | |
| # st.download_button(label="Download the detailed result table_csv",data=csv1,file_name='results.csv',mime='text/csv') | |
| # st.download_button(label="Download the result table_csv",data=csv2,file_name='final_data.csv',mime='text/csv') | |
| #with st.container(): | |
| # Execute your app | |
| #st.title("Visualization example") | |
| # components.html(source_code) | |
| #html(my_html) | |
| #webbrowser.open('https://huggingface.co/spaces/Seetha/visual-knowledgegraph') | |
| # # embed streamlit docs in a streamlit app | |
| # #components.iframe("https://webpages.charlotte.edu/ltotapal/") | |
| if __name__ == '__main__': | |
| start_time = time.time() | |
| main() | |