Spaces:
Build error
Build error
| import streamlit as st | |
| import pandas as pd | |
| from huggingface_hub import Repository | |
| import os | |
| from pathlib import Path | |
| import json | |
| import numpy as np | |
| # Declaring the variables for later use to talk to dataset | |
| # the token is saved as secret key-value pair in the environment which can be access as shown below | |
| auth_token = os.environ.get("space_to_dataset") or True | |
| DATASET_REPO_URL = 'ppsingh/annotation_data' # path to dataset repo | |
| DATA_FILENAME = "paralist.json" | |
| DATA_FILE = os.path.join("data", DATA_FILENAME) | |
| # cloning the dataset repo | |
| # Data file name | |
| file_name = 'paralist.json' | |
| # reading the json | |
| def read_dataset(): | |
| repo = Repository( local_dir="data", clone_from=DATASET_REPO_URL, repo_type="dataset", use_auth_token= auth_token) | |
| with open('data/{}'.format(file_name), 'r', encoding="utf8") as json_file: | |
| paraList = json.load(json_file) | |
| return repo, paraList | |
| st.sidebar.markdown(""" | |
| # Data Annotation Demo | |
| This app is demo how to use the space to provide user interface for the data annotation/tagging. The data resides in repo_type 'dataset'. | |
| """) | |
| # sidebar with info and drop down to select from the keys | |
| topic = None | |
| repo, paraList = read_dataset() | |
| # getting outer level keys in json | |
| keys = paraList.keys() | |
| if keys is not None: | |
| topic = st.sidebar.selectbox(label="Choose dataset topic to load", options=keys ) | |
| #with st.container(): | |
| with st.form("annotation_form"): | |
| if topic is not None: | |
| subtopics = list(paraList[topic].keys()) | |
| #st.write(subtopics) | |
| val = np.random.randint(0,len(subtopics)-1) | |
| tag = subtopics[val] | |
| idx = np.random.randint(0,3) | |
| st.markdown("**Text**") | |
| st.write(paraList[topic][tag][idx]['textsegment']) | |
| st.markdown("**Tag**") | |
| st.write(tag) | |
| feedback = st.selectbox('0 If Tag is not a good keyword for text, 5 for prefect match',(0,1,2,3,4,5)) | |
| submitted = st.form_submit_button("Submit") | |
| if submitted: | |
| paraList[topic][tag][idx]['annotation'].append(feedback) | |
| with open("data/{}".format(file_name), "w") as outfile: | |
| json.dump(paraList, outfile) | |
| repo.push_to_hub('added new annotation') | |
| # st.write(type(paraList)) | |
| #c1, c2, c3 = st.columns([3, 1, 1]) | |
| #with c1: | |
| # st.header('Text') | |
| # st.write(paraList[topic][tag][idx]['textsegment']) | |
| #with c2: | |
| # st.header('Tag') | |
| # st.text(tag) | |
| #with c3: | |
| # st.header('Feedback') | |
| # feedback = None | |
| # feedback = st.selectbox('0 If Tag is not a good keyword for text, 5 for prefect match',(0,1,2,3,4,5)) | |
| #if feedback: | |
| # st.write(feedback) | |
| # if st.button('Submit'): | |
| # paraList[topic][choice][idx]['annotation'].append(feedback) | |
| # with open('data/{}'.format(file_name), 'r', encoding="utf8") as json_file: | |
| # json.dump(paraList,json_file, ensure_ascii = True) | |
| # repo.push_to_hub('added new annotation') | |
| #st.write(paraList) | |
| #new_row = title | |
| # data = data.append(new_row, ignore_index=True) | |
| # st.write(data) | |
| # st.write(os.getcwd()) | |
| # data.to_csv('test.csv', index= False) | |
| #st.write(df) | |
| # st.write('data/test.csv') | |
| # iterate over files in | |
| # that directory | |
| #directory = os.getcwd() | |
| #files = Path(directory).glob('*') | |
| #for file in files: | |
| # st.write(file) | |
| #with open(DATA_FILE, "a") as csvfile: | |
| # writer = csv.DictWriter(csvfile, fieldnames=["Sentences"]) | |
| # writer.writerow({'Sentences': new_row}) | |
| # repo.push_to_hub('adding new line') | |
| # st.write('Succcess') | |