ruiheesi
Added Application Files
12ac14b
import numpy as np
import ast
import csv
import pickle
import os
import json
def subset_and_unwrapp(
df,
subset_list,
target_column
):
subset_df = df.loc[:, subset_list].copy()
subset_df[target_column] = subset_df[target_column].apply(
lambda x: ast.literal_eval(x) if isinstance(
x,
str
) and x != 'nan' else np.nan)
subset_df = subset_df.explode(target_column)
subset_df = subset_df.reset_index(drop=True)
return subset_df
def parse_overview_table(df_in):
columns = df_in.columns.to_list()
# Find columns that have string values representing lists
columns_with_lists = []
for column in df_in.columns:
if df_in[column].apply(
lambda x: isinstance(
x,
str
) and x.startswith(
'['
) and x.endswith(
']')
).any():
columns_with_lists.append(column)
overview_dict = {}
table_structure = ""
for column in columns[1:]:
if column in columns_with_lists:
temp_df = subset_and_unwrapp(
df_in,
[columns[0], column],
column
)
else:
temp_df = df_in[[columns[0], column]]
temp_key = "ID" + column
overview_dict[temp_key] = temp_df
table_text = "{table:" + temp_key + ","
column_text = "columns:[sampleId," + column + "]};"
entry = table_text + column_text
table_structure += entry
print(overview_dict.keys())
return overview_dict, table_structure
def get_unique_value_list(sample_search_df,
key,
file_path,
run=False):
if run:
com_list = sample_search_df[key].unique()
com_par_list = []
for item in com_list:
if item is not np.nan:
python_list = ast.literal_eval(item)
com_par_list.extend(python_list)
else:
com_par_list.append("NULL")
com_par_list = list(set(com_par_list))
if key in com_par_list:
com_par_list.remove(key)
# Save unique values to CSV
with open(file_path, 'w', newline='') as file:
writer = csv.writer(file)
writer.writerows([[value] for value in com_par_list])
print(f"Unique values for {key} saved to", file_path)
return com_par_list
else:
print(f"Skipeed for {key}")
com_par_list = []
with open(file_path, 'r') as file:
reader = csv.reader(file)
for row in reader:
com_par_list.append(row[0])
return com_par_list
def extract_unique_options(
sample_search_df,
data_path,
run=False):
compo_file_path = data_path + "\\composition_list.csv"
compositions = get_unique_value_list(
sample_search_df,
'composition',
compo_file_path,
run=run)
functions_file_path = data_path + "\\functions_list.csv"
functions = get_unique_value_list(
sample_search_df,
'functions',
functions_file_path,
run=run)
characterizations_file_path = data_path + "\\characterizations_list.csv"
characterizations = get_unique_value_list(
sample_search_df,
'characterizations',
characterizations_file_path,
run=run)
return {
"compositions": compositions,
"functions": functions,
"characterizations": characterizations
}
def load_sample_list(
output_data_dir,
read_sample,
n_to_read,
data_path
):
sample_list_all_path = data_path + "\\sample_list_all.pickle"
Json_list = os.listdir(output_data_dir)
if read_sample:
sample_list = []
for i in range(0, n_to_read):
file_path = os.path.join(
output_data_dir,
Json_list[i])
with open(file_path, 'r') as file:
sample_list.append(json.load(file))
with open(sample_list_all_path, 'wb') as file:
pickle.dump(sample_list, file)
else:
with open(sample_list_all_path, 'rb') as file:
sample_list = pickle.load(file)
return sample_list
def parse_dictionary(source_dictionary,
key,
new_dictionary):
for entry in source_dictionary:
new_dictionary[str(entry)] = source_dictionary[entry][key]
def parse_overview_raw_data(
sample_search_df,
overview_data_path,
re_parse_overview=False
):
overview_dict_path = overview_data_path + "\\overview_dict.pickle"
overview_table_path = overview_data_path + "\\overview_table.pickle"
if re_parse_overview:
overview_dict, table_structure = parse_overview_table(sample_search_df)
with open(overview_dict_path, 'wb') as file:
pickle.dump(overview_dict, file)
with open(overview_table_path, 'wb') as file:
pickle.dump(table_structure, file)
else:
with open(overview_dict_path, 'rb') as file:
overview_dict = pickle.load(file)
with open(overview_table_path, 'rb') as file:
table_structure = pickle.load(file)
return overview_dict, table_structure