Spaces:
Sleeping
Sleeping
Erva Ulusoy
commited on
Commit
·
10fe79e
1
Parent(s):
77b1fab
results on one page
Browse files- app.py +71 -74
- run_domain2go_app.py +2 -2
app.py
CHANGED
|
@@ -22,15 +22,10 @@ st.markdown("""
|
|
| 22 |
|
| 23 |
|
| 24 |
|
| 25 |
-
domain_tab, pred_tab = st.tabs(['Domains', 'Function predictions'])
|
| 26 |
-
|
| 27 |
-
with domain_tab:
|
| 28 |
-
st.header('Domains in sequence')
|
| 29 |
-
|
| 30 |
with st.sidebar:
|
| 31 |
|
| 32 |
st.title("Domain2GO: Mutual Annotation-Based Prediction of Protein Domain Functions")
|
| 33 |
-
st.write("[![
|
| 34 |
|
| 35 |
if 'example_seq_button' not in st.session_state:
|
| 36 |
st.session_state.example_seq_button = False
|
|
@@ -58,79 +53,81 @@ with st.sidebar:
|
|
| 58 |
|
| 59 |
st.session_state['email'] = st.text_input('Enter your email for InterProScan query: ')
|
| 60 |
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
|
|
|
|
|
|
|
|
|
| 64 |
if 'email' in st.session_state and 'sequence' in st.session_state and '@' in st.session_state.email:
|
| 65 |
-
|
| 66 |
st.session_state.disabled = True
|
| 67 |
else:
|
| 68 |
-
st.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
else:
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
st.download_button(
|
| 98 |
-
label="Download
|
| 99 |
-
data=
|
| 100 |
-
file_name=f"{st.session_state.name}
|
| 101 |
mime="text/csv",
|
| 102 |
)
|
| 103 |
|
| 104 |
-
with pred_tab:
|
| 105 |
-
st.header('Function predictions')
|
| 106 |
-
if 'domain_df' not in st.session_state:
|
| 107 |
-
if no_domains:
|
| 108 |
-
st.warning('No domains found. Please find domains in sequence first.')
|
| 109 |
-
elif error_in_interproscan:
|
| 110 |
-
st.error('Error in InterProScan. Please check InterProScan job id and response.')
|
| 111 |
-
else:
|
| 112 |
-
st.warning('Please find domains in sequence first.')
|
| 113 |
-
else:
|
| 114 |
-
with st.spinner('Generating function predictions...'):
|
| 115 |
-
cwd = os.getcwd()
|
| 116 |
-
# mapping_path = "{}/Domain2GO/data".format(cwd.split("Domain2GO")[0])
|
| 117 |
-
mapping_path = './data'
|
| 118 |
-
pred_results = generate_function_predictions(st.session_state.domain_df, mapping_path)
|
| 119 |
-
pred_result_text = pred_results[0]
|
| 120 |
-
if pred_result_text == 'Function predictions found.':
|
| 121 |
-
st.success(pred_result_text)
|
| 122 |
-
st.session_state['pred_df'] = pred_results[1]
|
| 123 |
-
elif pred_result_text == 'No function predictions found.':
|
| 124 |
-
st.warning(pred_result_text)
|
| 125 |
-
|
| 126 |
-
if 'pred_df' in st.session_state:
|
| 127 |
-
with st.expander('Show function predictions'):
|
| 128 |
-
st.write(st.session_state.pred_df)
|
| 129 |
-
pred_csv = convert_df(st.session_state.pred_df)
|
| 130 |
-
st.download_button(
|
| 131 |
-
label="Download function predictions as CSV",
|
| 132 |
-
data=pred_csv,
|
| 133 |
-
file_name=f"{st.session_state.name}_function_predictions.csv",
|
| 134 |
-
mime="text/csv",
|
| 135 |
-
)
|
| 136 |
-
|
|
|
|
| 22 |
|
| 23 |
|
| 24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
with st.sidebar:
|
| 26 |
|
| 27 |
st.title("Domain2GO: Mutual Annotation-Based Prediction of Protein Domain Functions")
|
| 28 |
+
st.write("[](https://www.biorxiv.org/content/10.1101/2022.11.03.514980v1) [](https://github.com/HUBioDataLab/Domain2GO)")
|
| 29 |
|
| 30 |
if 'example_seq_button' not in st.session_state:
|
| 31 |
st.session_state.example_seq_button = False
|
|
|
|
| 53 |
|
| 54 |
st.session_state['email'] = st.text_input('Enter your email for InterProScan query: ')
|
| 55 |
|
| 56 |
+
# prevent user from clicking submit button if email or sequence is empty
|
| 57 |
+
submitted = False
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
with st.sidebar:
|
| 61 |
+
if st.button('Predict functions'):
|
| 62 |
if 'email' in st.session_state and 'sequence' in st.session_state and '@' in st.session_state.email:
|
| 63 |
+
submitted = True
|
| 64 |
st.session_state.disabled = True
|
| 65 |
else:
|
| 66 |
+
with st.sidebar:
|
| 67 |
+
st.warning('Please enter your email and protein sequence first. If you have already entered your email and protein sequence, please check that your email is valid.')
|
| 68 |
+
|
| 69 |
+
if not submitted:
|
| 70 |
+
# on main page, write warning message if user has not submitted email and sequence
|
| 71 |
+
st.markdown("""
|
| 72 |
+
<div style="padding:30px">
|
| 73 |
+
<p style="color:#2a7b36;font-size:20px;">Submit your protein sequence to start.</p>
|
| 74 |
+
</div>
|
| 75 |
+
""", unsafe_allow_html=True)
|
| 76 |
+
|
| 77 |
+
no_domains = False
|
| 78 |
+
error_in_interproscan = False
|
| 79 |
+
if submitted:
|
| 80 |
+
with st.spinner('Finding domains in sequence using InterProScan. This may take a while...'):
|
| 81 |
+
result = find_domains(st.session_state.email, st.session_state.sequence, st.session_state.name)
|
| 82 |
+
result_text = result[0]
|
| 83 |
+
if result_text == 'Domains found.':
|
| 84 |
+
# st.success(result_text + ' You can now see function predictions for the sequence in the "Function predictions" tab.')
|
| 85 |
+
st.session_state['domain_df'] = result[1]
|
| 86 |
+
elif result_text == 'No domains found.':
|
| 87 |
+
st.warning(result_text)
|
| 88 |
+
no_domains = True
|
| 89 |
else:
|
| 90 |
+
st.error(result_text)
|
| 91 |
+
st.write(f'InterProScan job id: {result[1]}')
|
| 92 |
+
st.write(f'InterProScan job response: {result[2]}')
|
| 93 |
+
error_in_interproscan = True
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
# if 'domain_df' in st.session_state:
|
| 97 |
+
# with st.expander('Show domains in sequence'):
|
| 98 |
+
# st.write(st.session_state.domain_df)
|
| 99 |
+
# domains_csv = convert_df(st.session_state.domain_df)
|
| 100 |
+
# st.download_button(
|
| 101 |
+
# label="Download domains in sequence as CSV",
|
| 102 |
+
# data=domains_csv,
|
| 103 |
+
# file_name=f"{st.session_state.name}_domains.csv",
|
| 104 |
+
# mime="text/csv",
|
| 105 |
+
# )
|
| 106 |
+
|
| 107 |
+
if 'domain_df' not in st.session_state:
|
| 108 |
+
if error_in_interproscan:
|
| 109 |
+
st.error('Error in InterProScan. Please check InterProScan job id and response.')
|
| 110 |
+
else:
|
| 111 |
+
with st.spinner('Generating function predictions...'):
|
| 112 |
+
cwd = os.getcwd()
|
| 113 |
+
# mapping_path = "{}/Domain2GO/data".format(cwd.split("Domain2GO")[0])
|
| 114 |
+
mapping_path = './data'
|
| 115 |
+
pred_results = generate_function_predictions(st.session_state.domain_df, mapping_path)
|
| 116 |
+
pred_result_text = pred_results[0]
|
| 117 |
+
if pred_result_text == 'Function predictions found.':
|
| 118 |
+
st.success('Function predictions generated.')
|
| 119 |
+
st.session_state['pred_df'] = pred_results[1]
|
| 120 |
+
elif pred_result_text == 'No predictions made for domains found in sequence.':
|
| 121 |
+
st.warning(pred_result_text)
|
| 122 |
+
|
| 123 |
+
if 'pred_df' in st.session_state:
|
| 124 |
+
with st.expander('Show function predictions'):
|
| 125 |
+
st.write(st.session_state.pred_df)
|
| 126 |
+
pred_csv = convert_df(st.session_state.pred_df)
|
| 127 |
st.download_button(
|
| 128 |
+
label="Download function predictions as CSV",
|
| 129 |
+
data=pred_csv,
|
| 130 |
+
file_name=f"{st.session_state.name}_function_predictions.csv",
|
| 131 |
mime="text/csv",
|
| 132 |
)
|
| 133 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
run_domain2go_app.py
CHANGED
|
@@ -98,7 +98,7 @@ def generate_function_predictions(domains_df, mapping_path):
|
|
| 98 |
domain2go_df = pd.read_csv(os.path.join(mapping_path, 'finalized_domain2go_mappings.txt'))
|
| 99 |
print('Domain2GO mappings loaded')
|
| 100 |
# merge domain2go mappings with domains found in protein sequence
|
| 101 |
-
merged_df = pd.merge(domains_df, domain2go_df, left_on='
|
| 102 |
|
| 103 |
print('Function predictions generated.')
|
| 104 |
|
|
@@ -109,7 +109,7 @@ def generate_function_predictions(domains_df, mapping_path):
|
|
| 109 |
|
| 110 |
else:
|
| 111 |
merged_df['protein_name'] = domains_df['protein_name'].iloc[0]
|
| 112 |
-
merged_df = merged_df[['protein_name', 'GO', '
|
| 113 |
merged_df.columns = ['protein_name', 'GO_ID', 'domain_locations', 'probability', 'domain_accession', 'domain_name',]
|
| 114 |
|
| 115 |
# save protein function predictions
|
|
|
|
| 98 |
domain2go_df = pd.read_csv(os.path.join(mapping_path, 'finalized_domain2go_mappings.txt'))
|
| 99 |
print('Domain2GO mappings loaded')
|
| 100 |
# merge domain2go mappings with domains found in protein sequence
|
| 101 |
+
merged_df = pd.merge(domains_df, domain2go_df, left_on='domain_accession', right_on='Interpro')
|
| 102 |
|
| 103 |
print('Function predictions generated.')
|
| 104 |
|
|
|
|
| 109 |
|
| 110 |
else:
|
| 111 |
merged_df['protein_name'] = domains_df['protein_name'].iloc[0]
|
| 112 |
+
merged_df = merged_df[['protein_name', 'GO', 'domain_locations', 's', 'domain_accession', 'domain_name',]]
|
| 113 |
merged_df.columns = ['protein_name', 'GO_ID', 'domain_locations', 'probability', 'domain_accession', 'domain_name',]
|
| 114 |
|
| 115 |
# save protein function predictions
|