tiger / app.py
Andrew Stirn
very close
c2e1605
raw
history blame
5.99 kB
import tiger
import pandas as pd
import streamlit as st
ENTRY_METHODS = dict(
manual='Manual entry of single transcript',
fasta="Fasta file upload (supports multiple transcripts if they have unique ID's)"
)
# containers
DOCUMENTATION = st.container()
MODE_SELECTION = st.container()
TRANSCRIPT_ENTRY = st.container()
RUNTIME = st.container()
RESULTS = st.container()
@st.cache_data
def convert_df(df):
# IMPORTANT: Cache the conversion to prevent computation on every rerun
return df.to_csv().encode('utf-8')
def mode_change_callback():
if st.session_state.mode == tiger.RUN_MODES['all']:
st.session_state.check_off_targets = False
st.session_state.disable_off_target_checkbox = True
else:
st.session_state.disable_off_target_checkbox = False
def run():
# initialize transcript DataFrame
transcripts = pd.DataFrame(columns=[tiger.ID_COL, tiger.SEQ_COL])
# initialize results
st.session_state.on_target = st.session_state.off_target = None
# manual entry
if st.session_state.entry_method == ENTRY_METHODS['manual']:
transcripts = pd.DataFrame({
tiger.ID_COL: ['ManualEntry'],
tiger.SEQ_COL: [st.session_state.manual_entry]
})
# fasta file upload
elif st.session_state.entry_method == ENTRY_METHODS['fasta']:
if st.session_state.fasta_entry is not None:
fasta_path = st.session_state.fasta_entry.name
with open(fasta_path, 'w') as f:
f.write(st.session_state.fasta_entry.getvalue().decode('utf-8'))
transcripts = tiger.load_transcripts([fasta_path], enforce_unique_ids=False)
# make sure all transcripts have unique identifiers
if transcripts.index.has_duplicates:
with TRANSCRIPT_ENTRY:
st.write("Duplicate transcript ID's detected in fasta file")
return
# convert to upper case as used by tokenizer
transcripts[tiger.SEQ_COL] = transcripts[tiger.SEQ_COL].apply(lambda s: s.upper())
# check that all transcripts only contain nucleotides A, C, G, T, and wildcard N
if not all(transcripts[tiger.SEQ_COL].apply(lambda s: set(s).issubset(tiger.NUCLEOTIDE_TOKENS.keys()))):
with TRANSCRIPT_ENTRY:
st.write('Transcript(s) must only contain upper or lower case A, C, G, and Ts')
# check that all transcripts satisfy length requirements
elif any(transcripts[tiger.SEQ_COL].apply(lambda s: len(s) < tiger.TARGET_LEN)):
with TRANSCRIPT_ENTRY:
st.write('Transcript(s) must be at least {:d} bases.'.format(tiger.TARGET_LEN))
# run model if we have any transcripts
elif len(transcripts) > 0:
st.session_state.on_target, st.session_state.off_target = tiger.tiger_exhibit(
transcripts=transcripts,
mode={v: k for k, v in tiger.RUN_MODES.items()}[st.session_state.mode],
# status=RUNTIME,
check_off_targets=st.session_state.check_off_targets
)
if __name__ == '__main__':
# app initialization
if 'mode' not in st.session_state:
st.session_state.mode = tiger.RUN_MODES['all']
st.session_state.disable_off_target_checkbox = True
if 'entry_method' not in st.session_state:
st.session_state.entry_method = ENTRY_METHODS['manual']
if 'run' not in st.session_state:
st.session_state.run = False
if 'on_target' not in st.session_state:
st.session_state.on_target = None
if 'off_target' not in st.session_state:
st.session_state.off_target = None
# title and documentation
with DOCUMENTATION:
st.title('TIGER Cas13 Efficacy Prediction')
# mode selection
with MODE_SELECTION:
col1, col2 = st.columns([0.65, 0.35])
with col1:
st.radio(
label='What do you want to predict?',
options=tuple(tiger.RUN_MODES.values()),
key='mode',
on_change=mode_change_callback
)
with col2:
st.checkbox(
label='Find off-target effects (slow)',
key='check_off_targets',
disabled=st.session_state.disable_off_target_checkbox
)
# transcript entry
with TRANSCRIPT_ENTRY:
st.selectbox(
label='How would you like to provide transcripts of interest?',
options=ENTRY_METHODS.values(),
key='entry_method',
)
if st.session_state.entry_method == ENTRY_METHODS['manual']:
st.text_input(
label='Enter a target transcript:',
key='manual_entry',
placeholder='Upper or lower case',
)
elif st.session_state.entry_method == ENTRY_METHODS['fasta']:
st.file_uploader(
label='Upload a fasta file:',
key='fasta_entry',
)
# runtime
with RUNTIME:
st.button(label='Get predictions!', on_click=run)
# results
with RESULTS:
# on-target results
if st.session_state.on_target is not None:
st.write('On-target predictions:', st.session_state.on_target)
st.download_button(
label='Download on-target predictions',
data=convert_df(st.session_state.on_target),
file_name='on_target.csv',
mime='text/csv'
)
# off-target results
if st.session_state.off_target is not None:
if len(st.session_state.off_target) > 0:
st.write('Off-target predictions:', st.session_state.off_target)
st.download_button(
label='Download off-target predictions',
data=convert_df(st.session_state.off_target),
file_name='off_target.csv',
mime='text/csv'
)
else:
st.write('We did not find any off-target effects!')