Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
File size: 5,993 Bytes
5e69dea 89ffb34 89be9f9 5e69dea 97f8ff7 5e69dea 89be9f9 d77f54b f606ed7 7c87ab2 5e69dea d3204b1 5e69dea e43faaa 5e69dea e43faaa 5e69dea c2e1605 5e69dea e43faaa 1574649 5e69dea e43faaa 5e69dea 66b2911 e43faaa 66b2911 0178c63 66b2911 e43faaa 0178c63 c2e1605 e9d876c 66b2911 c2e1605 e43faaa c2e1605 1574649 5e69dea 227367c e43faaa 5e69dea e43faaa 5e69dea e43faaa 97f8ff7 e43faaa 5e69dea e43faaa 97f8ff7 e43faaa 97f8ff7 e43faaa 97f8ff7 e43faaa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 |
import tiger
import pandas as pd
import streamlit as st
ENTRY_METHODS = dict(
manual='Manual entry of single transcript',
fasta="Fasta file upload (supports multiple transcripts if they have unique ID's)"
)
# containers
DOCUMENTATION = st.container()
MODE_SELECTION = st.container()
TRANSCRIPT_ENTRY = st.container()
RUNTIME = st.container()
RESULTS = st.container()
@st.cache_data
def convert_df(df):
# IMPORTANT: Cache the conversion to prevent computation on every rerun
return df.to_csv().encode('utf-8')
def mode_change_callback():
if st.session_state.mode == tiger.RUN_MODES['all']:
st.session_state.check_off_targets = False
st.session_state.disable_off_target_checkbox = True
else:
st.session_state.disable_off_target_checkbox = False
def run():
# initialize transcript DataFrame
transcripts = pd.DataFrame(columns=[tiger.ID_COL, tiger.SEQ_COL])
# initialize results
st.session_state.on_target = st.session_state.off_target = None
# manual entry
if st.session_state.entry_method == ENTRY_METHODS['manual']:
transcripts = pd.DataFrame({
tiger.ID_COL: ['ManualEntry'],
tiger.SEQ_COL: [st.session_state.manual_entry]
})
# fasta file upload
elif st.session_state.entry_method == ENTRY_METHODS['fasta']:
if st.session_state.fasta_entry is not None:
fasta_path = st.session_state.fasta_entry.name
with open(fasta_path, 'w') as f:
f.write(st.session_state.fasta_entry.getvalue().decode('utf-8'))
transcripts = tiger.load_transcripts([fasta_path], enforce_unique_ids=False)
# make sure all transcripts have unique identifiers
if transcripts.index.has_duplicates:
with TRANSCRIPT_ENTRY:
st.write("Duplicate transcript ID's detected in fasta file")
return
# convert to upper case as used by tokenizer
transcripts[tiger.SEQ_COL] = transcripts[tiger.SEQ_COL].apply(lambda s: s.upper())
# check that all transcripts only contain nucleotides A, C, G, T, and wildcard N
if not all(transcripts[tiger.SEQ_COL].apply(lambda s: set(s).issubset(tiger.NUCLEOTIDE_TOKENS.keys()))):
with TRANSCRIPT_ENTRY:
st.write('Transcript(s) must only contain upper or lower case A, C, G, and Ts')
# check that all transcripts satisfy length requirements
elif any(transcripts[tiger.SEQ_COL].apply(lambda s: len(s) < tiger.TARGET_LEN)):
with TRANSCRIPT_ENTRY:
st.write('Transcript(s) must be at least {:d} bases.'.format(tiger.TARGET_LEN))
# run model if we have any transcripts
elif len(transcripts) > 0:
st.session_state.on_target, st.session_state.off_target = tiger.tiger_exhibit(
transcripts=transcripts,
mode={v: k for k, v in tiger.RUN_MODES.items()}[st.session_state.mode],
# status=RUNTIME,
check_off_targets=st.session_state.check_off_targets
)
if __name__ == '__main__':
# app initialization
if 'mode' not in st.session_state:
st.session_state.mode = tiger.RUN_MODES['all']
st.session_state.disable_off_target_checkbox = True
if 'entry_method' not in st.session_state:
st.session_state.entry_method = ENTRY_METHODS['manual']
if 'run' not in st.session_state:
st.session_state.run = False
if 'on_target' not in st.session_state:
st.session_state.on_target = None
if 'off_target' not in st.session_state:
st.session_state.off_target = None
# title and documentation
with DOCUMENTATION:
st.title('TIGER Cas13 Efficacy Prediction')
# mode selection
with MODE_SELECTION:
col1, col2 = st.columns([0.65, 0.35])
with col1:
st.radio(
label='What do you want to predict?',
options=tuple(tiger.RUN_MODES.values()),
key='mode',
on_change=mode_change_callback
)
with col2:
st.checkbox(
label='Find off-target effects (slow)',
key='check_off_targets',
disabled=st.session_state.disable_off_target_checkbox
)
# transcript entry
with TRANSCRIPT_ENTRY:
st.selectbox(
label='How would you like to provide transcripts of interest?',
options=ENTRY_METHODS.values(),
key='entry_method',
)
if st.session_state.entry_method == ENTRY_METHODS['manual']:
st.text_input(
label='Enter a target transcript:',
key='manual_entry',
placeholder='Upper or lower case',
)
elif st.session_state.entry_method == ENTRY_METHODS['fasta']:
st.file_uploader(
label='Upload a fasta file:',
key='fasta_entry',
)
# runtime
with RUNTIME:
st.button(label='Get predictions!', on_click=run)
# results
with RESULTS:
# on-target results
if st.session_state.on_target is not None:
st.write('On-target predictions:', st.session_state.on_target)
st.download_button(
label='Download on-target predictions',
data=convert_df(st.session_state.on_target),
file_name='on_target.csv',
mime='text/csv'
)
# off-target results
if st.session_state.off_target is not None:
if len(st.session_state.off_target) > 0:
st.write('Off-target predictions:', st.session_state.off_target)
st.download_button(
label='Download off-target predictions',
data=convert_df(st.session_state.off_target),
file_name='off_target.csv',
mime='text/csv'
)
else:
st.write('We did not find any off-target effects!') |