Andrew Stirn commited on
Commit
a2591b6
·
1 Parent(s): 27992d2

ability to run locally

Browse files
Files changed (2) hide show
  1. app.py +1 -1
  2. tiger.py +14 -8
app.py CHANGED
@@ -78,7 +78,7 @@ else:
78
  # valid input
79
  if src_seq and all([True if nt.upper() in NUCLEOTIDE_TOKENS.keys() else False for nt in src_seq]):
80
  on_target, off_target = tiger_exhibit(pd.DataFrame(dict(id=['ManualEntry'], seq=[src_seq])),
81
- status_bar, status_text, option)
82
  on_target.rename(columns={"Guide":"23 nt guide sequence"}, inplace=True)
83
  if len(on_target)>0:
84
  if on_target.iloc[0]["On-target ID"] == 0:
 
78
  # valid input
79
  if src_seq and all([True if nt.upper() in NUCLEOTIDE_TOKENS.keys() else False for nt in src_seq]):
80
  on_target, off_target = tiger_exhibit(pd.DataFrame(dict(id=['ManualEntry'], seq=[src_seq])),
81
+ status_bar, status_text, check_off_targets=option == 'On and Off Target')
82
  on_target.rename(columns={"Guide":"23 nt guide sequence"}, inplace=True)
83
  if len(on_target)>0:
84
  if on_target.iloc[0]["On-target ID"] == 0:
tiger.py CHANGED
@@ -24,6 +24,7 @@ for gpu in tf.config.list_physical_devices('GPU'):
24
  if len(tf.config.list_physical_devices('GPU')) > 0:
25
  tf.config.experimental.set_visible_devices(tf.config.list_physical_devices('GPU')[0], 'GPU')
26
 
 
27
  def load_transcripts(fasta_files):
28
 
29
  # load all transcripts from fasta files into a DataFrame
@@ -94,7 +95,6 @@ def process_data(transcript_seq: str):
94
  tf.reshape(one_hot_encode_sequence(target_seq, add_context_padding=False), [len(target_seq), -1]),
95
  tf.reshape(one_hot_encode_sequence(guide_seq, add_context_padding=True), [len(guide_seq), -1]),
96
  ], axis=-1)
97
- print(model_inputs)
98
  return target_seq, guide_seq, model_inputs
99
 
100
 
@@ -188,7 +188,7 @@ def predict_off_target(off_targets: pd.DataFrame, model: tf.keras.Model):
188
  return off_targets.sort_values('Normalized LFC')
189
 
190
 
191
- def tiger_exhibit(transcripts: pd.DataFrame, status_bar=None, status_text=None, option=''):
192
 
193
  # load model
194
  if os.path.exists('model'):
@@ -214,13 +214,13 @@ def tiger_exhibit(transcripts: pd.DataFrame, status_bar=None, status_text=None,
214
 
215
  # predict off-target effects for top guides
216
  off_target_predictions = pd.DataFrame()
217
- if option=='On and Off Target':
218
  off_targets = find_off_targets(on_target_predictions, status_bar, status_text)
219
  off_target_predictions = predict_off_target(off_targets, model=tiger)
220
 
221
  # reverse guide sequences
222
  on_target_predictions['Guide'] = on_target_predictions['Guide'].apply(lambda s: s[::-1])
223
- if option=='On and Off Target' and len(off_target_predictions) > 0:
224
  off_target_predictions['Guide'] = off_target_predictions['Guide'].apply(lambda s: s[::-1])
225
 
226
  return on_target_predictions.reset_index(drop=True), off_target_predictions.reset_index(drop=True)
@@ -230,6 +230,7 @@ if __name__ == '__main__':
230
 
231
  # common arguments
232
  parser = argparse.ArgumentParser()
 
233
  parser.add_argument('--fasta_path', type=str, default=None)
234
  parser.add_argument('--simple_test', action='store_true', default=False)
235
  args = parser.parse_args()
@@ -239,13 +240,17 @@ if __name__ == '__main__':
239
  # first 50 from EIF3B-003's CDS
240
  simple_test = pd.DataFrame(dict(id=['ManualEntry'], seq=['ATGCAGGACGCGGAGAACGTGGCGGTGCCCGAGGCGGCCGAGGAGCGCGC']))
241
  simple_test.set_index('id', inplace=True)
242
- df_on_target, df_off_target = tiger_exhibit(simple_test)
243
  df_on_target.to_csv('on_target.csv')
244
  df_off_target.to_csv('off_target.csv')
245
 
246
  # directory of fasta files
247
  elif args.fasta_path is not None and os.path.exists(args.fasta_path):
248
 
 
 
 
 
249
  # load transcripts
250
  df_transcripts = load_transcripts([os.path.join(args.fasta_path, f) for f in os.listdir(args.fasta_path)])
251
 
@@ -259,11 +264,12 @@ if __name__ == '__main__':
259
 
260
  # run batch
261
  idx_stop = min(idx + BATCH_SIZE_TRANSCRIPTS, len(df_transcripts))
262
- df_on_target, df_off_target = tiger_exhibit(df_transcripts[idx:idx_stop])
263
 
264
  # save batch results
265
- df_on_target.to_csv('on_target_{:d}.csv'.format(batch), index=False)
266
- df_off_target.to_csv('off_target_{:d}.csv'.format(batch), index=False)
 
267
 
268
  # clear session to prevent memory blow up
269
  tf.keras.backend.clear_session()
 
24
  if len(tf.config.list_physical_devices('GPU')) > 0:
25
  tf.config.experimental.set_visible_devices(tf.config.list_physical_devices('GPU')[0], 'GPU')
26
 
27
+
28
  def load_transcripts(fasta_files):
29
 
30
  # load all transcripts from fasta files into a DataFrame
 
95
  tf.reshape(one_hot_encode_sequence(target_seq, add_context_padding=False), [len(target_seq), -1]),
96
  tf.reshape(one_hot_encode_sequence(guide_seq, add_context_padding=True), [len(guide_seq), -1]),
97
  ], axis=-1)
 
98
  return target_seq, guide_seq, model_inputs
99
 
100
 
 
188
  return off_targets.sort_values('Normalized LFC')
189
 
190
 
191
+ def tiger_exhibit(transcripts: pd.DataFrame, status_bar=None, status_text=None, check_off_targets=False):
192
 
193
  # load model
194
  if os.path.exists('model'):
 
214
 
215
  # predict off-target effects for top guides
216
  off_target_predictions = pd.DataFrame()
217
+ if check_off_targets:
218
  off_targets = find_off_targets(on_target_predictions, status_bar, status_text)
219
  off_target_predictions = predict_off_target(off_targets, model=tiger)
220
 
221
  # reverse guide sequences
222
  on_target_predictions['Guide'] = on_target_predictions['Guide'].apply(lambda s: s[::-1])
223
+ if check_off_targets and len(off_target_predictions) > 0:
224
  off_target_predictions['Guide'] = off_target_predictions['Guide'].apply(lambda s: s[::-1])
225
 
226
  return on_target_predictions.reset_index(drop=True), off_target_predictions.reset_index(drop=True)
 
230
 
231
  # common arguments
232
  parser = argparse.ArgumentParser()
233
+ parser.add_argument('--check_off_targets', action='store_true', default=False)
234
  parser.add_argument('--fasta_path', type=str, default=None)
235
  parser.add_argument('--simple_test', action='store_true', default=False)
236
  args = parser.parse_args()
 
240
  # first 50 from EIF3B-003's CDS
241
  simple_test = pd.DataFrame(dict(id=['ManualEntry'], seq=['ATGCAGGACGCGGAGAACGTGGCGGTGCCCGAGGCGGCCGAGGAGCGCGC']))
242
  simple_test.set_index('id', inplace=True)
243
+ df_on_target, df_off_target = tiger_exhibit(simple_test, check_off_targets=args.check_off_targets)
244
  df_on_target.to_csv('on_target.csv')
245
  df_off_target.to_csv('off_target.csv')
246
 
247
  # directory of fasta files
248
  elif args.fasta_path is not None and os.path.exists(args.fasta_path):
249
 
250
+ # check for any existing results
251
+ if os.path.exists('on_target.csv') or os.path.exists('off_target.csv'):
252
+ raise FileExistsError('please rename or delete existing results')
253
+
254
  # load transcripts
255
  df_transcripts = load_transcripts([os.path.join(args.fasta_path, f) for f in os.listdir(args.fasta_path)])
256
 
 
264
 
265
  # run batch
266
  idx_stop = min(idx + BATCH_SIZE_TRANSCRIPTS, len(df_transcripts))
267
+ df_on_target, df_off_target = tiger_exhibit(df_transcripts[idx:idx_stop], check_off_targets=args.check_off_targets)
268
 
269
  # save batch results
270
+ df_on_target.to_csv('on_target.csv', header=batch == 1, index=False, mode='a')
271
+ if args.check_off_targets:
272
+ df_off_target.to_csv('off_target.csv', header=batch == 1, index=False, mode='a')
273
 
274
  # clear session to prevent memory blow up
275
  tf.keras.backend.clear_session()