Andrew Stirn commited on
Commit
ecb653b
·
1 Parent(s): 9ccfeb4

simplified code

Browse files
Files changed (1) hide show
  1. tiger.py +32 -40
tiger.py CHANGED
@@ -307,49 +307,41 @@ if __name__ == '__main__':
307
  parser = argparse.ArgumentParser()
308
  parser.add_argument('--check_off_targets', action='store_true', default=False)
309
  parser.add_argument('--fasta_path', type=str, default=None)
310
- parser.add_argument('--simple_test', action='store_true', default=False)
311
  args = parser.parse_args()
312
 
313
- # simple test case
314
- if args.simple_test:
315
- # first 50 from EIF3B-003's CDS
316
- simple_test = pd.DataFrame({
 
 
 
 
 
 
 
317
  ID_COL: ['ManualEntry'],
318
  SEQ_COL: ['ATGCAGGACGCGGAGAACGTGGCGGTGCCCGAGGCGGCCGAGGAGCGCGC']})
319
- simple_test.set_index(ID_COL, inplace=True)
320
- df_on_target, df_off_target = tiger_exhibit(simple_test, check_off_targets=args.off_target)
321
- df_on_target.to_csv('on_target.csv')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
322
  if args.check_off_targets:
323
- df_off_target.to_csv('off_target.csv')
324
-
325
- # directory of fasta files
326
- elif args.fasta_path is not None and os.path.exists(args.fasta_path):
327
-
328
- # check for any existing results
329
- if os.path.exists('on_target.csv') or os.path.exists('off_target.csv'):
330
- raise FileExistsError('please rename or delete existing results')
331
-
332
- # load transcripts
333
- df_transcripts = load_transcripts([os.path.join(args.fasta_path, f) for f in os.listdir(args.fasta_path)])
334
 
335
- # process in batches
336
- batch = 0
337
- num_batches = len(df_transcripts) // BATCH_SIZE_TRANSCRIPTS
338
- num_batches += (len(df_transcripts) % BATCH_SIZE_TRANSCRIPTS > 0)
339
- for idx in range(0, len(df_transcripts), BATCH_SIZE_TRANSCRIPTS):
340
- batch += 1
341
- print('Batch {:d} of {:d}'.format(batch, num_batches))
342
-
343
- # run batch
344
- idx_stop = min(idx + BATCH_SIZE_TRANSCRIPTS, len(df_transcripts))
345
- df_on_target, df_off_target = tiger_exhibit(df_transcripts[idx:idx_stop],
346
- run_mode=RUN_MODE_TITRATION,
347
- check_off_targets=args.check_off_targets)
348
-
349
- # save batch results
350
- df_on_target.to_csv('on_target.csv', header=batch == 1, index=False, mode='a')
351
- if args.check_off_targets:
352
- df_off_target.to_csv('off_target.csv', header=batch == 1, index=False, mode='a')
353
-
354
- # clear session to prevent memory blow up
355
- tf.keras.backend.clear_session()
 
307
  parser = argparse.ArgumentParser()
308
  parser.add_argument('--check_off_targets', action='store_true', default=False)
309
  parser.add_argument('--fasta_path', type=str, default=None)
 
310
  args = parser.parse_args()
311
 
312
+ # check for any existing results
313
+ if os.path.exists('on_target.csv') or os.path.exists('off_target.csv'):
314
+ raise FileExistsError('please rename or delete existing results')
315
+
316
+ # load transcripts from a directory of fasta files
317
+ if args.fasta_path is not None and os.path.exists(args.fasta_path):
318
+ df_transcripts = load_transcripts([os.path.join(args.fasta_path, f) for f in os.listdir(args.fasta_path)])
319
+
320
+ # otherwise consider simple test case with first 50 nucleotides from EIF3B-003's CDS
321
+ else:
322
+ df_transcripts = pd.DataFrame({
323
  ID_COL: ['ManualEntry'],
324
  SEQ_COL: ['ATGCAGGACGCGGAGAACGTGGCGGTGCCCGAGGCGGCCGAGGAGCGCGC']})
325
+ df_transcripts.set_index(ID_COL, inplace=True)
326
+
327
+ # process in batches
328
+ batch = 0
329
+ num_batches = len(df_transcripts) // BATCH_SIZE_TRANSCRIPTS
330
+ num_batches += (len(df_transcripts) % BATCH_SIZE_TRANSCRIPTS > 0)
331
+ for idx in range(0, len(df_transcripts), BATCH_SIZE_TRANSCRIPTS):
332
+ batch += 1
333
+ print('Batch {:d} of {:d}'.format(batch, num_batches))
334
+
335
+ # run batch
336
+ idx_stop = min(idx + BATCH_SIZE_TRANSCRIPTS, len(df_transcripts))
337
+ df_on_target, df_off_target = tiger_exhibit(df_transcripts[idx:idx_stop],
338
+ run_mode=RUN_MODE_TITRATION,
339
+ check_off_targets=args.check_off_targets)
340
+
341
+ # save batch results
342
+ df_on_target.to_csv('on_target.csv', header=batch == 1, index=False, mode='a')
343
  if args.check_off_targets:
344
+ df_off_target.to_csv('off_target.csv', header=batch == 1, index=False, mode='a')
 
 
 
 
 
 
 
 
 
 
345
 
346
+ # clear session to prevent memory blow up
347
+ tf.keras.backend.clear_session()