Spaces:

Knowles-Lab
/

tiger

Running on CPU Upgrade

App Files Files Community

Andrew Stirn commited on Jul 4, 2023

Commit

ecb653b

1 Parent(s): 9ccfeb4

simplified code

Browse files

Files changed (1) hide show

tiger.py +32 -40

tiger.py CHANGED Viewed

@@ -307,49 +307,41 @@ if __name__ == '__main__':
     parser = argparse.ArgumentParser()
     parser.add_argument('--check_off_targets', action='store_true', default=False)
     parser.add_argument('--fasta_path', type=str, default=None)
-    parser.add_argument('--simple_test', action='store_true', default=False)
     args = parser.parse_args()
-    # simple test case
-    if args.simple_test:
-        # first 50 from EIF3B-003's CDS
-        simple_test = pd.DataFrame({
             ID_COL: ['ManualEntry'],
             SEQ_COL: ['ATGCAGGACGCGGAGAACGTGGCGGTGCCCGAGGCGGCCGAGGAGCGCGC']})
-        simple_test.set_index(ID_COL, inplace=True)
-        df_on_target, df_off_target = tiger_exhibit(simple_test, check_off_targets=args.off_target)
-        df_on_target.to_csv('on_target.csv')
         if args.check_off_targets:
-            df_off_target.to_csv('off_target.csv')
-    # directory of fasta files
-    elif args.fasta_path is not None and os.path.exists(args.fasta_path):
-        # check for any existing results
-        if os.path.exists('on_target.csv') or os.path.exists('off_target.csv'):
-            raise FileExistsError('please rename or delete existing results')
-        # load transcripts
-        df_transcripts = load_transcripts([os.path.join(args.fasta_path, f) for f in os.listdir(args.fasta_path)])
-        # process in batches
-        batch = 0
-        num_batches = len(df_transcripts) // BATCH_SIZE_TRANSCRIPTS
-        num_batches += (len(df_transcripts) % BATCH_SIZE_TRANSCRIPTS > 0)
-        for idx in range(0, len(df_transcripts), BATCH_SIZE_TRANSCRIPTS):
-            batch += 1
-            print('Batch {:d} of {:d}'.format(batch, num_batches))
-            # run batch
-            idx_stop = min(idx + BATCH_SIZE_TRANSCRIPTS, len(df_transcripts))
-            df_on_target, df_off_target = tiger_exhibit(df_transcripts[idx:idx_stop],
-                                                        run_mode=RUN_MODE_TITRATION,
-                                                        check_off_targets=args.check_off_targets)
-            # save batch results
-            df_on_target.to_csv('on_target.csv', header=batch == 1, index=False, mode='a')
-            if args.check_off_targets:
-                df_off_target.to_csv('off_target.csv', header=batch == 1, index=False, mode='a')
-            # clear session to prevent memory blow up
-            tf.keras.backend.clear_session()

     parser = argparse.ArgumentParser()
     parser.add_argument('--check_off_targets', action='store_true', default=False)
     parser.add_argument('--fasta_path', type=str, default=None)
     args = parser.parse_args()
+    # check for any existing results
+    if os.path.exists('on_target.csv') or os.path.exists('off_target.csv'):
+        raise FileExistsError('please rename or delete existing results')
+    # load transcripts from a directory of fasta files
+    if args.fasta_path is not None and os.path.exists(args.fasta_path):
+        df_transcripts = load_transcripts([os.path.join(args.fasta_path, f) for f in os.listdir(args.fasta_path)])
+    # otherwise consider simple test case with first 50 nucleotides from EIF3B-003's CDS
+    else:
+        df_transcripts = pd.DataFrame({
             ID_COL: ['ManualEntry'],
             SEQ_COL: ['ATGCAGGACGCGGAGAACGTGGCGGTGCCCGAGGCGGCCGAGGAGCGCGC']})
+        df_transcripts.set_index(ID_COL, inplace=True)
+    # process in batches
+    batch = 0
+    num_batches = len(df_transcripts) // BATCH_SIZE_TRANSCRIPTS
+    num_batches += (len(df_transcripts) % BATCH_SIZE_TRANSCRIPTS > 0)
+    for idx in range(0, len(df_transcripts), BATCH_SIZE_TRANSCRIPTS):
+        batch += 1
+        print('Batch {:d} of {:d}'.format(batch, num_batches))
+        # run batch
+        idx_stop = min(idx + BATCH_SIZE_TRANSCRIPTS, len(df_transcripts))
+        df_on_target, df_off_target = tiger_exhibit(df_transcripts[idx:idx_stop],
+                                                    run_mode=RUN_MODE_TITRATION,
+                                                    check_off_targets=args.check_off_targets)
+        # save batch results
+        df_on_target.to_csv('on_target.csv', header=batch == 1, index=False, mode='a')
         if args.check_off_targets:
+            df_off_target.to_csv('off_target.csv', header=batch == 1, index=False, mode='a')
+        # clear session to prevent memory blow up
+        tf.keras.backend.clear_session()