Spaces:

Knowles-Lab
/

tiger

Running on CPU Upgrade

Andrew Stirn commited on Feb 10, 2023

Commit

0450f78

1 Parent(s): 53c486e

memory leak plugged

Files changed (1) hide show

tiger.py CHANGED Viewed

@@ -168,7 +168,6 @@ def find_off_targets(top_guides: pd.DataFrame):
         # progress update
         print('\rPercent complete: {:.2f}%'.format(100 * min(i / len(reference_transcripts), 1)), end='')
     print('')
-    del reference_transcripts
     return off_targets
@@ -239,15 +238,20 @@ if __name__ == '__main__':
         df_transcripts = load_transcripts([os.path.join(args.fasta_path, f) for f in os.listdir(args.fasta_path)])
         # process in batches
-        batch = 1
         num_batches = len(df_transcripts) // BATCH_SIZE_TRANSCRIPTS
         num_batches += (len(df_transcripts) % BATCH_SIZE_TRANSCRIPTS > 0)
-        for t in range(0, len(df_transcripts), BATCH_SIZE_TRANSCRIPTS):
             print('Batch {:d} of {:d}'.format(batch, num_batches))
-            t_stop = min(t + BATCH_SIZE_TRANSCRIPTS, len(df_transcripts))
-            df_on_target, df_off_target = tiger_exhibit(df_transcripts[t:t_stop])
             df_on_target.to_csv('on_target_{:d}.csv'.format(batch), index=False)
             df_off_target.to_csv('off_target_{:d}.csv'.format(batch), index=False)
-            del df_on_target, df_off_target
-            batch += 1

         # progress update
         print('\rPercent complete: {:.2f}%'.format(100 * min(i / len(reference_transcripts), 1)), end='')
     print('')
     return off_targets
         df_transcripts = load_transcripts([os.path.join(args.fasta_path, f) for f in os.listdir(args.fasta_path)])
         # process in batches
+        batch = 0
         num_batches = len(df_transcripts) // BATCH_SIZE_TRANSCRIPTS
         num_batches += (len(df_transcripts) % BATCH_SIZE_TRANSCRIPTS > 0)
+        for idx in range(0, len(df_transcripts), BATCH_SIZE_TRANSCRIPTS):
+            batch += 1
             print('Batch {:d} of {:d}'.format(batch, num_batches))
+            # run batch
+            idx_stop = min(idx + BATCH_SIZE_TRANSCRIPTS, len(df_transcripts))
+            df_on_target, df_off_target = tiger_exhibit(df_transcripts[idx:idx_stop])
+            # save batch results
             df_on_target.to_csv('on_target_{:d}.csv'.format(batch), index=False)
             df_off_target.to_csv('off_target_{:d}.csv'.format(batch), index=False)
+            # clear session to prevent memory blow up
+            tf.keras.backend.clear_session()