Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Andrew Stirn
commited on
Commit
·
ecb653b
1
Parent(s):
9ccfeb4
simplified code
Browse files
tiger.py
CHANGED
|
@@ -307,49 +307,41 @@ if __name__ == '__main__':
|
|
| 307 |
parser = argparse.ArgumentParser()
|
| 308 |
parser.add_argument('--check_off_targets', action='store_true', default=False)
|
| 309 |
parser.add_argument('--fasta_path', type=str, default=None)
|
| 310 |
-
parser.add_argument('--simple_test', action='store_true', default=False)
|
| 311 |
args = parser.parse_args()
|
| 312 |
|
| 313 |
-
#
|
| 314 |
-
if
|
| 315 |
-
|
| 316 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 317 |
ID_COL: ['ManualEntry'],
|
| 318 |
SEQ_COL: ['ATGCAGGACGCGGAGAACGTGGCGGTGCCCGAGGCGGCCGAGGAGCGCGC']})
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 322 |
if args.check_off_targets:
|
| 323 |
-
df_off_target.to_csv('off_target.csv')
|
| 324 |
-
|
| 325 |
-
# directory of fasta files
|
| 326 |
-
elif args.fasta_path is not None and os.path.exists(args.fasta_path):
|
| 327 |
-
|
| 328 |
-
# check for any existing results
|
| 329 |
-
if os.path.exists('on_target.csv') or os.path.exists('off_target.csv'):
|
| 330 |
-
raise FileExistsError('please rename or delete existing results')
|
| 331 |
-
|
| 332 |
-
# load transcripts
|
| 333 |
-
df_transcripts = load_transcripts([os.path.join(args.fasta_path, f) for f in os.listdir(args.fasta_path)])
|
| 334 |
|
| 335 |
-
#
|
| 336 |
-
|
| 337 |
-
num_batches = len(df_transcripts) // BATCH_SIZE_TRANSCRIPTS
|
| 338 |
-
num_batches += (len(df_transcripts) % BATCH_SIZE_TRANSCRIPTS > 0)
|
| 339 |
-
for idx in range(0, len(df_transcripts), BATCH_SIZE_TRANSCRIPTS):
|
| 340 |
-
batch += 1
|
| 341 |
-
print('Batch {:d} of {:d}'.format(batch, num_batches))
|
| 342 |
-
|
| 343 |
-
# run batch
|
| 344 |
-
idx_stop = min(idx + BATCH_SIZE_TRANSCRIPTS, len(df_transcripts))
|
| 345 |
-
df_on_target, df_off_target = tiger_exhibit(df_transcripts[idx:idx_stop],
|
| 346 |
-
run_mode=RUN_MODE_TITRATION,
|
| 347 |
-
check_off_targets=args.check_off_targets)
|
| 348 |
-
|
| 349 |
-
# save batch results
|
| 350 |
-
df_on_target.to_csv('on_target.csv', header=batch == 1, index=False, mode='a')
|
| 351 |
-
if args.check_off_targets:
|
| 352 |
-
df_off_target.to_csv('off_target.csv', header=batch == 1, index=False, mode='a')
|
| 353 |
-
|
| 354 |
-
# clear session to prevent memory blow up
|
| 355 |
-
tf.keras.backend.clear_session()
|
|
|
|
| 307 |
parser = argparse.ArgumentParser()
|
| 308 |
parser.add_argument('--check_off_targets', action='store_true', default=False)
|
| 309 |
parser.add_argument('--fasta_path', type=str, default=None)
|
|
|
|
| 310 |
args = parser.parse_args()
|
| 311 |
|
| 312 |
+
# check for any existing results
|
| 313 |
+
if os.path.exists('on_target.csv') or os.path.exists('off_target.csv'):
|
| 314 |
+
raise FileExistsError('please rename or delete existing results')
|
| 315 |
+
|
| 316 |
+
# load transcripts from a directory of fasta files
|
| 317 |
+
if args.fasta_path is not None and os.path.exists(args.fasta_path):
|
| 318 |
+
df_transcripts = load_transcripts([os.path.join(args.fasta_path, f) for f in os.listdir(args.fasta_path)])
|
| 319 |
+
|
| 320 |
+
# otherwise consider simple test case with first 50 nucleotides from EIF3B-003's CDS
|
| 321 |
+
else:
|
| 322 |
+
df_transcripts = pd.DataFrame({
|
| 323 |
ID_COL: ['ManualEntry'],
|
| 324 |
SEQ_COL: ['ATGCAGGACGCGGAGAACGTGGCGGTGCCCGAGGCGGCCGAGGAGCGCGC']})
|
| 325 |
+
df_transcripts.set_index(ID_COL, inplace=True)
|
| 326 |
+
|
| 327 |
+
# process in batches
|
| 328 |
+
batch = 0
|
| 329 |
+
num_batches = len(df_transcripts) // BATCH_SIZE_TRANSCRIPTS
|
| 330 |
+
num_batches += (len(df_transcripts) % BATCH_SIZE_TRANSCRIPTS > 0)
|
| 331 |
+
for idx in range(0, len(df_transcripts), BATCH_SIZE_TRANSCRIPTS):
|
| 332 |
+
batch += 1
|
| 333 |
+
print('Batch {:d} of {:d}'.format(batch, num_batches))
|
| 334 |
+
|
| 335 |
+
# run batch
|
| 336 |
+
idx_stop = min(idx + BATCH_SIZE_TRANSCRIPTS, len(df_transcripts))
|
| 337 |
+
df_on_target, df_off_target = tiger_exhibit(df_transcripts[idx:idx_stop],
|
| 338 |
+
run_mode=RUN_MODE_TITRATION,
|
| 339 |
+
check_off_targets=args.check_off_targets)
|
| 340 |
+
|
| 341 |
+
# save batch results
|
| 342 |
+
df_on_target.to_csv('on_target.csv', header=batch == 1, index=False, mode='a')
|
| 343 |
if args.check_off_targets:
|
| 344 |
+
df_off_target.to_csv('off_target.csv', header=batch == 1, index=False, mode='a')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 345 |
|
| 346 |
+
# clear session to prevent memory blow up
|
| 347 |
+
tf.keras.backend.clear_session()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|