Andrew Stirn commited on
Commit
5fc4e72
·
1 Parent(s): 2694c1b

use zipped fasta file

Browse files
Files changed (1) hide show
  1. tiger.py +2 -1
tiger.py CHANGED
@@ -1,4 +1,5 @@
1
  import os
 
2
  import numpy as np
3
  import pandas as pd
4
  import tensorflow as tf
@@ -85,7 +86,7 @@ def tiger_predict(transcript_seq: str):
85
 
86
 
87
  def find_off_targets(guides, batch_size=1000):
88
- with open('gencode.v19.pc_transcripts.fa', 'r') as file:
89
  df_transcripts = pd.DataFrame([(t.id, str(t.seq)) for t in SeqIO.parse(file, 'fasta')], columns=['id', 'seq'])
90
  df_transcripts['id'] = df_transcripts['id'].apply(lambda s: s.split('|')[4])
91
  df_transcripts.set_index('id', inplace=True)
 
1
  import os
2
+ import gzip
3
  import numpy as np
4
  import pandas as pd
5
  import tensorflow as tf
 
86
 
87
 
88
  def find_off_targets(guides, batch_size=1000):
89
+ with gzip.open(os.path.join('transcripts', 'gencode.v19.pc_transcripts.fa.gz'), 'rt') as file:
90
  df_transcripts = pd.DataFrame([(t.id, str(t.seq)) for t in SeqIO.parse(file, 'fasta')], columns=['id', 'seq'])
91
  df_transcripts['id'] = df_transcripts['id'].apply(lambda s: s.split('|')[4])
92
  df_transcripts.set_index('id', inplace=True)