Andrew Stirn commited on
Commit
de06d10
·
1 Parent(s): 2bc5b93

off-target predictions

Browse files
Files changed (1) hide show
  1. tiger.py +26 -11
tiger.py CHANGED
@@ -65,20 +65,13 @@ def process_data(transcript_seq: str):
65
  return target_seq, guide_seq, model_inputs
66
 
67
 
68
- def predict_on_target(transcript_seq: str):
69
-
70
- # load model
71
- if os.path.exists('model'):
72
- tiger = tf.keras.models.load_model('model')
73
- else:
74
- print('no saved model!')
75
- exit()
76
 
77
  # parse transcript sequence
78
  target_seq, guide_seq, model_inputs = process_data(transcript_seq)
79
 
80
  # get predictions
81
- normalized_lfc = tiger.predict_step(model_inputs)
82
  predictions = pd.DataFrame({'Guide': guide_seq, 'Normalized LFC': tf.squeeze(normalized_lfc).numpy()})
83
  predictions = predictions.set_index('Guide').sort_values('Normalized LFC')
84
 
@@ -86,6 +79,8 @@ def predict_on_target(transcript_seq: str):
86
 
87
 
88
  def find_off_targets(guides, batch_size=1000):
 
 
89
  with gzip.open(os.path.join('transcripts', 'gencode.v19.pc_transcripts.fa.gz'), 'rt') as file:
90
  df_transcripts = pd.DataFrame([(t.id, str(t.seq)) for t in SeqIO.parse(file, 'fasta')], columns=['id', 'seq'])
91
  df_transcripts['id'] = df_transcripts['id'].apply(lambda s: s.split('|')[4])
@@ -132,16 +127,36 @@ def find_off_targets(guides, batch_size=1000):
132
  return df_off_targets
133
 
134
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  def tiger_exhibit(transcript):
136
 
 
 
 
 
 
 
 
137
  # on-target predictions
138
- on_target_predictions = predict_on_target(transcript)
139
 
140
  # keep only top guides
141
  on_target_predictions = on_target_predictions.iloc[:NUM_TOP_GUIDES]
142
 
143
- # scan for off-targets for top guides
144
  off_targets = find_off_targets(on_target_predictions.index.values.tolist())
 
145
 
146
  return on_target_predictions, off_targets
147
 
 
65
  return target_seq, guide_seq, model_inputs
66
 
67
 
68
+ def predict_on_target(transcript_seq: str, model: tf.keras.Model):
 
 
 
 
 
 
 
69
 
70
  # parse transcript sequence
71
  target_seq, guide_seq, model_inputs = process_data(transcript_seq)
72
 
73
  # get predictions
74
+ normalized_lfc = model.predict_step(model_inputs)
75
  predictions = pd.DataFrame({'Guide': guide_seq, 'Normalized LFC': tf.squeeze(normalized_lfc).numpy()})
76
  predictions = predictions.set_index('Guide').sort_values('Normalized LFC')
77
 
 
79
 
80
 
81
  def find_off_targets(guides, batch_size=1000):
82
+
83
+ # load reference transcripts
84
  with gzip.open(os.path.join('transcripts', 'gencode.v19.pc_transcripts.fa.gz'), 'rt') as file:
85
  df_transcripts = pd.DataFrame([(t.id, str(t.seq)) for t in SeqIO.parse(file, 'fasta')], columns=['id', 'seq'])
86
  df_transcripts['id'] = df_transcripts['id'].apply(lambda s: s.split('|')[4])
 
127
  return df_off_targets
128
 
129
 
130
+ def predict_off_target(off_targets: pd.DataFrame, model: tf.keras.Model):
131
+
132
+ # append predictions off-target predictions
133
+ model_inputs = tf.concat([
134
+ tf.reshape(one_hot_encode_sequence(off_targets['Target'], add_context_padding=False), [len(off_targets), -1]),
135
+ tf.reshape(one_hot_encode_sequence(off_targets['Guide'], add_context_padding=True), [len(off_targets), -1]),
136
+ ], axis=-1)
137
+ off_targets['Normalized LFC'] = model.predict_step(model_inputs)
138
+
139
+ return off_targets
140
+
141
+
142
  def tiger_exhibit(transcript):
143
 
144
+ # load model
145
+ if os.path.exists('model'):
146
+ tiger = tf.keras.models.load_model('model')
147
+ else:
148
+ print('no saved model!')
149
+ exit()
150
+
151
  # on-target predictions
152
+ on_target_predictions = predict_on_target(transcript, model=tiger)
153
 
154
  # keep only top guides
155
  on_target_predictions = on_target_predictions.iloc[:NUM_TOP_GUIDES]
156
 
157
+ # predict off-target effects for top guides
158
  off_targets = find_off_targets(on_target_predictions.index.values.tolist())
159
+ off_targets = predict_off_target(off_targets, model=tiger)
160
 
161
  return on_target_predictions, off_targets
162