Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Andrew Stirn
commited on
Commit
·
de06d10
1
Parent(s):
2bc5b93
off-target predictions
Browse files
tiger.py
CHANGED
|
@@ -65,20 +65,13 @@ def process_data(transcript_seq: str):
|
|
| 65 |
return target_seq, guide_seq, model_inputs
|
| 66 |
|
| 67 |
|
| 68 |
-
def predict_on_target(transcript_seq: str):
|
| 69 |
-
|
| 70 |
-
# load model
|
| 71 |
-
if os.path.exists('model'):
|
| 72 |
-
tiger = tf.keras.models.load_model('model')
|
| 73 |
-
else:
|
| 74 |
-
print('no saved model!')
|
| 75 |
-
exit()
|
| 76 |
|
| 77 |
# parse transcript sequence
|
| 78 |
target_seq, guide_seq, model_inputs = process_data(transcript_seq)
|
| 79 |
|
| 80 |
# get predictions
|
| 81 |
-
normalized_lfc =
|
| 82 |
predictions = pd.DataFrame({'Guide': guide_seq, 'Normalized LFC': tf.squeeze(normalized_lfc).numpy()})
|
| 83 |
predictions = predictions.set_index('Guide').sort_values('Normalized LFC')
|
| 84 |
|
|
@@ -86,6 +79,8 @@ def predict_on_target(transcript_seq: str):
|
|
| 86 |
|
| 87 |
|
| 88 |
def find_off_targets(guides, batch_size=1000):
|
|
|
|
|
|
|
| 89 |
with gzip.open(os.path.join('transcripts', 'gencode.v19.pc_transcripts.fa.gz'), 'rt') as file:
|
| 90 |
df_transcripts = pd.DataFrame([(t.id, str(t.seq)) for t in SeqIO.parse(file, 'fasta')], columns=['id', 'seq'])
|
| 91 |
df_transcripts['id'] = df_transcripts['id'].apply(lambda s: s.split('|')[4])
|
|
@@ -132,16 +127,36 @@ def find_off_targets(guides, batch_size=1000):
|
|
| 132 |
return df_off_targets
|
| 133 |
|
| 134 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 135 |
def tiger_exhibit(transcript):
|
| 136 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
# on-target predictions
|
| 138 |
-
on_target_predictions = predict_on_target(transcript)
|
| 139 |
|
| 140 |
# keep only top guides
|
| 141 |
on_target_predictions = on_target_predictions.iloc[:NUM_TOP_GUIDES]
|
| 142 |
|
| 143 |
-
#
|
| 144 |
off_targets = find_off_targets(on_target_predictions.index.values.tolist())
|
|
|
|
| 145 |
|
| 146 |
return on_target_predictions, off_targets
|
| 147 |
|
|
|
|
| 65 |
return target_seq, guide_seq, model_inputs
|
| 66 |
|
| 67 |
|
| 68 |
+
def predict_on_target(transcript_seq: str, model: tf.keras.Model):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
|
| 70 |
# parse transcript sequence
|
| 71 |
target_seq, guide_seq, model_inputs = process_data(transcript_seq)
|
| 72 |
|
| 73 |
# get predictions
|
| 74 |
+
normalized_lfc = model.predict_step(model_inputs)
|
| 75 |
predictions = pd.DataFrame({'Guide': guide_seq, 'Normalized LFC': tf.squeeze(normalized_lfc).numpy()})
|
| 76 |
predictions = predictions.set_index('Guide').sort_values('Normalized LFC')
|
| 77 |
|
|
|
|
| 79 |
|
| 80 |
|
| 81 |
def find_off_targets(guides, batch_size=1000):
|
| 82 |
+
|
| 83 |
+
# load reference transcripts
|
| 84 |
with gzip.open(os.path.join('transcripts', 'gencode.v19.pc_transcripts.fa.gz'), 'rt') as file:
|
| 85 |
df_transcripts = pd.DataFrame([(t.id, str(t.seq)) for t in SeqIO.parse(file, 'fasta')], columns=['id', 'seq'])
|
| 86 |
df_transcripts['id'] = df_transcripts['id'].apply(lambda s: s.split('|')[4])
|
|
|
|
| 127 |
return df_off_targets
|
| 128 |
|
| 129 |
|
| 130 |
+
def predict_off_target(off_targets: pd.DataFrame, model: tf.keras.Model):
|
| 131 |
+
|
| 132 |
+
# append predictions off-target predictions
|
| 133 |
+
model_inputs = tf.concat([
|
| 134 |
+
tf.reshape(one_hot_encode_sequence(off_targets['Target'], add_context_padding=False), [len(off_targets), -1]),
|
| 135 |
+
tf.reshape(one_hot_encode_sequence(off_targets['Guide'], add_context_padding=True), [len(off_targets), -1]),
|
| 136 |
+
], axis=-1)
|
| 137 |
+
off_targets['Normalized LFC'] = model.predict_step(model_inputs)
|
| 138 |
+
|
| 139 |
+
return off_targets
|
| 140 |
+
|
| 141 |
+
|
| 142 |
def tiger_exhibit(transcript):
|
| 143 |
|
| 144 |
+
# load model
|
| 145 |
+
if os.path.exists('model'):
|
| 146 |
+
tiger = tf.keras.models.load_model('model')
|
| 147 |
+
else:
|
| 148 |
+
print('no saved model!')
|
| 149 |
+
exit()
|
| 150 |
+
|
| 151 |
# on-target predictions
|
| 152 |
+
on_target_predictions = predict_on_target(transcript, model=tiger)
|
| 153 |
|
| 154 |
# keep only top guides
|
| 155 |
on_target_predictions = on_target_predictions.iloc[:NUM_TOP_GUIDES]
|
| 156 |
|
| 157 |
+
# predict off-target effects for top guides
|
| 158 |
off_targets = find_off_targets(on_target_predictions.index.values.tolist())
|
| 159 |
+
off_targets = predict_off_target(off_targets, model=tiger)
|
| 160 |
|
| 161 |
return on_target_predictions, off_targets
|
| 162 |
|