Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Andrew Stirn
commited on
Commit
·
5b03846
1
Parent(s):
daa1987
tiger.py changes in support of app.py
Browse files
tiger.py
CHANGED
|
@@ -35,7 +35,11 @@ BATCH_SIZE_SCAN = 20
|
|
| 35 |
BATCH_SIZE_TRANSCRIPTS = 50
|
| 36 |
NUM_TOP_GUIDES = 10
|
| 37 |
NUM_MISMATCHES = 3
|
| 38 |
-
RUN_MODES = dict(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
|
| 40 |
|
| 41 |
# configure GPUs
|
|
@@ -45,7 +49,7 @@ if len(tf.config.list_physical_devices('GPU')) > 0:
|
|
| 45 |
tf.config.experimental.set_visible_devices(tf.config.list_physical_devices('GPU')[0], 'GPU')
|
| 46 |
|
| 47 |
|
| 48 |
-
def load_transcripts(fasta_files):
|
| 49 |
|
| 50 |
# load all transcripts from fasta files into a DataFrame
|
| 51 |
transcripts = pd.DataFrame()
|
|
@@ -64,7 +68,8 @@ def load_transcripts(fasta_files):
|
|
| 64 |
# set index
|
| 65 |
transcripts[ID_COL] = transcripts[ID_COL].apply(lambda s: s.split('|')[0])
|
| 66 |
transcripts.set_index(ID_COL, inplace=True)
|
| 67 |
-
|
|
|
|
| 68 |
|
| 69 |
return transcripts
|
| 70 |
|
|
@@ -156,7 +161,7 @@ def prediction_transform(predictions: np.array, **params):
|
|
| 156 |
raise NotImplementedError
|
| 157 |
|
| 158 |
|
| 159 |
-
def get_on_target_predictions(transcripts: pd.DataFrame, model: tf.keras.Model,
|
| 160 |
|
| 161 |
# loop over transcripts
|
| 162 |
predictions = pd.DataFrame()
|
|
@@ -178,9 +183,10 @@ def get_on_target_predictions(transcripts: pd.DataFrame, model: tf.keras.Model,
|
|
| 178 |
percent_complete = 100 * min((i + 1) / len(transcripts), 1)
|
| 179 |
update_text = 'Evaluating on-target guides for each transcript: {:.2f}%'.format(percent_complete)
|
| 180 |
print('\r' + update_text, end='')
|
| 181 |
-
if
|
| 182 |
-
|
| 183 |
-
|
|
|
|
| 184 |
print('')
|
| 185 |
|
| 186 |
return predictions
|
|
@@ -198,7 +204,7 @@ def top_guides_per_transcript(predictions: pd.DataFrame):
|
|
| 198 |
return top_guides.reset_index(drop=True)
|
| 199 |
|
| 200 |
|
| 201 |
-
def find_off_targets(top_guides: pd.DataFrame,
|
| 202 |
|
| 203 |
# load reference transcripts
|
| 204 |
reference_transcripts = load_transcripts([os.path.join('transcripts', f) for f in REFERENCE_TRANSCRIPTS])
|
|
@@ -255,9 +261,10 @@ def find_off_targets(top_guides: pd.DataFrame, status_bar, status_text):
|
|
| 255 |
percent_complete = 100 * min((i + 1) / len(reference_transcripts), 1)
|
| 256 |
update_text = 'Scanning for off-targets: {:.2f}%'.format(percent_complete)
|
| 257 |
print('\r' + update_text, end='')
|
| 258 |
-
if
|
| 259 |
-
|
| 260 |
-
|
|
|
|
| 261 |
print('')
|
| 262 |
|
| 263 |
return off_targets
|
|
@@ -281,7 +288,7 @@ def predict_off_target(off_targets: pd.DataFrame, model: tf.keras.Model):
|
|
| 281 |
return off_targets.sort_values(SCORE_COL, ascending=False).reset_index(drop=True)
|
| 282 |
|
| 283 |
|
| 284 |
-
def tiger_exhibit(transcripts: pd.DataFrame, mode: str, check_off_targets: bool,
|
| 285 |
|
| 286 |
# load model
|
| 287 |
if os.path.exists('model'):
|
|
@@ -291,24 +298,25 @@ def tiger_exhibit(transcripts: pd.DataFrame, mode: str, check_off_targets: bool,
|
|
| 291 |
exit()
|
| 292 |
|
| 293 |
# evaluate all on-target guides per transcript
|
| 294 |
-
on_target_predictions = get_on_target_predictions(transcripts, tiger,
|
| 295 |
|
| 296 |
# initialize other outputs
|
| 297 |
off_target_predictions = pd.DataFrame()
|
| 298 |
|
| 299 |
-
if mode == 'all':
|
| 300 |
pass # nothing to do!
|
| 301 |
|
| 302 |
-
elif mode == '
|
| 303 |
on_target_predictions = top_guides_per_transcript(on_target_predictions)
|
| 304 |
-
|
|
|
|
| 305 |
|
| 306 |
else:
|
| 307 |
raise NotImplementedError
|
| 308 |
|
| 309 |
# check off-target effects for top guides
|
| 310 |
if check_off_targets:
|
| 311 |
-
off_targets = find_off_targets(on_target_predictions,
|
| 312 |
off_target_predictions = predict_off_target(off_targets, model=tiger)
|
| 313 |
|
| 314 |
# reverse guide sequences
|
|
|
|
| 35 |
BATCH_SIZE_TRANSCRIPTS = 50
|
| 36 |
NUM_TOP_GUIDES = 10
|
| 37 |
NUM_MISMATCHES = 3
|
| 38 |
+
RUN_MODES = dict(
|
| 39 |
+
all='All on-target guides per transcript',
|
| 40 |
+
top_guides='Top {:d} guides per transcript'.format(NUM_TOP_GUIDES),
|
| 41 |
+
titration='Top {:d} guides per transcript & their titration candidates'.format(NUM_TOP_GUIDES)
|
| 42 |
+
)
|
| 43 |
|
| 44 |
|
| 45 |
# configure GPUs
|
|
|
|
| 49 |
tf.config.experimental.set_visible_devices(tf.config.list_physical_devices('GPU')[0], 'GPU')
|
| 50 |
|
| 51 |
|
| 52 |
+
def load_transcripts(fasta_files: list, enforce_unique_ids: bool = True):
|
| 53 |
|
| 54 |
# load all transcripts from fasta files into a DataFrame
|
| 55 |
transcripts = pd.DataFrame()
|
|
|
|
| 68 |
# set index
|
| 69 |
transcripts[ID_COL] = transcripts[ID_COL].apply(lambda s: s.split('|')[0])
|
| 70 |
transcripts.set_index(ID_COL, inplace=True)
|
| 71 |
+
if enforce_unique_ids:
|
| 72 |
+
assert not transcripts.index.has_duplicates, "duplicate transcript ID's detected in fasta file"
|
| 73 |
|
| 74 |
return transcripts
|
| 75 |
|
|
|
|
| 161 |
raise NotImplementedError
|
| 162 |
|
| 163 |
|
| 164 |
+
def get_on_target_predictions(transcripts: pd.DataFrame, model: tf.keras.Model, status=None, progress_bar=None):
|
| 165 |
|
| 166 |
# loop over transcripts
|
| 167 |
predictions = pd.DataFrame()
|
|
|
|
| 183 |
percent_complete = 100 * min((i + 1) / len(transcripts), 1)
|
| 184 |
update_text = 'Evaluating on-target guides for each transcript: {:.2f}%'.format(percent_complete)
|
| 185 |
print('\r' + update_text, end='')
|
| 186 |
+
if status is not None:
|
| 187 |
+
status.text(update_text)
|
| 188 |
+
if progress_bar is not None:
|
| 189 |
+
progress_bar.progress(percent_complete / 100)
|
| 190 |
print('')
|
| 191 |
|
| 192 |
return predictions
|
|
|
|
| 204 |
return top_guides.reset_index(drop=True)
|
| 205 |
|
| 206 |
|
| 207 |
+
def find_off_targets(top_guides: pd.DataFrame, status=None, progress_bar=None):
|
| 208 |
|
| 209 |
# load reference transcripts
|
| 210 |
reference_transcripts = load_transcripts([os.path.join('transcripts', f) for f in REFERENCE_TRANSCRIPTS])
|
|
|
|
| 261 |
percent_complete = 100 * min((i + 1) / len(reference_transcripts), 1)
|
| 262 |
update_text = 'Scanning for off-targets: {:.2f}%'.format(percent_complete)
|
| 263 |
print('\r' + update_text, end='')
|
| 264 |
+
if status is not None:
|
| 265 |
+
status.text(update_text)
|
| 266 |
+
if progress_bar is not None:
|
| 267 |
+
progress_bar.progress(percent_complete / 100)
|
| 268 |
print('')
|
| 269 |
|
| 270 |
return off_targets
|
|
|
|
| 288 |
return off_targets.sort_values(SCORE_COL, ascending=False).reset_index(drop=True)
|
| 289 |
|
| 290 |
|
| 291 |
+
def tiger_exhibit(transcripts: pd.DataFrame, mode: str, check_off_targets: bool, status=None, progress_bar=None):
|
| 292 |
|
| 293 |
# load model
|
| 294 |
if os.path.exists('model'):
|
|
|
|
| 298 |
exit()
|
| 299 |
|
| 300 |
# evaluate all on-target guides per transcript
|
| 301 |
+
on_target_predictions = get_on_target_predictions(transcripts, tiger, status, progress_bar)
|
| 302 |
|
| 303 |
# initialize other outputs
|
| 304 |
off_target_predictions = pd.DataFrame()
|
| 305 |
|
| 306 |
+
if mode == 'all' and not check_off_targets:
|
| 307 |
pass # nothing to do!
|
| 308 |
|
| 309 |
+
elif mode == 'top_guides':
|
| 310 |
on_target_predictions = top_guides_per_transcript(on_target_predictions)
|
| 311 |
+
|
| 312 |
+
# TODO: add titration candidates
|
| 313 |
|
| 314 |
else:
|
| 315 |
raise NotImplementedError
|
| 316 |
|
| 317 |
# check off-target effects for top guides
|
| 318 |
if check_off_targets:
|
| 319 |
+
off_targets = find_off_targets(on_target_predictions, status, progress_bar)
|
| 320 |
off_target_predictions = predict_off_target(off_targets, model=tiger)
|
| 321 |
|
| 322 |
# reverse guide sequences
|