Andrew Stirn commited on
Commit
5b03846
·
1 Parent(s): daa1987

tiger.py changes in support of app.py

Browse files
Files changed (1) hide show
  1. tiger.py +25 -17
tiger.py CHANGED
@@ -35,7 +35,11 @@ BATCH_SIZE_SCAN = 20
35
  BATCH_SIZE_TRANSCRIPTS = 50
36
  NUM_TOP_GUIDES = 10
37
  NUM_MISMATCHES = 3
38
- RUN_MODES = dict(all='All on-target guides per transcript', titration='Top guides per transcript')
 
 
 
 
39
 
40
 
41
  # configure GPUs
@@ -45,7 +49,7 @@ if len(tf.config.list_physical_devices('GPU')) > 0:
45
  tf.config.experimental.set_visible_devices(tf.config.list_physical_devices('GPU')[0], 'GPU')
46
 
47
 
48
- def load_transcripts(fasta_files):
49
 
50
  # load all transcripts from fasta files into a DataFrame
51
  transcripts = pd.DataFrame()
@@ -64,7 +68,8 @@ def load_transcripts(fasta_files):
64
  # set index
65
  transcripts[ID_COL] = transcripts[ID_COL].apply(lambda s: s.split('|')[0])
66
  transcripts.set_index(ID_COL, inplace=True)
67
- assert not transcripts.index.has_duplicates, "duplicate transcript ID's detected in fasta file"
 
68
 
69
  return transcripts
70
 
@@ -156,7 +161,7 @@ def prediction_transform(predictions: np.array, **params):
156
  raise NotImplementedError
157
 
158
 
159
- def get_on_target_predictions(transcripts: pd.DataFrame, model: tf.keras.Model, status_bar=None, status_text=None):
160
 
161
  # loop over transcripts
162
  predictions = pd.DataFrame()
@@ -178,9 +183,10 @@ def get_on_target_predictions(transcripts: pd.DataFrame, model: tf.keras.Model,
178
  percent_complete = 100 * min((i + 1) / len(transcripts), 1)
179
  update_text = 'Evaluating on-target guides for each transcript: {:.2f}%'.format(percent_complete)
180
  print('\r' + update_text, end='')
181
- if status_bar:
182
- status_text.text()
183
- status_bar.progress(percent_complete)
 
184
  print('')
185
 
186
  return predictions
@@ -198,7 +204,7 @@ def top_guides_per_transcript(predictions: pd.DataFrame):
198
  return top_guides.reset_index(drop=True)
199
 
200
 
201
- def find_off_targets(top_guides: pd.DataFrame, status_bar, status_text):
202
 
203
  # load reference transcripts
204
  reference_transcripts = load_transcripts([os.path.join('transcripts', f) for f in REFERENCE_TRANSCRIPTS])
@@ -255,9 +261,10 @@ def find_off_targets(top_guides: pd.DataFrame, status_bar, status_text):
255
  percent_complete = 100 * min((i + 1) / len(reference_transcripts), 1)
256
  update_text = 'Scanning for off-targets: {:.2f}%'.format(percent_complete)
257
  print('\r' + update_text, end='')
258
- if status_bar:
259
- status_text.text()
260
- status_bar.progress(percent_complete)
 
261
  print('')
262
 
263
  return off_targets
@@ -281,7 +288,7 @@ def predict_off_target(off_targets: pd.DataFrame, model: tf.keras.Model):
281
  return off_targets.sort_values(SCORE_COL, ascending=False).reset_index(drop=True)
282
 
283
 
284
- def tiger_exhibit(transcripts: pd.DataFrame, mode: str, check_off_targets: bool, status_bar=None, status_text=None):
285
 
286
  # load model
287
  if os.path.exists('model'):
@@ -291,24 +298,25 @@ def tiger_exhibit(transcripts: pd.DataFrame, mode: str, check_off_targets: bool,
291
  exit()
292
 
293
  # evaluate all on-target guides per transcript
294
- on_target_predictions = get_on_target_predictions(transcripts, tiger, status_bar, status_text)
295
 
296
  # initialize other outputs
297
  off_target_predictions = pd.DataFrame()
298
 
299
- if mode == 'all':
300
  pass # nothing to do!
301
 
302
- elif mode == 'titration':
303
  on_target_predictions = top_guides_per_transcript(on_target_predictions)
304
- # TODO: add titration candidates
 
305
 
306
  else:
307
  raise NotImplementedError
308
 
309
  # check off-target effects for top guides
310
  if check_off_targets:
311
- off_targets = find_off_targets(on_target_predictions, status_bar, status_text)
312
  off_target_predictions = predict_off_target(off_targets, model=tiger)
313
 
314
  # reverse guide sequences
 
35
  BATCH_SIZE_TRANSCRIPTS = 50
36
  NUM_TOP_GUIDES = 10
37
  NUM_MISMATCHES = 3
38
+ RUN_MODES = dict(
39
+ all='All on-target guides per transcript',
40
+ top_guides='Top {:d} guides per transcript'.format(NUM_TOP_GUIDES),
41
+ titration='Top {:d} guides per transcript & their titration candidates'.format(NUM_TOP_GUIDES)
42
+ )
43
 
44
 
45
  # configure GPUs
 
49
  tf.config.experimental.set_visible_devices(tf.config.list_physical_devices('GPU')[0], 'GPU')
50
 
51
 
52
+ def load_transcripts(fasta_files: list, enforce_unique_ids: bool = True):
53
 
54
  # load all transcripts from fasta files into a DataFrame
55
  transcripts = pd.DataFrame()
 
68
  # set index
69
  transcripts[ID_COL] = transcripts[ID_COL].apply(lambda s: s.split('|')[0])
70
  transcripts.set_index(ID_COL, inplace=True)
71
+ if enforce_unique_ids:
72
+ assert not transcripts.index.has_duplicates, "duplicate transcript ID's detected in fasta file"
73
 
74
  return transcripts
75
 
 
161
  raise NotImplementedError
162
 
163
 
164
+ def get_on_target_predictions(transcripts: pd.DataFrame, model: tf.keras.Model, status=None, progress_bar=None):
165
 
166
  # loop over transcripts
167
  predictions = pd.DataFrame()
 
183
  percent_complete = 100 * min((i + 1) / len(transcripts), 1)
184
  update_text = 'Evaluating on-target guides for each transcript: {:.2f}%'.format(percent_complete)
185
  print('\r' + update_text, end='')
186
+ if status is not None:
187
+ status.text(update_text)
188
+ if progress_bar is not None:
189
+ progress_bar.progress(percent_complete / 100)
190
  print('')
191
 
192
  return predictions
 
204
  return top_guides.reset_index(drop=True)
205
 
206
 
207
+ def find_off_targets(top_guides: pd.DataFrame, status=None, progress_bar=None):
208
 
209
  # load reference transcripts
210
  reference_transcripts = load_transcripts([os.path.join('transcripts', f) for f in REFERENCE_TRANSCRIPTS])
 
261
  percent_complete = 100 * min((i + 1) / len(reference_transcripts), 1)
262
  update_text = 'Scanning for off-targets: {:.2f}%'.format(percent_complete)
263
  print('\r' + update_text, end='')
264
+ if status is not None:
265
+ status.text(update_text)
266
+ if progress_bar is not None:
267
+ progress_bar.progress(percent_complete / 100)
268
  print('')
269
 
270
  return off_targets
 
288
  return off_targets.sort_values(SCORE_COL, ascending=False).reset_index(drop=True)
289
 
290
 
291
+ def tiger_exhibit(transcripts: pd.DataFrame, mode: str, check_off_targets: bool, status=None, progress_bar=None):
292
 
293
  # load model
294
  if os.path.exists('model'):
 
298
  exit()
299
 
300
  # evaluate all on-target guides per transcript
301
+ on_target_predictions = get_on_target_predictions(transcripts, tiger, status, progress_bar)
302
 
303
  # initialize other outputs
304
  off_target_predictions = pd.DataFrame()
305
 
306
+ if mode == 'all' and not check_off_targets:
307
  pass # nothing to do!
308
 
309
+ elif mode == 'top_guides':
310
  on_target_predictions = top_guides_per_transcript(on_target_predictions)
311
+
312
+ # TODO: add titration candidates
313
 
314
  else:
315
  raise NotImplementedError
316
 
317
  # check off-target effects for top guides
318
  if check_off_targets:
319
+ off_targets = find_off_targets(on_target_predictions, status, progress_bar)
320
  off_target_predictions = predict_off_target(off_targets, model=tiger)
321
 
322
  # reverse guide sequences