Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Andrew Stirn
commited on
Commit
·
a2591b6
1
Parent(s):
27992d2
ability to run locally
Browse files
app.py
CHANGED
|
@@ -78,7 +78,7 @@ else:
|
|
| 78 |
# valid input
|
| 79 |
if src_seq and all([True if nt.upper() in NUCLEOTIDE_TOKENS.keys() else False for nt in src_seq]):
|
| 80 |
on_target, off_target = tiger_exhibit(pd.DataFrame(dict(id=['ManualEntry'], seq=[src_seq])),
|
| 81 |
-
status_bar, status_text, option)
|
| 82 |
on_target.rename(columns={"Guide":"23 nt guide sequence"}, inplace=True)
|
| 83 |
if len(on_target)>0:
|
| 84 |
if on_target.iloc[0]["On-target ID"] == 0:
|
|
|
|
| 78 |
# valid input
|
| 79 |
if src_seq and all([True if nt.upper() in NUCLEOTIDE_TOKENS.keys() else False for nt in src_seq]):
|
| 80 |
on_target, off_target = tiger_exhibit(pd.DataFrame(dict(id=['ManualEntry'], seq=[src_seq])),
|
| 81 |
+
status_bar, status_text, check_off_targets=option == 'On and Off Target')
|
| 82 |
on_target.rename(columns={"Guide":"23 nt guide sequence"}, inplace=True)
|
| 83 |
if len(on_target)>0:
|
| 84 |
if on_target.iloc[0]["On-target ID"] == 0:
|
tiger.py
CHANGED
|
@@ -24,6 +24,7 @@ for gpu in tf.config.list_physical_devices('GPU'):
|
|
| 24 |
if len(tf.config.list_physical_devices('GPU')) > 0:
|
| 25 |
tf.config.experimental.set_visible_devices(tf.config.list_physical_devices('GPU')[0], 'GPU')
|
| 26 |
|
|
|
|
| 27 |
def load_transcripts(fasta_files):
|
| 28 |
|
| 29 |
# load all transcripts from fasta files into a DataFrame
|
|
@@ -94,7 +95,6 @@ def process_data(transcript_seq: str):
|
|
| 94 |
tf.reshape(one_hot_encode_sequence(target_seq, add_context_padding=False), [len(target_seq), -1]),
|
| 95 |
tf.reshape(one_hot_encode_sequence(guide_seq, add_context_padding=True), [len(guide_seq), -1]),
|
| 96 |
], axis=-1)
|
| 97 |
-
print(model_inputs)
|
| 98 |
return target_seq, guide_seq, model_inputs
|
| 99 |
|
| 100 |
|
|
@@ -188,7 +188,7 @@ def predict_off_target(off_targets: pd.DataFrame, model: tf.keras.Model):
|
|
| 188 |
return off_targets.sort_values('Normalized LFC')
|
| 189 |
|
| 190 |
|
| 191 |
-
def tiger_exhibit(transcripts: pd.DataFrame, status_bar=None, status_text=None,
|
| 192 |
|
| 193 |
# load model
|
| 194 |
if os.path.exists('model'):
|
|
@@ -214,13 +214,13 @@ def tiger_exhibit(transcripts: pd.DataFrame, status_bar=None, status_text=None,
|
|
| 214 |
|
| 215 |
# predict off-target effects for top guides
|
| 216 |
off_target_predictions = pd.DataFrame()
|
| 217 |
-
if
|
| 218 |
off_targets = find_off_targets(on_target_predictions, status_bar, status_text)
|
| 219 |
off_target_predictions = predict_off_target(off_targets, model=tiger)
|
| 220 |
|
| 221 |
# reverse guide sequences
|
| 222 |
on_target_predictions['Guide'] = on_target_predictions['Guide'].apply(lambda s: s[::-1])
|
| 223 |
-
if
|
| 224 |
off_target_predictions['Guide'] = off_target_predictions['Guide'].apply(lambda s: s[::-1])
|
| 225 |
|
| 226 |
return on_target_predictions.reset_index(drop=True), off_target_predictions.reset_index(drop=True)
|
|
@@ -230,6 +230,7 @@ if __name__ == '__main__':
|
|
| 230 |
|
| 231 |
# common arguments
|
| 232 |
parser = argparse.ArgumentParser()
|
|
|
|
| 233 |
parser.add_argument('--fasta_path', type=str, default=None)
|
| 234 |
parser.add_argument('--simple_test', action='store_true', default=False)
|
| 235 |
args = parser.parse_args()
|
|
@@ -239,13 +240,17 @@ if __name__ == '__main__':
|
|
| 239 |
# first 50 from EIF3B-003's CDS
|
| 240 |
simple_test = pd.DataFrame(dict(id=['ManualEntry'], seq=['ATGCAGGACGCGGAGAACGTGGCGGTGCCCGAGGCGGCCGAGGAGCGCGC']))
|
| 241 |
simple_test.set_index('id', inplace=True)
|
| 242 |
-
df_on_target, df_off_target = tiger_exhibit(simple_test)
|
| 243 |
df_on_target.to_csv('on_target.csv')
|
| 244 |
df_off_target.to_csv('off_target.csv')
|
| 245 |
|
| 246 |
# directory of fasta files
|
| 247 |
elif args.fasta_path is not None and os.path.exists(args.fasta_path):
|
| 248 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 249 |
# load transcripts
|
| 250 |
df_transcripts = load_transcripts([os.path.join(args.fasta_path, f) for f in os.listdir(args.fasta_path)])
|
| 251 |
|
|
@@ -259,11 +264,12 @@ if __name__ == '__main__':
|
|
| 259 |
|
| 260 |
# run batch
|
| 261 |
idx_stop = min(idx + BATCH_SIZE_TRANSCRIPTS, len(df_transcripts))
|
| 262 |
-
df_on_target, df_off_target = tiger_exhibit(df_transcripts[idx:idx_stop])
|
| 263 |
|
| 264 |
# save batch results
|
| 265 |
-
df_on_target.to_csv('
|
| 266 |
-
|
|
|
|
| 267 |
|
| 268 |
# clear session to prevent memory blow up
|
| 269 |
tf.keras.backend.clear_session()
|
|
|
|
| 24 |
if len(tf.config.list_physical_devices('GPU')) > 0:
|
| 25 |
tf.config.experimental.set_visible_devices(tf.config.list_physical_devices('GPU')[0], 'GPU')
|
| 26 |
|
| 27 |
+
|
| 28 |
def load_transcripts(fasta_files):
|
| 29 |
|
| 30 |
# load all transcripts from fasta files into a DataFrame
|
|
|
|
| 95 |
tf.reshape(one_hot_encode_sequence(target_seq, add_context_padding=False), [len(target_seq), -1]),
|
| 96 |
tf.reshape(one_hot_encode_sequence(guide_seq, add_context_padding=True), [len(guide_seq), -1]),
|
| 97 |
], axis=-1)
|
|
|
|
| 98 |
return target_seq, guide_seq, model_inputs
|
| 99 |
|
| 100 |
|
|
|
|
| 188 |
return off_targets.sort_values('Normalized LFC')
|
| 189 |
|
| 190 |
|
| 191 |
+
def tiger_exhibit(transcripts: pd.DataFrame, status_bar=None, status_text=None, check_off_targets=False):
|
| 192 |
|
| 193 |
# load model
|
| 194 |
if os.path.exists('model'):
|
|
|
|
| 214 |
|
| 215 |
# predict off-target effects for top guides
|
| 216 |
off_target_predictions = pd.DataFrame()
|
| 217 |
+
if check_off_targets:
|
| 218 |
off_targets = find_off_targets(on_target_predictions, status_bar, status_text)
|
| 219 |
off_target_predictions = predict_off_target(off_targets, model=tiger)
|
| 220 |
|
| 221 |
# reverse guide sequences
|
| 222 |
on_target_predictions['Guide'] = on_target_predictions['Guide'].apply(lambda s: s[::-1])
|
| 223 |
+
if check_off_targets and len(off_target_predictions) > 0:
|
| 224 |
off_target_predictions['Guide'] = off_target_predictions['Guide'].apply(lambda s: s[::-1])
|
| 225 |
|
| 226 |
return on_target_predictions.reset_index(drop=True), off_target_predictions.reset_index(drop=True)
|
|
|
|
| 230 |
|
| 231 |
# common arguments
|
| 232 |
parser = argparse.ArgumentParser()
|
| 233 |
+
parser.add_argument('--check_off_targets', action='store_true', default=False)
|
| 234 |
parser.add_argument('--fasta_path', type=str, default=None)
|
| 235 |
parser.add_argument('--simple_test', action='store_true', default=False)
|
| 236 |
args = parser.parse_args()
|
|
|
|
| 240 |
# first 50 from EIF3B-003's CDS
|
| 241 |
simple_test = pd.DataFrame(dict(id=['ManualEntry'], seq=['ATGCAGGACGCGGAGAACGTGGCGGTGCCCGAGGCGGCCGAGGAGCGCGC']))
|
| 242 |
simple_test.set_index('id', inplace=True)
|
| 243 |
+
df_on_target, df_off_target = tiger_exhibit(simple_test, check_off_targets=args.check_off_targets)
|
| 244 |
df_on_target.to_csv('on_target.csv')
|
| 245 |
df_off_target.to_csv('off_target.csv')
|
| 246 |
|
| 247 |
# directory of fasta files
|
| 248 |
elif args.fasta_path is not None and os.path.exists(args.fasta_path):
|
| 249 |
|
| 250 |
+
# check for any existing results
|
| 251 |
+
if os.path.exists('on_target.csv') or os.path.exists('off_target.csv'):
|
| 252 |
+
raise FileExistsError('please rename or delete existing results')
|
| 253 |
+
|
| 254 |
# load transcripts
|
| 255 |
df_transcripts = load_transcripts([os.path.join(args.fasta_path, f) for f in os.listdir(args.fasta_path)])
|
| 256 |
|
|
|
|
| 264 |
|
| 265 |
# run batch
|
| 266 |
idx_stop = min(idx + BATCH_SIZE_TRANSCRIPTS, len(df_transcripts))
|
| 267 |
+
df_on_target, df_off_target = tiger_exhibit(df_transcripts[idx:idx_stop], check_off_targets=args.check_off_targets)
|
| 268 |
|
| 269 |
# save batch results
|
| 270 |
+
df_on_target.to_csv('on_target.csv', header=batch == 1, index=False, mode='a')
|
| 271 |
+
if args.check_off_targets:
|
| 272 |
+
df_off_target.to_csv('off_target.csv', header=batch == 1, index=False, mode='a')
|
| 273 |
|
| 274 |
# clear session to prevent memory blow up
|
| 275 |
tf.keras.backend.clear_session()
|