Andrew Stirn commited on
Commit
89be9f9
·
1 Parent(s): f6276d8

added app and run

Browse files
Files changed (2) hide show
  1. app.py +30 -0
  2. run.py +61 -0
app.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from time import time, ctime
3
+ import tiger_trainer as stt
4
+ import run as trun
5
+ import pandas as pd
6
+ import os, shutil, glob
7
+
8
+
9
+ def run_with_input(reset=False):
10
+ if reset:
11
+ st.write("")
12
+ return 0
13
+ returned_x = trun.run(st.session_state["userInput"])
14
+ csv_x = returned_x.to_csv()
15
+ st.write("model prediction: ", returned_x)
16
+ return csv_x
17
+
18
+ st.title("Tiger gen prediction")
19
+ st.session_state['userInput'] = ""
20
+ st.session_state["userInput"] = st.text_input('type gen sequence')
21
+ cvs_data = "first run model to generate data"
22
+ if len(st.session_state['userInput']) < 23:
23
+ st.write("Sorry your input length must be at least 23 bases. It is %s chars"%len(st.session_state['userInput']))
24
+ run_with_input(reset=True)
25
+ elif all([True if item in "ACGTacgt" else False for item in st.session_state['userInput']]):
26
+ st.write('This is your sequence', st.session_state["userInput"])
27
+ csv_data = run_with_input()
28
+ else:
29
+ st.write("only ACTG is allowed")
30
+ st.download_button(label="Download as CVS File", data=csv_data)
run.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import tensorflow as tf
4
+ import pandas as pd
5
+
6
+ GUIDE_LEN = 23
7
+ NUCLEOTIDE_TOKENS = dict(zip(['A', 'C', 'G', 'T'], [0, 1, 2, 3]))
8
+
9
+ # load model
10
+ if os.path.exists('model'):
11
+ tiger = tf.keras.models.load_model('model')
12
+ else:
13
+ print('no saved model!')
14
+ exit()
15
+
16
+
17
+ def process_data(x):
18
+ x = [item.upper() for item in x]
19
+ number_of_input = len(x) - GUIDE_LEN + 1
20
+ input_gens = []
21
+ for i in range(number_of_input):
22
+ input_gens.append("".join(x[i:i + GUIDE_LEN]))
23
+ merged_token = []
24
+ token_x = [NUCLEOTIDE_TOKENS[item] for item in x]
25
+ for i in range(number_of_input):
26
+ merged_token.extend(token_x[i:i + GUIDE_LEN])
27
+ one_hot_x = tf.one_hot(merged_token, depth=4)
28
+ model_input_x = tf.reshape(one_hot_x, [-1, GUIDE_LEN * 4])
29
+ return input_gens, model_input_x
30
+
31
+
32
+ def gen_report_table(input_gens, res):
33
+ res = res.numpy().flatten().tolist()
34
+ # print("ftaltten res: ", res)
35
+ data = {"Gene": input_gens, "res": res}
36
+ tbl = pd.DataFrame.from_dict(data)
37
+ return tbl
38
+
39
+
40
+ def run(x):
41
+ input_gens, model_input_x = process_data(x)
42
+ # print("input gene: ", input_gens)
43
+ # print("model_input: ", model_input_x)
44
+ res = tiger.model.predict_step(model_input_x)
45
+ # print("res: ", res)
46
+ return gen_report_table(input_gens, res)
47
+
48
+
49
+ if __name__ == "__main__":
50
+ if len(sys.argv) == 1:
51
+ print("you need to specify 23 character gen information")
52
+ exit()
53
+ x = sys.argv[1]
54
+ if len(x) != 23:
55
+ print("you need to specify 23 character gen information. You typed %s chars" % len(x))
56
+ exit()
57
+ elif all([True if item in "ACGT" else False for item in x]):
58
+ print("run succesfully: ", run(x))
59
+ else:
60
+ print("your gen sequence need 23 character only from ACGT")
61
+ exit()