Verathagnus commited on
Commit
9deef23
·
verified ·
1 Parent(s): c46c192

Upload 6 files

Browse files
Files changed (6) hide show
  1. .python-version +1 -0
  2. .streamlit/config.toml +17 -0
  3. railway.json +9 -0
  4. requirements.txt +9 -0
  5. streamlit_app.py +329 -0
  6. test.js +1 -0
.python-version ADDED
@@ -0,0 +1 @@
 
 
1
+ 3.10
.streamlit/config.toml ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [server]
2
+ enableCORS=false
3
+ port = 8501
4
+
5
+ [theme]
6
+ # Primary accent for interactive elements
7
+ primaryColor = '#7792E3'
8
+ # Background color for the main content area
9
+ backgroundColor = '#000319'
10
+ # Background color for sidebar and most interactive widgets
11
+ secondaryBackgroundColor = '#52968e'
12
+ # Color used for almost all text
13
+ textColor = '#FFFFFF'
14
+ # Font family for all text in the app, except code blocks
15
+ # Accepted values (serif | sans serif | monospace)
16
+ # Default: "sans serif"
17
+ font = "sans serif"
railway.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "$schema": "https://railway.app/railway.schema.json",
3
+ "build": {
4
+ "builder": "NIXPACKS"
5
+ },
6
+ "deploy": {
7
+ "startCommand": "streamlit run streamlit_app.py --server.headless true --server.address 0.0.0.0 --server.port $PORT --server.fileWatcherType none --browser.gatherUsageStats false --client.toolbarMode minimal"
8
+ }
9
+ }
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ numpy==1.26.4
2
+ streamlit
3
+ transformers
4
+ torch
5
+ fasttext-langdetect
6
+ python-iso639
7
+ scikit-learn
8
+ numpy==1.26.4
9
+ tensorflow
streamlit_app.py ADDED
@@ -0,0 +1,329 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pandas as pd
3
+ import matplotlib.pyplot as plt
4
+ import re, spacy
5
+ from time import time
6
+ from sklearn.model_selection import train_test_split
7
+ from tensorflow.keras.preprocessing.text import Tokenizer
8
+ from tensorflow.keras.preprocessing.sequence import pad_sequences
9
+ from tensorflow.keras.layers import Input, Embedding, LSTM, Concatenate, TimeDistributed, Dense
10
+ from tensorflow.keras.models import Model
11
+ from tensorflow.keras.callbacks import EarlyStopping
12
+ import warnings
13
+ warnings.filterwarnings('ignore')
14
+
15
+ import pickle
16
+ import streamlit as st
17
+ from ftlangdetect import detect
18
+ import iso639
19
+ import streamlit.components.v1 as components
20
+ import os
21
+ gpt2_tokenizer = None
22
+ gpt2_model = None
23
+ from transformers import (
24
+ # GPT2Config,
25
+ # GPT2Tokenizer,
26
+ # GPT2Model,
27
+ BertTokenizer,
28
+ BertModel)
29
+ import torch
30
+
31
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
32
+ bert_tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-uncased')
33
+ bert_model = BertModel.from_pretrained('bert-base-multilingual-uncased')
34
+ class_names = {0:'sadness', 1:'joy', 2:'love', 3:'anger', 4:'fear', 5:'surprise'}
35
+ import os
36
+ gpt2_tokenizer = None
37
+ gpt2_model = None
38
+ # gpt2_model = GPT2Model.from_pretrained("gpt2")
39
+ # gpt2_tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
40
+ # gpt2_tokenizer.padding_side = "left"
41
+ # gpt2_tokenizer.pad_token = gpt2_tokenizer.eos_token
42
+ # Define preprocessing function with smaller max length
43
+ def tokenize_sample(texts, tokenizer="bert"):
44
+ if tokenizer == "gpt2":
45
+ return gpt2_tokenizer(texts, padding="max_length", truncation=True, return_tensors='pt', max_length=128)
46
+ return bert_tokenizer(texts, padding=True, truncation=True, return_tensors='pt', max_length=128)
47
+ def get_embeddings(text, model_type="bert"):
48
+ tokenized_text = tokenize_sample(text, model_type)
49
+ if model_type =="gpt2":
50
+ outputs = gpt2_model(**tokenized_text)
51
+ else:
52
+ outputs = bert_model(**tokenized_text)
53
+ embeddings = outputs.last_hidden_state[:, 0, :].detach().numpy() # Get the embeddings for [CLS] token
54
+ return embeddings
55
+
56
+ # path_to_models = ".."
57
+ path_to_models = os.environ['RAILWAY_VOLUME_MOUNT_PATH']+"/storage"
58
+ emotion_classifier_map={
59
+ "Naive Bayes":f"{path_to_models}/models/naive_bayes_model.sav",
60
+ "Logistic Regression":f"{path_to_models}/models/logistic_regression_model.sav",
61
+ "KNN":f"{path_to_models}/models/knn_model.sav",
62
+ "KMeans":f"{path_to_models}/models/kmeans_model.sav",
63
+ "SVM":f"{path_to_models}/models/svm_model.sav",
64
+ "Decision Tree":f"{path_to_models}/models/decision_tree_model.sav",
65
+ "Random Forest":f"{path_to_models}/models/random_forest_model.sav"
66
+ }
67
+ summarizer_map={
68
+ "Bengali":f"{path_to_models}/models/bengali_summarization_model.sav",
69
+ }
70
+ # print(os.listdir())
71
+ # print(os.environ["RAILWAY_VOLUME_MOUNT_PATH"])
72
+ # print(os.listdir(os.environ["RAILWAY_VOLUME_MOUNT_PATH"]+"/storage"))
73
+ summarizer_models=dict()
74
+ for i in summarizer_map:
75
+ with open(summarizer_map[i], 'rb') as file:
76
+ summarizer_models[i] = pickle.load(file)
77
+ emotion_classfier_models=dict()
78
+ for i in emotion_classifier_map:
79
+ with open(emotion_classifier_map[i], 'rb') as file:
80
+ emotion_classfier_models[i] = pickle.load(file)
81
+ def get_emotion_prediction(input, model_name):
82
+ if model_name in emotion_classfier_models:
83
+ return class_names[emotion_classfier_models[model_name].predict(get_embeddings(input))[0]]
84
+ else:
85
+ raise ValueError("Model type should be of the types: {}".format(", ".join(list(emotion_classfier_models.keys()))))
86
+
87
+ def decode_sequence(input_seq, max_summary_len, encoder_model, decoder_model, target_word_index, reverse_target_word_index):
88
+ # Encode the input as state vectors.
89
+ e_out, e_h, e_c = encoder_model.predict(input_seq)
90
+
91
+ # Generate empty target sequence of length 1.
92
+ target_seq = np.zeros((1,1))
93
+
94
+ # Populate the first word of target sequence with the start word.
95
+ target_seq[0, 0] = target_word_index['sostok']
96
+
97
+ stop_condition = False
98
+ decoded_sentence = ''
99
+ while not stop_condition:
100
+
101
+ output_tokens, h, c = decoder_model.predict([target_seq] + [e_out, e_h, e_c])
102
+
103
+ # Sample a token
104
+ sampled_token_index = np.argmax(output_tokens[0, -1, :])
105
+ sampled_token = reverse_target_word_index[sampled_token_index]
106
+
107
+ if(sampled_token!='eostok'):
108
+ decoded_sentence += ' '+sampled_token
109
+
110
+ # Exit condition: either hit max length or find stop word.
111
+ if (sampled_token == 'eostok' or len(decoded_sentence.split()) >= (max_summary_len-1)):
112
+ stop_condition = True
113
+
114
+ # Update the target sequence (of length 1).
115
+ target_seq = np.zeros((1,1))
116
+ target_seq[0, 0] = sampled_token_index
117
+
118
+ # Update internal states
119
+ e_h, e_c = h, c
120
+
121
+ return decoded_sentence
122
+
123
+ def summarize_text(text, x_tokenizer, max_text_len, max_summary_len, encoder_model, decoder_model, target_word_index, reverse_target_word_index):
124
+ tokenized_sentence = pad_sequences(x_tokenizer.texts_to_sequences([text]), maxlen=max_text_len, padding='post')[0]
125
+ return decode_sequence(tokenized_sentence.reshape(1,max_text_len), max_summary_len, encoder_model, decoder_model, target_word_index, reverse_target_word_index)
126
+
127
+ def main():
128
+ list_of_tabs = st.tabs(["Indic Multilingual Text Summarizer", "Indic Multilingual Emotion Detection"])
129
+ # Title of the web app
130
+ with list_of_tabs[0]:
131
+ st.title('Indic Multilingual Text Summarizer')
132
+ # print(os.listdir())
133
+ # print(os.environ["RAILWAY_VOLUME_MOUNT_PATH"])
134
+ # print(os.listdir(os.environ["RAILWAY_VOLUME_MOUNT_PATH"]))
135
+
136
+ # Input text from the user
137
+ input_sentence_emotion = st.text_input('Enter a sentence', key="summarize")
138
+
139
+ # Model selection
140
+ # model_option = st.selectbox('Select the model', list(models.keys()))
141
+ # Result initialization
142
+ result = None
143
+ error = None
144
+ langlist = {"bn": "Bengali"}
145
+ # Prediction button
146
+ if st.button('Summarize'):
147
+ lang = detect(text=input_sentence_emotion, low_memory=False)['lang']
148
+ if lang in langlist:
149
+ result = summarize_text(input_sentence_emotion, summarizer_models[langlist[lang]]["x_tokenizer"], summarizer_models[langlist[lang]]["max_text_len"],summarizer_models[langlist[lang]]['max_summary_len'], summarizer_models[langlist[lang]]['encoder_model'], summarizer_models[langlist[lang]]['decoder_model'], summarizer_models[langlist[lang]]['target_word_index'], summarizer_models[langlist[lang]]['reverse_target_word_index']).replace("start ", "").replace(" end", "")
150
+ else:
151
+ error = f"{iso639.Language.from_part1(lang).name} is not supported.\n List of supported languages: {', '.join(langlist.values())}"
152
+ st.markdown(f"Current language support: Bengali")
153
+ # Display the result
154
+ if result:
155
+ st.success(f'Summary: {result}')
156
+ if error:
157
+ st.error(f'Error: {error}')
158
+ # Credits
159
+ # Credits
160
+
161
+
162
+ with list_of_tabs[1]:
163
+ st.title('Indic Multilingual Emotion Detection')
164
+ # print(os.listdir())
165
+ # print(os.environ["RAILWAY_VOLUME_MOUNT_PATH"])
166
+ # print(os.listdir(os.environ["RAILWAY_VOLUME_MOUNT_PATH"]))
167
+
168
+ # Input text from the user
169
+ input_sentence_emotion = st.text_input('Enter a sentence', key="emotion")
170
+
171
+ # Model selection
172
+ model_option = st.selectbox('Select the model', list(emotion_classfier_models.keys()))
173
+
174
+ # Result initialization
175
+ result = None
176
+ error = None
177
+ langlist = {"hi": "Hindi"}
178
+ # Prediction button
179
+ if st.button('Predict Emotion'):
180
+ lang = detect(text=input_sentence_emotion, low_memory=False)['lang']
181
+ if lang in langlist:
182
+ result = get_emotion_prediction(input_sentence_emotion, model_option)
183
+ else:
184
+ error = f"{iso639.Language.from_part1(lang).name} is not supported.\n List of supported languages: {', '.join(langlist.values())}"
185
+ st.markdown(f"Current language support: Hindi")
186
+ # Display the result
187
+ if result:
188
+ st.success(f'Prediction: {result}')
189
+ if error:
190
+ st.error(f'Error: {error}')
191
+ # Credits
192
+ # Credits
193
+ st.markdown("---") # Separator
194
+ st.markdown("""## Contributors
195
+ - Bishwaraj Paul
196
+ **Role** Intern
197
+ **Email:** bishwaraj.paul98@gmail.com
198
+ - Dr. Sahinur Rahman Laskar
199
+ **Role:** Mentor
200
+ Assistant Professor
201
+ School of Computer Science, UPES, Dehradun, India
202
+ **Email:** sahinurlaskar.nits@gmail.com / sahinur.laskar@ddn.upes.ac.in""")
203
+ footer = """<style>
204
+ .footer-text{
205
+ -webkit-text-size-adjust: 100%;
206
+ -webkit-tap-highlight-color: transparent;
207
+ --blue: #007bff;
208
+ --indigo: #6610f2;
209
+ --purple: #6f42c1;
210
+ --pink: #e83e8c;
211
+ --red: #dc3545;
212
+ --orange: #fd7e14;
213
+ --yellow: #ffc107;
214
+ --green: #28a745;
215
+ --teal: #20c997;
216
+ --cyan: #17a2b8;
217
+ --white: #fff;
218
+ --gray: #6c757d;
219
+ --gray-dark: #343a40;
220
+ --primary: #007bff;
221
+ --secondary: #6c757d;
222
+ --success: #28a745;
223
+ --info: #17a2b8;
224
+ --warning: #ffc107;
225
+ --danger: #dc3545;
226
+ --light: #f8f9fa;
227
+ --dark: #343a40;
228
+ --breakpoint-xs: 0;
229
+ --breakpoint-sm: 576px;
230
+ --breakpoint-md: 768px;
231
+ --breakpoint-lg: 992px;
232
+ --breakpoint-xl: 1200px;
233
+ --font-family-sans-serif: -apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,"Helvetica Neue",Arial,sans-serif,"Apple Color Emoji","Segoe UI Emoji","Segoe UI Symbol";
234
+ --font-family-monospace: SFMono-Regular,Menlo,Monaco,Consolas,"Liberation Mono","Courier New",monospace;
235
+ font-size: 16px;
236
+ font-weight: 400;
237
+ line-height: 24px;
238
+ letter-spacing: 1px;
239
+ font-family: 'Raleway', sans-serif;
240
+ color: #666;
241
+ box-sizing: border-box;
242
+ text-align: center!important;
243
+ }
244
+ @media (min-width: 576px) {
245
+ .col-sm-12 {
246
+ -webkit-box-flex: 0;
247
+ -ms-flex: 0 0 100%;
248
+ flex: 0 0 100%;
249
+ max-width: 100%;
250
+ }
251
+ }
252
+ .row {
253
+ display: -webkit-box;
254
+ display: -ms-flexbox;
255
+ display: flex;
256
+ -ms-flex-wrap: wrap;
257
+ flex-wrap: wrap;
258
+ margin-right: -15px;
259
+ margin-left: -15px;
260
+ }
261
+ @media (min-width: 1200px) {
262
+ .container {
263
+ max-width: 1140px;
264
+ }
265
+ }
266
+ @media (min-width: 992px) {
267
+ .container {
268
+ max-width: 960px;
269
+ }
270
+ }
271
+ @media (min-width: 768px) {
272
+ .container {
273
+ max-width: 720px;
274
+ }
275
+ }
276
+ @media (min-width: 576px) {
277
+ .container {
278
+ max-width: 540px;
279
+ }
280
+ }
281
+ .container {
282
+ width: 100%;
283
+ padding-right: 15px;
284
+ padding-left: 15px;
285
+ margin-right: auto;
286
+ margin-left: auto;
287
+ }
288
+ .footer-bottom-area {
289
+ padding: 30px 0;
290
+ display: block;
291
+ box-sizing: border-box;
292
+ }
293
+ .footer-bottom-bg {
294
+ background: #222;
295
+ }
296
+ </style>
297
+ <footer class="footer-bottom-area footer-bottom-bg">
298
+ <div class="container">
299
+ <div class="row">
300
+ <div class="col-sm-12">
301
+ <div class="footer-text">
302
+ <p style="color: white; font-style: sans-serif;"><span>Bahash Private Limited</span> ©2024 - All Right Reserved.</p>
303
+ </div>
304
+ </div>
305
+ </div>
306
+ </div>
307
+ </footer>
308
+ """
309
+ components.html(footer)
310
+ # Handling query parameters
311
+ query = st.experimental_get_query_params()
312
+ try:
313
+ ## Look-up the tab from the query
314
+ index_tab = query["tab"][0]
315
+ ## Click on that tab
316
+ js = f"""
317
+ <script>
318
+ var tab = window.parent.document.getElementById('tabs-bui2-tab-{index_tab}');
319
+ tab.click();
320
+ </script>
321
+ """
322
+ st.components.v1.html(js)
323
+
324
+ except ValueError:
325
+ ## Do nothing if the query parameter does not correspond to any of the tabs
326
+ pass
327
+
328
+ if __name__ == '__main__':
329
+ main()
test.js ADDED
@@ -0,0 +1 @@
 
 
1
+ < !doctype html > <html lang="en"><head><meta charset="UTF-8" /><meta name="viewport" content="width=device-width,initial-scale=1,shrink-to-fit=no" /><link rel="shortcut icon" href="./favicon.png" /><link rel="preload" href="./static/media/SourceSansPro-Regular.0d69e5ff5e92ac64a0c9.woff2" as="font" type="font/woff2" crossorigin><link rel="preload" href="./static/media/SourceSansPro-SemiBold.abed79cd0df1827e18cf.woff2" as="font" type="font/woff2" crossorigin><link rel="preload" href="./static/media/SourceSansPro-Bold.118dea98980e20a81ced.woff2" as="font" type="font/woff2" crossorigin><title>Streamlit</title><script>window.prerenderReady=!1</script><script defer="defer" src="./static/js/main.d55f6a3c.js"></script><link href="./static/css/main.29bca1b5.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>