Spaces:
Sleeping
Sleeping
Update game3.py
Browse files
game3.py
CHANGED
|
@@ -109,6 +109,8 @@ def func3(num_selected, human_predict, num1, num2, user_important):
|
|
| 109 |
|
| 110 |
def interpre3(num_selected):
|
| 111 |
fname = 'data3_convai2_inferred.txt'
|
|
|
|
|
|
|
| 112 |
with open(fname) as f:
|
| 113 |
content = f.readlines()
|
| 114 |
text = eval(content[int(num_selected*2)])
|
|
@@ -116,7 +118,38 @@ def interpre3(num_selected):
|
|
| 116 |
|
| 117 |
print(interpretation)
|
| 118 |
|
| 119 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
# pos = []
|
| 121 |
# neg = []
|
| 122 |
# res = []
|
|
@@ -156,6 +189,7 @@ def func3_written(text_written, human_predict, lang_written):
|
|
| 156 |
|
| 157 |
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
|
| 158 |
classifier = pipeline("text-classification", model="padmajabfrl/Gender-Classification", device=device)
|
|
|
|
| 159 |
|
| 160 |
output = classifier([text_written])
|
| 161 |
|
|
@@ -181,8 +215,40 @@ def func3_written(text_written, human_predict, lang_written):
|
|
| 181 |
|
| 182 |
shap_values = explainer([text_written])
|
| 183 |
interpretation = list(zip(shap_values.data[0], shap_values.values[0, :, 1]))
|
|
|
|
| 184 |
|
| 185 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 186 |
print(res)
|
| 187 |
|
| 188 |
return res, ai_predict, chatbot
|
|
|
|
| 109 |
|
| 110 |
def interpre3(num_selected):
|
| 111 |
fname = 'data3_convai2_inferred.txt'
|
| 112 |
+
tokenizer = AutoTokenizer.from_pretrained("padmajabfrl/Gender-Classification")
|
| 113 |
+
|
| 114 |
with open(fname) as f:
|
| 115 |
content = f.readlines()
|
| 116 |
text = eval(content[int(num_selected*2)])
|
|
|
|
| 118 |
|
| 119 |
print(interpretation)
|
| 120 |
|
| 121 |
+
encodings = tokenizer(text['text'], return_offsets_mapping=True)
|
| 122 |
+
|
| 123 |
+
print(encodings['offset_mapping'])
|
| 124 |
+
is_subword = [False, False]
|
| 125 |
+
for i in range(2, len(encodings['offset_mapping'])):
|
| 126 |
+
if encodings['offset_mapping'][i][0] == encodings['offset_mapping'][i-1][1]:
|
| 127 |
+
is_subword.append(True)
|
| 128 |
+
else:
|
| 129 |
+
is_subword.append(False)
|
| 130 |
+
print(is_subword)
|
| 131 |
+
interpretation_combined = []
|
| 132 |
+
|
| 133 |
+
index_tmp = 0
|
| 134 |
+
while index_tmp < (len(interpretation) - 1):
|
| 135 |
+
if not is_subword[index_tmp+1]:
|
| 136 |
+
interpretation_combined.append(interpretation[index_tmp])
|
| 137 |
+
index_tmp += 1
|
| 138 |
+
else:
|
| 139 |
+
text_combined = interpretation[index_tmp][0]
|
| 140 |
+
score_combinded = interpretation[index_tmp][1]
|
| 141 |
+
length = 1
|
| 142 |
+
while is_subword[index_tmp+length]:
|
| 143 |
+
text_combined += interpretation[index_tmp+length][0]
|
| 144 |
+
score_combinded += interpretation[index_tmp+length][1]
|
| 145 |
+
length += 1
|
| 146 |
+
interpretation_combined.append((text_combined, score_combinded/length))
|
| 147 |
+
index_tmp += length
|
| 148 |
+
|
| 149 |
+
interpretation_combined.append(('', 0.0))
|
| 150 |
+
print(interpretation_combined)
|
| 151 |
+
|
| 152 |
+
res = {"original": text['text'], "interpretation": interpretation_combined}
|
| 153 |
# pos = []
|
| 154 |
# neg = []
|
| 155 |
# res = []
|
|
|
|
| 189 |
|
| 190 |
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
|
| 191 |
classifier = pipeline("text-classification", model="padmajabfrl/Gender-Classification", device=device)
|
| 192 |
+
tokenizer = AutoTokenizer.from_pretrained("padmajabfrl/Gender-Classification")
|
| 193 |
|
| 194 |
output = classifier([text_written])
|
| 195 |
|
|
|
|
| 215 |
|
| 216 |
shap_values = explainer([text_written])
|
| 217 |
interpretation = list(zip(shap_values.data[0], shap_values.values[0, :, 1]))
|
| 218 |
+
|
| 219 |
|
| 220 |
+
encodings = tokenizer(text['text'], return_offsets_mapping=True)
|
| 221 |
+
|
| 222 |
+
print(encodings['offset_mapping'])
|
| 223 |
+
is_subword = [False, False]
|
| 224 |
+
for i in range(2, len(encodings['offset_mapping'])):
|
| 225 |
+
if encodings['offset_mapping'][i][0] == encodings['offset_mapping'][i-1][1]:
|
| 226 |
+
is_subword.append(True)
|
| 227 |
+
else:
|
| 228 |
+
is_subword.append(False)
|
| 229 |
+
print(is_subword)
|
| 230 |
+
interpretation_combined = []
|
| 231 |
+
|
| 232 |
+
index_tmp = 0
|
| 233 |
+
while index_tmp < (len(interpretation) - 1):
|
| 234 |
+
if not is_subword[index_tmp+1]:
|
| 235 |
+
interpretation_combined.append(interpretation[index_tmp])
|
| 236 |
+
index_tmp += 1
|
| 237 |
+
else:
|
| 238 |
+
text_combined = interpretation[index_tmp][0]
|
| 239 |
+
score_combinded = interpretation[index_tmp][1]
|
| 240 |
+
length = 1
|
| 241 |
+
while is_subword[index_tmp+length]:
|
| 242 |
+
text_combined += interpretation[index_tmp+length][0]
|
| 243 |
+
score_combinded += interpretation[index_tmp+length][1]
|
| 244 |
+
length += 1
|
| 245 |
+
interpretation_combined.append((text_combined, score_combinded/length))
|
| 246 |
+
index_tmp += length
|
| 247 |
+
|
| 248 |
+
interpretation_combined.append(('', 0.0))
|
| 249 |
+
print(interpretation_combined)
|
| 250 |
+
|
| 251 |
+
res = {"original": text_written, "interpretation": interpretation_combined}
|
| 252 |
print(res)
|
| 253 |
|
| 254 |
return res, ai_predict, chatbot
|