Spaces:
Runtime error
Runtime error
Commit
·
66e5e13
1
Parent(s):
bdec808
Add application file
Browse files- app.py +57 -0
- configuration.py +13 -0
- dataset.py +12 -0
- model.py +46 -0
- models_file/config.pth +3 -0
- models_file/microsoft-deberta-base_0.9449373420387531_8_best.pth +3 -0
- models_file/tokenizer/merges.txt +0 -0
- models_file/tokenizer/special_tokens_map.json +51 -0
- models_file/tokenizer/tokenizer.json +0 -0
- models_file/tokenizer/tokenizer_config.json +66 -0
- models_file/tokenizer/vocab.json +0 -0
- requirements.txt +5 -0
- utils.py +104 -0
app.py
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
if __name__ == '__main__':
|
| 2 |
+
inputs = ['gbjjhbdjhbdgjhdbfjhsdkjrkjf', 'fdjhbjhsbd']
|
| 3 |
+
from transformers import AutoTokenizer
|
| 4 |
+
from model import CustomModel
|
| 5 |
+
import torch
|
| 6 |
+
from configuration import CFG
|
| 7 |
+
from dataset import SingleInputDataset
|
| 8 |
+
from torch.utils.data import DataLoader
|
| 9 |
+
from utils import inference_fn, get_char_probs, get_results, get_text
|
| 10 |
+
import numpy as np
|
| 11 |
+
import gradio as gr
|
| 12 |
+
|
| 13 |
+
device = torch.device('cpu')
|
| 14 |
+
tokenizer = AutoTokenizer.from_pretrained('models_file/tokenizer')
|
| 15 |
+
model = CustomModel(CFG, config_path='models_file\config.pth', pretrained=False)
|
| 16 |
+
state = torch.load('models_file\microsoft-deberta-base_0.9449373420387531_8_best.pth',
|
| 17 |
+
map_location=torch.device('cpu'))
|
| 18 |
+
model.load_state_dict(state['model'])
|
| 19 |
+
|
| 20 |
+
def get_answer(context, feature):
|
| 21 |
+
|
| 22 |
+
## Input to the model using patient-history and feature-text
|
| 23 |
+
inputs_single = tokenizer(context, feature,
|
| 24 |
+
add_special_tokens=True,
|
| 25 |
+
max_length=CFG.max_len,
|
| 26 |
+
padding="max_length",
|
| 27 |
+
return_offsets_mapping=False)
|
| 28 |
+
|
| 29 |
+
for k, v in inputs_single.items():
|
| 30 |
+
inputs_single[k] = torch.tensor(v, dtype=torch.long)
|
| 31 |
+
|
| 32 |
+
# Create a new dataset containing only the input sample
|
| 33 |
+
single_input_dataset = SingleInputDataset(inputs_single)
|
| 34 |
+
# Create a DataLoader for the new dataset
|
| 35 |
+
single_input_loader = DataLoader(single_input_dataset,
|
| 36 |
+
batch_size=1,
|
| 37 |
+
shuffle=False,
|
| 38 |
+
num_workers=2)
|
| 39 |
+
|
| 40 |
+
# Perform inference on the single input
|
| 41 |
+
output = inference_fn(single_input_loader, model, device)
|
| 42 |
+
|
| 43 |
+
prediction = output.reshape((1, CFG.max_len))
|
| 44 |
+
char_probs = get_char_probs([context], prediction, tokenizer)
|
| 45 |
+
predictions = np.mean([char_probs], axis=0)
|
| 46 |
+
results = get_results(predictions, th=0.5)
|
| 47 |
+
|
| 48 |
+
print(results)
|
| 49 |
+
return get_text(context, results[0])
|
| 50 |
+
|
| 51 |
+
inputs = [gr.inputs.Textbox(label="Context Para", lines=10), gr.inputs.Textbox(label="Question", lines=1)]
|
| 52 |
+
output = gr.outputs.Textbox(label="Answer")
|
| 53 |
+
|
| 54 |
+
app = gr.Interface(fn=get_answer, inputs=inputs, outputs=output, allow_flagging='never')
|
| 55 |
+
|
| 56 |
+
app.launch()
|
| 57 |
+
print(get_answer(inputs[0], inputs[1]))
|
configuration.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ====================================================
|
| 2 |
+
# CFG
|
| 3 |
+
# ====================================================
|
| 4 |
+
class CFG:
|
| 5 |
+
print_freq=100
|
| 6 |
+
num_workers=0
|
| 7 |
+
model="microsoft/deberta-base"
|
| 8 |
+
token="microsoft/deberta-base"
|
| 9 |
+
fc_dropout=0.2
|
| 10 |
+
max_len=739
|
| 11 |
+
weight_decay=0.01
|
| 12 |
+
project_folder = '/content/drive/MyDrive/Projects/Exigent/POC-V1/'
|
| 13 |
+
matching_data = 'matching_data.csv'
|
dataset.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from torch.utils.data import Dataset
|
| 2 |
+
|
| 3 |
+
# Create a custom dataset class that takes a single input sample
|
| 4 |
+
class SingleInputDataset(Dataset):
|
| 5 |
+
def __init__(self, input_single):
|
| 6 |
+
self.sample = input_single
|
| 7 |
+
|
| 8 |
+
def __len__(self):
|
| 9 |
+
return 1
|
| 10 |
+
|
| 11 |
+
def __getitem__(self, index):
|
| 12 |
+
return self.sample
|
model.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import torch.nn as nn
|
| 3 |
+
from transformers import AutoConfig, AutoModel
|
| 4 |
+
|
| 5 |
+
# ====================================================
|
| 6 |
+
# Model
|
| 7 |
+
# ====================================================
|
| 8 |
+
class CustomModel(nn.Module):
|
| 9 |
+
def __init__(self, cfg, config_path=None, pretrained=False):
|
| 10 |
+
super().__init__()
|
| 11 |
+
self.cfg = cfg
|
| 12 |
+
|
| 13 |
+
if config_path is None:
|
| 14 |
+
self.config = AutoConfig.from_pretrained(cfg.model, output_hidden_states=True)
|
| 15 |
+
else:
|
| 16 |
+
self.config = torch.load(config_path)
|
| 17 |
+
if pretrained:
|
| 18 |
+
self.model = AutoModel.from_pretrained(cfg.model, config=self.config)
|
| 19 |
+
else:
|
| 20 |
+
self.model = AutoModel.from_config(self.config)
|
| 21 |
+
self.fc_dropout = nn.Dropout(cfg.fc_dropout)
|
| 22 |
+
self.fc = nn.Linear(self.config.hidden_size, 1)
|
| 23 |
+
self._init_weights(self.fc)
|
| 24 |
+
|
| 25 |
+
def _init_weights(self, module):
|
| 26 |
+
if isinstance(module, nn.Linear):
|
| 27 |
+
module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
|
| 28 |
+
if module.bias is not None:
|
| 29 |
+
module.bias.data.zero_()
|
| 30 |
+
elif isinstance(module, nn.Embedding):
|
| 31 |
+
module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
|
| 32 |
+
if module.padding_idx is not None:
|
| 33 |
+
module.weight.data[module.padding_idx].zero_()
|
| 34 |
+
elif isinstance(module, nn.LayerNorm):
|
| 35 |
+
module.bias.data.zero_()
|
| 36 |
+
module.weight.data.fill_(1.0)
|
| 37 |
+
|
| 38 |
+
def feature(self, inputs):
|
| 39 |
+
outputs = self.model(**inputs)
|
| 40 |
+
last_hidden_states = outputs[0]
|
| 41 |
+
return last_hidden_states
|
| 42 |
+
|
| 43 |
+
def forward(self, inputs):
|
| 44 |
+
feature = self.feature(inputs)
|
| 45 |
+
output = self.fc(self.fc_dropout(feature))
|
| 46 |
+
return output
|
models_file/config.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:44242dd46e256e33385a5be4979c8df941af4ae4d8ad5f2feb5315d114da5f98
|
| 3 |
+
size 2541
|
models_file/microsoft-deberta-base_0.9449373420387531_8_best.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:994ef334eed041e7b0d62f2ad3f97444adcac4696a8027a5b14bf803bb27265f
|
| 3 |
+
size 555618276
|
models_file/tokenizer/merges.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
models_file/tokenizer/special_tokens_map.json
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": {
|
| 3 |
+
"content": "[CLS]",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": true,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"cls_token": {
|
| 10 |
+
"content": "[CLS]",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": true,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"eos_token": {
|
| 17 |
+
"content": "[SEP]",
|
| 18 |
+
"lstrip": false,
|
| 19 |
+
"normalized": true,
|
| 20 |
+
"rstrip": false,
|
| 21 |
+
"single_word": false
|
| 22 |
+
},
|
| 23 |
+
"mask_token": {
|
| 24 |
+
"content": "[MASK]",
|
| 25 |
+
"lstrip": true,
|
| 26 |
+
"normalized": true,
|
| 27 |
+
"rstrip": false,
|
| 28 |
+
"single_word": false
|
| 29 |
+
},
|
| 30 |
+
"pad_token": {
|
| 31 |
+
"content": "[PAD]",
|
| 32 |
+
"lstrip": false,
|
| 33 |
+
"normalized": true,
|
| 34 |
+
"rstrip": false,
|
| 35 |
+
"single_word": false
|
| 36 |
+
},
|
| 37 |
+
"sep_token": {
|
| 38 |
+
"content": "[SEP]",
|
| 39 |
+
"lstrip": false,
|
| 40 |
+
"normalized": true,
|
| 41 |
+
"rstrip": false,
|
| 42 |
+
"single_word": false
|
| 43 |
+
},
|
| 44 |
+
"unk_token": {
|
| 45 |
+
"content": "[UNK]",
|
| 46 |
+
"lstrip": false,
|
| 47 |
+
"normalized": true,
|
| 48 |
+
"rstrip": false,
|
| 49 |
+
"single_word": false
|
| 50 |
+
}
|
| 51 |
+
}
|
models_file/tokenizer/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
models_file/tokenizer/tokenizer_config.json
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_bos_token": false,
|
| 3 |
+
"add_prefix_space": false,
|
| 4 |
+
"bos_token": {
|
| 5 |
+
"__type": "AddedToken",
|
| 6 |
+
"content": "[CLS]",
|
| 7 |
+
"lstrip": false,
|
| 8 |
+
"normalized": true,
|
| 9 |
+
"rstrip": false,
|
| 10 |
+
"single_word": false
|
| 11 |
+
},
|
| 12 |
+
"clean_up_tokenization_spaces": true,
|
| 13 |
+
"cls_token": {
|
| 14 |
+
"__type": "AddedToken",
|
| 15 |
+
"content": "[CLS]",
|
| 16 |
+
"lstrip": false,
|
| 17 |
+
"normalized": true,
|
| 18 |
+
"rstrip": false,
|
| 19 |
+
"single_word": false
|
| 20 |
+
},
|
| 21 |
+
"do_lower_case": false,
|
| 22 |
+
"eos_token": {
|
| 23 |
+
"__type": "AddedToken",
|
| 24 |
+
"content": "[SEP]",
|
| 25 |
+
"lstrip": false,
|
| 26 |
+
"normalized": true,
|
| 27 |
+
"rstrip": false,
|
| 28 |
+
"single_word": false
|
| 29 |
+
},
|
| 30 |
+
"errors": "replace",
|
| 31 |
+
"mask_token": {
|
| 32 |
+
"__type": "AddedToken",
|
| 33 |
+
"content": "[MASK]",
|
| 34 |
+
"lstrip": true,
|
| 35 |
+
"normalized": true,
|
| 36 |
+
"rstrip": false,
|
| 37 |
+
"single_word": false
|
| 38 |
+
},
|
| 39 |
+
"model_max_length": 512,
|
| 40 |
+
"pad_token": {
|
| 41 |
+
"__type": "AddedToken",
|
| 42 |
+
"content": "[PAD]",
|
| 43 |
+
"lstrip": false,
|
| 44 |
+
"normalized": true,
|
| 45 |
+
"rstrip": false,
|
| 46 |
+
"single_word": false
|
| 47 |
+
},
|
| 48 |
+
"sep_token": {
|
| 49 |
+
"__type": "AddedToken",
|
| 50 |
+
"content": "[SEP]",
|
| 51 |
+
"lstrip": false,
|
| 52 |
+
"normalized": true,
|
| 53 |
+
"rstrip": false,
|
| 54 |
+
"single_word": false
|
| 55 |
+
},
|
| 56 |
+
"tokenizer_class": "DebertaTokenizer",
|
| 57 |
+
"unk_token": {
|
| 58 |
+
"__type": "AddedToken",
|
| 59 |
+
"content": "[UNK]",
|
| 60 |
+
"lstrip": false,
|
| 61 |
+
"normalized": true,
|
| 62 |
+
"rstrip": false,
|
| 63 |
+
"single_word": false
|
| 64 |
+
},
|
| 65 |
+
"vocab_type": "gpt2"
|
| 66 |
+
}
|
models_file/tokenizer/vocab.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
requirements.txt
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
torch
|
| 2 |
+
transformers
|
| 3 |
+
numpy
|
| 4 |
+
scikit-learn
|
| 5 |
+
gradio
|
utils.py
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import itertools
|
| 2 |
+
import torch
|
| 3 |
+
import numpy as np
|
| 4 |
+
from tqdm.auto import tqdm
|
| 5 |
+
|
| 6 |
+
def get_char_probs(texts, predictions, tokenizer):
|
| 7 |
+
"""
|
| 8 |
+
Maps prediction from encoded offset mapping to the text
|
| 9 |
+
|
| 10 |
+
Prediction = 466 sequence length * batch
|
| 11 |
+
text = 768 * batch
|
| 12 |
+
Using offset mapping [(0, 4), ] -- 466
|
| 13 |
+
|
| 14 |
+
creates results that is size of texts
|
| 15 |
+
|
| 16 |
+
for each text result[i]
|
| 17 |
+
result[0, 4] = pred[0] like wise for all
|
| 18 |
+
|
| 19 |
+
"""
|
| 20 |
+
results = [np.zeros(len(t)) for t in texts]
|
| 21 |
+
for i, (text, prediction) in enumerate(zip(texts, predictions)):
|
| 22 |
+
encoded = tokenizer(text,
|
| 23 |
+
add_special_tokens=True,
|
| 24 |
+
return_offsets_mapping=True)
|
| 25 |
+
for idx, (offset_mapping, pred) in enumerate(zip(encoded['offset_mapping'], prediction)):
|
| 26 |
+
start = offset_mapping[0]
|
| 27 |
+
end = offset_mapping[1]
|
| 28 |
+
results[i][start:end] = pred
|
| 29 |
+
return results
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def get_results(char_probs, th=0.5):
|
| 33 |
+
"""
|
| 34 |
+
Get the list of probabilites with size of text
|
| 35 |
+
And then get the index of the characters which are more than th
|
| 36 |
+
example:
|
| 37 |
+
char_prob = [0.1, 0.1, 0.9, 0.9, 0.9, 0.9, 0.2, 0.2, 0.2, 0.7, 0.7, 0.7] ## length == 766
|
| 38 |
+
where > 0.5 index ## [ 2, 3, 4, 5, 9, 10, 11]
|
| 39 |
+
|
| 40 |
+
Groupby same one -- [[2, 3, 4, 5], [9, 10, 11]]
|
| 41 |
+
And get the max and min and output the results
|
| 42 |
+
|
| 43 |
+
"""
|
| 44 |
+
results = []
|
| 45 |
+
for char_prob in char_probs:
|
| 46 |
+
result = np.where(char_prob >= th)[0] + 1
|
| 47 |
+
result = [list(g) for _, g in itertools.groupby(result, key=lambda n, c=itertools.count(): n - next(c))]
|
| 48 |
+
result = [f"{min(r)} {max(r)}" for r in result]
|
| 49 |
+
result = ";".join(result)
|
| 50 |
+
results.append(result)
|
| 51 |
+
return results
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def get_predictions(results):
|
| 55 |
+
"""
|
| 56 |
+
Will get the location, as a string, just like location in the df
|
| 57 |
+
results = ['2 5', '9 11']
|
| 58 |
+
|
| 59 |
+
loop through, split it and save it as start and end and store it in array
|
| 60 |
+
"""
|
| 61 |
+
predictions = []
|
| 62 |
+
for result in results:
|
| 63 |
+
prediction = []
|
| 64 |
+
if result != "":
|
| 65 |
+
for loc in [s.split() for s in result.split(';')]:
|
| 66 |
+
start, end = int(loc[0]), int(loc[1])
|
| 67 |
+
prediction.append([start, end])
|
| 68 |
+
predictions.append(prediction)
|
| 69 |
+
return predictions
|
| 70 |
+
|
| 71 |
+
def inference_fn(test_loader, model, device):
|
| 72 |
+
preds = []
|
| 73 |
+
model.eval()
|
| 74 |
+
model.to(device)
|
| 75 |
+
tk0 = tqdm(test_loader, total=len(test_loader))
|
| 76 |
+
for inputs in tk0:
|
| 77 |
+
for k, v in inputs.items():
|
| 78 |
+
inputs[k] = v.to(device)
|
| 79 |
+
with torch.no_grad():
|
| 80 |
+
y_preds = model(inputs)
|
| 81 |
+
preds.append(y_preds.sigmoid().numpy())
|
| 82 |
+
predictions = np.concatenate(preds)
|
| 83 |
+
return predictions
|
| 84 |
+
|
| 85 |
+
def get_text(context, indexes):
|
| 86 |
+
if (indexes):
|
| 87 |
+
if ';' in indexes:
|
| 88 |
+
list_indexes = indexes.split(';')
|
| 89 |
+
|
| 90 |
+
answer = ''
|
| 91 |
+
for idx in list_indexes:
|
| 92 |
+
start_index = int(idx.split(' ')[0])
|
| 93 |
+
end_index = int(idx.split(' ')[1])
|
| 94 |
+
answer += ' '
|
| 95 |
+
answer += context[start_index:end_index]
|
| 96 |
+
return answer
|
| 97 |
+
else:
|
| 98 |
+
start_index = int(indexes.split(' ')[0])
|
| 99 |
+
end_index = int(indexes.split(' ')[1])
|
| 100 |
+
|
| 101 |
+
return context[start_index:end_index]
|
| 102 |
+
else:
|
| 103 |
+
return 'Not found in this Context'
|
| 104 |
+
|