Disfluency-large

Sleeping

App Files Files Community

LeTruongVu2k1 commited on May 2, 2023

Commit

969d94d

1 Parent(s): 2720879

creating Customized_IDSF folder and putting corresponded files into it; modifying app.py

Browse files

Files changed (14) hide show

.gitattributes +1 -0
{JointBERT-CRF_PhoBERTencoder → Customized_IDSF/JointBERT-CRF_PhoBERTencoder}/config.json +0 -0
{JointBERT-CRF_PhoBERTencoder → Customized_IDSF/JointBERT-CRF_PhoBERTencoder}/eval_dev_results.txt +0 -0
{JointBERT-CRF_PhoBERTencoder → Customized_IDSF/JointBERT-CRF_PhoBERTencoder}/eval_test_results.txt +0 -0
{JointBERT-CRF_PhoBERTencoder → Customized_IDSF/JointBERT-CRF_PhoBERTencoder}/events.out.tfevents.1617863943.d86fb58144ae.20305.0 +0 -0
{JointBERT-CRF_PhoBERTencoder → Customized_IDSF/JointBERT-CRF_PhoBERTencoder}/pytorch_model.bin +0 -0
{JointBERT-CRF_PhoBERTencoder → Customized_IDSF/JointBERT-CRF_PhoBERTencoder}/training_args.bin +0 -0
load_model.py → Customized_IDSF/load_model.py +0 -0
Customized_IDSF/model/__init__.py +2 -0
Customized_IDSF/model/modeling_jointphobert.py +89 -0
Customized_IDSF/model/modeling_jointxlmr.py +88 -0
Customized_IDSF/model/module.py +157 -0
utils.py → Customized_IDSF/utils.py +0 -0
app.py +3 -3

.gitattributes CHANGED Viewed

@@ -36,3 +36,4 @@ jdk-8u361-linux-aarch64.rpm filter=lfs diff=lfs merge=lfs -text
 VnCoreNLP-1.2.jar filter=lfs diff=lfs merge=lfs -text
 models/postagger/vi-tagger filter=lfs diff=lfs merge=lfs -text
 JointBERT-CRF_PhoBERTencoder/ filter=lfs diff=lfs merge=lfs -text

 VnCoreNLP-1.2.jar filter=lfs diff=lfs merge=lfs -text
 models/postagger/vi-tagger filter=lfs diff=lfs merge=lfs -text
 JointBERT-CRF_PhoBERTencoder/ filter=lfs diff=lfs merge=lfs -text
+Customized_IDSF/ filter=lfs diff=lfs merge=lfs -text

{JointBERT-CRF_PhoBERTencoder → Customized_IDSF/JointBERT-CRF_PhoBERTencoder}/config.json RENAMED Viewed

File without changes

{JointBERT-CRF_PhoBERTencoder → Customized_IDSF/JointBERT-CRF_PhoBERTencoder}/eval_dev_results.txt RENAMED Viewed

File without changes

{JointBERT-CRF_PhoBERTencoder → Customized_IDSF/JointBERT-CRF_PhoBERTencoder}/eval_test_results.txt RENAMED Viewed

File without changes

{JointBERT-CRF_PhoBERTencoder → Customized_IDSF/JointBERT-CRF_PhoBERTencoder}/events.out.tfevents.1617863943.d86fb58144ae.20305.0 RENAMED Viewed

File without changes

{JointBERT-CRF_PhoBERTencoder → Customized_IDSF/JointBERT-CRF_PhoBERTencoder}/pytorch_model.bin RENAMED Viewed

File without changes

{JointBERT-CRF_PhoBERTencoder → Customized_IDSF/JointBERT-CRF_PhoBERTencoder}/training_args.bin RENAMED Viewed

File without changes

load_model.py → Customized_IDSF/load_model.py RENAMED Viewed

File without changes

Customized_IDSF/model/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from .modeling_jointphobert import JointPhoBERT
2	+ from .modeling_jointxlmr import JointXLMR

Customized_IDSF/model/modeling_jointphobert.py ADDED Viewed

	@@ -0,0 +1,89 @@

+import torch
+import torch.nn as nn
+from torchcrf import CRF
+from transformers.models.roberta.modeling_roberta import RobertaModel, RobertaPreTrainedModel
+from .module import IntentClassifier, SlotClassifier
+class JointPhoBERT(RobertaPreTrainedModel):
+    def __init__(self, config, args, intent_label_lst, slot_label_lst):
+        super(JointPhoBERT, self).__init__(config)
+        self.args = args
+        self.num_intent_labels = len(intent_label_lst)
+        self.num_slot_labels = len(slot_label_lst)
+        self.roberta = RobertaModel(config)  # Load pretrained phobert
+        self.intent_classifier = IntentClassifier(config.hidden_size, self.num_intent_labels, args.dropout_rate)
+        self.slot_classifier = SlotClassifier(
+            config.hidden_size,
+            self.num_intent_labels,
+            self.num_slot_labels,
+            self.args.use_intent_context_concat,
+            self.args.use_intent_context_attention,
+            self.args.max_seq_len,
+            self.args.attention_embedding_size,
+            args.dropout_rate,
+        )
+        if args.use_crf:
+            self.crf = CRF(num_tags=self.num_slot_labels, batch_first=True)
+    def forward(self, input_ids, attention_mask, token_type_ids, intent_label_ids, slot_labels_ids):
+        outputs = self.roberta(
+            input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids
+        )  # sequence_output, pooled_output, (hidden_states), (attentions)
+        sequence_output = outputs[0]
+        pooled_output = outputs[1]  # [CLS]
+        intent_logits = self.intent_classifier(pooled_output)
+        if not self.args.use_attention_mask:
+            tmp_attention_mask = None
+        else:
+            tmp_attention_mask = attention_mask
+        if self.args.embedding_type == "hard":
+            hard_intent_logits = torch.zeros(intent_logits.shape)
+            for i, sample in enumerate(intent_logits):
+                max_idx = torch.argmax(sample)
+                hard_intent_logits[i][max_idx] = 1
+            slot_logits = self.slot_classifier(sequence_output, hard_intent_logits, tmp_attention_mask)
+        else:
+            slot_logits = self.slot_classifier(sequence_output, intent_logits, tmp_attention_mask)
+        total_loss = 0
+        # 1. Intent Softmax
+        if intent_label_ids is not None:
+            if self.num_intent_labels == 1:
+                intent_loss_fct = nn.MSELoss()
+                intent_loss = intent_loss_fct(intent_logits.view(-1), intent_label_ids.view(-1))
+            else:
+                intent_loss_fct = nn.CrossEntropyLoss()
+                intent_loss = intent_loss_fct(
+                    intent_logits.view(-1, self.num_intent_labels), intent_label_ids.view(-1)
+                )
+            total_loss += self.args.intent_loss_coef * intent_loss
+        # 2. Slot Softmax
+        if slot_labels_ids is not None:
+            if self.args.use_crf:
+                slot_loss = self.crf(slot_logits, slot_labels_ids, mask=attention_mask.byte(), reduction="mean")
+                slot_loss = -1 * slot_loss  # negative log-likelihood
+            else:
+                slot_loss_fct = nn.CrossEntropyLoss(ignore_index=self.args.ignore_index)
+                # Only keep active parts of the loss
+                if attention_mask is not None:
+                    active_loss = attention_mask.view(-1) == 1
+                    active_logits = slot_logits.view(-1, self.num_slot_labels)[active_loss]
+                    active_labels = slot_labels_ids.view(-1)[active_loss]
+                    slot_loss = slot_loss_fct(active_logits, active_labels)
+                else:
+                    slot_loss = slot_loss_fct(slot_logits.view(-1, self.num_slot_labels), slot_labels_ids.view(-1))
+            total_loss += (1 - self.args.intent_loss_coef) * slot_loss
+        outputs = ((intent_logits, slot_logits),) + outputs[2:]  # add hidden states and attention if they are here
+        outputs = (total_loss,) + outputs
+        return outputs  # (loss), logits, (hidden_states), (attentions) # Logits is a tuple of intent and slot logits

Customized_IDSF/model/modeling_jointxlmr.py ADDED Viewed

	@@ -0,0 +1,88 @@

+import torch
+import torch.nn as nn
+from torchcrf import CRF
+from transformers.models.roberta.modeling_roberta import RobertaPreTrainedModel
+from transformers.models.xlm_roberta.modeling_xlm_roberta import XLMRobertaModel
+from .module import IntentClassifier, SlotClassifier
+class JointXLMR(RobertaPreTrainedModel):
+    def __init__(self, config, args, intent_label_lst, slot_label_lst):
+        super(JointXLMR, self).__init__(config)
+        self.args = args
+        self.num_intent_labels = len(intent_label_lst)
+        self.num_slot_labels = len(slot_label_lst)
+        self.roberta = XLMRobertaModel(config)  # Load pretrained bert
+        self.intent_classifier = IntentClassifier(config.hidden_size, self.num_intent_labels, args.dropout_rate)
+        self.slot_classifier = SlotClassifier(
+            config.hidden_size,
+            self.num_intent_labels,
+            self.num_slot_labels,
+            self.args.use_intent_context_concat,
+            self.args.use_intent_context_attention,
+            self.args.max_seq_len,
+            self.args.attention_embedding_size,
+            args.dropout_rate,
+        )
+        if args.use_crf:
+            self.crf = CRF(num_tags=self.num_slot_labels, batch_first=True)
+    def forward(self, input_ids, attention_mask, token_type_ids, intent_label_ids, slot_labels_ids):
+        outputs = self.roberta(
+            input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids
+        )  # sequence_output, pooled_output, (hidden_states), (attentions)
+        sequence_output = outputs[0]
+        pooled_output = outputs[1]  # [CLS]
+        intent_logits = self.intent_classifier(pooled_output)
+        if not self.args.use_attention_mask:
+            tmp_attention_mask = None
+        else:
+            tmp_attention_mask = attention_mask
+        if self.args.embedding_type == "hard":
+            hard_intent_logits = torch.zeros(intent_logits.shape)
+            for i, sample in enumerate(intent_logits):
+                max_idx = torch.argmax(sample)
+                hard_intent_logits[i][max_idx] = 1
+            slot_logits = self.slot_classifier(sequence_output, hard_intent_logits, tmp_attention_mask)
+        else:
+            slot_logits = self.slot_classifier(sequence_output, intent_logits, tmp_attention_mask)
+        total_loss = 0
+        # 1. Intent Softmax
+        if intent_label_ids is not None:
+            if self.num_intent_labels == 1:
+                intent_loss_fct = nn.MSELoss()
+                intent_loss = intent_loss_fct(intent_logits.view(-1), intent_label_ids.view(-1))
+            else:
+                intent_loss_fct = nn.CrossEntropyLoss()
+                intent_loss = intent_loss_fct(
+                    intent_logits.view(-1, self.num_intent_labels), intent_label_ids.view(-1)
+                )
+            total_loss += self.args.intent_loss_coef * intent_loss
+        # 2. Slot Softmax
+        if slot_labels_ids is not None:
+            if self.args.use_crf:
+                slot_loss = self.crf(slot_logits, slot_labels_ids, mask=attention_mask.byte(), reduction="mean")
+                slot_loss = -1 * slot_loss  # negative log-likelihood
+            else:
+                slot_loss_fct = nn.CrossEntropyLoss(ignore_index=self.args.ignore_index)
+                # Only keep active parts of the loss
+                if attention_mask is not None:
+                    active_loss = attention_mask.view(-1) == 1
+                    active_logits = slot_logits.view(-1, self.num_slot_labels)[active_loss]
+                    active_labels = slot_labels_ids.view(-1)[active_loss]
+                    slot_loss = slot_loss_fct(active_logits, active_labels)
+                else:
+                    slot_loss = slot_loss_fct(slot_logits.view(-1, self.num_slot_labels), slot_labels_ids.view(-1))
+            total_loss += (1 - self.args.intent_loss_coef) * slot_loss
+        outputs = ((intent_logits, slot_logits),) + outputs[2:]  # add hidden states and attention if they are here
+        outputs = (total_loss,) + outputs
+        return outputs  # (loss), logits, (hidden_states), (attentions) # Logits is a tuple of intent and slot logits

Customized_IDSF/model/module.py ADDED Viewed

	@@ -0,0 +1,157 @@

+import numpy as np
+import torch
+import torch.nn as nn
+class Attention(nn.Module):
+    """Applies attention mechanism on the `context` using the `query`.
+    Args:
+        dimensions (int): Dimensionality of the query and context.
+        attention_type (str, optional): How to compute the attention score:
+            * dot: :math:`score(H_j,q) = H_j^T q`
+            * general: :math:`score(H_j, q) = H_j^T W_a q`
+    Example:
+         >>> attention = Attention(256)
+         >>> query = torch.randn(32, 50, 256)
+         >>> context = torch.randn(32, 1, 256)
+         >>> output, weights = attention(query, context)
+         >>> output.size()
+         torch.Size([32, 50, 256])
+         >>> weights.size()
+         torch.Size([32, 50, 1])
+    """
+    def __init__(self, dimensions):
+        super(Attention, self).__init__()
+        self.dimensions = dimensions
+        self.linear_out = nn.Linear(dimensions * 2, dimensions, bias=False)
+        self.softmax = nn.Softmax(dim=1)
+        self.tanh = nn.Tanh()
+    def forward(self, query, context, attention_mask):
+        """
+        Args:
+            query (:class:`torch.FloatTensor` [batch size, output length, dimensions]): Sequence of
+                queries to query the context.
+            context (:class:`torch.FloatTensor` [batch size, query length, dimensions]): Data
+                overwhich to apply the attention mechanism.
+            output length: length of utterance
+            query length: length of each token (1)
+        Returns:
+            :class:`tuple` with `output` and `weights`:
+            * **output** (:class:`torch.LongTensor` [batch size, output length, dimensions]):
+              Tensor containing the attended features.
+            * **weights** (:class:`torch.FloatTensor` [batch size, output length, query length]):
+              Tensor containing attention weights.
+        """
+        # query = self.linear_query(query)
+        batch_size, output_len, hidden_size = query.size()
+        # query_len = context.size(1)
+        # (batch_size, output_len, dimensions) * (batch_size, query_len, dimensions) ->
+        # (batch_size, output_len, query_len)
+        attention_scores = torch.bmm(query, context.transpose(1, 2).contiguous())
+        # Compute weights across every context sequence
+        # attention_scores = attention_scores.view(batch_size * output_len, query_len)
+        if attention_mask is not None:
+            # Create attention mask, apply attention mask before softmax
+            attention_mask = torch.unsqueeze(attention_mask, 2)
+            # attention_mask = attention_mask.view(batch_size * output_len, query_len)
+            attention_scores.masked_fill_(attention_mask == 0, -np.inf)
+        # attention_scores = torch.squeeze(attention_scores,1)
+        attention_weights = self.softmax(attention_scores)
+        # attention_weights = attention_weights.view(batch_size, output_len, query_len)
+        # (batch_size, output_len, query_len) * (batch_size, query_len, dimensions) ->
+        # (batch_size, output_len, dimensions)
+        mix = torch.bmm(attention_weights, context)
+        # from IPython import embed; embed()
+        # concat -> (batch_size * output_len, 2*dimensions)
+        combined = torch.cat((mix, query), dim=2)
+        # combined = combined.view(batch_size * output_len, 2 * self.dimensions)
+        # Apply linear_out on every 2nd dimension of concat
+        # output -> (batch_size, output_len, dimensions)
+        # output = self.linear_out(combined).view(batch_size, output_len, self.dimensions)
+        output = self.linear_out(combined)
+        output = self.tanh(output)
+        # output = combined
+        return output, attention_weights
+class IntentClassifier(nn.Module):
+    def __init__(self, input_dim, num_intent_labels, dropout_rate=0.0):
+        super(IntentClassifier, self).__init__()
+        self.dropout = nn.Dropout(dropout_rate)
+        self.linear = nn.Linear(input_dim, num_intent_labels)
+    def forward(self, x):
+        x = self.dropout(x)
+        return self.linear(x)
+class SlotClassifier(nn.Module):
+    def __init__(
+        self,
+        input_dim,
+        num_intent_labels,
+        num_slot_labels,
+        use_intent_context_concat=False,
+        use_intent_context_attn=False,
+        max_seq_len=50,
+        attention_embedding_size=200,
+        dropout_rate=0.0,
+    ):
+        super(SlotClassifier, self).__init__()
+        self.use_intent_context_attn = use_intent_context_attn
+        self.use_intent_context_concat = use_intent_context_concat
+        self.max_seq_len = max_seq_len
+        self.num_intent_labels = num_intent_labels
+        self.num_slot_labels = num_slot_labels
+        self.attention_embedding_size = attention_embedding_size
+        output_dim = self.attention_embedding_size  # base model
+        if self.use_intent_context_concat:
+            output_dim = self.attention_embedding_size
+            self.linear_out = nn.Linear(2 * attention_embedding_size, attention_embedding_size)
+        elif self.use_intent_context_attn:
+            output_dim = self.attention_embedding_size
+            self.attention = Attention(attention_embedding_size)
+        self.linear_slot = nn.Linear(input_dim, self.attention_embedding_size, bias=False)
+        if self.use_intent_context_attn or self.use_intent_context_concat:
+            # project intent vector and slot vector to have the same dimensions
+            self.linear_intent_context = nn.Linear(self.num_intent_labels, self.attention_embedding_size, bias=False)
+            self.softmax = nn.Softmax(dim=-1)  # softmax layer for intent logits
+            # self.linear_out = nn.Linear(2 * intent_embedding_size, intent_embedding_size)
+        # output
+        self.dropout = nn.Dropout(dropout_rate)
+        self.linear = nn.Linear(output_dim, num_slot_labels)
+    def forward(self, x, intent_context, attention_mask):
+        x = self.linear_slot(x)
+        if self.use_intent_context_concat:
+            intent_context = self.softmax(intent_context)
+            intent_context = self.linear_intent_context(intent_context)
+            intent_context = torch.unsqueeze(intent_context, 1)
+            intent_context = intent_context.expand(-1, self.max_seq_len, -1)
+            x = torch.cat((x, intent_context), dim=2)
+            x = self.linear_out(x)
+        elif self.use_intent_context_attn:
+            intent_context = self.softmax(intent_context)
+            intent_context = self.linear_intent_context(intent_context)
+            intent_context = torch.unsqueeze(intent_context, 1)  # 1: query length (each token)
+            output, weights = self.attention(x, intent_context, attention_mask)
+            x = output
+        x = self.dropout(x)
+        return self.linear(x)

utils.py → Customized_IDSF/utils.py RENAMED Viewed

File without changes

app.py CHANGED Viewed

@@ -52,9 +52,9 @@ my_classifier = pipeline(
 #################### IDSF #######################
-from utils import get_intent_labels, get_slot_labels, load_tokenizer
 import argparse
-import load_model as lm
 parser = argparse.ArgumentParser()
@@ -96,7 +96,7 @@ def remove_disfluency(example, prediction):
 import gradio as gr
 def ner(text):
-  text = " ".join(rdrsegmenter.word_segment(text))
   # Some words in lowercase like "đà nẵng" will get error (due to vncorenlp)
   text = text.replace("đà ", " đà ")

 #################### IDSF #######################
+from Customized_IDSF.utils import get_intent_labels, get_slot_labels, load_tokenizer
 import argparse
+import Customized_IDSF.load_model as lm
 parser = argparse.ArgumentParser()
 import gradio as gr
 def ner(text):
+  text = " ". oin(rdrsegmenter.word_segment(text))
   # Some words in lowercase like "đà nẵng" will get error (due to vncorenlp)
   text = text.replace("đà ", " đà ")