Spaces:

aisafe
/

SCA

Running

App Files Files Community

aisafe commited on Jan 27, 2024

Commit

203cc33

verified ·

1 Parent(s): 1291e3d

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -40

app.py CHANGED Viewed

@@ -92,22 +92,27 @@ def replace_pp_with_pause(sentence, entity_tags):
     for tag in entity_tags:
         start = tag['start']
         end = tag['end']
-        if end<len(sentence)-1:
             token = sentence[start:end]  # Adjust for 0-based indexing
         else:
-            token = sentence[start:end+1]
-        tag_name = f"[{tag['entity_group']}]"
-        if tag['entity_group'] == 'PP':
-            # Replace [PP] with [PAUSE]
-            tag_name = '[PAUSE]'
         else:
-            tag_name = ''
-        tagged_tokens.append(f"{token}{tag_name}")
-    # Return the sentence with [PAUSE] replacement
-    return " ".join(tagged_tokens)
 def get_split_sentences(sentence, entity_tags):
@@ -139,34 +144,9 @@ def get_split_sentences(sentence, entity_tags):
     # If the sentence ends without a [PAUSE] token, add the final sentence
     if current_sentence:
-        split_sentences.append(" ".join(current_sentence))
     return split_sentences
-# def get_split_sentences(sentence, entity_tags):
-#     split_sentences = []
-#     # Initialize a variable to hold the current sentence
-#     current_sentence = []
-#     # Process the entity tags to split the sentence
-#     for tag in entity_tags:
-#         if tag['entity_group'] == 'PP':
-#             if current_sentence:
-#                 print(current_sentence)
-#                 split_sentences.append(" ".join(current_sentence))
-#                 current_sentence = []  # Reset the current sentence
-#         else:
-#             start = tag['start']
-#             end = tag['end']
-#             token = sentence[start - 1:end]  # Adjust for 0-based indexing
-#             current_sentence.append(token)
-#     # If the sentence ends without a [PAUSE] token, add the final sentence
-#     if current_sentence:
-#         split_sentences.append(" ".join(current_sentence))
-#     return split_sentences
@@ -510,9 +490,9 @@ def analyze_heatmap(df_input):
         )
     # Additional styling
-    ax.set_title("Importance Score per Token", size=25)
-    ax.set_xlabel("Token")
-    ax.set_ylabel("Importance Value")
     ax.set_xticks(range(len(df["token"])))
     ax.set_xticklabels(df["token"], rotation=45)
@@ -724,7 +704,8 @@ class SentenceAnalyzer:
             attribution_df1 = process_integrated_gradients(split_sentence, self._gpt2tokenizer, self.model)
             if i < len(self.split_sentences) - 1:
                 # Add a row with [PAUSE] and value 0 at the end
-                pause_row = pd.DataFrame({'token': '[PAUSE]', 'importance_value': 0},index=[len(attribution_df1)])
                 attribution_df1 = pd.concat([attribution_df1,pause_row], ignore_index=True)
             dataframes_list.append(attribution_df1)

     for tag in entity_tags:
         start = tag['start']
         end = tag['end']
+        if end < len(sentence) - 1:
             token = sentence[start:end]  # Adjust for 0-based indexing
         else:
+            token = sentence[start:end + 1]
+        tag_name = '[PAUSE]' if tag['entity_group'] == 'PP' else ''
+        tagged_tokens.append(f"{token}{tag_name}")
+        print(tagged_tokens)
+    # Return the sentence with [PAUSE] replacement and spaces preserved
+    modified_words = []
+    for i, word in enumerate(tagged_tokens):
+        if word.startswith("'s"):
+            modified_words[-1] = modified_words[-1] + word
         else:
+            modified_words.append(word)
+    output = " ".join(modified_words)
+    return output
 def get_split_sentences(sentence, entity_tags):
     # If the sentence ends without a [PAUSE] token, add the final sentence
     if current_sentence:
+        split_sentences.append("".join(current_sentence))
     return split_sentences
         )
     # Additional styling
+    # ax.set_title("Importance Score per Token", size=25)
+    # ax.set_xlabel("Token")
+    # ax.set_ylabel("Importance Value")
     ax.set_xticks(range(len(df["token"])))
     ax.set_xticklabels(df["token"], rotation=45)
             attribution_df1 = process_integrated_gradients(split_sentence, self._gpt2tokenizer, self.model)
             if i < len(self.split_sentences) - 1:
                 # Add a row with [PAUSE] and value 0 at the end
+                # pause_row = pd.DataFrame({'token': '[PAUSE]', 'importance_value': 0},index=[len(attribution_df1)])
+                pause_row = pd.DataFrame({'', '': 0},index=[len(attribution_df1)])
                 attribution_df1 = pd.concat([attribution_df1,pause_row], ignore_index=True)
             dataframes_list.append(attribution_df1)