Spaces:

qgyd2021
/

gpt2_chat

Running

qgyd2021 commited on Nov 23, 2023

Commit

da35a3c

1 Parent(s): 323087b

update

Files changed (2) hide show

examples.json CHANGED Viewed

@@ -5,6 +5,6 @@
     ["男人走进房间, 上床, 压上", 512, 0.75, 0.35, 1.8, "qgyd2021/chinese_porn_novel", false],
     [
         "电销场景意图识别。如果不能确定，请输出 “未知意图”。\n\nExamples:\n------------\ntext: 没关系啦 知道的\nintent: 肯定答复\n------------\ntext: 怎么能联系你\nintent: 查联系方式\n------------\ntext: 恩。让我想想吧。\nintent: 考虑一下\n------------\ntext: 说点有用的\nintent: 请讲重点\n------------\ntext: 唉唉\nintent: 语气词\n------------\ntext: 说快一点\nintent: 请讲重点\n------------\ntext: 再介绍一下\nintent: 要求复述\n------------\ntext: 从哪弄到我信息\nintent: 质疑隐私安全\n------------\ntext: 哎。。不是的\nintent: 不是\n------------\ntext: 给我电话号码\nintent: 查联系方式\n------------\ntext: 先看看吧\nintent: 考虑一下\n------------\ntext: 怎么知道道我的信息\nintent: 质疑隐私安全\n------------\ntext: 哎,再说吧,我再想想\nintent: 考虑一下\n------------\ntext: 不,我清醒。\nintent: 不是\n------------\ntext: 重说一次\nintent: 要求复述\n------------\ntext: 行了,晚安\nintent: 肯定答复\n------------\ntext: 额额额额\nintent: 语气词\n------------\ntext: 恩。哎再说吧我考虑一下hiahia\nintent:\n",
-        128, 0.75, 0.10, 1.2, "qgyd2021/few_shot_intent", true
     ]
 ]

     ["男人走进房间, 上床, 压上", 512, 0.75, 0.35, 1.8, "qgyd2021/chinese_porn_novel", false],
     [
         "电销场景意图识别。如果不能确定，请输出 “未知意图”。\n\nExamples:\n------------\ntext: 没关系啦 知道的\nintent: 肯定答复\n------------\ntext: 怎么能联系你\nintent: 查联系方式\n------------\ntext: 恩。让我想想吧。\nintent: 考虑一下\n------------\ntext: 说点有用的\nintent: 请讲重点\n------------\ntext: 唉唉\nintent: 语气词\n------------\ntext: 说快一点\nintent: 请讲重点\n------------\ntext: 再介绍一下\nintent: 要求复述\n------------\ntext: 从哪弄到我信息\nintent: 质疑隐私安全\n------------\ntext: 哎。。不是的\nintent: 不是\n------------\ntext: 给我电话号码\nintent: 查联系方式\n------------\ntext: 先看看吧\nintent: 考虑一下\n------------\ntext: 怎么知道道我的信息\nintent: 质疑隐私安全\n------------\ntext: 哎,再说吧,我再想想\nintent: 考虑一下\n------------\ntext: 不,我清醒。\nintent: 不是\n------------\ntext: 重说一次\nintent: 要求复述\n------------\ntext: 行了,晚安\nintent: 肯定答复\n------------\ntext: 额额额额\nintent: 语气词\n------------\ntext: 恩。哎再说吧我考虑一下hiahia\nintent:\n",
+        128, 0.75, 0.35, 1.2, "qgyd2021/few_shot_intent", true
     ]
 ]

main.py CHANGED Viewed

@@ -6,6 +6,7 @@ import json
 import os
 import platform
 import re
 from typing import List
 from project_settings import project_path
@@ -48,6 +49,29 @@ def repl2(match):
     return result
 def main():
     args = get_args()
@@ -123,8 +147,9 @@ def main():
                 output = output[5:]
             output = output.lstrip(" ,.!?")
-            output = re.sub(r"([，。！？\u4e00-\u9fa5]) ([，。！？\u4e00-\u9fa5])", repl1, output)
-            output = re.sub(r"([，。！？\u4e00-\u9fa5]) ", repl2, output)
             output = output.replace("[SEP] ", "\n")
             output = output.replace("[SEP]", "\n")

 import os
 import platform
 import re
+import string
 from typing import List
 from project_settings import project_path
     return result
+def remove_space_between_cn_en(text):
+    splits = re.split(" ", text)
+    if len(splits) < 2:
+        return text
+    result = ""
+    for t in splits:
+        if t == "":
+            continue
+        if re.search(f"[a-zA-Z0-9{string.punctuation}]$", result) and re.search("^[a-zA-Z0-9]", t):
+            result += " "
+            result += t
+        else:
+            if not result == "":
+                result += t
+            else:
+                result = t
+    if text.endswith(" "):
+        result += " "
+    return result
 def main():
     args = get_args()
                 output = output[5:]
             output = output.lstrip(" ,.!?")
+            output = remove_space_between_cn_en(output)
+            # output = re.sub(r"([，。！？\u4e00-\u9fa5]) ([，。！？\u4e00-\u9fa5])", repl1, output)
+            # output = re.sub(r"([，。！？\u4e00-\u9fa5]) ", repl2, output)
             output = output.replace("[SEP] ", "\n")
             output = output.replace("[SEP]", "\n")