update
Browse files- examples.json +1 -1
- main.py +27 -2
examples.json
CHANGED
|
@@ -5,6 +5,6 @@
|
|
| 5 |
["男人走进房间, 上床, 压上", 512, 0.75, 0.35, 1.8, "qgyd2021/chinese_porn_novel", false],
|
| 6 |
[
|
| 7 |
"电销场景意图识别。如果不能确定,请输出 “未知意图”。\n\nExamples:\n------------\ntext: 没关系啦 知道的\nintent: 肯定答复\n------------\ntext: 怎么能联系你\nintent: 查联系方式\n------------\ntext: 恩。让我想想吧。\nintent: 考虑一下\n------------\ntext: 说点有用的\nintent: 请讲重点\n------------\ntext: 唉唉\nintent: 语气词\n------------\ntext: 说快一点\nintent: 请讲重点\n------------\ntext: 再介绍一下\nintent: 要求复述\n------------\ntext: 从哪弄到我信息\nintent: 质疑隐私安全\n------------\ntext: 哎。。不是的\nintent: 不是\n------------\ntext: 给我电话号码\nintent: 查联系方式\n------------\ntext: 先看看吧\nintent: 考虑一下\n------------\ntext: 怎么知道道我的信息\nintent: 质疑隐私安全\n------------\ntext: 哎,再说吧,我再想想\nintent: 考虑一下\n------------\ntext: 不,我清醒。\nintent: 不是\n------------\ntext: 重说一次\nintent: 要求复述\n------------\ntext: 行了,晚安\nintent: 肯定答复\n------------\ntext: 额额额额\nintent: 语气词\n------------\ntext: 恩。哎再说吧我考虑一下hiahia\nintent:\n",
|
| 8 |
-
128, 0.75, 0.
|
| 9 |
]
|
| 10 |
]
|
|
|
|
| 5 |
["男人走进房间, 上床, 压上", 512, 0.75, 0.35, 1.8, "qgyd2021/chinese_porn_novel", false],
|
| 6 |
[
|
| 7 |
"电销场景意图识别。如果不能确定,请输出 “未知意图”。\n\nExamples:\n------------\ntext: 没关系啦 知道的\nintent: 肯定答复\n------------\ntext: 怎么能联系你\nintent: 查联系方式\n------------\ntext: 恩。让我想想吧。\nintent: 考虑一下\n------------\ntext: 说点有用的\nintent: 请讲重点\n------------\ntext: 唉唉\nintent: 语气词\n------------\ntext: 说快一点\nintent: 请讲重点\n------------\ntext: 再介绍一下\nintent: 要求复述\n------------\ntext: 从哪弄到我信息\nintent: 质疑隐私安全\n------------\ntext: 哎。。不是的\nintent: 不是\n------------\ntext: 给我电话号码\nintent: 查联系方式\n------------\ntext: 先看看吧\nintent: 考虑一下\n------------\ntext: 怎么知道道我的信息\nintent: 质疑隐私安全\n------------\ntext: 哎,再说吧,我再想想\nintent: 考虑一下\n------------\ntext: 不,我清醒。\nintent: 不是\n------------\ntext: 重说一次\nintent: 要求复述\n------------\ntext: 行了,晚安\nintent: 肯定答复\n------------\ntext: 额额额额\nintent: 语气词\n------------\ntext: 恩。哎再说吧我考虑一下hiahia\nintent:\n",
|
| 8 |
+
128, 0.75, 0.35, 1.2, "qgyd2021/few_shot_intent", true
|
| 9 |
]
|
| 10 |
]
|
main.py
CHANGED
|
@@ -6,6 +6,7 @@ import json
|
|
| 6 |
import os
|
| 7 |
import platform
|
| 8 |
import re
|
|
|
|
| 9 |
from typing import List
|
| 10 |
|
| 11 |
from project_settings import project_path
|
|
@@ -48,6 +49,29 @@ def repl2(match):
|
|
| 48 |
return result
|
| 49 |
|
| 50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
def main():
|
| 52 |
args = get_args()
|
| 53 |
|
|
@@ -123,8 +147,9 @@ def main():
|
|
| 123 |
output = output[5:]
|
| 124 |
|
| 125 |
output = output.lstrip(" ,.!?")
|
| 126 |
-
output =
|
| 127 |
-
output = re.sub(r"([,。!?\u4e00-\u9fa5]) ",
|
|
|
|
| 128 |
|
| 129 |
output = output.replace("[SEP] ", "\n")
|
| 130 |
output = output.replace("[SEP]", "\n")
|
|
|
|
| 6 |
import os
|
| 7 |
import platform
|
| 8 |
import re
|
| 9 |
+
import string
|
| 10 |
from typing import List
|
| 11 |
|
| 12 |
from project_settings import project_path
|
|
|
|
| 49 |
return result
|
| 50 |
|
| 51 |
|
| 52 |
+
def remove_space_between_cn_en(text):
|
| 53 |
+
splits = re.split(" ", text)
|
| 54 |
+
if len(splits) < 2:
|
| 55 |
+
return text
|
| 56 |
+
|
| 57 |
+
result = ""
|
| 58 |
+
for t in splits:
|
| 59 |
+
if t == "":
|
| 60 |
+
continue
|
| 61 |
+
if re.search(f"[a-zA-Z0-9{string.punctuation}]$", result) and re.search("^[a-zA-Z0-9]", t):
|
| 62 |
+
result += " "
|
| 63 |
+
result += t
|
| 64 |
+
else:
|
| 65 |
+
if not result == "":
|
| 66 |
+
result += t
|
| 67 |
+
else:
|
| 68 |
+
result = t
|
| 69 |
+
|
| 70 |
+
if text.endswith(" "):
|
| 71 |
+
result += " "
|
| 72 |
+
return result
|
| 73 |
+
|
| 74 |
+
|
| 75 |
def main():
|
| 76 |
args = get_args()
|
| 77 |
|
|
|
|
| 147 |
output = output[5:]
|
| 148 |
|
| 149 |
output = output.lstrip(" ,.!?")
|
| 150 |
+
output = remove_space_between_cn_en(output)
|
| 151 |
+
# output = re.sub(r"([,。!?\u4e00-\u9fa5]) ([,。!?\u4e00-\u9fa5])", repl1, output)
|
| 152 |
+
# output = re.sub(r"([,。!?\u4e00-\u9fa5]) ", repl2, output)
|
| 153 |
|
| 154 |
output = output.replace("[SEP] ", "\n")
|
| 155 |
output = output.replace("[SEP]", "\n")
|