Spaces:
Running
Running
zRzRzRzRzRzRzR
commited on
Commit
·
9ec8fec
1
Parent(s):
aa0c384
formt
Browse files
app.py
CHANGED
|
@@ -35,30 +35,59 @@ def stream_from_vllm(messages, thinking_enabled=True, temperature=1.0):
|
|
| 35 |
|
| 36 |
|
| 37 |
class GLM45Model:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
def _strip_html(self, text: str) -> str:
|
| 39 |
return re.sub(r"<[^>]+>", "", text).strip()
|
| 40 |
|
| 41 |
def _wrap_text(self, text: str):
|
| 42 |
return [{"type": "text", "text": text}]
|
| 43 |
|
| 44 |
-
def
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
think_html = (
|
| 49 |
"<details open><summary style='cursor:pointer;font-weight:bold;color:#007acc;'>💭 Thinking</summary>"
|
| 50 |
"<div style='color:#555555;line-height:1.6;padding:15px;border-left:4px solid #007acc;margin:10px 0;background-color:#f0f7ff;border-radius:4px;'>"
|
| 51 |
-
+
|
| 52 |
-
|
| 53 |
)
|
|
|
|
| 54 |
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
content_escaped = html.escape(content)
|
| 58 |
content_formatted = content_escaped.replace("\n", "<br>")
|
| 59 |
-
|
|
|
|
| 60 |
|
| 61 |
-
return
|
| 62 |
|
| 63 |
def _build_messages(self, raw_hist, sys_prompt):
|
| 64 |
msgs = []
|
|
@@ -78,32 +107,28 @@ class GLM45Model:
|
|
| 78 |
global stop_generation
|
| 79 |
stop_generation = False
|
| 80 |
msgs = self._build_messages(raw_hist, sys_prompt)
|
| 81 |
-
|
| 82 |
-
|
| 83 |
|
| 84 |
try:
|
| 85 |
for delta in stream_from_vllm(msgs, thinking_enabled, temperature):
|
| 86 |
if stop_generation:
|
| 87 |
break
|
| 88 |
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
elif hasattr(delta, 'content') and delta.content:
|
| 100 |
-
content_buffer += delta.content
|
| 101 |
-
|
| 102 |
-
yield self._stream_fragment(reasoning_buffer, content_buffer, not thinking_enabled)
|
| 103 |
|
| 104 |
except Exception as e:
|
| 105 |
error_msg = f"Error during streaming: {str(e)}"
|
| 106 |
-
yield self.
|
| 107 |
|
| 108 |
|
| 109 |
glm45 = GLM45Model()
|
|
|
|
| 35 |
|
| 36 |
|
| 37 |
class GLM45Model:
|
| 38 |
+
def __init__(self):
|
| 39 |
+
self.reset_state()
|
| 40 |
+
|
| 41 |
+
def reset_state(self):
|
| 42 |
+
self.accumulated_text = ""
|
| 43 |
+
|
| 44 |
def _strip_html(self, text: str) -> str:
|
| 45 |
return re.sub(r"<[^>]+>", "", text).strip()
|
| 46 |
|
| 47 |
def _wrap_text(self, text: str):
|
| 48 |
return [{"type": "text", "text": text}]
|
| 49 |
|
| 50 |
+
def _parse_thinking_content(self, text: str):
|
| 51 |
+
thinking_content = ""
|
| 52 |
+
regular_content = ""
|
| 53 |
+
|
| 54 |
+
if "<think>" in text:
|
| 55 |
+
think_pattern = r'<think>(.*?)</think>'
|
| 56 |
+
think_match = re.search(think_pattern, text, re.DOTALL)
|
| 57 |
+
|
| 58 |
+
if think_match:
|
| 59 |
+
thinking_content = think_match.group(1).strip()
|
| 60 |
+
regular_content = re.sub(think_pattern, '', text, flags=re.DOTALL).strip()
|
| 61 |
+
else:
|
| 62 |
+
think_start = text.find("<think>")
|
| 63 |
+
if think_start != -1:
|
| 64 |
+
thinking_content = text[think_start + 7:]
|
| 65 |
+
regular_content = text[:think_start].strip()
|
| 66 |
+
else:
|
| 67 |
+
regular_content = text
|
| 68 |
+
|
| 69 |
+
return thinking_content, regular_content
|
| 70 |
+
|
| 71 |
+
def _render_response(self, thinking_content: str, regular_content: str, skip_think: bool = False):
|
| 72 |
+
html_parts = []
|
| 73 |
+
|
| 74 |
+
if thinking_content and not skip_think:
|
| 75 |
+
thinking_escaped = html.escape(thinking_content).replace("\n", "<br>")
|
| 76 |
think_html = (
|
| 77 |
"<details open><summary style='cursor:pointer;font-weight:bold;color:#007acc;'>💭 Thinking</summary>"
|
| 78 |
"<div style='color:#555555;line-height:1.6;padding:15px;border-left:4px solid #007acc;margin:10px 0;background-color:#f0f7ff;border-radius:4px;'>"
|
| 79 |
+
+ thinking_escaped +
|
| 80 |
+
"</div></details>"
|
| 81 |
)
|
| 82 |
+
html_parts.append(think_html)
|
| 83 |
|
| 84 |
+
if regular_content:
|
| 85 |
+
content_escaped = html.escape(regular_content)
|
|
|
|
| 86 |
content_formatted = content_escaped.replace("\n", "<br>")
|
| 87 |
+
content_html = f"<div style='margin:0.5em 0; white-space: pre-wrap; line-height:1.6;'>{content_formatted}</div>"
|
| 88 |
+
html_parts.append(content_html)
|
| 89 |
|
| 90 |
+
return "".join(html_parts)
|
| 91 |
|
| 92 |
def _build_messages(self, raw_hist, sys_prompt):
|
| 93 |
msgs = []
|
|
|
|
| 107 |
global stop_generation
|
| 108 |
stop_generation = False
|
| 109 |
msgs = self._build_messages(raw_hist, sys_prompt)
|
| 110 |
+
|
| 111 |
+
self.reset_state()
|
| 112 |
|
| 113 |
try:
|
| 114 |
for delta in stream_from_vllm(msgs, thinking_enabled, temperature):
|
| 115 |
if stop_generation:
|
| 116 |
break
|
| 117 |
|
| 118 |
+
delta_content = ""
|
| 119 |
+
if hasattr(delta, 'content') and delta.content:
|
| 120 |
+
delta_content = delta.content
|
| 121 |
+
elif isinstance(delta, dict) and 'content' in delta and delta['content']:
|
| 122 |
+
delta_content = delta['content']
|
| 123 |
+
|
| 124 |
+
if delta_content:
|
| 125 |
+
self.accumulated_text += delta_content
|
| 126 |
+
thinking_content, regular_content = self._parse_thinking_content(self.accumulated_text)
|
| 127 |
+
yield self._render_response(thinking_content, regular_content, not thinking_enabled)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
|
| 129 |
except Exception as e:
|
| 130 |
error_msg = f"Error during streaming: {str(e)}"
|
| 131 |
+
yield self._render_response("", error_msg)
|
| 132 |
|
| 133 |
|
| 134 |
glm45 = GLM45Model()
|