KillerKing93 commited on
Commit
9447956
·
verified ·
1 Parent(s): f2475d1

Sync from GitHub aa90805

Browse files
Files changed (2) hide show
  1. main.py +81 -18
  2. web/index.html +1008 -348
main.py CHANGED
@@ -433,15 +433,34 @@ class Engine:
433
  except Exception:
434
  AutoModelForImageTextToText = None # type: ignore
435
 
 
 
436
  model_kwargs: Dict[str, Any] = {
437
  "trust_remote_code": True,
438
  }
439
  if hf_token:
440
  # Only pass 'token' (use_auth_token is deprecated and causes conflicts)
441
  model_kwargs["token"] = hf_token
442
- # Device and dtype
443
- model_kwargs["device_map"] = DEVICE_MAP
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
444
  model_kwargs["torch_dtype"] = TORCH_DTYPE if TORCH_DTYPE != "auto" else "auto"
 
 
445
 
446
  # Processor (handles text + images/videos)
447
  proc_kwargs: Dict[str, Any] = {"trust_remote_code": True}
@@ -473,6 +492,18 @@ class Engine:
473
  # Generic AutoModel as last-resort with trust_remote_code to load custom architectures
474
  model = AutoModel.from_pretrained(model_id, **model_kwargs) # pragma: no cover
475
  self.model = model.eval() # pragma: no cover
 
 
 
 
 
 
 
 
 
 
 
 
476
 
477
  self.model_id = model_id
478
  self.tokenizer = getattr(self.processor, "tokenizer", None)
@@ -665,22 +696,40 @@ class Engine:
665
  proc_kwargs["videos"] = videos
666
 
667
  inputs = self.processor(**proc_kwargs)
668
- # Move tensors to model device if present
669
  try:
670
- device = getattr(self.model, "device", None) or next(self.model.parameters()).device
671
- inputs = {k: (v.to(device) if hasattr(v, "to") else v) for k, v in inputs.items()}
 
 
 
 
672
  except Exception:
673
  pass
674
 
675
  do_sample = temperature is not None and float(temperature) > 0.0
676
 
677
- gen_ids = self.model.generate(
678
- **inputs,
679
- max_new_tokens=int(max_tokens),
680
- temperature=float(temperature),
681
- do_sample=do_sample,
682
- use_cache=True,
683
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
684
  # Decode
685
  output = self.processor.batch_decode(
686
  gen_ids,
@@ -722,8 +771,11 @@ class Engine:
722
 
723
  inputs = self.processor(**proc_kwargs)
724
  try:
725
- device = getattr(self.model, "device", None) or next(self.model.parameters()).device
726
- inputs = {k: (v.to(device) if hasattr(v, "to") else v) for k, v in inputs.items()}
 
 
 
727
  except Exception:
728
  pass
729
 
@@ -755,7 +807,17 @@ class Engine:
755
 
756
  gen_kwargs["stopping_criteria"] = StoppingCriteriaList([_CancelCrit(cancel_event)])
757
 
758
- th = threading.Thread(target=self.model.generate, kwargs=gen_kwargs)
 
 
 
 
 
 
 
 
 
 
759
  th.start()
760
 
761
  for piece in streamer:
@@ -1114,9 +1176,10 @@ def chat_completions(
1114
  pass
1115
  sess.cancel_timer = None
1116
 
1117
- # Replay if Last-Event-ID was provided
1118
- replay_from = last_idx_from_header if sid_from_header == session_id else -1
1119
- if replay_from >= -1:
 
1120
  # First try in-memory buffer
1121
  for idx, block in list(sess.buffer):
1122
  if idx > replay_from:
 
433
  except Exception:
434
  AutoModelForImageTextToText = None # type: ignore
435
 
436
+ # Resolve device map to avoid 'meta' device on CPU Spaces
437
+ # If DEVICE_MAP is "auto" but no CUDA is available, force "cpu" and disable low_cpu_mem_usage
438
  model_kwargs: Dict[str, Any] = {
439
  "trust_remote_code": True,
440
  }
441
  if hf_token:
442
  # Only pass 'token' (use_auth_token is deprecated and causes conflicts)
443
  model_kwargs["token"] = hf_token
444
+
445
+ # Device and dtype resolution
446
+ try:
447
+ import torch # local import to avoid heavy import at module load
448
+ has_cuda = bool(getattr(torch, "cuda", None) and torch.cuda.is_available())
449
+ except Exception:
450
+ has_cuda = False
451
+
452
+ resolved_device_map = DEVICE_MAP
453
+ if str(DEVICE_MAP).lower() == "auto" and not has_cuda:
454
+ resolved_device_map = "cpu"
455
+
456
+ model_kwargs["device_map"] = resolved_device_map
457
+ # Explicitly disable low_cpu_mem_usage on pure CPU to fully materialize weights (avoids meta tensors)
458
+ if resolved_device_map == "cpu":
459
+ model_kwargs["low_cpu_mem_usage"] = False
460
+ # dtype
461
  model_kwargs["torch_dtype"] = TORCH_DTYPE if TORCH_DTYPE != "auto" else "auto"
462
+ # store for later
463
+ self._resolved_device_map = resolved_device_map
464
 
465
  # Processor (handles text + images/videos)
466
  proc_kwargs: Dict[str, Any] = {"trust_remote_code": True}
 
492
  # Generic AutoModel as last-resort with trust_remote_code to load custom architectures
493
  model = AutoModel.from_pretrained(model_id, **model_kwargs) # pragma: no cover
494
  self.model = model.eval() # pragma: no cover
495
+ # Ensure model is fully on CPU when resolved device_map is cpu (prevents meta device mix during inference)
496
+ try:
497
+ if str(getattr(self, "_resolved_device_map", "")).lower() == "cpu":
498
+ _ = self.model.to("cpu")
499
+ except Exception:
500
+ pass
501
+ # Ensure model is on CPU when resolved device_map is cpu (prevents meta device mix during inference)
502
+ try:
503
+ if getattr(self, "_resolved_device_map", None) == "cpu":
504
+ _ = self.model.to("cpu")
505
+ except Exception:
506
+ pass
507
 
508
  self.model_id = model_id
509
  self.tokenizer = getattr(self.processor, "tokenizer", None)
 
696
  proc_kwargs["videos"] = videos
697
 
698
  inputs = self.processor(**proc_kwargs)
699
+ # Move tensors to the correct device
700
  try:
701
+ if str(getattr(self, "_resolved_device_map", "")).lower() == "cpu":
702
+ # Explicit CPU placement avoids 'meta' device errors on Spaces
703
+ inputs = {k: (v.to("cpu") if hasattr(v, "to") else v) for k, v in inputs.items()}
704
+ else:
705
+ device = getattr(self.model, "device", None) or next(self.model.parameters()).device
706
+ inputs = {k: (v.to(device) if hasattr(v, "to") else v) for k, v in inputs.items()}
707
  except Exception:
708
  pass
709
 
710
  do_sample = temperature is not None and float(temperature) > 0.0
711
 
712
+ # Safer on CPU: run without gradients to reduce memory pressure and avoid autograd hooks
713
+ try:
714
+ import torch
715
+ with torch.no_grad():
716
+ gen_ids = self.model.generate(
717
+ **inputs,
718
+ max_new_tokens=int(max_tokens),
719
+ temperature=float(temperature),
720
+ do_sample=do_sample,
721
+ use_cache=True,
722
+ )
723
+ except Exception:
724
+ # Fallback without no_grad if torch import fails (very unlikely)
725
+ gen_ids = self.model.generate(
726
+ **inputs,
727
+ max_new_tokens=int(max_tokens),
728
+ temperature=float(temperature),
729
+ do_sample=do_sample,
730
+ use_cache=True,
731
+ )
732
+
733
  # Decode
734
  output = self.processor.batch_decode(
735
  gen_ids,
 
771
 
772
  inputs = self.processor(**proc_kwargs)
773
  try:
774
+ if str(getattr(self, "_resolved_device_map", "")).lower() == "cpu":
775
+ inputs = {k: (v.to("cpu") if hasattr(v, "to") else v) for k, v in inputs.items()}
776
+ else:
777
+ device = getattr(self.model, "device", None) or next(self.model.parameters()).device
778
+ inputs = {k: (v.to(device) if hasattr(v, "to") else v) for k, v in inputs.items()}
779
  except Exception:
780
  pass
781
 
 
807
 
808
  gen_kwargs["stopping_criteria"] = StoppingCriteriaList([_CancelCrit(cancel_event)])
809
 
810
+ # Wrap generation with torch.no_grad() to avoid autograd overhead on CPU and reduce failure surface
811
+ def _runner():
812
+ try:
813
+ import torch
814
+ with torch.no_grad():
815
+ self.model.generate(**gen_kwargs)
816
+ except Exception:
817
+ # Let streamer finish gracefully even if generation throws
818
+ pass
819
+
820
+ th = threading.Thread(target=_runner)
821
  th.start()
822
 
823
  for piece in streamer:
 
1176
  pass
1177
  sess.cancel_timer = None
1178
 
1179
+ # Replay only when a valid Last-Event-ID is provided for this same session
1180
+ do_replay = bool(sid_from_header) and (sid_from_header == session_id)
1181
+ if do_replay:
1182
+ replay_from = last_idx_from_header
1183
  # First try in-memory buffer
1184
  for idx, block in list(sess.buffer):
1185
  if idx > replay_from:
web/index.html CHANGED
@@ -1,380 +1,1040 @@
1
- <!doctype html>
2
  <html lang="en">
3
- <head>
4
- <meta charset="utf-8" />
5
- <title>Qwen3‑VL Chat (HF Space API)</title>
6
- <meta name="viewport" content="width=device-width, initial-scale=1" />
7
- <style>
8
- :root { --bg:#0f172a; --fg:#e2e8f0; --muted:#94a3b8; --accent:#6366f1; --card:#111827; --chip:#1f2937; --border:#334155; }
9
- html, body { height:100%; margin:0; background:var(--bg); color:var(--fg); font-family: system-ui, -apple-system, Segoe UI, Roboto, Helvetica, Arial, "Apple Color Emoji", "Segoe UI Emoji"; }
10
- .app { display:flex; flex-direction:column; height:100%; max-width: 1200px; margin: 0 auto; }
11
- header { padding:12px 16px; border-bottom:1px solid var(--border); display:flex; gap:12px; align-items:center; flex-wrap: wrap; }
12
- header .title { font-weight:700; }
13
- header input[type="text"] { flex: 1 1 360px; background:var(--card); border:1px solid var(--border); color:var(--fg); padding:8px 10px; border-radius:6px; }
14
- header .small { color: var(--muted); font-size: 12px; }
15
- main { flex:1; overflow:auto; padding: 16px; display:flex; gap:16px; }
16
- .chat { flex: 1 1 auto; display:flex; flex-direction:column; gap:12px; }
17
- .msg { background:var(--card); border:1px solid var(--border); border-radius:10px; padding:12px; }
18
- .msg.user { border-left: 3px solid #22c55e; }
19
- .msg.assistant { border-left: 3px solid var(--accent); }
20
- .role { font-weight:700; margin-bottom:6px; color: var(--muted); text-transform: uppercase; font-size: 12px; }
21
- .content pre { white-space: pre-wrap; word-break: break-word; }
22
- .media { display:flex; flex-wrap:wrap; gap:8px; margin-top:8px; }
23
- .media img, .media video { max-width: 240px; max-height: 180px; border:1px solid var(--border); border-radius:8px; }
24
- .aside { width: 320px; flex: 0 0 auto; display:flex; flex-direction:column; gap:12px; }
25
- .card { background:var(--card); border:1px solid var(--border); border-radius:10px; padding:12px; }
26
- .label { font-size: 12px; color: var(--muted); margin-bottom:6px; }
27
- .row { display:flex; gap:8px; align-items:center; flex-wrap: wrap; }
28
- .controls textarea { width:100%; min-height: 80px; background:var(--card); border:1px solid var(--border); color:var(--fg); padding:8px; border-radius:8px; resize: vertical; }
29
- button { background:var(--accent); color:white; border:0; padding:8px 12px; border-radius:8px; cursor:pointer; }
30
- button.secondary { background: var(--chip); color: var(--fg); }
31
- input[type="number"], input[type="text"] { background:var(--card); border:1px solid var(--border); color:var(--fg); padding:6px 8px; border-radius:6px; }
32
- .chips { display:flex; gap:8px; flex-wrap: wrap; }
33
- .chip { background:var(--chip); color:var(--fg); border:1px solid var(--border); padding:4px 8px; border-radius: 999px; font-size: 12px; }
34
- footer { padding:10px 16px; border-top:1px solid var(--border); color: var(--muted); font-size:12px; display:flex; justify-content:space-between; gap:10px; flex-wrap: wrap; }
35
- a { color: #93c5fd; text-decoration: none; }
36
- a:hover { text-decoration: underline; }
37
- .hint { font-size: 12px; color: var(--muted); }
38
- input[type="file"] { display:none; }
39
- .file-btn { background: var(--chip); }
40
- .preview { display:flex; gap:8px; flex-wrap: wrap; margin-top:8px; }
41
- .preview-item { position:relative; }
42
- .remove { position:absolute; top:4px; right:4px; background: #ef4444; color:white; border:0; border-radius: 6px; padding:2px 6px; cursor:pointer; font-size:12px;}
43
- </style>
44
- </head>
45
- <body>
46
- <div class="app">
47
- <header>
48
- <div class="title">Qwen3‑VL Chat</div>
49
- <input id="apiBase" type="text" placeholder="HF Space API Base, e.g. https://killerking93-transformers-inferenceserver-openapi.hf.space" />
50
- <button id="saveBase" class="secondary">Save Base</button>
51
- <span id="health" class="small">Health: checking…</span>
52
- </header>
53
-
54
- <main>
55
- <section class="chat" id="chat"></section>
56
-
57
- <aside class="aside">
58
- <div class="card">
59
- <div class="label">Prompt</div>
60
- <div class="controls">
61
- <textarea id="prompt" placeholder="Ask anything… Supports images and videos."></textarea>
62
- <div class="row">
63
- <label for="file" class="file-btn button"><button class="secondary">Attach Image/Video</button></label>
64
- <input id="file" type="file" accept="image/*,video/*" multiple />
65
- <input id="maxTokens" type="number" min="1" max="8192" value="4096" title="Max tokens" />
66
- <input id="temperature" type="number" min="0" max="2" step="0.1" value="0.7" title="Temperature" />
67
- <button id="send">Send (Stream)</button>
68
- </div>
69
- <div id="preview" class="preview"></div>
70
- <div class="row" style="margin-top:8px;">
71
- <button id="clearHistory" class="secondary">Clear History</button>
72
- <span class="hint">Session <code id="sessionIdLabel"></code> — history saved locally</span>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  </div>
74
  </div>
75
- </div>
76
 
77
- <div class="card">
78
- <div class="label">Hints</div>
79
- <div class="chips">
80
- <div class="chip">Images: embedded as base64</div>
81
- <div class="chip">Videos: base64, frame-sampled by server</div>
82
- <div class="chip">SSE Streaming</div>
 
83
  </div>
 
 
 
 
 
 
 
84
  </div>
85
- </aside>
86
- </main>
87
-
88
- <footer>
89
- <div>Powered by FastAPI + Transformers (Qwen3‑VL). Calls public HF Space API (no internal access).</div>
90
- <div><a href="./docs" target="_blank">Swagger</a> · <a href="./openapi.yaml" target="_blank">OpenAPI YAML</a></div>
91
- </footer>
92
- </div>
93
-
94
- <script>
95
- // Config and state
96
- const DEFAULT_SPACE = "https://killerking93-transformers-inferenceserver-openapi.hf.space";
97
- const qs = new URLSearchParams(location.search);
98
- const apiBaseInput = document.getElementById('apiBase');
99
- const saveBaseBtn = document.getElementById('saveBase');
100
- const healthEl = document.getElementById('health');
101
- const chatEl = document.getElementById('chat');
102
- const promptEl = document.getElementById('prompt');
103
- const fileEl = document.getElementById('file');
104
- const previewEl = document.getElementById('preview');
105
- const sendBtn = document.getElementById('send');
106
- const clearBtn = document.getElementById('clearHistory');
107
- const sessionIdLabel = document.getElementById('sessionIdLabel');
108
- const maxTokensEl = document.getElementById('maxTokens');
109
- const temperatureEl = document.getElementById('temperature');
110
-
111
- const store = {
112
- get apiBase() { return localStorage.getItem('apiBase') || DEFAULT_SPACE; },
113
- set apiBase(v) { localStorage.setItem('apiBase', v); },
114
- get sessionId() {
115
- let sid = localStorage.getItem('sessionId');
116
- if (!sid) { sid = 'sess-' + Math.random().toString(16).slice(2, 10); localStorage.setItem('sessionId', sid); }
117
- return sid;
118
- },
119
- get messages() {
120
- const sid = this.sessionId;
121
- try { return JSON.parse(localStorage.getItem(`chat:${sid}`) || '[]'); } catch { return []; }
122
- },
123
- set messages(arr) {
124
- const sid = this.sessionId;
125
- localStorage.setItem(`chat:${sid}`, JSON.stringify(arr));
126
- },
127
- clear() {
128
- localStorage.removeItem(`chat:${this.sessionId}`);
129
- }
130
- };
131
-
132
- apiBaseInput.value = qs.get('api') || store.apiBase;
133
- sessionIdLabel.textContent = store.sessionId;
134
-
135
- saveBaseBtn.onclick = () => {
136
- const v = apiBaseInput.value.trim();
137
- if (!/^https?:\/\//i.test(v)) { alert('Provide a valid API base (https://...)'); return; }
138
- store.apiBase = v;
139
- checkHealth();
140
- };
141
-
142
- async function checkHealth() {
143
- healthEl.textContent = 'Health: checking…';
144
- try {
145
- const r = await fetch(new URL('/health', store.apiBase), { mode: 'cors' });
146
- const j = await r.json();
147
- healthEl.textContent = `Health: ${j.ok ? 'OK' : 'ERR'} · ModelReady=${j.modelReady ? 'yes' : 'no'} · Model=${j.modelId || 'unknown'}`;
148
- } catch (e) {
149
- healthEl.textContent = `Health: error (${e && e.message ? e.message : 'network'})`;
150
- }
 
 
 
 
 
 
 
 
 
 
151
  }
152
 
153
- // UI helpers
154
- function render() {
155
- chatEl.innerHTML = '';
156
- const messages = store.messages;
157
- // Render messages grouped by role sequence
158
- for (const msg of messages) {
159
- const node = document.createElement('div');
160
- node.className = `msg ${msg.role}`;
161
- const role = document.createElement('div');
162
- role.className = 'role';
163
- role.textContent = msg.role;
164
- node.appendChild(role);
165
-
166
- const content = document.createElement('div');
167
- content.className = 'content';
168
- if (typeof msg.content === 'string') {
169
- const pre = document.createElement('pre');
170
- pre.textContent = msg.content;
171
- content.appendChild(pre);
172
- } else if (Array.isArray(msg.content)) {
173
- const textParts = msg.content.filter(p => p.type === 'text');
174
- for (const t of textParts) {
175
- const pre = document.createElement('pre');
176
- pre.textContent = t.text || '';
177
- content.appendChild(pre);
 
 
 
 
178
  }
179
- const media = document.createElement('div');
180
- media.className = 'media';
181
- for (const p of msg.content) {
182
- if (p.type === 'input_image' || p.type === 'image_url') {
183
- const img = document.createElement('img');
184
- if (p.b64_json) {
185
- img.src = p.b64_json.startsWith('data:') ? p.b64_json : ('data:image/*;base64,' + p.b64_json);
186
- } else if (p.image_url && p.image_url.url) {
187
- img.src = p.image_url.url;
188
- }
189
- media.appendChild(img);
190
- } else if (p.type === 'input_video' || p.type === 'video_url') {
191
- const video = document.createElement('video');
192
- video.controls = true;
193
- if (p.b64_json) {
194
- video.src = p.b64_json.startsWith('data:') ? p.b64_json : ('data:video/mp4;base64,' + p.b64_json);
195
- } else if (p.video_url && p.video_url.url) {
196
- video.src = p.video_url.url;
197
- }
198
- media.appendChild(video);
199
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
  }
201
- if (media.childElementCount) content.appendChild(media);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
202
  }
203
- node.appendChild(content);
204
- chatEl.appendChild(node);
205
  }
206
- chatEl.scrollTop = chatEl.scrollHeight;
207
- }
208
 
209
- // File handling
210
- const fileQueue = [];
211
- fileEl.addEventListener('change', async (e) => {
212
- const files = Array.from(e.target.files || []);
213
- for (const f of files) {
214
- const b64 = await fileToDataURL(f);
215
- fileQueue.push({ name: f.name, type: f.type, dataUrl: b64 });
216
- }
217
- renderPreview();
218
- e.target.value = '';
219
- });
220
-
221
- function renderPreview() {
222
- previewEl.innerHTML = '';
223
- for (let i = 0; i < fileQueue.length; i++) {
224
- const f = fileQueue[i];
225
- const wrap = document.createElement('div');
226
- wrap.className = 'preview-item';
227
- const btn = document.createElement('button');
228
- btn.className = 'remove';
229
- btn.textContent = 'x';
230
- btn.onclick = () => { fileQueue.splice(i, 1); renderPreview(); };
231
- wrap.appendChild(btn);
232
- if (f.type.startsWith('image/')) {
233
- const img = document.createElement('img');
234
- img.src = f.dataUrl;
235
- img.style.maxWidth = '160px';
236
- img.style.maxHeight = '120px';
237
- wrap.appendChild(img);
238
- } else if (f.type.startsWith('video/')) {
239
- const video = document.createElement('video');
240
- video.src = f.dataUrl;
241
- video.controls = true;
242
- video.style.maxWidth = '160px';
243
- video.style.maxHeight = '120px';
244
- wrap.appendChild(video);
245
- } else {
246
- const pre = document.createElement('pre');
247
- pre.textContent = f.name;
248
- wrap.appendChild(pre);
249
  }
250
- previewEl.appendChild(wrap);
 
 
 
 
 
 
 
 
 
 
 
251
  }
252
- }
253
 
254
- function fileToDataURL(file) {
255
- return new Promise((resolve, reject) => {
256
- const reader = new FileReader();
257
- reader.onload = () => resolve(reader.result);
258
- reader.onerror = reject;
259
- reader.readAsDataURL(file);
260
- });
261
- }
 
 
 
262
 
263
- function dataUrlToBase64(d) {
264
- return d.includes('base64,') ? d.split('base64,')[1] : d;
265
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
266
 
267
- // Build OpenAI-style messages array from stored history (already in that shape)
268
- function getMessages() {
269
- return store.messages;
270
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
271
 
272
- function pushUserMessageFromUI() {
273
- const msg = { role: 'user', content: [] };
274
- const text = (promptEl.value || '').trim();
275
- if (text) msg.content.push({ type: 'text', text });
276
- for (const f of fileQueue) {
277
- if (f.type.startsWith('image/')) {
278
- msg.content.push({ type: 'input_image', b64_json: dataUrlToBase64(f.dataUrl) });
279
- } else if (f.type.startsWith('video/')) {
280
- msg.content.push({ type: 'input_video', b64_json: dataUrlToBase64(f.dataUrl) });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
281
  }
282
  }
283
- const messages = getMessages();
284
- messages.push(msg);
285
- store.messages = messages;
286
- // clear UI queue
287
- fileQueue.splice(0, fileQueue.length);
288
- previewEl.innerHTML = '';
289
- promptEl.value = '';
290
- render();
291
- }
292
 
293
- async function sendStream() {
294
- const apiBase = apiBaseInput.value.trim() || DEFAULT_SPACE;
295
- const body = {
296
- messages: getMessages(),
297
- stream: true,
298
- session_id: store.sessionId,
299
- max_tokens: Math.max(1, parseInt(maxTokensEl.value || '4096', 10)),
300
- temperature: parseFloat(temperatureEl.value || '0.7'),
301
- };
302
 
303
- const url = new URL('/v1/chat/completions', apiBase);
304
- const resp = await fetch(url, {
305
- method: 'POST',
306
- headers: { 'Content-Type': 'application/json' },
307
- body: JSON.stringify(body),
308
- mode: 'cors',
309
- });
310
- if (!resp.ok || !resp.body) {
311
- const text = await resp.text().catch(() => '');
312
- throw new Error(`HTTP ${resp.status}: ${text}`);
313
- }
314
- // Prepare assistant message to accumulate streamed content
315
- const messages = getMessages();
316
- const asst = { role: 'assistant', content: '' };
317
- messages.push(asst);
318
- store.messages = messages;
319
- render();
320
-
321
- const reader = resp.body.getReader();
322
- const decoder = new TextDecoder();
323
- let buffer = '';
324
- while (true) {
325
- const { done, value } = await reader.read();
326
- if (done) break;
327
- buffer += decoder.decode(value, { stream: true });
328
- // split SSE blocks
329
- let idx;
330
- while ((idx = buffer.indexOf('\n\n')) !== -1) {
331
- const block = buffer.slice(0, idx); buffer = buffer.slice(idx + 2);
332
- const lines = block.split('\n');
333
- for (const line of lines) {
334
- if (line.startsWith('data:')) {
335
- const data = line.slice(5).trim();
336
- if (data === '[DONE]') continue;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
337
  try {
338
- const j = JSON.parse(data);
339
- const delta = (((j || {}).choices || [])[0] || {}).delta || {};
340
- if (typeof delta.content === 'string' && delta.content.length) {
341
- // append token
342
- const msgs = getMessages();
343
- const last = msgs[msgs.length - 1];
344
- if (last && last.role === 'assistant') {
345
- last.content = (last.content || '') + delta.content;
346
- store.messages = msgs;
347
- render();
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
348
  }
349
  }
350
- } catch {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
351
  }
 
352
  }
353
  }
 
 
 
 
 
 
354
  }
355
- }
356
 
357
- sendBtn.onclick = async () => {
358
- try {
359
- pushUserMessageFromUI();
360
- await sendStream();
361
- } catch (e) {
362
- alert('Send failed: ' + (e && e.message ? e.message : e));
363
- }
364
- };
365
-
366
- clearBtn.onclick = () => {
367
- if (confirm('Clear chat history for this session?')) {
368
- store.clear(); render();
369
- }
370
- };
371
-
372
- (async function init() {
373
- render();
374
- await checkHealth();
375
- // Auto-save default base on first load if empty
376
- if (!localStorage.getItem('apiBase')) localStorage.setItem('apiBase', apiBaseInput.value.trim() || DEFAULT_SPACE);
377
- })();
378
- </script>
379
- </body>
380
- </html>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
  <html lang="en">
3
+ <head>
4
+ <meta charset="utf-8" />
5
+ <title>Qwen3‑VL Chat (HF Space API)</title>
6
+ <meta name="viewport" content="width=device-width, initial-scale=1" />
7
+ <style>
8
+ :root {
9
+ --bg: #0f172a;
10
+ --fg: #e2e8f0;
11
+ --muted: #94a3b8;
12
+ --accent: #6366f1;
13
+ --card: #111827;
14
+ --chip: #1f2937;
15
+ --border: #334155;
16
+ }
17
+ html,
18
+ body {
19
+ height: 100%;
20
+ margin: 0;
21
+ background: var(--bg);
22
+ color: var(--fg);
23
+ font-family: system-ui, -apple-system, Segoe UI, Roboto, Helvetica,
24
+ Arial, "Apple Color Emoji", "Segoe UI Emoji";
25
+ }
26
+ .app {
27
+ display: flex;
28
+ flex-direction: column;
29
+ height: 100%;
30
+ max-width: 1200px;
31
+ margin: 0 auto;
32
+ }
33
+ header {
34
+ padding: 12px 16px;
35
+ border-bottom: 1px solid var(--border);
36
+ display: flex;
37
+ gap: 12px;
38
+ align-items: center;
39
+ flex-wrap: wrap;
40
+ }
41
+ header .title {
42
+ font-weight: 700;
43
+ }
44
+ header input[type="text"] {
45
+ flex: 1 1 360px;
46
+ background: var(--card);
47
+ border: 1px solid var(--border);
48
+ color: var(--fg);
49
+ padding: 8px 10px;
50
+ border-radius: 6px;
51
+ }
52
+ header .small {
53
+ color: var(--muted);
54
+ font-size: 12px;
55
+ }
56
+ main {
57
+ flex: 1;
58
+ overflow: auto;
59
+ padding: 16px;
60
+ display: flex;
61
+ gap: 16px;
62
+ }
63
+ .chat {
64
+ flex: 1 1 auto;
65
+ display: flex;
66
+ flex-direction: column;
67
+ gap: 12px;
68
+ }
69
+ .msg {
70
+ background: var(--card);
71
+ border: 1px solid var(--border);
72
+ border-radius: 10px;
73
+ padding: 12px;
74
+ }
75
+ .msg.user {
76
+ border-left: 3px solid #22c55e;
77
+ }
78
+ .msg.assistant {
79
+ border-left: 3px solid var(--accent);
80
+ }
81
+ .role {
82
+ font-weight: 700;
83
+ margin-bottom: 6px;
84
+ color: var(--muted);
85
+ text-transform: uppercase;
86
+ font-size: 12px;
87
+ }
88
+ .content pre {
89
+ white-space: pre-wrap;
90
+ word-break: break-word;
91
+ }
92
+ .media {
93
+ display: flex;
94
+ flex-wrap: wrap;
95
+ gap: 8px;
96
+ margin-top: 8px;
97
+ }
98
+ .media img,
99
+ .media video {
100
+ max-width: 240px;
101
+ max-height: 180px;
102
+ border: 1px solid var(--border);
103
+ border-radius: 8px;
104
+ }
105
+ .aside {
106
+ width: 320px;
107
+ flex: 0 0 auto;
108
+ display: flex;
109
+ flex-direction: column;
110
+ gap: 12px;
111
+ }
112
+ .card {
113
+ background: var(--card);
114
+ border: 1px solid var(--border);
115
+ border-radius: 10px;
116
+ padding: 12px;
117
+ }
118
+ .label {
119
+ font-size: 12px;
120
+ color: var(--muted);
121
+ margin-bottom: 6px;
122
+ }
123
+ .row {
124
+ display: flex;
125
+ gap: 8px;
126
+ align-items: center;
127
+ flex-wrap: wrap;
128
+ }
129
+ .controls textarea {
130
+ width: 100%;
131
+ min-height: 80px;
132
+ background: var(--card);
133
+ border: 1px solid var(--border);
134
+ color: var(--fg);
135
+ padding: 8px;
136
+ border-radius: 8px;
137
+ resize: vertical;
138
+ }
139
+ button {
140
+ background: var(--accent);
141
+ color: white;
142
+ border: 0;
143
+ padding: 8px 12px;
144
+ border-radius: 8px;
145
+ cursor: pointer;
146
+ }
147
+ button.secondary {
148
+ background: var(--chip);
149
+ color: var(--fg);
150
+ }
151
+ input[type="number"],
152
+ input[type="text"] {
153
+ background: var(--card);
154
+ border: 1px solid var(--border);
155
+ color: var(--fg);
156
+ padding: 6px 8px;
157
+ border-radius: 6px;
158
+ }
159
+ .chips {
160
+ display: flex;
161
+ gap: 8px;
162
+ flex-wrap: wrap;
163
+ }
164
+ .chip {
165
+ background: var(--chip);
166
+ color: var(--fg);
167
+ border: 1px solid var(--border);
168
+ padding: 4px 8px;
169
+ border-radius: 999px;
170
+ font-size: 12px;
171
+ }
172
+ footer {
173
+ padding: 10px 16px;
174
+ border-top: 1px solid var(--border);
175
+ color: var(--muted);
176
+ font-size: 12px;
177
+ display: flex;
178
+ justify-content: space-between;
179
+ gap: 10px;
180
+ flex-wrap: wrap;
181
+ }
182
+ a {
183
+ color: #93c5fd;
184
+ text-decoration: none;
185
+ }
186
+ a:hover {
187
+ text-decoration: underline;
188
+ }
189
+ .hint {
190
+ font-size: 12px;
191
+ color: var(--muted);
192
+ }
193
+ input[type="file"] {
194
+ display: none;
195
+ }
196
+ .file-btn {
197
+ background: var(--chip);
198
+ }
199
+ .preview {
200
+ display: flex;
201
+ gap: 8px;
202
+ flex-wrap: wrap;
203
+ margin-top: 8px;
204
+ }
205
+ .preview-item {
206
+ position: relative;
207
+ }
208
+ .remove {
209
+ position: absolute;
210
+ top: 4px;
211
+ right: 4px;
212
+ background: #ef4444;
213
+ color: white;
214
+ border: 0;
215
+ border-radius: 6px;
216
+ padding: 2px 6px;
217
+ cursor: pointer;
218
+ font-size: 12px;
219
+ }
220
+ </style>
221
+ </head>
222
+ <body>
223
+ <div class="app">
224
+ <header>
225
+ <div class="title">Qwen3‑VL Chat</div>
226
+ <input
227
+ id="apiBase"
228
+ type="text"
229
+ placeholder="HF Space API Base, e.g. https://killerking93-transformers-inferenceserver-openapi.hf.space"
230
+ />
231
+ <button id="saveBase" class="secondary">Save Base</button>
232
+ <span id="health" class="small">Health: checking…</span>
233
+ </header>
234
+
235
+ <main>
236
+ <section class="chat" id="chat"></section>
237
+
238
+ <aside class="aside">
239
+ <div class="card">
240
+ <div class="label">Prompt</div>
241
+ <div class="controls">
242
+ <textarea
243
+ id="prompt"
244
+ placeholder="Ask anything… Supports images and videos."
245
+ ></textarea>
246
+ <div class="row">
247
+ <button id="attach" type="button" class="secondary">
248
+ Attach Image/Video
249
+ </button>
250
+ <input
251
+ id="file"
252
+ type="file"
253
+ accept="image/*,video/*"
254
+ multiple
255
+ style="display: none"
256
+ />
257
+ <input
258
+ id="maxTokens"
259
+ type="number"
260
+ min="1"
261
+ max="8192"
262
+ value="4096"
263
+ title="Max tokens"
264
+ />
265
+ <input
266
+ id="temperature"
267
+ type="number"
268
+ min="0"
269
+ max="2"
270
+ step="0.1"
271
+ value="0.7"
272
+ title="Temperature"
273
+ />
274
+ <button id="send">Send (Stream)</button>
275
+ </div>
276
+ <div id="preview" class="preview"></div>
277
+ <div class="row" style="margin-top: 8px">
278
+ <button id="clearHistory" class="secondary">
279
+ Clear History
280
+ </button>
281
+ <span class="hint"
282
+ >Session <code id="sessionIdLabel"></code> — history saved
283
+ locally</span
284
+ >
285
+ </div>
286
  </div>
287
  </div>
 
288
 
289
+ <div class="card">
290
+ <div class="label">Hints</div>
291
+ <div class="chips">
292
+ <div class="chip">Images: embedded as base64</div>
293
+ <div class="chip">Videos: base64, frame-sampled by server</div>
294
+ <div class="chip">SSE Streaming</div>
295
+ </div>
296
  </div>
297
+ </aside>
298
+ </main>
299
+
300
+ <footer>
301
+ <div>
302
+ Powered by FastAPI + Transformers (Qwen3‑VL). Calls public HF Space
303
+ API (no internal access).
304
  </div>
305
+ <div>
306
+ <a href="./docs" target="_blank">Swagger</a> ·
307
+ <a href="./openapi.yaml" target="_blank">OpenAPI YAML</a>
308
+ </div>
309
+ </footer>
310
+ </div>
311
+
312
+ <script>
313
+ // Config and state
314
+ const DEFAULT_SPACE =
315
+ "https://killerking93-transformers-inferenceserver-openapi.hf.space";
316
+ const qs = new URLSearchParams(location.search);
317
+ const apiBaseInput = document.getElementById("apiBase");
318
+ const saveBaseBtn = document.getElementById("saveBase");
319
+ const healthEl = document.getElementById("health");
320
+ const chatEl = document.getElementById("chat");
321
+ const promptEl = document.getElementById("prompt");
322
+ const fileEl = document.getElementById("file");
323
+ const previewEl = document.getElementById("preview");
324
+ const sendBtn = document.getElementById("send");
325
+ const clearBtn = document.getElementById("clearHistory");
326
+ const sessionIdLabel = document.getElementById("sessionIdLabel");
327
+ const maxTokensEl = document.getElementById("maxTokens");
328
+ const temperatureEl = document.getElementById("temperature");
329
+ const attachBtn = document.getElementById("attach");
330
+
331
+ const store = {
332
+ get apiBase() {
333
+ return localStorage.getItem("apiBase") || DEFAULT_SPACE;
334
+ },
335
+ set apiBase(v) {
336
+ localStorage.setItem("apiBase", v);
337
+ },
338
+ get sessionId() {
339
+ let sid = localStorage.getItem("sessionId");
340
+ if (!sid) {
341
+ sid = "sess-" + Math.random().toString(16).slice(2, 10);
342
+ localStorage.setItem("sessionId", sid);
343
+ }
344
+ return sid;
345
+ },
346
+ get messages() {
347
+ const sid = this.sessionId;
348
+ try {
349
+ return JSON.parse(localStorage.getItem(`chat:${sid}`) || "[]");
350
+ } catch {
351
+ return [];
352
+ }
353
+ },
354
+ set messages(arr) {
355
+ const sid = this.sessionId;
356
+ try {
357
+ const compact = compactMessages(arr || []);
358
+ let serialized = JSON.stringify(compact);
359
+ const LIMIT = 4.5 * 1024 * 1024; // ~4.5MB safety window below localStorage quota
360
+ // Create small image thumbnail (JPEG) for compact storage and chat display
361
+ async function createImageThumbnail(dataUrl, maxW = 320, maxH = 240, quality = 0.7) {
362
+ return new Promise((resolve) => {
363
+ const img = new Image();
364
+ img.onload = () => {
365
+ try {
366
+ const ratio = Math.min(maxW / img.width, maxH / img.height, 1);
367
+ const w = Math.max(1, Math.round(img.width * ratio));
368
+ const h = Math.max(1, Math.round(img.height * ratio));
369
+ const canvas = document.createElement('canvas');
370
+ canvas.width = w; canvas.height = h;
371
+ const ctx = canvas.getContext('2d');
372
+ ctx.drawImage(img, 0, 0, w, h);
373
+ resolve(canvas.toDataURL('image/jpeg', quality));
374
+ } catch {
375
+ resolve(null);
376
+ }
377
+ };
378
+ img.onerror = () => resolve(null);
379
+ img.src = dataUrl;
380
+ });
381
  }
382
 
383
+ // Create a poster thumbnail from the first video frame (best-effort)
384
+ async function createVideoThumbnail(dataUrl, maxW = 320, maxH = 240, quality = 0.6) {
385
+ return new Promise((resolve) => {
386
+ let settled = false;
387
+ const settle = (v) => { if (!settled) { settled = true; resolve(v); } };
388
+
389
+ const video = document.createElement('video');
390
+ video.preload = 'auto';
391
+ video.muted = true;
392
+ video.playsInline = true;
393
+ video.src = dataUrl;
394
+
395
+ const onReady = () => {
396
+ try {
397
+ const vw = Math.max(1, video.videoWidth || 1);
398
+ const vh = Math.max(1, video.videoHeight || 1);
399
+ const ratio = Math.min(maxW / vw, maxH / vh, 1);
400
+ const w = Math.max(1, Math.round(vw * ratio));
401
+ const h = Math.max(1, Math.round(vh * ratio));
402
+ const canvas = document.createElement('canvas');
403
+ canvas.width = w; canvas.height = h;
404
+ const ctx = canvas.getContext('2d');
405
+ ctx.drawImage(video, 0, 0, w, h);
406
+ settle(canvas.toDataURL('image/jpeg', quality));
407
+ } catch {
408
+ settle(null);
409
+ } finally {
410
+ try { video.pause(); } catch {}
411
+ video.src = '';
412
  }
413
+ };
414
+
415
+ video.addEventListener('loadeddata', onReady, { once: true });
416
+ video.addEventListener('error', () => settle(null), { once: true });
417
+ // Fallback timeout in case metadata never fires
418
+ setTimeout(() => settle(null), 3000);
419
+ });
420
+ }
421
+ while (serialized.length > LIMIT && compact.length > 1) {
422
+ compact.shift(); // drop oldest message
423
+ serialized = JSON.stringify(compact);
 
 
 
 
 
 
 
 
 
424
  }
425
+ localStorage.setItem(`chat:${sid}`, serialized);
426
+ } catch (e) {
427
+ console.warn(
428
+ "Persist messages failed, clearing oldest/history may be truncated:",
429
+ e
430
+ );
431
+ try {
432
+ localStorage.removeItem(`chat:${sid}`);
433
+ } catch {}
434
+ }
435
+ },
436
+ clear() {
437
+ // Remove current chat history and rotate to a brand new session
438
+ const sid = localStorage.getItem("sessionId");
439
+ if (sid) {
440
+ localStorage.removeItem(`chat:${sid}`);
441
  }
442
+ // Drop session id so next access generates a fresh one
443
+ localStorage.removeItem("sessionId");
444
+ },
445
+ };
446
+
447
+ apiBaseInput.value = qs.get("api") || store.apiBase;
448
+ sessionIdLabel.textContent = store.sessionId;
449
+
450
+ saveBaseBtn.onclick = () => {
451
+ const v = apiBaseInput.value.trim();
452
+ if (!/^https?:\/\//i.test(v)) {
453
+ alert("Provide a valid API base (https://...)");
454
+ return;
455
+ }
456
+ store.apiBase = v;
457
+ checkHealth();
458
+ };
459
+
460
+ async function checkHealth() {
461
+ healthEl.textContent = "Health: checking…";
462
+ try {
463
+ const r = await fetch(new URL("/health", store.apiBase), {
464
+ mode: "cors",
465
+ });
466
+ const j = await r.json();
467
+ healthEl.textContent = `Health: ${j.ok ? "OK" : "ERR"} · ModelReady=${
468
+ j.modelReady ? "yes" : "no"
469
+ } · Model=${j.modelId || "unknown"}`;
470
+ } catch (e) {
471
+ healthEl.textContent = `Health: error (${
472
+ e && e.message ? e.message : "network"
473
+ })`;
474
  }
 
 
475
  }
 
 
476
 
477
+ // UI helpers
478
+ function renderAssistantText(text) {
479
+ const frag = document.createDocumentFragment();
480
+ if (typeof text !== "string" || !text.length) {
481
+ const pre = document.createElement("pre");
482
+ pre.textContent = text || "";
483
+ frag.appendChild(pre);
484
+ return frag;
485
+ }
486
+ const re = /<think>([\s\S]*?)<\/think>/gi;
487
+ let last = 0;
488
+ let m;
489
+ while ((m = re.exec(text)) !== null) {
490
+ const before = text.slice(last, m.index);
491
+ if (before) {
492
+ const pre = document.createElement("pre");
493
+ pre.textContent = before;
494
+ frag.appendChild(pre);
495
+ }
496
+ const det = document.createElement("details");
497
+ const sum = document.createElement("summary");
498
+ sum.textContent = "Show reasoning";
499
+ det.appendChild(sum);
500
+ const pre2 = document.createElement("pre");
501
+ pre2.textContent = m[1];
502
+ det.appendChild(pre2);
503
+ frag.appendChild(det);
504
+ last = m.index + m[0].length;
 
 
 
 
 
 
 
 
 
 
 
 
505
  }
506
+ const after = text.slice(last);
507
+ if (after) {
508
+ const pre = document.createElement("pre");
509
+ pre.textContent = after;
510
+ frag.appendChild(pre);
511
+ }
512
+ if (!frag.childNodes.length) {
513
+ const pre = document.createElement("pre");
514
+ pre.textContent = text;
515
+ frag.appendChild(pre);
516
+ }
517
+ return frag;
518
  }
 
519
 
520
+ function render() {
521
+ chatEl.innerHTML = "";
522
+ const messages = store.messages;
523
+ // Render messages grouped by role sequence
524
+ for (const msg of messages) {
525
+ const node = document.createElement("div");
526
+ node.className = `msg ${msg.role}`;
527
+ const role = document.createElement("div");
528
+ role.className = "role";
529
+ role.textContent = msg.role;
530
+ node.appendChild(role);
531
 
532
+ const content = document.createElement("div");
533
+ content.className = "content";
534
+ if (typeof msg.content === "string") {
535
+ content.appendChild(renderAssistantText(msg.content));
536
+ } else if (Array.isArray(msg.content)) {
537
+ const textParts = msg.content.filter((p) => p.type === "text");
538
+ for (const t of textParts) {
539
+ const pre = document.createElement("pre");
540
+ pre.textContent = t.text || "";
541
+ content.appendChild(pre);
542
+ }
543
+ const media = document.createElement("div");
544
+ media.className = "media";
545
+ for (const p of msg.content) {
546
+ if (p.type === "input_image" || p.type === "image_url") {
547
+ const img = document.createElement("img");
548
+ if (p.b64_json) {
549
+ img.src = p.b64_json.startsWith("data:")
550
+ ? p.b64_json
551
+ : "data:image/*;base64," + p.b64_json;
552
+ } else if (p.image_url && p.image_url.url) {
553
+ img.src = p.image_url.url;
554
+ }
555
+ media.appendChild(img);
556
+ } else if (p.type === "input_video" || p.type === "video_url") {
557
+ const video = document.createElement("video");
558
+ video.controls = true;
559
+ if (p.b64_json) {
560
+ video.src = p.b64_json.startsWith("data:")
561
+ ? p.b64_json
562
+ : "data:video/mp4;base64," + p.b64_json;
563
+ } else if (p.video_url && p.video_url.url) {
564
+ video.src = p.video_url.url;
565
+ }
566
+ media.appendChild(video);
567
+ } else if (p.type === "image_thumb" && p.dataUrl) {
568
+ const img = document.createElement("img");
569
+ img.src = p.dataUrl;
570
+ media.appendChild(img);
571
+ } else if (p.type === "video_thumb" && p.dataUrl) {
572
+ const img = document.createElement("img");
573
+ img.src = p.dataUrl; // poster-like thumbnail
574
+ media.appendChild(img);
575
+ } else if (p.type === "image_ref") {
576
+ const chip = document.createElement("div");
577
+ chip.className = "chip";
578
+ chip.textContent = `Image (${p.name || "ref"})`;
579
+ media.appendChild(chip);
580
+ } else if (p.type === "video_ref") {
581
+ const chip = document.createElement("div");
582
+ chip.className = "chip";
583
+ chip.textContent = `Video (${p.name || "ref"})`;
584
+ media.appendChild(chip);
585
+ }
586
+ }
587
+ if (media.childElementCount) content.appendChild(media);
588
+ }
589
+ node.appendChild(content);
590
+ chatEl.appendChild(node);
591
+ }
592
+ chatEl.scrollTop = chatEl.scrollHeight;
593
+ }
594
 
595
+ // File handling
596
+ const fileQueue = [];
597
+ function handleFileList(files) {
598
+ const arr = Array.from(files || []);
599
+ return Promise.all(
600
+ arr.map(async (f) => {
601
+ const b64 = await fileToDataURL(f);
602
+ fileQueue.push({ name: f.name, type: f.type, dataUrl: b64 });
603
+ })
604
+ ).then(() => {
605
+ renderPreview();
606
+ });
607
+ }
608
+ // Button triggers native picker
609
+ if (attachBtn) attachBtn.addEventListener("click", () => fileEl.click());
610
+ // Native input change
611
+ fileEl.addEventListener("change", async (e) => {
612
+ await handleFileList(e.target.files);
613
+ e.target.value = "";
614
+ });
615
+ // Drag & drop onto preview area
616
+ previewEl.addEventListener("dragover", (e) => {
617
+ e.preventDefault();
618
+ e.dataTransfer.dropEffect = "copy";
619
+ });
620
+ previewEl.addEventListener("drop", async (e) => {
621
+ e.preventDefault();
622
+ await handleFileList(e.dataTransfer.files);
623
+ });
624
+ // Paste from clipboard (images/videos)
625
+ document.addEventListener("paste", async (e) => {
626
+ if (
627
+ e.clipboardData &&
628
+ e.clipboardData.files &&
629
+ e.clipboardData.files.length
630
+ ) {
631
+ await handleFileList(e.clipboardData.files);
632
+ }
633
+ });
634
 
635
+ function renderPreview() {
636
+ previewEl.innerHTML = "";
637
+ for (let i = 0; i < fileQueue.length; i++) {
638
+ const f = fileQueue[i];
639
+ const wrap = document.createElement("div");
640
+ wrap.className = "preview-item";
641
+ const btn = document.createElement("button");
642
+ btn.className = "remove";
643
+ btn.textContent = "x";
644
+ btn.onclick = () => {
645
+ fileQueue.splice(i, 1);
646
+ renderPreview();
647
+ };
648
+ wrap.appendChild(btn);
649
+ if (f.type.startsWith("image/")) {
650
+ const img = document.createElement("img");
651
+ img.src = f.dataUrl;
652
+ img.style.maxWidth = "160px";
653
+ img.style.maxHeight = "120px";
654
+ wrap.appendChild(img);
655
+ } else if (f.type.startsWith("video/")) {
656
+ const video = document.createElement("video");
657
+ video.src = f.dataUrl;
658
+ video.controls = true;
659
+ video.style.maxWidth = "160px";
660
+ video.style.maxHeight = "120px";
661
+ wrap.appendChild(video);
662
+ } else {
663
+ const pre = document.createElement("pre");
664
+ pre.textContent = f.name;
665
+ wrap.appendChild(pre);
666
+ }
667
+ previewEl.appendChild(wrap);
668
  }
669
  }
 
 
 
 
 
 
 
 
 
670
 
671
+ function fileToDataURL(file) {
672
+ return new Promise((resolve, reject) => {
673
+ const reader = new FileReader();
674
+ reader.onload = () => resolve(reader.result);
675
+ reader.onerror = reject;
676
+ reader.readAsDataURL(file);
677
+ });
678
+ }
 
679
 
680
+ function dataUrlToBase64(d) {
681
+ return d.includes("base64,") ? d.split("base64,")[1] : d;
682
+ }
683
+
684
+ // Create small image thumbnail data URL (JPEG) for compact storage and UI display
685
+ async function createImageThumbnail(
686
+ dataUrl,
687
+ maxW = 320,
688
+ maxH = 240,
689
+ quality = 0.7
690
+ ) {
691
+ return new Promise((resolve) => {
692
+ const img = new Image();
693
+ img.onload = () => {
694
+ try {
695
+ const ratio = Math.min(maxW / img.width, maxH / img.height, 1);
696
+ const w = Math.max(1, Math.round(img.width * ratio));
697
+ const h = Math.max(1, Math.round(img.height * ratio));
698
+ const canvas = document.createElement("canvas");
699
+ canvas.width = w;
700
+ canvas.height = h;
701
+ const ctx = canvas.getContext("2d");
702
+ ctx.drawImage(img, 0, 0, w, h);
703
+ resolve(canvas.toDataURL("image/jpeg", quality));
704
+ } catch {
705
+ resolve(null);
706
+ }
707
+ };
708
+ img.onerror = () => resolve(null);
709
+ img.src = dataUrl;
710
+ });
711
+ }
712
+
713
+ // Create a poster thumbnail from the first video frame
714
+ async function createVideoThumbnail(
715
+ dataUrl,
716
+ maxW = 320,
717
+ maxH = 240,
718
+ quality = 0.6
719
+ ) {
720
+ return new Promise((resolve) => {
721
+ let settled = false;
722
+ const settle = (v) => {
723
+ if (!settled) {
724
+ settled = true;
725
+ resolve(v);
726
+ }
727
+ };
728
+
729
+ const video = document.createElement("video");
730
+ video.preload = "auto";
731
+ video.muted = true;
732
+ video.playsInline = true;
733
+ video.src = dataUrl;
734
+
735
+ const onReady = () => {
736
+ try {
737
+ const vw = Math.max(1, video.videoWidth || 1);
738
+ const vh = Math.max(1, video.videoHeight || 1);
739
+ const ratio = Math.min(maxW / vw, maxH / vh, 1);
740
+ const w = Math.max(1, Math.round(vw * ratio));
741
+ const h = Math.max(1, Math.round(vh * ratio));
742
+ const canvas = document.createElement("canvas");
743
+ canvas.width = w;
744
+ canvas.height = h;
745
+ const ctx = canvas.getContext("2d");
746
+ ctx.drawImage(video, 0, 0, w, h);
747
+ settle(canvas.toDataURL("image/jpeg", quality));
748
+ } catch {
749
+ settle(null);
750
+ } finally {
751
  try {
752
+ video.pause();
753
+ } catch {}
754
+ video.src = "";
755
+ }
756
+ };
757
+
758
+ video.addEventListener("loadeddata", onReady, { once: true });
759
+ video.addEventListener("error", () => settle(null), { once: true });
760
+ setTimeout(() => settle(null), 3000);
761
+ });
762
+ }
763
+
764
+ // Compact messages for storage: strip large binary fields to avoid localStorage quota overflow.
765
+ function compactMessages(arr) {
766
+ const out = [];
767
+ for (const m of arr || []) {
768
+ const c = Array.isArray(m.content)
769
+ ? m.content.map((p) => {
770
+ if (p && typeof p === "object") {
771
+ // Remove huge base64 payloads from persisted history; keep lightweight placeholders only.
772
+ if (p.type === "input_image" && p.b64_json) {
773
+ return {
774
+ type: "image_ref",
775
+ name: p.name || "image",
776
+ mime: "image/*",
777
+ };
778
+ }
779
+ if (p.type === "input_video" && p.b64_json) {
780
+ return {
781
+ type: "video_ref",
782
+ name: p.name || "video",
783
+ mime: "video/*",
784
+ };
785
  }
786
  }
787
+ // Keep text and URL references as-is
788
+ return p;
789
+ })
790
+ : m.content;
791
+ out.push({ role: m.role, content: c });
792
+ }
793
+ return out;
794
+ }
795
+
796
+ // Transform persisted messages to a request-safe form (text + URL refs only).
797
+ function transformForSend(arr) {
798
+ const out = [];
799
+ for (const m of arr || []) {
800
+ if (
801
+ m &&
802
+ (m.role === "user" || m.role === "assistant" || m.role === "system")
803
+ ) {
804
+ const entry = { role: m.role, content: [] };
805
+ if (typeof m.content === "string") {
806
+ entry.content.push({ type: "text", text: m.content });
807
+ } else if (Array.isArray(m.content)) {
808
+ for (const p of m.content) {
809
+ if (!p || typeof p !== "object") continue;
810
+ if (p.type === "text" && typeof p.text === "string") {
811
+ entry.content.push({ type: "text", text: p.text });
812
+ } else if (
813
+ p.type === "image_url" &&
814
+ p.image_url &&
815
+ p.image_url.url
816
+ ) {
817
+ entry.content.push({
818
+ type: "image_url",
819
+ image_url: { url: p.image_url.url },
820
+ });
821
+ } else if (
822
+ p.type === "video_url" &&
823
+ p.video_url &&
824
+ p.video_url.url
825
+ ) {
826
+ entry.content.push({
827
+ type: "video_url",
828
+ video_url: { url: p.video_url.url },
829
+ });
830
+ }
831
+ // Skip image_ref/video_ref/image_thumb/video_thumb when sending; only the current turn will include base64
832
+ }
833
  }
834
+ out.push(entry);
835
  }
836
  }
837
+ // Limit history window to avoid stale over-conditioning and ghosting
838
+ const SEND_MAX_MESSAGES = 24; // ~12 turns
839
+ if (out.length > SEND_MAX_MESSAGES) {
840
+ return out.slice(out.length - SEND_MAX_MESSAGES);
841
+ }
842
+ return out;
843
  }
 
844
 
845
+ // Holds the most recent outgoing user message with real base64 attachments (not persisted)
846
+ let lastOutgoingUser = null;
847
+
848
+ // Build OpenAI-style messages array from stored history (already in that shape)
849
+ function getMessages() {
850
+ return store.messages;
851
+ }
852
+
853
+ async function pushUserMessageFromUI() {
854
+ // Build two versions:
855
+ // 1) storedMsg: safe for localStorage (no huge base64) but visually rich using thumbnails
856
+ // 2) outgoingMsg: includes real base64 attachments for the current turn
857
+ const storedMsg = { role: "user", content: [] };
858
+ const outgoingMsg = { role: "user", content: [] };
859
+
860
+ const text = (promptEl.value || "").trim();
861
+ if (text) {
862
+ storedMsg.content.push({ type: "text", text });
863
+ outgoingMsg.content.push({ type: "text", text });
864
+ }
865
+
866
+ for (const f of fileQueue) {
867
+ if (f.type.startsWith("image/")) {
868
+ outgoingMsg.content.push({
869
+ type: "input_image",
870
+ b64_json: dataUrlToBase64(f.dataUrl),
871
+ name: f.name || "image",
872
+ });
873
+ const thumb = await createImageThumbnail(f.dataUrl);
874
+ if (thumb) {
875
+ storedMsg.content.push({
876
+ type: "image_thumb",
877
+ dataUrl: thumb,
878
+ name: f.name || "image",
879
+ });
880
+ } else {
881
+ storedMsg.content.push({
882
+ type: "image_ref",
883
+ name: f.name || "image",
884
+ mime: f.type || "image/*",
885
+ });
886
+ }
887
+ } else if (f.type.startsWith("video/")) {
888
+ outgoingMsg.content.push({
889
+ type: "input_video",
890
+ b64_json: dataUrlToBase64(f.dataUrl),
891
+ name: f.name || "video",
892
+ });
893
+ const vthumb = await createVideoThumbnail(f.dataUrl);
894
+ if (vthumb) {
895
+ storedMsg.content.push({
896
+ type: "video_thumb",
897
+ dataUrl: vthumb,
898
+ name: f.name || "video",
899
+ });
900
+ } else {
901
+ storedMsg.content.push({
902
+ type: "video_ref",
903
+ name: f.name || "video",
904
+ mime: f.type || "video/*",
905
+ });
906
+ }
907
+ }
908
+ }
909
+
910
+ const messages = getMessages();
911
+ messages.push(storedMsg);
912
+ store.messages = messages;
913
+
914
+ // Stash the real payload for the immediate request
915
+ lastOutgoingUser = outgoingMsg;
916
+
917
+ // clear UI queue
918
+ fileQueue.splice(0, fileQueue.length);
919
+ previewEl.innerHTML = "";
920
+ promptEl.value = "";
921
+ render();
922
+ }
923
+
924
+ async function sendStream() {
925
+ const apiBase = apiBaseInput.value.trim() || DEFAULT_SPACE;
926
+
927
+ // Build request messages: text history + replace last user turn with real attachments if available
928
+ let msgs = transformForSend(getMessages());
929
+ if (lastOutgoingUser) {
930
+ if (msgs.length && msgs[msgs.length - 1].role === "user") {
931
+ msgs[msgs.length - 1] = lastOutgoingUser;
932
+ } else {
933
+ msgs.push(lastOutgoingUser);
934
+ }
935
+ }
936
+ // Clear the stash to avoid accidental reuse
937
+ lastOutgoingUser = null;
938
+
939
+ const body = {
940
+ messages: msgs,
941
+ stream: true,
942
+ session_id: store.sessionId,
943
+ max_tokens: Math.max(1, parseInt(maxTokensEl.value || "4096", 10)),
944
+ temperature: parseFloat(temperatureEl.value || "0.7"),
945
+ };
946
+
947
+ const url = new URL("/v1/chat/completions", apiBase);
948
+ const resp = await fetch(url, {
949
+ method: "POST",
950
+ headers: { "Content-Type": "application/json" },
951
+ body: JSON.stringify(body),
952
+ mode: "cors",
953
+ });
954
+ if (!resp.ok || !resp.body) {
955
+ const text = await resp.text().catch(() => "");
956
+ throw new Error(`HTTP ${resp.status}: ${text}`);
957
+ }
958
+ // Prepare assistant message to accumulate streamed content
959
+ const messages = getMessages();
960
+ const asst = { role: "assistant", content: "" };
961
+ messages.push(asst);
962
+ store.messages = messages;
963
+ render();
964
+
965
+ const reader = resp.body.getReader();
966
+ const decoder = new TextDecoder();
967
+ let buffer = "";
968
+ while (true) {
969
+ const { done, value } = await reader.read();
970
+ if (done) break;
971
+ buffer += decoder.decode(value, { stream: true });
972
+ // split SSE blocks
973
+ let idx;
974
+ while ((idx = buffer.indexOf("\n\n")) !== -1) {
975
+ const block = buffer.slice(0, idx);
976
+ buffer = buffer.slice(idx + 2);
977
+ const lines = block.split("\n");
978
+ for (const line of lines) {
979
+ if (line.startsWith("data:")) {
980
+ const data = line.slice(5).trim();
981
+ if (data === "[DONE]") continue;
982
+ try {
983
+ const j = JSON.parse(data);
984
+ const delta =
985
+ (((j || {}).choices || [])[0] || {}).delta || {};
986
+ if (
987
+ typeof delta.content === "string" &&
988
+ delta.content.length
989
+ ) {
990
+ // append token
991
+ const msgs = getMessages();
992
+ const last = msgs[msgs.length - 1];
993
+ if (last && last.role === "assistant") {
994
+ last.content = (last.content || "") + delta.content;
995
+ store.messages = msgs;
996
+ render();
997
+ }
998
+ }
999
+ } catch {}
1000
+ }
1001
+ }
1002
+ }
1003
+ }
1004
+ }
1005
+
1006
+ sendBtn.onclick = async () => {
1007
+ try {
1008
+ await pushUserMessageFromUI();
1009
+ await sendStream();
1010
+ } catch (e) {
1011
+ alert("Send failed: " + (e && e.message ? e.message : e));
1012
+ }
1013
+ };
1014
+
1015
+ clearBtn.onclick = () => {
1016
+ if (confirm("Clear chat history and start a new session?")) {
1017
+ store.clear();
1018
+ // Reset UI state
1019
+ fileQueue.splice(0, fileQueue.length);
1020
+ previewEl.innerHTML = "";
1021
+ promptEl.value = "";
1022
+ // Force a new session id (getter will create it) and update label
1023
+ sessionIdLabel.textContent = store.sessionId;
1024
+ render();
1025
+ }
1026
+ };
1027
+
1028
+ (async function init() {
1029
+ render();
1030
+ await checkHealth();
1031
+ // Auto-save default base on first load if empty
1032
+ if (!localStorage.getItem("apiBase"))
1033
+ localStorage.setItem(
1034
+ "apiBase",
1035
+ apiBaseInput.value.trim() || DEFAULT_SPACE
1036
+ );
1037
+ })();
1038
+ </script>
1039
+ </body>
1040
+ </html>