Spaces:
Running
Running
Commit
·
878e432
1
Parent(s):
08f0bdc
track pending requests, improve ui, add qwen-2-0.5B
Browse files- src/routes/+page.svelte +46 -14
src/routes/+page.svelte
CHANGED
|
@@ -4,6 +4,8 @@
|
|
| 4 |
import * as webllm from "@mlc-ai/web-llm";
|
| 5 |
import { onMount } from 'svelte';
|
| 6 |
|
|
|
|
|
|
|
| 7 |
let engine: webllm.MLCEngineInterface;
|
| 8 |
let isLoading = false;
|
| 9 |
let loadingStatus = '';
|
|
@@ -12,8 +14,8 @@
|
|
| 12 |
let error = '';
|
| 13 |
let completionSpeed: number | null = null;
|
| 14 |
let tokensPerSecond: number | null = null;
|
| 15 |
-
let selectedModel = "SmolLM-360M-Instruct-q4f16_1-MLC";
|
| 16 |
let isGenerating = false;
|
|
|
|
| 17 |
|
| 18 |
async function loadWebLLM() {
|
| 19 |
isLoading = true;
|
|
@@ -24,11 +26,18 @@
|
|
| 24 |
|
| 25 |
const appConfig: webllm.AppConfig = {
|
| 26 |
model_list: [{
|
| 27 |
-
model: `https://huggingface.co/mlc-ai
|
| 28 |
-
model_id:
|
| 29 |
model_lib: `${webllm.modelLibURLPrefix}${webllm.modelVersion}/SmolLM-360M-Instruct-q4f16_1-ctx2k_cs1k-webgpu.wasm`,
|
| 30 |
overrides: { context_window_size: 2048 },
|
| 31 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
};
|
| 33 |
|
| 34 |
try {
|
|
@@ -44,18 +53,37 @@
|
|
| 44 |
}
|
| 45 |
}
|
| 46 |
|
| 47 |
-
async function generateCompletion() {
|
| 48 |
-
if (!engine ||
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
|
| 50 |
isGenerating = true;
|
| 51 |
const startTime = performance.now();
|
| 52 |
try {
|
|
|
|
| 53 |
const response = await engine.chat.completions.create({
|
| 54 |
-
messages: [
|
|
|
|
|
|
|
|
|
|
| 55 |
max_tokens: 10,
|
| 56 |
});
|
| 57 |
|
| 58 |
outputText = response.choices[0].message.content || "";
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
const endTime = performance.now();
|
| 60 |
const elapsedTimeInSeconds = (endTime - startTime) / 1000;
|
| 61 |
completionSpeed = Math.round(endTime - startTime);
|
|
@@ -68,6 +96,13 @@
|
|
| 68 |
error = `Error: ${(err as Error).message}`;
|
| 69 |
} finally {
|
| 70 |
isGenerating = false;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
}
|
| 72 |
}
|
| 73 |
|
|
@@ -79,16 +114,11 @@
|
|
| 79 |
<p class="text-center font-mono text-sm mb-4">Powered by {selectedModel}</p>
|
| 80 |
<Textarea
|
| 81 |
bind:value={inputText}
|
| 82 |
-
on:input={() =>
|
| 83 |
-
if (!isGenerating) {
|
| 84 |
-
generateCompletion();
|
| 85 |
-
}
|
| 86 |
-
}}
|
| 87 |
disabled={isLoading}
|
| 88 |
class="w-full"
|
| 89 |
placeholder="Say something..."
|
| 90 |
/>
|
| 91 |
-
<pre class="text-lg whitespace-pre-wrap">{outputText}</pre>
|
| 92 |
{#if isLoading}
|
| 93 |
<p class="text-sm text-slate-600 text-center">{loadingStatus}</p>
|
| 94 |
{:else if error}
|
|
@@ -101,7 +131,9 @@
|
|
| 101 |
{#if tokensPerSecond !== null}
|
| 102 |
<Badge>{tokensPerSecond} tok/s</Badge>
|
| 103 |
{/if}
|
| 104 |
-
<Badge
|
| 105 |
</div>
|
| 106 |
{/if}
|
|
|
|
|
|
|
| 107 |
</div>
|
|
|
|
| 4 |
import * as webllm from "@mlc-ai/web-llm";
|
| 5 |
import { onMount } from 'svelte';
|
| 6 |
|
| 7 |
+
let selectedModel = "SmolLM-360M-Instruct-q4f16_1-MLC";
|
| 8 |
+
|
| 9 |
let engine: webllm.MLCEngineInterface;
|
| 10 |
let isLoading = false;
|
| 11 |
let loadingStatus = '';
|
|
|
|
| 14 |
let error = '';
|
| 15 |
let completionSpeed: number | null = null;
|
| 16 |
let tokensPerSecond: number | null = null;
|
|
|
|
| 17 |
let isGenerating = false;
|
| 18 |
+
let pendingRequest: string | null = null;
|
| 19 |
|
| 20 |
async function loadWebLLM() {
|
| 21 |
isLoading = true;
|
|
|
|
| 26 |
|
| 27 |
const appConfig: webllm.AppConfig = {
|
| 28 |
model_list: [{
|
| 29 |
+
model: `https://huggingface.co/mlc-ai/SmolLM-360M-Instruct-q4f16_1-MLC`,
|
| 30 |
+
model_id: 'SmolLM-360M-Instruct-q4f16_1-MLC',
|
| 31 |
model_lib: `${webllm.modelLibURLPrefix}${webllm.modelVersion}/SmolLM-360M-Instruct-q4f16_1-ctx2k_cs1k-webgpu.wasm`,
|
| 32 |
overrides: { context_window_size: 2048 },
|
| 33 |
+
},
|
| 34 |
+
{
|
| 35 |
+
model: `https://huggingface.co/mlc-ai/Qwen2-0.5B-Instruct-q4f16_1-MLC`,
|
| 36 |
+
model_id: 'Qwen2-0.5B-Instruct-q4f16_1-MLC',
|
| 37 |
+
model_lib: `${webllm.modelLibURLPrefix}${webllm.modelVersion}/Qwen2-0.5B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm`,
|
| 38 |
+
overrides: { context_window_size: 2048 },
|
| 39 |
+
}
|
| 40 |
+
],
|
| 41 |
};
|
| 42 |
|
| 43 |
try {
|
|
|
|
| 53 |
}
|
| 54 |
}
|
| 55 |
|
| 56 |
+
async function generateCompletion(content: string) {
|
| 57 |
+
if (!engine || isGenerating) {
|
| 58 |
+
/**
|
| 59 |
+
* This is used to store the most recent request from user
|
| 60 |
+
* while the current request is being processed.
|
| 61 |
+
*/
|
| 62 |
+
pendingRequest = content.trim();
|
| 63 |
+
return;
|
| 64 |
+
}
|
| 65 |
+
|
| 66 |
+
if (!content.trim()) return;
|
| 67 |
|
| 68 |
isGenerating = true;
|
| 69 |
const startTime = performance.now();
|
| 70 |
try {
|
| 71 |
+
console.log("Generating completion:", content);
|
| 72 |
const response = await engine.chat.completions.create({
|
| 73 |
+
messages: [
|
| 74 |
+
{role:"system", content: "You are a helpful AI agent helping users. Try your best to answer the users request."},
|
| 75 |
+
{role: "user", content: content}
|
| 76 |
+
],
|
| 77 |
max_tokens: 10,
|
| 78 |
});
|
| 79 |
|
| 80 |
outputText = response.choices[0].message.content || "";
|
| 81 |
+
|
| 82 |
+
// indicate that the response was cut short
|
| 83 |
+
if (response.choices[0].finish_reason === "length") {
|
| 84 |
+
outputText += "...";
|
| 85 |
+
}
|
| 86 |
+
|
| 87 |
const endTime = performance.now();
|
| 88 |
const elapsedTimeInSeconds = (endTime - startTime) / 1000;
|
| 89 |
completionSpeed = Math.round(endTime - startTime);
|
|
|
|
| 96 |
error = `Error: ${(err as Error).message}`;
|
| 97 |
} finally {
|
| 98 |
isGenerating = false;
|
| 99 |
+
|
| 100 |
+
// process pending request if exists
|
| 101 |
+
if (pendingRequest && pendingRequest !== content) {
|
| 102 |
+
const nextRequest = pendingRequest;
|
| 103 |
+
pendingRequest = null;
|
| 104 |
+
await generateCompletion(nextRequest);
|
| 105 |
+
}
|
| 106 |
}
|
| 107 |
}
|
| 108 |
|
|
|
|
| 114 |
<p class="text-center font-mono text-sm mb-4">Powered by {selectedModel}</p>
|
| 115 |
<Textarea
|
| 116 |
bind:value={inputText}
|
| 117 |
+
on:input={() => generateCompletion(inputText)}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
disabled={isLoading}
|
| 119 |
class="w-full"
|
| 120 |
placeholder="Say something..."
|
| 121 |
/>
|
|
|
|
| 122 |
{#if isLoading}
|
| 123 |
<p class="text-sm text-slate-600 text-center">{loadingStatus}</p>
|
| 124 |
{:else if error}
|
|
|
|
| 131 |
{#if tokensPerSecond !== null}
|
| 132 |
<Badge>{tokensPerSecond} tok/s</Badge>
|
| 133 |
{/if}
|
| 134 |
+
<Badge>{selectedModel}</Badge>
|
| 135 |
</div>
|
| 136 |
{/if}
|
| 137 |
+
<pre class="text-lg font-bold whitespace-pre-wrap">{outputText}</pre>
|
| 138 |
+
|
| 139 |
</div>
|