Spaces:
Running
Running
| <script lang="ts"> | |
| import Textarea from "@/lib/components/ui/textarea/textarea.svelte"; | |
| import Badge from "@/lib/components/ui/badge/badge.svelte"; | |
| import * as webllm from "@mlc-ai/web-llm"; | |
| import { onMount } from 'svelte'; | |
| let selectedModel = "smollm-360M-instruct-add-basics-q0f16-MLC"; | |
| let engine: webllm.MLCEngineInterface; | |
| let isLoading = false; | |
| let loadingStatus = ''; | |
| let inputText = ''; | |
| let outputText = ''; | |
| let error = ''; | |
| let completionSpeed: number | null = null; | |
| let tokensPerSecond: number | null = null; | |
| let isGenerating = false; | |
| let pendingRequest: string | null = null; | |
| let maxTokens = 15; | |
| const promptExamples = [ | |
| "Tell me a story about a cat.", | |
| "What is refraction?", | |
| "Explain thermal conductivity", | |
| "What is Newton's first law of motion?", | |
| "Implement fib(n) in Python", | |
| ] | |
| async function setPrompt(prompt: string) { | |
| inputText = prompt; | |
| generateCompletion(prompt); | |
| } | |
| async function loadWebLLM() { | |
| isLoading = true; | |
| error = ''; | |
| const initProgressCallback = (report: webllm.InitProgressReport) => { | |
| loadingStatus = report.text; | |
| }; | |
| const appConfig: webllm.AppConfig = { | |
| model_list: [{ | |
| model: `https://huggingface.co/reach-vb/smollm-360M-instruct-add-basics-q0f16-MLC`, | |
| model_id: 'smollm-360M-instruct-add-basics-q0f16-MLC', | |
| model_lib: `${webllm.modelLibURLPrefix}${webllm.modelVersion}/SmolLM-360M-Instruct-q0f16-ctx2k_cs1k-webgpu.wasm`, | |
| overrides: { context_window_size: 2048 }, | |
| }, | |
| { | |
| model: `https://huggingface.co/mlc-ai/Qwen2-0.5B-Instruct-q4f16_1-MLC`, | |
| model_id: 'Qwen2-0.5B-Instruct-q4f16_1-MLC', | |
| model_lib: `${webllm.modelLibURLPrefix}${webllm.modelVersion}/Qwen2-0.5B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm`, | |
| overrides: { context_window_size: 2048 }, | |
| } | |
| ], | |
| }; | |
| try { | |
| engine = await webllm.CreateMLCEngine(selectedModel, { | |
| appConfig, | |
| initProgressCallback, | |
| logLevel: "INFO", | |
| }); | |
| } catch (err) { | |
| error = `Failed to load the model: ${(err as Error).message}`; | |
| } finally { | |
| isLoading = false; | |
| } | |
| } | |
| async function generateCompletion(content: string) { | |
| if (!engine || isGenerating) { | |
| /** | |
| * This is used to store the most recent request from user | |
| * while the current request is being processed. | |
| */ | |
| pendingRequest = content.trim(); | |
| return; | |
| } | |
| if (!content.trim()) return; | |
| isGenerating = true; | |
| const startTime = performance.now(); | |
| try { | |
| const response = await engine.chat.completions.create({ | |
| messages: [ | |
| {role: "user", content: content} | |
| ], | |
| max_tokens: maxTokens, | |
| }); | |
| outputText = response.choices[0].message.content || ""; | |
| const endTime = performance.now(); | |
| const elapsedTimeInSeconds = (endTime - startTime) / 1000; | |
| completionSpeed = Math.round(endTime - startTime); | |
| const generatedTokens = response.usage?.completion_tokens || 0; | |
| tokensPerSecond = Math.round(generatedTokens / elapsedTimeInSeconds); | |
| error = ''; | |
| } catch (err) { | |
| error = `Error: ${(err as Error).message}`; | |
| } finally { | |
| isGenerating = false; | |
| // process pending request if exists | |
| if (pendingRequest && pendingRequest !== content) { | |
| const nextRequest = pendingRequest; | |
| pendingRequest = null; | |
| await generateCompletion(nextRequest); | |
| } | |
| } | |
| } | |
| onMount(loadWebLLM); | |
| </script> | |
| <div class="flex my-12 flex-col items-center gap-6 max-w-xl mx-auto relative font-sans"> | |
| <img | |
| src="logo_smollm.png" | |
| alt="logo" | |
| class="absolute top-0 right-0 w-28 h-28 object-contain -mt-8 -mr-8 lg:-mr-16" | |
| /> | |
| <h1 class="text-center font-bold text-5xl text-gray-800 mb-2">Instant SmolLM</h1> | |
| <p class="text-center text-sm text-gray-600">Powered by <a href="https://huggingface.co/mlc-ai" target="_blank" class="underline text-gray-800">MLC</a> WebLLM <a class="underline text-gray-800" href="https://huggingface.co/HuggingFaceTB/SmolLM-360M-Instruct" target="_blank">SmolLM-360M-Instruct</a></p> | |
| <p class="text-center text-xs text-gray-600 mb-4 italic">This is a smol model, go easy on it. Check out <a href="https://huggingface.co/spaces/HuggingFaceTB/SmolLM-360M-Instruct-WebGPU" target="_blank" class="underline text-gray-800">this demo</a> for full conversations.</p> | |
| <Textarea | |
| bind:value={inputText} | |
| on:input={() => generateCompletion(inputText)} | |
| disabled={isLoading} | |
| class="w-full text-lg" | |
| placeholder="Say something..." | |
| /> | |
| {#if isLoading} | |
| <p class="text-sm text-slate-600 text-center">{loadingStatus}</p> | |
| {:else if error} | |
| <p class="text-sm text-red-600">{error}</p> | |
| {:else} | |
| <div class="flex gap-2"> | |
| {#if completionSpeed !== null} | |
| <Badge>{completionSpeed}ms</Badge> | |
| {/if} | |
| {#if tokensPerSecond !== null} | |
| <Badge>{tokensPerSecond} tok/s</Badge> | |
| {/if} | |
| </div> | |
| {/if} | |
| <div class="w-full flex flex-col items-center gap-2"> | |
| <input | |
| type="range" | |
| id="max-tokens" | |
| bind:value={maxTokens} | |
| min="15" | |
| max="75" | |
| step="1" | |
| class="w-full accent-black" | |
| /> | |
| <label for="max-tokens" class="text-xs italic text-slate-800">Max of {maxTokens} tokens</label> | |
| </div> | |
| <div class="flex flex-col items-center mb-4"> | |
| {#if inputText === '' && !isLoading} | |
| <p class="text-sm mb-2">Try these examples:</p> | |
| <div class="flex flex-wrap justify-center gap-2"> | |
| {#each promptExamples as prompt} | |
| <button on:click={() => setPrompt(prompt)}> | |
| <Badge | |
| variant="outline" | |
| class="cursor-pointer bg-orange-100 hover:bg-orange-200" | |
| > | |
| {prompt} | |
| </Badge> | |
| </button> | |
| {/each} | |
| </div> | |
| {/if} | |
| </div> | |
| <pre class="text-xl font-bold whitespace-pre-wrap">{outputText}</pre> | |
| </div> |