Spaces:

oyinbo
/

localm

Configuration error

mihailik commited on Aug 18

Commit

df41f54

1 Parent(s): 2c016c2

Use auto for the backend.

Files changed (3) hide show

package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "localm",
-  "version": "1.1.4",
   "description": "",
   "main": "chat-full.js",
   "scripts": {

 {
   "name": "localm",
+  "version": "1.1.5",
   "description": "",
   "main": "chat-full.js",
   "scripts": {

src/app/handle-prompt.js CHANGED Viewed

@@ -1,6 +1,8 @@
 // @ts-check
 import { outputMessage } from './output-message';
 /**
  * @param {{
@@ -9,12 +11,23 @@ import { outputMessage } from './output-message';
  * }} _
  */
 export async function handlePrompt({ promptMarkdown, workerConnection }) {
   const formatted = `**Question:**\n> ${promptMarkdown.replaceAll('\n', '\n> ')}`;
   outputMessage(formatted);
   outputMessage('Processing your request...');
   try {
-    const promptOutput = await workerConnection.runPrompt(promptMarkdown);
     outputMessage('**Reply:**\n' + promptOutput);
   } catch (error) {
     outputMessage('**Error:** ' + error.message);

 // @ts-check
 import { outputMessage } from './output-message';
+import { chatLogEditor } from './boot-app';
+import { editorViewCtx, serializerCtx } from '@milkdown/core';
 /**
  * @param {{
  * }} _
  */
 export async function handlePrompt({ promptMarkdown, workerConnection }) {
+  // Build history from chat log editor (serialize entire document)
+  let historyText = await chatLogEditor.action(async (ctx) => {
+    const serializer = ctx.get(serializerCtx);
+    const view = ctx.get(editorViewCtx);
+    return serializer(view.state.doc);
+  });
   const formatted = `**Question:**\n> ${promptMarkdown.replaceAll('\n', '\n> ')}`;
   outputMessage(formatted);
   outputMessage('Processing your request...');
   try {
+  // Concatenate history and the new prompt into a single prompt string
+    const combinedPrompt = promptMarkdown;
+      // historyText ? (historyText + '\n\n' + promptMarkdown) :
+      //   promptMarkdown;
+  const promptOutput = await workerConnection.runPrompt(combinedPrompt);
     outputMessage('**Reply:**\n' + promptOutput);
   } catch (error) {
     outputMessage('**Error:** ' + error.message);

src/worker/model-cache.js CHANGED Viewed

@@ -8,8 +8,10 @@ export class ModelCache {
   backend = undefined;
   knownModels = [
     'Xenova/llama2.c-stories15M', // nonsense
-    'Xenova/phi-3-mini-4k-instruct',
     'Xenova/all-MiniLM-L6-v2', // unsupported model type: bert
     'Xenova/phi-1.5', // gated
     'Qwen/Qwen2.5-3B', // cannot be loaded
@@ -37,13 +39,13 @@ export class ModelCache {
     if (!this.backend) this.backend = detectTransformersBackend();
     // Create a loader promise that will try multiple backends in order.
     const loader = (async () => {
-      const tried = [];
       // candidate order: detected backend first, then common fallbacks
       let candidates = ['webgpu', 'gpu', 'wasm'];
-      candidates = ['gpu', 'wasm'];
       candidates = candidates.slice(candidates.indexOf(this.backend || 'wasm'));
-      let lastErr = null;
       console.log('Trying candidates ', candidates);
       for (const device of candidates) {
         try {
@@ -57,14 +59,15 @@ export class ModelCache {
           return model;
         } catch (err) {
           console.log('Failed ', device, ' ', err);
-          tried.push({ device, error: err.stack || String(err) });
-          lastErr = err;
           // continue to next candidate
         }
       }
       // none succeeded
-      const err = new Error(`no available backend found. attempts=${JSON.stringify(tried)}; last=${String(lastErr)}`);
       throw err;
     })();

   backend = undefined;
   knownModels = [
+    //'microsoft/phi-1_5', // cannot be loaded
+    // 'Xenova/phi-3-mini-4k-instruct',
     'Xenova/llama2.c-stories15M', // nonsense
     'Xenova/all-MiniLM-L6-v2', // unsupported model type: bert
     'Xenova/phi-1.5', // gated
     'Qwen/Qwen2.5-3B', // cannot be loaded
     if (!this.backend) this.backend = detectTransformersBackend();
     // Create a loader promise that will try multiple backends in order.
     const loader = (async () => {
       // candidate order: detected backend first, then common fallbacks
       let candidates = ['webgpu', 'gpu', 'wasm'];
+      // candidates = ['gpu', 'wasm'];
       candidates = candidates.slice(candidates.indexOf(this.backend || 'wasm'));
+      candidates = ['auto'];
+      let errs = [];
       console.log('Trying candidates ', candidates);
       for (const device of candidates) {
         try {
           return model;
         } catch (err) {
           console.log('Failed ', device, ' ', err);
+          errs.push(device + ': ' + err.stack);
           // continue to next candidate
         }
       }
       // none succeeded
+      const err = new Error(
+        'Backends failed: ' + JSON.stringify(candidates) + ', errors:\n\n' +
+        errs.join('\n\n'));
       throw err;
     })();