mihailik commited on
Commit
df41f54
·
1 Parent(s): 2c016c2

Use auto for the backend.

Browse files
package.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "name": "localm",
3
- "version": "1.1.4",
4
  "description": "",
5
  "main": "chat-full.js",
6
  "scripts": {
 
1
  {
2
  "name": "localm",
3
+ "version": "1.1.5",
4
  "description": "",
5
  "main": "chat-full.js",
6
  "scripts": {
src/app/handle-prompt.js CHANGED
@@ -1,6 +1,8 @@
1
  // @ts-check
2
 
3
  import { outputMessage } from './output-message';
 
 
4
 
5
  /**
6
  * @param {{
@@ -9,12 +11,23 @@ import { outputMessage } from './output-message';
9
  * }} _
10
  */
11
  export async function handlePrompt({ promptMarkdown, workerConnection }) {
 
 
 
 
 
 
 
12
  const formatted = `**Question:**\n> ${promptMarkdown.replaceAll('\n', '\n> ')}`;
13
  outputMessage(formatted);
14
 
15
  outputMessage('Processing your request...');
16
  try {
17
- const promptOutput = await workerConnection.runPrompt(promptMarkdown);
 
 
 
 
18
  outputMessage('**Reply:**\n' + promptOutput);
19
  } catch (error) {
20
  outputMessage('**Error:** ' + error.message);
 
1
  // @ts-check
2
 
3
  import { outputMessage } from './output-message';
4
+ import { chatLogEditor } from './boot-app';
5
+ import { editorViewCtx, serializerCtx } from '@milkdown/core';
6
 
7
  /**
8
  * @param {{
 
11
  * }} _
12
  */
13
  export async function handlePrompt({ promptMarkdown, workerConnection }) {
14
+ // Build history from chat log editor (serialize entire document)
15
+ let historyText = await chatLogEditor.action(async (ctx) => {
16
+ const serializer = ctx.get(serializerCtx);
17
+ const view = ctx.get(editorViewCtx);
18
+ return serializer(view.state.doc);
19
+ });
20
+
21
  const formatted = `**Question:**\n> ${promptMarkdown.replaceAll('\n', '\n> ')}`;
22
  outputMessage(formatted);
23
 
24
  outputMessage('Processing your request...');
25
  try {
26
+ // Concatenate history and the new prompt into a single prompt string
27
+ const combinedPrompt = promptMarkdown;
28
+ // historyText ? (historyText + '\n\n' + promptMarkdown) :
29
+ // promptMarkdown;
30
+ const promptOutput = await workerConnection.runPrompt(combinedPrompt);
31
  outputMessage('**Reply:**\n' + promptOutput);
32
  } catch (error) {
33
  outputMessage('**Error:** ' + error.message);
src/worker/model-cache.js CHANGED
@@ -8,8 +8,10 @@ export class ModelCache {
8
  backend = undefined;
9
 
10
  knownModels = [
 
 
 
11
  'Xenova/llama2.c-stories15M', // nonsense
12
- 'Xenova/phi-3-mini-4k-instruct',
13
  'Xenova/all-MiniLM-L6-v2', // unsupported model type: bert
14
  'Xenova/phi-1.5', // gated
15
  'Qwen/Qwen2.5-3B', // cannot be loaded
@@ -37,13 +39,13 @@ export class ModelCache {
37
  if (!this.backend) this.backend = detectTransformersBackend();
38
  // Create a loader promise that will try multiple backends in order.
39
  const loader = (async () => {
40
- const tried = [];
41
  // candidate order: detected backend first, then common fallbacks
42
  let candidates = ['webgpu', 'gpu', 'wasm'];
43
- candidates = ['gpu', 'wasm'];
44
  candidates = candidates.slice(candidates.indexOf(this.backend || 'wasm'));
 
45
 
46
- let lastErr = null;
47
  console.log('Trying candidates ', candidates);
48
  for (const device of candidates) {
49
  try {
@@ -57,14 +59,15 @@ export class ModelCache {
57
  return model;
58
  } catch (err) {
59
  console.log('Failed ', device, ' ', err);
60
- tried.push({ device, error: err.stack || String(err) });
61
- lastErr = err;
62
  // continue to next candidate
63
  }
64
  }
65
 
66
  // none succeeded
67
- const err = new Error(`no available backend found. attempts=${JSON.stringify(tried)}; last=${String(lastErr)}`);
 
 
68
  throw err;
69
  })();
70
 
 
8
  backend = undefined;
9
 
10
  knownModels = [
11
+ //'microsoft/phi-1_5', // cannot be loaded
12
+
13
+ // 'Xenova/phi-3-mini-4k-instruct',
14
  'Xenova/llama2.c-stories15M', // nonsense
 
15
  'Xenova/all-MiniLM-L6-v2', // unsupported model type: bert
16
  'Xenova/phi-1.5', // gated
17
  'Qwen/Qwen2.5-3B', // cannot be loaded
 
39
  if (!this.backend) this.backend = detectTransformersBackend();
40
  // Create a loader promise that will try multiple backends in order.
41
  const loader = (async () => {
 
42
  // candidate order: detected backend first, then common fallbacks
43
  let candidates = ['webgpu', 'gpu', 'wasm'];
44
+ // candidates = ['gpu', 'wasm'];
45
  candidates = candidates.slice(candidates.indexOf(this.backend || 'wasm'));
46
+ candidates = ['auto'];
47
 
48
+ let errs = [];
49
  console.log('Trying candidates ', candidates);
50
  for (const device of candidates) {
51
  try {
 
59
  return model;
60
  } catch (err) {
61
  console.log('Failed ', device, ' ', err);
62
+ errs.push(device + ': ' + err.stack);
 
63
  // continue to next candidate
64
  }
65
  }
66
 
67
  // none succeeded
68
+ const err = new Error(
69
+ 'Backends failed: ' + JSON.stringify(candidates) + ', errors:\n\n' +
70
+ errs.join('\n\n'));
71
  throw err;
72
  })();
73