mihailik commited on
Commit
a2e7566
·
1 Parent(s): 1175d79

Trying to fix errors.

Browse files
Files changed (1) hide show
  1. index.html +55 -20
index.html CHANGED
@@ -176,7 +176,7 @@
176
 
177
  <script type="module">
178
  import * as webllm from "https://esm.run/@mlc-ai/web-llm";
179
- import { pipeline } from 'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.14.0';
180
 
181
  /*************** WebLLM Logic & RAG Components ***************/
182
 
@@ -218,8 +218,23 @@ If you can answer the question directly with your existing knowledge or after us
218
  // 1. transformers:webgpu 2. transformers:webgl 3. webllm:webgpu 4. transformers:wasm
219
  let chatBackend = null;
220
  let textGenPipeline = null; // transformers.js pipeline instance
221
- const TRANSFORMERS_MODEL = 'Xenova/Qwen2.5-0.5B-Instruct'; // Adjust for size/perf; can swap to smaller model
222
- const SMALLER_MODEL_HINT = 'Xenova/SmolLM2-360M-Instruct';
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
223
 
224
  let miniTableIndexEmbeddings = []; // Stores { tableId: "users", text: "...", embedding: [...] }
225
  let detailedSchemaEmbeddings = []; // Stores { tableId: "users", chunkId: "col_details", text: "...", embedding: [...] }
@@ -404,22 +419,32 @@ If you can answer the question directly with your existing knowledge or after us
404
  console.log('Backend availability:', { hasWebGPU, hasWebGL2 });
405
 
406
  // Attempt order: transformers webgpu -> transformers webgl -> webllm -> transformers wasm
 
407
  async function tryTransformers(deviceTag) {
408
- try {
409
- downloadStatus.textContent = `Loading transformers (${deviceTag}) model...`;
410
- const opts = {};
411
- // device selection hint (API may evolve; ignored if unsupported)
412
- opts.device = deviceTag.startsWith('web') ? 'gpu' : 'cpu';
413
- // quantization hint (lower memory where supported)
414
- opts.quantized = true;
415
- textGenPipeline = await pipeline('text-generation', TRANSFORMERS_MODEL, opts);
416
- chatBackend = `transformers-${deviceTag}`;
417
- console.log(`Loaded transformers model on ${deviceTag}`);
418
- return true;
419
- } catch (e) {
420
- console.warn(`Transformers ${deviceTag} load failed:`, e);
421
- return false;
 
 
 
 
 
 
 
 
422
  }
 
423
  }
424
 
425
  let initialized = false;
@@ -485,8 +510,18 @@ If you can answer the question directly with your existing knowledge or after us
485
  sendButton.disabled = false;
486
  userInput.disabled = false;
487
  userInput.setAttribute('placeholder', slow ? 'Type (CPU fallback, slower)...' : 'Type a message...');
488
- appendMessage({ role: 'system', content: `AI (${backendLabel}): Ready. Ask about the SQL schema. ${slow ? 'Consider a smaller model ('+SMALLER_MODEL_HINT+') for speed.' : ''}` });
489
- downloadStatus.textContent = 'Models loaded (' + backendLabel + ').';
 
 
 
 
 
 
 
 
 
 
490
  }
491
 
492
 
@@ -565,7 +600,7 @@ If you can answer the question directly with your existing knowledge or after us
565
  updateLastAssistantMessage('🧠 Gathering relevant schema context...');
566
  const ragContext = await performRagLookup(input) || 'No directly relevant schema rows found.';
567
  const prompt = `${systemMessageContent}\n\nUser question: ${input}\n\nRelevant schema context:\n${ragContext}\n\nAnswer:`;
568
- updateLastAssistantMessage(`✍️ Generating answer (${chatBackend})...`);
569
  const result = await textGenPipeline(prompt, {
570
  max_new_tokens: 220,
571
  temperature: 0.7,
 
176
 
177
  <script type="module">
178
  import * as webllm from "https://esm.run/@mlc-ai/web-llm";
179
+ import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.14.0';
180
 
181
  /*************** WebLLM Logic & RAG Components ***************/
182
 
 
218
  // 1. transformers:webgpu 2. transformers:webgl 3. webllm:webgpu 4. transformers:wasm
219
  let chatBackend = null;
220
  let textGenPipeline = null; // transformers.js pipeline instance
221
+ // Candidate models (ordered). We rotate until one loads. Prefer fully open, ungated models first.
222
+ const TRANSFORMERS_MODEL_CANDIDATES = [
223
+ 'Xenova/SmolLM2-360M-Instruct', // small, permissive
224
+ 'Xenova/Qwen2.5-0.5B-Instruct', // may require accepting license or token
225
+ 'Xenova/gpt2' // fallback tiny (non-instruct, but ensures something works)
226
+ ];
227
+ const SMALLER_MODEL_HINT = 'Xenova/SmolLM2-360M-Instruct';
228
+ let chosenTransformersModel = null;
229
+
230
+ // Allow user to inject HF token before loading (e.g., window.HF_TOKEN = 'hf_xxx'; before this script)
231
+ if (window.HF_TOKEN) {
232
+ env.HF_ACCESS_TOKEN = window.HF_TOKEN;
233
+ }
234
+ // Ensure remote huggingface URL (avoid accidental local mirror attempts). Can be customized.
235
+ env.remoteURL = 'https://huggingface.co';
236
+ // Disable local model resolution attempts to avoid 404 on /models/* when self-hosting without copies.
237
+ env.allowLocalModels = false;
238
 
239
  let miniTableIndexEmbeddings = []; // Stores { tableId: "users", text: "...", embedding: [...] }
240
  let detailedSchemaEmbeddings = []; // Stores { tableId: "users", chunkId: "col_details", text: "...", embedding: [...] }
 
419
  console.log('Backend availability:', { hasWebGPU, hasWebGL2 });
420
 
421
  // Attempt order: transformers webgpu -> transformers webgl -> webllm -> transformers wasm
422
+ const modelLoadErrors = [];
423
  async function tryTransformers(deviceTag) {
424
+ for (const modelId of TRANSFORMERS_MODEL_CANDIDATES) {
425
+ try {
426
+ downloadStatus.textContent = `Loading ${modelId} (${deviceTag})...`;
427
+ const opts = { quantized: true };
428
+ opts.device = deviceTag.startsWith('web') ? 'gpu' : (deviceTag === 'wasm' ? 'cpu' : 'cpu');
429
+ textGenPipeline = await pipeline('text-generation', modelId, opts);
430
+ chatBackend = `transformers-${deviceTag}`;
431
+ chosenTransformersModel = modelId;
432
+ console.log(`Loaded transformers model '${modelId}' on ${deviceTag}`);
433
+ return true;
434
+ } catch (e) {
435
+ const msg = (e?.message || '').toLowerCase();
436
+ let short = e.message;
437
+ if (msg.includes('unauthorized')) {
438
+ short += ' (Likely gated model; set window.HF_TOKEN before loading or choose an open model)';
439
+ } else if (msg.includes('404')) {
440
+ short += ' (Resource not found; if self-hosting assets, ensure files exist)';
441
+ }
442
+ modelLoadErrors.push({ device: deviceTag, model: modelId, error: short });
443
+ console.warn(`Transformers load failed for ${modelId} on ${deviceTag}:`, e);
444
+ // Try next model candidate
445
+ }
446
  }
447
+ return false; // none loaded
448
  }
449
 
450
  let initialized = false;
 
510
  sendButton.disabled = false;
511
  userInput.disabled = false;
512
  userInput.setAttribute('placeholder', slow ? 'Type (CPU fallback, slower)...' : 'Type a message...');
513
+ if (chosenTransformersModel) {
514
+ appendMessage({ role: 'system', content: `AI (${backendLabel}/${chosenTransformersModel}): Ready. Ask about the SQL schema. ${slow ? 'Consider a smaller model ('+SMALLER_MODEL_HINT+') for speed.' : ''}` });
515
+ downloadStatus.textContent = 'Models loaded (' + backendLabel + ').';
516
+ } else if (backendLabel === 'webllm') {
517
+ appendMessage({ role: 'system', content: `AI (${backendLabel}): Ready. Ask about the SQL schema.` });
518
+ downloadStatus.textContent = 'Models loaded (' + backendLabel + ').';
519
+ } else {
520
+ // Provide a concise summary of failures
521
+ const lines = modelLoadErrors.map(e => `- ${e.model} on ${e.device}: ${e.error}`).slice(0,6).join('\n');
522
+ downloadStatus.textContent = 'Warning: Model loaded without ID label.';
523
+ appendMessage({ role: 'system', content: `Model resolution issues encountered. Attempts summary:\n${lines}` });
524
+ }
525
  }
526
 
527
 
 
600
  updateLastAssistantMessage('🧠 Gathering relevant schema context...');
601
  const ragContext = await performRagLookup(input) || 'No directly relevant schema rows found.';
602
  const prompt = `${systemMessageContent}\n\nUser question: ${input}\n\nRelevant schema context:\n${ragContext}\n\nAnswer:`;
603
+ updateLastAssistantMessage(`✍️ Generating answer (${chatBackend}${chosenTransformersModel? '/' + chosenTransformersModel: ''})...`);
604
  const result = await textGenPipeline(prompt, {
605
  max_new_tokens: 220,
606
  temperature: 0.7,