mihailik commited on
Commit
1bdb11a
·
1 Parent(s): 637b403
Files changed (1) hide show
  1. index.html +167 -45
index.html CHANGED
@@ -208,6 +208,14 @@
208
  <input type="text" id="hf-token" placeholder="hf_... (optional)" size="18" autocomplete="off" />
209
  </label>
210
  <button id="apply-token" title="Store token (localStorage) & reload">Apply Token + Reload</button>
 
 
 
 
 
 
 
 
211
  <select id="preferred-backend" title="Preferred first backend">
212
  <option value="transformers-webgpu">TF WebGPU</option>
213
  <option value="transformers-webgl">TF WebGL</option>
@@ -230,14 +238,35 @@
230
  </div>
231
 
232
  <script type="module">
233
- import * as webllm from "https://esm.run/@mlc-ai/web-llm";
234
  import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.14.0';
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
235
 
236
  /*************** WebLLM Logic & RAG Components ***************/
237
 
238
  // System message for the LLM to understand its role and tool use
239
  const systemMessageContent = `
240
- You are an intelligent SQL database schema assistant. Your primary goal is to answer user questions about database tables, their columns, relationships, and provide SQL query suggestions.
 
 
241
 
242
  You have access to a special "lookup" tool. If you need more specific details about tables or concepts to answer a user's question, you MUST respond with a JSON object in this exact format:
243
 
@@ -263,13 +292,15 @@ If you can answer the question directly with your existing knowledge or after us
263
  const sendButton = document.getElementById("send");
264
  const downloadStatus = document.getElementById("download-status");
265
  const chatStats = document.getElementById("chat-stats");
266
- const diagnosticsEl = document.getElementById('diagnostics');
267
- const tokenInput = document.getElementById('hf-token');
268
- const applyTokenBtn = document.getElementById('apply-token');
269
- const forceReloadBtn = document.getElementById('force-reload');
270
- const toggleDiagBtn = document.getElementById('toggle-diagnostics');
271
- const backendSelect = document.getElementById('preferred-backend');
272
- const activeBackendLabel = document.getElementById('active-backend');
 
 
273
 
274
  let currentAssistantMessageElement = null; // To update the streaming message
275
  let embedder = null; // In-browser embedding model
@@ -281,13 +312,34 @@ If you can answer the question directly with your existing knowledge or after us
281
  let chatBackend = null;
282
  let textGenPipeline = null; // transformers.js pipeline instance
283
  // Candidate models (ordered). We rotate until one loads. Prefer fully open, ungated models first.
284
- const TRANSFORMERS_MODEL_CANDIDATES = [
285
- 'Xenova/SmolLM2-360M-Instruct', // small, permissive
286
- 'Xenova/Qwen2.5-0.5B-Instruct', // may require accepting license or token
287
- 'Xenova/gpt2' // fallback tiny (non-instruct, but ensures something works)
 
 
 
288
  ];
289
- const SMALLER_MODEL_HINT = 'Xenova/SmolLM2-360M-Instruct';
 
 
 
 
 
 
 
 
 
 
290
  let chosenTransformersModel = null;
 
 
 
 
 
 
 
 
291
 
292
  // Allow user to inject HF token before loading (e.g., window.HF_TOKEN = 'hf_xxx'; before this script)
293
  if (window.HF_TOKEN) {
@@ -358,6 +410,8 @@ If you can answer the question directly with your existing knowledge or after us
358
  }
359
  ];
360
 
 
 
361
  // --- Helper Functions ---
362
 
363
  // Callback function for initializing WebLLM progress.
@@ -421,15 +475,22 @@ If you can answer the question directly with your existing knowledge or after us
421
 
422
  // --- RAG Lookup Logic ---
423
  async function performRagLookup(query) {
 
 
 
 
424
  if (!embedder || miniTableIndexEmbeddings.length === 0 || detailedSchemaEmbeddings.length === 0) {
425
  console.warn("Embedding model or knowledge base not ready for RAG lookup.");
 
426
  return null;
427
  }
 
428
 
429
  try {
430
  // Stage 1: Embed user query and identify relevant tables from mini-index
431
  const queryEmbeddingOutput = await embedder(query, { pooling: 'mean', normalize: true });
432
  const queryEmbedding = queryEmbeddingOutput.data;
 
433
 
434
  let tableSimilarities = [];
435
  for (const tableIndex of miniTableIndexEmbeddings) {
@@ -442,8 +503,10 @@ If you can answer the question directly with your existing knowledge or after us
442
 
443
  if (topRelevantTableIds.length === 0) {
444
  console.log("No highly relevant tables identified for query:", query);
 
445
  return null;
446
  }
 
447
  console.log("Identified relevant tables for RAG:", topRelevantTableIds);
448
 
449
  // Stage 2: Filter detailed chunks by relevant tables and re-rank
@@ -465,8 +528,10 @@ If you can answer the question directly with your existing knowledge or after us
465
  const contextChunks = chunkSimilarities.filter(s => s.score > 0.4).slice(0, maxChunksToInclude).map(s => s.chunk); // Filter by score again
466
 
467
  if (contextChunks.length > 0) {
 
468
  return contextChunks.join("\n\n---\n\n");
469
  } else {
 
470
  return null; // No relevant chunks found after filtering
471
  }
472
 
@@ -488,8 +553,36 @@ If you can answer the question directly with your existing knowledge or after us
488
 
489
  // Attempt order: transformers webgpu -> transformers webgl -> webllm -> transformers wasm
490
  const modelLoadErrors = [];
491
- async function tryTransformers(deviceTag) {
 
 
 
 
492
  for (const modelId of TRANSFORMERS_MODEL_CANDIDATES) {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
493
  try {
494
  downloadStatus.textContent = `Loading ${modelId} (${deviceTag})...`;
495
  const opts = { quantized: true };
@@ -651,6 +744,8 @@ If you can answer the question directly with your existing knowledge or after us
651
  let fullAssistantResponse = "";
652
  chatStats.classList.add("hidden");
653
 
 
 
654
  try {
655
  if (chatBackend === 'webllm') {
656
  // Original WebLLM two-pass tool invocation logic
@@ -660,39 +755,46 @@ If you can answer the question directly with your existing knowledge or after us
660
  temperature: 0.7,
661
  top_p: 0.9,
662
  });
663
- const llmFirstResponseContent = initialCompletion.choices?.[0]?.message?.content || "";
664
- let parsedAction = null;
665
- try { parsedAction = JSON.parse(llmFirstResponseContent); } catch (_) {}
666
  let finalResponseContent = "";
667
- if (parsedAction && parsedAction.action === "lookup_schema_info" && parsedAction.query) {
668
- updateLastAssistantMessage("🔎 Searching schema for: " + parsedAction.query);
669
- messages.push({ role: "assistant", content: llmFirstResponseContent });
670
- const retrievedContext = await performRagLookup(parsedAction.query);
671
- if (retrievedContext) {
672
- const toolOutputMessage = `Here is the requested schema information:\n\`\`\`\n${retrievedContext}\n\`\`\`\nPlease use this information to answer the user's original question: "${input}"`;
673
- messages.push({ role: "user", content: toolOutputMessage });
674
- updateLastAssistantMessage("🧠 Processing with retrieved info...");
675
- const finalCompletion = await engine.chat.completions.create({
676
- messages: messages,
677
- stream: true,
678
- temperature: 0.7,
679
- top_p: 0.9,
680
- });
681
- for await (const chunk of finalCompletion) {
682
- const curDelta = chunk.choices?.[0]?.delta.content;
683
- if (curDelta) {
684
- fullAssistantResponse += curDelta;
685
- updateLastAssistantMessage(fullAssistantResponse);
 
 
 
 
 
 
 
 
 
686
  }
 
 
 
 
687
  }
688
- finalResponseContent = fullAssistantResponse;
689
  } else {
690
- finalResponseContent = "I couldn't find specific relevant schema information for your request: \"" + parsedAction.query + "\".";
691
  updateLastAssistantMessage(finalResponseContent);
692
  }
693
- } else {
694
- finalResponseContent = llmFirstResponseContent;
695
- updateLastAssistantMessage(finalResponseContent);
696
  }
697
  messages.push({ content: finalResponseContent, role: 'assistant' });
698
  const usageText = await engine.runtimeStatsText();
@@ -701,8 +803,11 @@ If you can answer the question directly with your existing knowledge or after us
701
  } else if (chatBackend && chatBackend.startsWith('transformers')) {
702
  // Fallback CPU flow: single pass with RAG context (no tool JSON handshake to save latency)
703
  updateLastAssistantMessage('🧠 Gathering relevant schema context...');
704
- const ragContext = await performRagLookup(input) || 'No directly relevant schema rows found.';
705
- const prompt = `${systemMessageContent}\n\nUser question: ${input}\n\nRelevant schema context:\n${ragContext}\n\nAnswer:`;
 
 
 
706
  updateLastAssistantMessage(`✍️ Generating answer (${chatBackend}${chosenTransformersModel? '/' + chosenTransformersModel: ''})...`);
707
  let streamedAnswer = '';
708
  try {
@@ -771,6 +876,12 @@ If you can answer the question directly with your existing knowledge or after us
771
  window.location.reload();
772
  }
773
  });
 
 
 
 
 
 
774
  forceReloadBtn.addEventListener('click', () => window.location.reload());
775
  toggleDiagBtn.addEventListener('click', () => diagnosticsEl.classList.toggle('show'));
776
 
@@ -782,8 +893,19 @@ If you can answer the question directly with your existing knowledge or after us
782
  }
783
  });
784
 
 
 
 
 
 
 
 
 
 
 
 
785
  // Initialize all models (WebLLM and Embedding model) when the page loads
786
- document.addEventListener("DOMContentLoaded", initializeModels);
787
  </script>
788
  </body>
789
  </html>
 
208
  <input type="text" id="hf-token" placeholder="hf_... (optional)" size="18" autocomplete="off" />
209
  </label>
210
  <button id="apply-token" title="Store token (localStorage) & reload">Apply Token + Reload</button>
211
+ <label style="display:flex;align-items:center;gap:4px;">Models:
212
+ <input type="text" id="model-candidates" placeholder="comma-separated model ids" size="26" />
213
+ </label>
214
+ <button id="apply-models" title="Store custom model list & reload">Apply Models</button>
215
+ <label style="display:flex;align-items:center;gap:4px;">Skip RAG:
216
+ <input type="checkbox" id="skip-rag" title="If checked, no retrieval augmented context will be gathered." />
217
+ </label>
218
+
219
  <select id="preferred-backend" title="Preferred first backend">
220
  <option value="transformers-webgpu">TF WebGPU</option>
221
  <option value="transformers-webgl">TF WebGL</option>
 
238
  </div>
239
 
240
  <script type="module">
241
+ import * as webllm from "https://esm.run/@mlc-ai/web-llm";
242
  import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.14.0';
243
+
244
+ // ---- Console Log Filtering (suppress noisy ONNX optimizer warnings) ----
245
+ const LOG_FILTER_PATTERNS = [
246
+ /CleanUnusedInitializersAndNodeArgs/i,
247
+ /graph\.cc:\d+ CleanUnusedInitializersAndNodeArgs/i,
248
+ /Removing initializer '\/transformer\//i
249
+ ];
250
+ const originalConsole = { log: console.log, warn: console.warn };
251
+ function shouldSuppress(args) {
252
+ return args.some(a => typeof a === 'string' && LOG_FILTER_PATTERNS.some(p => p.test(a)));
253
+ }
254
+ console.warn = (...args) => {
255
+ if (shouldSuppress(args)) { return; }
256
+ originalConsole.warn(...args);
257
+ };
258
+ console.log = (...args) => {
259
+ if (shouldSuppress(args)) { return; }
260
+ originalConsole.log(...args);
261
+ };
262
 
263
  /*************** WebLLM Logic & RAG Components ***************/
264
 
265
  // System message for the LLM to understand its role and tool use
266
  const systemMessageContent = `
267
+ You are an intelligent person with honesty and broad knowledge.
268
+
269
+ Although you also know about SQL queries.
270
 
271
  You have access to a special "lookup" tool. If you need more specific details about tables or concepts to answer a user's question, you MUST respond with a JSON object in this exact format:
272
 
 
292
  const sendButton = document.getElementById("send");
293
  const downloadStatus = document.getElementById("download-status");
294
  const chatStats = document.getElementById("chat-stats");
295
+ const diagnosticsEl = document.getElementById('diagnostics');
296
+ const tokenInput = document.getElementById('hf-token');
297
+ const applyTokenBtn = document.getElementById('apply-token');
298
+ const forceReloadBtn = document.getElementById('force-reload');
299
+ const toggleDiagBtn = document.getElementById('toggle-diagnostics');
300
+ const backendSelect = document.getElementById('preferred-backend');
301
+ const activeBackendLabel = document.getElementById('active-backend');
302
+ const skipRagCheckbox = document.getElementById('skip-rag');
303
+
304
 
305
  let currentAssistantMessageElement = null; // To update the streaming message
306
  let embedder = null; // In-browser embedding model
 
312
  let chatBackend = null;
313
  let textGenPipeline = null; // transformers.js pipeline instance
314
  // Candidate models (ordered). We rotate until one loads. Prefer fully open, ungated models first.
315
+ let TRANSFORMERS_MODEL_CANDIDATES = [];
316
+ const DEFAULT_TRANSFORMERS_MODEL_CANDIDATES = [
317
+ // Prefer small, widely available, ungated first.
318
+ 'Xenova/distilgpt2', // tiny baseline, almost always available
319
+ 'Xenova/gpt2', // larger baseline
320
+ 'Xenova/phi-2', // smallish, popular (may need token if rate-limited)
321
+ 'Xenova/Qwen2.5-0.5B-Instruct' // instruct style (may gate)
322
  ];
323
+ const SMALLER_MODEL_HINT = 'Xenova/distilgpt2';
324
+ const modelCandidatesInput = document.getElementById('model-candidates');
325
+ const applyModelsBtn = document.getElementById('apply-models');
326
+ const storedModels = localStorage.getItem('MODEL_CANDIDATES');
327
+ if (storedModels) {
328
+ TRANSFORMERS_MODEL_CANDIDATES = storedModels.split(',').map(s=>s.trim()).filter(Boolean);
329
+ modelCandidatesInput.value = TRANSFORMERS_MODEL_CANDIDATES.join(',');
330
+ } else {
331
+ TRANSFORMERS_MODEL_CANDIDATES = [...DEFAULT_TRANSFORMERS_MODEL_CANDIDATES];
332
+ modelCandidatesInput.value = TRANSFORMERS_MODEL_CANDIDATES.join(',');
333
+ }
334
  let chosenTransformersModel = null;
335
+ // Load skip RAG preference
336
+ const storedSkipRag = localStorage.getItem('SKIP_RAG') === '1';
337
+ skipRagCheckbox.checked = storedSkipRag;
338
+ skipRagCheckbox.addEventListener('change', () => {
339
+ localStorage.setItem('SKIP_RAG', skipRagCheckbox.checked ? '1' : '0');
340
+ appendDiagnostic('Skip RAG set to ' + skipRagCheckbox.checked);
341
+ });
342
+
343
 
344
  // Allow user to inject HF token before loading (e.g., window.HF_TOKEN = 'hf_xxx'; before this script)
345
  if (window.HF_TOKEN) {
 
410
  }
411
  ];
412
 
413
+ appendDiagnostic(messages[0].content);
414
+
415
  // --- Helper Functions ---
416
 
417
  // Callback function for initializing WebLLM progress.
 
475
 
476
  // --- RAG Lookup Logic ---
477
  async function performRagLookup(query) {
478
+ if (skipRagCheckbox.checked) {
479
+ appendDiagnostic('RAG skipped by user preference.');
480
+ return null;
481
+ }
482
  if (!embedder || miniTableIndexEmbeddings.length === 0 || detailedSchemaEmbeddings.length === 0) {
483
  console.warn("Embedding model or knowledge base not ready for RAG lookup.");
484
+ appendDiagnostic("Embedding model or knowledge base not ready for RAG lookup.");
485
  return null;
486
  }
487
+ appendDiagnostic('RAG start for query: ' + query);
488
 
489
  try {
490
  // Stage 1: Embed user query and identify relevant tables from mini-index
491
  const queryEmbeddingOutput = await embedder(query, { pooling: 'mean', normalize: true });
492
  const queryEmbedding = queryEmbeddingOutput.data;
493
+ appendDiagnostic('RAG: query embedded dim=' + queryEmbedding.length);
494
 
495
  let tableSimilarities = [];
496
  for (const tableIndex of miniTableIndexEmbeddings) {
 
503
 
504
  if (topRelevantTableIds.length === 0) {
505
  console.log("No highly relevant tables identified for query:", query);
506
+ appendDiagnostic("RAG: No table above threshold.");
507
  return null;
508
  }
509
+ appendDiagnostic("RAG: tables -> " + topRelevantTableIds.join(','));
510
  console.log("Identified relevant tables for RAG:", topRelevantTableIds);
511
 
512
  // Stage 2: Filter detailed chunks by relevant tables and re-rank
 
528
  const contextChunks = chunkSimilarities.filter(s => s.score > 0.4).slice(0, maxChunksToInclude).map(s => s.chunk); // Filter by score again
529
 
530
  if (contextChunks.length > 0) {
531
+ appendDiagnostic('RAG: selected ' + contextChunks.length + ' chunks.');
532
  return contextChunks.join("\n\n---\n\n");
533
  } else {
534
+ appendDiagnostic('RAG: No chunk passed score filter.');
535
  return null; // No relevant chunks found after filtering
536
  }
537
 
 
553
 
554
  // Attempt order: transformers webgpu -> transformers webgl -> webllm -> transformers wasm
555
  const modelLoadErrors = [];
556
+ let validatedModelCandidates = null;
557
+ async function preflightModels() {
558
+ if (validatedModelCandidates) return validatedModelCandidates;
559
+ validatedModelCandidates = [];
560
+ appendDiagnostic('Preflight HEAD validation for models...');
561
  for (const modelId of TRANSFORMERS_MODEL_CANDIDATES) {
562
+ const cfgUrl = `${env.remoteURL}/${modelId}/resolve/main/config.json`;
563
+ try {
564
+ let resp = await fetch(cfgUrl, { method: 'HEAD' });
565
+ if (resp.status === 405) { // Method not allowed, try GET minimal
566
+ resp = await fetch(cfgUrl, { method: 'GET' });
567
+ }
568
+ if (resp.ok) {
569
+ validatedModelCandidates.push(modelId);
570
+ appendDiagnostic(`OK ${modelId}`);
571
+ } else {
572
+ appendDiagnostic(`Skip ${modelId} (${resp.status})`);
573
+ }
574
+ } catch (e) {
575
+ appendDiagnostic(`Skip ${modelId} (error: ${e.message})`);
576
+ }
577
+ }
578
+ if (validatedModelCandidates.length === 0) {
579
+ appendDiagnostic('No valid models after preflight.');
580
+ }
581
+ return validatedModelCandidates;
582
+ }
583
+ async function tryTransformers(deviceTag) {
584
+ const candidates = await preflightModels();
585
+ for (const modelId of candidates) {
586
  try {
587
  downloadStatus.textContent = `Loading ${modelId} (${deviceTag})...`;
588
  const opts = { quantized: true };
 
744
  let fullAssistantResponse = "";
745
  chatStats.classList.add("hidden");
746
 
747
+ console.log('Messages ', messages);
748
+
749
  try {
750
  if (chatBackend === 'webllm') {
751
  // Original WebLLM two-pass tool invocation logic
 
755
  temperature: 0.7,
756
  top_p: 0.9,
757
  });
758
+ let llmFirstResponseContent = initialCompletion.choices?.[0]?.message?.content || "";
 
 
759
  let finalResponseContent = "";
760
+ if (skipRagCheckbox.checked) {
761
+ appendDiagnostic('Skip RAG mode: using first LLM response directly.');
762
+ finalResponseContent = llmFirstResponseContent;
763
+ updateLastAssistantMessage(finalResponseContent);
764
+ } else {
765
+ let parsedAction = null;
766
+ try { parsedAction = JSON.parse(llmFirstResponseContent); } catch (_) {}
767
+ if (parsedAction && parsedAction.action === "lookup_schema_info" && parsedAction.query) {
768
+ appendDiagnostic("RAG lookup requested by model: " + parsedAction.query);
769
+ updateLastAssistantMessage("🔎 Searching schema for: " + parsedAction.query);
770
+ messages.push({ role: "assistant", content: llmFirstResponseContent });
771
+ const retrievedContext = await performRagLookup(parsedAction.query);
772
+ if (retrievedContext) {
773
+ const toolOutputMessage = `Here is the requested schema information:\n\`\`\`\n${retrievedContext}\n\`\`\`\nPlease use this information to answer the user's original question: "${input}"`;
774
+ messages.push({ role: "user", content: toolOutputMessage });
775
+ updateLastAssistantMessage("🧠 Processing with retrieved info...");
776
+ const finalCompletion = await engine.chat.completions.create({
777
+ messages: messages,
778
+ stream: true,
779
+ temperature: 0.7,
780
+ top_p: 0.9,
781
+ });
782
+ for await (const chunk of finalCompletion) {
783
+ const curDelta = chunk.choices?.[0]?.delta.content;
784
+ if (curDelta) {
785
+ fullAssistantResponse += curDelta;
786
+ updateLastAssistantMessage(fullAssistantResponse);
787
+ }
788
  }
789
+ finalResponseContent = fullAssistantResponse;
790
+ } else {
791
+ finalResponseContent = "No relevant context.";
792
+ updateLastAssistantMessage(finalResponseContent);
793
  }
 
794
  } else {
795
+ finalResponseContent = llmFirstResponseContent;
796
  updateLastAssistantMessage(finalResponseContent);
797
  }
 
 
 
798
  }
799
  messages.push({ content: finalResponseContent, role: 'assistant' });
800
  const usageText = await engine.runtimeStatsText();
 
803
  } else if (chatBackend && chatBackend.startsWith('transformers')) {
804
  // Fallback CPU flow: single pass with RAG context (no tool JSON handshake to save latency)
805
  updateLastAssistantMessage('🧠 Gathering relevant schema context...');
806
+ let ragContext = null;
807
+ if (!skipRagCheckbox.checked) ragContext = await performRagLookup(input);
808
+ const prompt = skipRagCheckbox.checked
809
+ ? `${systemMessageContent}\n\nUser question: ${input}\n\nAnswer:`
810
+ : `${systemMessageContent}\n\nUser question: ${input}\n\nRelevant schema context:\n${ragContext || 'No relevant context.'}\n\nAnswer:`;
811
  updateLastAssistantMessage(`✍️ Generating answer (${chatBackend}${chosenTransformersModel? '/' + chosenTransformersModel: ''})...`);
812
  let streamedAnswer = '';
813
  try {
 
876
  window.location.reload();
877
  }
878
  });
879
+ applyModelsBtn.addEventListener('click', () => {
880
+ const raw = modelCandidatesInput.value.trim();
881
+ if (!raw) return;
882
+ localStorage.setItem('MODEL_CANDIDATES', raw);
883
+ window.location.reload();
884
+ });
885
  forceReloadBtn.addEventListener('click', () => window.location.reload());
886
  toggleDiagBtn.addEventListener('click', () => diagnosticsEl.classList.toggle('show'));
887
 
 
893
  }
894
  });
895
 
896
+ // Attempt to reduce ONNX Runtime verbosity (if backend loads onnxruntime-web)
897
+ async function quietOnnxLogs() {
898
+ try {
899
+ const ort = await import('https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.min.js');
900
+ // ort.env.logLevel values: 'verbose'|'info'|'warning'|'error' (or numeric severity)
901
+ ort.env.logLevel = 'info';
902
+ } catch (e) {
903
+ appendDiagnostic('ORT log level not set: ' + e.message);
904
+ }
905
+ }
906
+
907
  // Initialize all models (WebLLM and Embedding model) when the page loads
908
+ document.addEventListener("DOMContentLoaded", () => { quietOnnxLogs(); initializeModels(); });
909
  </script>
910
  </body>
911
  </html>