Progress.
Browse files- index.html +167 -45
index.html
CHANGED
|
@@ -208,6 +208,14 @@
|
|
| 208 |
<input type="text" id="hf-token" placeholder="hf_... (optional)" size="18" autocomplete="off" />
|
| 209 |
</label>
|
| 210 |
<button id="apply-token" title="Store token (localStorage) & reload">Apply Token + Reload</button>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 211 |
<select id="preferred-backend" title="Preferred first backend">
|
| 212 |
<option value="transformers-webgpu">TF WebGPU</option>
|
| 213 |
<option value="transformers-webgl">TF WebGL</option>
|
|
@@ -230,14 +238,35 @@
|
|
| 230 |
</div>
|
| 231 |
|
| 232 |
<script type="module">
|
| 233 |
-
|
| 234 |
import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.14.0';
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 235 |
|
| 236 |
/*************** WebLLM Logic & RAG Components ***************/
|
| 237 |
|
| 238 |
// System message for the LLM to understand its role and tool use
|
| 239 |
const systemMessageContent = `
|
| 240 |
-
You are an intelligent
|
|
|
|
|
|
|
| 241 |
|
| 242 |
You have access to a special "lookup" tool. If you need more specific details about tables or concepts to answer a user's question, you MUST respond with a JSON object in this exact format:
|
| 243 |
|
|
@@ -263,13 +292,15 @@ If you can answer the question directly with your existing knowledge or after us
|
|
| 263 |
const sendButton = document.getElementById("send");
|
| 264 |
const downloadStatus = document.getElementById("download-status");
|
| 265 |
const chatStats = document.getElementById("chat-stats");
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
|
|
|
|
|
|
|
| 273 |
|
| 274 |
let currentAssistantMessageElement = null; // To update the streaming message
|
| 275 |
let embedder = null; // In-browser embedding model
|
|
@@ -281,13 +312,34 @@ If you can answer the question directly with your existing knowledge or after us
|
|
| 281 |
let chatBackend = null;
|
| 282 |
let textGenPipeline = null; // transformers.js pipeline instance
|
| 283 |
// Candidate models (ordered). We rotate until one loads. Prefer fully open, ungated models first.
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
'Xenova/
|
|
|
|
|
|
|
|
|
|
| 288 |
];
|
| 289 |
-
const SMALLER_MODEL_HINT = 'Xenova/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 290 |
let chosenTransformersModel = null;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 291 |
|
| 292 |
// Allow user to inject HF token before loading (e.g., window.HF_TOKEN = 'hf_xxx'; before this script)
|
| 293 |
if (window.HF_TOKEN) {
|
|
@@ -358,6 +410,8 @@ If you can answer the question directly with your existing knowledge or after us
|
|
| 358 |
}
|
| 359 |
];
|
| 360 |
|
|
|
|
|
|
|
| 361 |
// --- Helper Functions ---
|
| 362 |
|
| 363 |
// Callback function for initializing WebLLM progress.
|
|
@@ -421,15 +475,22 @@ If you can answer the question directly with your existing knowledge or after us
|
|
| 421 |
|
| 422 |
// --- RAG Lookup Logic ---
|
| 423 |
async function performRagLookup(query) {
|
|
|
|
|
|
|
|
|
|
|
|
|
| 424 |
if (!embedder || miniTableIndexEmbeddings.length === 0 || detailedSchemaEmbeddings.length === 0) {
|
| 425 |
console.warn("Embedding model or knowledge base not ready for RAG lookup.");
|
|
|
|
| 426 |
return null;
|
| 427 |
}
|
|
|
|
| 428 |
|
| 429 |
try {
|
| 430 |
// Stage 1: Embed user query and identify relevant tables from mini-index
|
| 431 |
const queryEmbeddingOutput = await embedder(query, { pooling: 'mean', normalize: true });
|
| 432 |
const queryEmbedding = queryEmbeddingOutput.data;
|
|
|
|
| 433 |
|
| 434 |
let tableSimilarities = [];
|
| 435 |
for (const tableIndex of miniTableIndexEmbeddings) {
|
|
@@ -442,8 +503,10 @@ If you can answer the question directly with your existing knowledge or after us
|
|
| 442 |
|
| 443 |
if (topRelevantTableIds.length === 0) {
|
| 444 |
console.log("No highly relevant tables identified for query:", query);
|
|
|
|
| 445 |
return null;
|
| 446 |
}
|
|
|
|
| 447 |
console.log("Identified relevant tables for RAG:", topRelevantTableIds);
|
| 448 |
|
| 449 |
// Stage 2: Filter detailed chunks by relevant tables and re-rank
|
|
@@ -465,8 +528,10 @@ If you can answer the question directly with your existing knowledge or after us
|
|
| 465 |
const contextChunks = chunkSimilarities.filter(s => s.score > 0.4).slice(0, maxChunksToInclude).map(s => s.chunk); // Filter by score again
|
| 466 |
|
| 467 |
if (contextChunks.length > 0) {
|
|
|
|
| 468 |
return contextChunks.join("\n\n---\n\n");
|
| 469 |
} else {
|
|
|
|
| 470 |
return null; // No relevant chunks found after filtering
|
| 471 |
}
|
| 472 |
|
|
@@ -488,8 +553,36 @@ If you can answer the question directly with your existing knowledge or after us
|
|
| 488 |
|
| 489 |
// Attempt order: transformers webgpu -> transformers webgl -> webllm -> transformers wasm
|
| 490 |
const modelLoadErrors = [];
|
| 491 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 492 |
for (const modelId of TRANSFORMERS_MODEL_CANDIDATES) {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 493 |
try {
|
| 494 |
downloadStatus.textContent = `Loading ${modelId} (${deviceTag})...`;
|
| 495 |
const opts = { quantized: true };
|
|
@@ -651,6 +744,8 @@ If you can answer the question directly with your existing knowledge or after us
|
|
| 651 |
let fullAssistantResponse = "";
|
| 652 |
chatStats.classList.add("hidden");
|
| 653 |
|
|
|
|
|
|
|
| 654 |
try {
|
| 655 |
if (chatBackend === 'webllm') {
|
| 656 |
// Original WebLLM two-pass tool invocation logic
|
|
@@ -660,39 +755,46 @@ If you can answer the question directly with your existing knowledge or after us
|
|
| 660 |
temperature: 0.7,
|
| 661 |
top_p: 0.9,
|
| 662 |
});
|
| 663 |
-
|
| 664 |
-
let parsedAction = null;
|
| 665 |
-
try { parsedAction = JSON.parse(llmFirstResponseContent); } catch (_) {}
|
| 666 |
let finalResponseContent = "";
|
| 667 |
-
if (
|
| 668 |
-
|
| 669 |
-
|
| 670 |
-
|
| 671 |
-
|
| 672 |
-
|
| 673 |
-
|
| 674 |
-
|
| 675 |
-
|
| 676 |
-
|
| 677 |
-
|
| 678 |
-
|
| 679 |
-
|
| 680 |
-
|
| 681 |
-
|
| 682 |
-
|
| 683 |
-
|
| 684 |
-
|
| 685 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 686 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 687 |
}
|
| 688 |
-
finalResponseContent = fullAssistantResponse;
|
| 689 |
} else {
|
| 690 |
-
finalResponseContent =
|
| 691 |
updateLastAssistantMessage(finalResponseContent);
|
| 692 |
}
|
| 693 |
-
} else {
|
| 694 |
-
finalResponseContent = llmFirstResponseContent;
|
| 695 |
-
updateLastAssistantMessage(finalResponseContent);
|
| 696 |
}
|
| 697 |
messages.push({ content: finalResponseContent, role: 'assistant' });
|
| 698 |
const usageText = await engine.runtimeStatsText();
|
|
@@ -701,8 +803,11 @@ If you can answer the question directly with your existing knowledge or after us
|
|
| 701 |
} else if (chatBackend && chatBackend.startsWith('transformers')) {
|
| 702 |
// Fallback CPU flow: single pass with RAG context (no tool JSON handshake to save latency)
|
| 703 |
updateLastAssistantMessage('🧠 Gathering relevant schema context...');
|
| 704 |
-
|
| 705 |
-
|
|
|
|
|
|
|
|
|
|
| 706 |
updateLastAssistantMessage(`✍️ Generating answer (${chatBackend}${chosenTransformersModel? '/' + chosenTransformersModel: ''})...`);
|
| 707 |
let streamedAnswer = '';
|
| 708 |
try {
|
|
@@ -771,6 +876,12 @@ If you can answer the question directly with your existing knowledge or after us
|
|
| 771 |
window.location.reload();
|
| 772 |
}
|
| 773 |
});
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 774 |
forceReloadBtn.addEventListener('click', () => window.location.reload());
|
| 775 |
toggleDiagBtn.addEventListener('click', () => diagnosticsEl.classList.toggle('show'));
|
| 776 |
|
|
@@ -782,8 +893,19 @@ If you can answer the question directly with your existing knowledge or after us
|
|
| 782 |
}
|
| 783 |
});
|
| 784 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 785 |
// Initialize all models (WebLLM and Embedding model) when the page loads
|
| 786 |
-
document.addEventListener("DOMContentLoaded", initializeModels);
|
| 787 |
</script>
|
| 788 |
</body>
|
| 789 |
</html>
|
|
|
|
| 208 |
<input type="text" id="hf-token" placeholder="hf_... (optional)" size="18" autocomplete="off" />
|
| 209 |
</label>
|
| 210 |
<button id="apply-token" title="Store token (localStorage) & reload">Apply Token + Reload</button>
|
| 211 |
+
<label style="display:flex;align-items:center;gap:4px;">Models:
|
| 212 |
+
<input type="text" id="model-candidates" placeholder="comma-separated model ids" size="26" />
|
| 213 |
+
</label>
|
| 214 |
+
<button id="apply-models" title="Store custom model list & reload">Apply Models</button>
|
| 215 |
+
<label style="display:flex;align-items:center;gap:4px;">Skip RAG:
|
| 216 |
+
<input type="checkbox" id="skip-rag" title="If checked, no retrieval augmented context will be gathered." />
|
| 217 |
+
</label>
|
| 218 |
+
|
| 219 |
<select id="preferred-backend" title="Preferred first backend">
|
| 220 |
<option value="transformers-webgpu">TF WebGPU</option>
|
| 221 |
<option value="transformers-webgl">TF WebGL</option>
|
|
|
|
| 238 |
</div>
|
| 239 |
|
| 240 |
<script type="module">
|
| 241 |
+
import * as webllm from "https://esm.run/@mlc-ai/web-llm";
|
| 242 |
import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.14.0';
|
| 243 |
+
|
| 244 |
+
// ---- Console Log Filtering (suppress noisy ONNX optimizer warnings) ----
|
| 245 |
+
const LOG_FILTER_PATTERNS = [
|
| 246 |
+
/CleanUnusedInitializersAndNodeArgs/i,
|
| 247 |
+
/graph\.cc:\d+ CleanUnusedInitializersAndNodeArgs/i,
|
| 248 |
+
/Removing initializer '\/transformer\//i
|
| 249 |
+
];
|
| 250 |
+
const originalConsole = { log: console.log, warn: console.warn };
|
| 251 |
+
function shouldSuppress(args) {
|
| 252 |
+
return args.some(a => typeof a === 'string' && LOG_FILTER_PATTERNS.some(p => p.test(a)));
|
| 253 |
+
}
|
| 254 |
+
console.warn = (...args) => {
|
| 255 |
+
if (shouldSuppress(args)) { return; }
|
| 256 |
+
originalConsole.warn(...args);
|
| 257 |
+
};
|
| 258 |
+
console.log = (...args) => {
|
| 259 |
+
if (shouldSuppress(args)) { return; }
|
| 260 |
+
originalConsole.log(...args);
|
| 261 |
+
};
|
| 262 |
|
| 263 |
/*************** WebLLM Logic & RAG Components ***************/
|
| 264 |
|
| 265 |
// System message for the LLM to understand its role and tool use
|
| 266 |
const systemMessageContent = `
|
| 267 |
+
You are an intelligent person with honesty and broad knowledge.
|
| 268 |
+
|
| 269 |
+
Although you also know about SQL queries.
|
| 270 |
|
| 271 |
You have access to a special "lookup" tool. If you need more specific details about tables or concepts to answer a user's question, you MUST respond with a JSON object in this exact format:
|
| 272 |
|
|
|
|
| 292 |
const sendButton = document.getElementById("send");
|
| 293 |
const downloadStatus = document.getElementById("download-status");
|
| 294 |
const chatStats = document.getElementById("chat-stats");
|
| 295 |
+
const diagnosticsEl = document.getElementById('diagnostics');
|
| 296 |
+
const tokenInput = document.getElementById('hf-token');
|
| 297 |
+
const applyTokenBtn = document.getElementById('apply-token');
|
| 298 |
+
const forceReloadBtn = document.getElementById('force-reload');
|
| 299 |
+
const toggleDiagBtn = document.getElementById('toggle-diagnostics');
|
| 300 |
+
const backendSelect = document.getElementById('preferred-backend');
|
| 301 |
+
const activeBackendLabel = document.getElementById('active-backend');
|
| 302 |
+
const skipRagCheckbox = document.getElementById('skip-rag');
|
| 303 |
+
|
| 304 |
|
| 305 |
let currentAssistantMessageElement = null; // To update the streaming message
|
| 306 |
let embedder = null; // In-browser embedding model
|
|
|
|
| 312 |
let chatBackend = null;
|
| 313 |
let textGenPipeline = null; // transformers.js pipeline instance
|
| 314 |
// Candidate models (ordered). We rotate until one loads. Prefer fully open, ungated models first.
|
| 315 |
+
let TRANSFORMERS_MODEL_CANDIDATES = [];
|
| 316 |
+
const DEFAULT_TRANSFORMERS_MODEL_CANDIDATES = [
|
| 317 |
+
// Prefer small, widely available, ungated first.
|
| 318 |
+
'Xenova/distilgpt2', // tiny baseline, almost always available
|
| 319 |
+
'Xenova/gpt2', // larger baseline
|
| 320 |
+
'Xenova/phi-2', // smallish, popular (may need token if rate-limited)
|
| 321 |
+
'Xenova/Qwen2.5-0.5B-Instruct' // instruct style (may gate)
|
| 322 |
];
|
| 323 |
+
const SMALLER_MODEL_HINT = 'Xenova/distilgpt2';
|
| 324 |
+
const modelCandidatesInput = document.getElementById('model-candidates');
|
| 325 |
+
const applyModelsBtn = document.getElementById('apply-models');
|
| 326 |
+
const storedModels = localStorage.getItem('MODEL_CANDIDATES');
|
| 327 |
+
if (storedModels) {
|
| 328 |
+
TRANSFORMERS_MODEL_CANDIDATES = storedModels.split(',').map(s=>s.trim()).filter(Boolean);
|
| 329 |
+
modelCandidatesInput.value = TRANSFORMERS_MODEL_CANDIDATES.join(',');
|
| 330 |
+
} else {
|
| 331 |
+
TRANSFORMERS_MODEL_CANDIDATES = [...DEFAULT_TRANSFORMERS_MODEL_CANDIDATES];
|
| 332 |
+
modelCandidatesInput.value = TRANSFORMERS_MODEL_CANDIDATES.join(',');
|
| 333 |
+
}
|
| 334 |
let chosenTransformersModel = null;
|
| 335 |
+
// Load skip RAG preference
|
| 336 |
+
const storedSkipRag = localStorage.getItem('SKIP_RAG') === '1';
|
| 337 |
+
skipRagCheckbox.checked = storedSkipRag;
|
| 338 |
+
skipRagCheckbox.addEventListener('change', () => {
|
| 339 |
+
localStorage.setItem('SKIP_RAG', skipRagCheckbox.checked ? '1' : '0');
|
| 340 |
+
appendDiagnostic('Skip RAG set to ' + skipRagCheckbox.checked);
|
| 341 |
+
});
|
| 342 |
+
|
| 343 |
|
| 344 |
// Allow user to inject HF token before loading (e.g., window.HF_TOKEN = 'hf_xxx'; before this script)
|
| 345 |
if (window.HF_TOKEN) {
|
|
|
|
| 410 |
}
|
| 411 |
];
|
| 412 |
|
| 413 |
+
appendDiagnostic(messages[0].content);
|
| 414 |
+
|
| 415 |
// --- Helper Functions ---
|
| 416 |
|
| 417 |
// Callback function for initializing WebLLM progress.
|
|
|
|
| 475 |
|
| 476 |
// --- RAG Lookup Logic ---
|
| 477 |
async function performRagLookup(query) {
|
| 478 |
+
if (skipRagCheckbox.checked) {
|
| 479 |
+
appendDiagnostic('RAG skipped by user preference.');
|
| 480 |
+
return null;
|
| 481 |
+
}
|
| 482 |
if (!embedder || miniTableIndexEmbeddings.length === 0 || detailedSchemaEmbeddings.length === 0) {
|
| 483 |
console.warn("Embedding model or knowledge base not ready for RAG lookup.");
|
| 484 |
+
appendDiagnostic("Embedding model or knowledge base not ready for RAG lookup.");
|
| 485 |
return null;
|
| 486 |
}
|
| 487 |
+
appendDiagnostic('RAG start for query: ' + query);
|
| 488 |
|
| 489 |
try {
|
| 490 |
// Stage 1: Embed user query and identify relevant tables from mini-index
|
| 491 |
const queryEmbeddingOutput = await embedder(query, { pooling: 'mean', normalize: true });
|
| 492 |
const queryEmbedding = queryEmbeddingOutput.data;
|
| 493 |
+
appendDiagnostic('RAG: query embedded dim=' + queryEmbedding.length);
|
| 494 |
|
| 495 |
let tableSimilarities = [];
|
| 496 |
for (const tableIndex of miniTableIndexEmbeddings) {
|
|
|
|
| 503 |
|
| 504 |
if (topRelevantTableIds.length === 0) {
|
| 505 |
console.log("No highly relevant tables identified for query:", query);
|
| 506 |
+
appendDiagnostic("RAG: No table above threshold.");
|
| 507 |
return null;
|
| 508 |
}
|
| 509 |
+
appendDiagnostic("RAG: tables -> " + topRelevantTableIds.join(','));
|
| 510 |
console.log("Identified relevant tables for RAG:", topRelevantTableIds);
|
| 511 |
|
| 512 |
// Stage 2: Filter detailed chunks by relevant tables and re-rank
|
|
|
|
| 528 |
const contextChunks = chunkSimilarities.filter(s => s.score > 0.4).slice(0, maxChunksToInclude).map(s => s.chunk); // Filter by score again
|
| 529 |
|
| 530 |
if (contextChunks.length > 0) {
|
| 531 |
+
appendDiagnostic('RAG: selected ' + contextChunks.length + ' chunks.');
|
| 532 |
return contextChunks.join("\n\n---\n\n");
|
| 533 |
} else {
|
| 534 |
+
appendDiagnostic('RAG: No chunk passed score filter.');
|
| 535 |
return null; // No relevant chunks found after filtering
|
| 536 |
}
|
| 537 |
|
|
|
|
| 553 |
|
| 554 |
// Attempt order: transformers webgpu -> transformers webgl -> webllm -> transformers wasm
|
| 555 |
const modelLoadErrors = [];
|
| 556 |
+
let validatedModelCandidates = null;
|
| 557 |
+
async function preflightModels() {
|
| 558 |
+
if (validatedModelCandidates) return validatedModelCandidates;
|
| 559 |
+
validatedModelCandidates = [];
|
| 560 |
+
appendDiagnostic('Preflight HEAD validation for models...');
|
| 561 |
for (const modelId of TRANSFORMERS_MODEL_CANDIDATES) {
|
| 562 |
+
const cfgUrl = `${env.remoteURL}/${modelId}/resolve/main/config.json`;
|
| 563 |
+
try {
|
| 564 |
+
let resp = await fetch(cfgUrl, { method: 'HEAD' });
|
| 565 |
+
if (resp.status === 405) { // Method not allowed, try GET minimal
|
| 566 |
+
resp = await fetch(cfgUrl, { method: 'GET' });
|
| 567 |
+
}
|
| 568 |
+
if (resp.ok) {
|
| 569 |
+
validatedModelCandidates.push(modelId);
|
| 570 |
+
appendDiagnostic(`OK ${modelId}`);
|
| 571 |
+
} else {
|
| 572 |
+
appendDiagnostic(`Skip ${modelId} (${resp.status})`);
|
| 573 |
+
}
|
| 574 |
+
} catch (e) {
|
| 575 |
+
appendDiagnostic(`Skip ${modelId} (error: ${e.message})`);
|
| 576 |
+
}
|
| 577 |
+
}
|
| 578 |
+
if (validatedModelCandidates.length === 0) {
|
| 579 |
+
appendDiagnostic('No valid models after preflight.');
|
| 580 |
+
}
|
| 581 |
+
return validatedModelCandidates;
|
| 582 |
+
}
|
| 583 |
+
async function tryTransformers(deviceTag) {
|
| 584 |
+
const candidates = await preflightModels();
|
| 585 |
+
for (const modelId of candidates) {
|
| 586 |
try {
|
| 587 |
downloadStatus.textContent = `Loading ${modelId} (${deviceTag})...`;
|
| 588 |
const opts = { quantized: true };
|
|
|
|
| 744 |
let fullAssistantResponse = "";
|
| 745 |
chatStats.classList.add("hidden");
|
| 746 |
|
| 747 |
+
console.log('Messages ', messages);
|
| 748 |
+
|
| 749 |
try {
|
| 750 |
if (chatBackend === 'webllm') {
|
| 751 |
// Original WebLLM two-pass tool invocation logic
|
|
|
|
| 755 |
temperature: 0.7,
|
| 756 |
top_p: 0.9,
|
| 757 |
});
|
| 758 |
+
let llmFirstResponseContent = initialCompletion.choices?.[0]?.message?.content || "";
|
|
|
|
|
|
|
| 759 |
let finalResponseContent = "";
|
| 760 |
+
if (skipRagCheckbox.checked) {
|
| 761 |
+
appendDiagnostic('Skip RAG mode: using first LLM response directly.');
|
| 762 |
+
finalResponseContent = llmFirstResponseContent;
|
| 763 |
+
updateLastAssistantMessage(finalResponseContent);
|
| 764 |
+
} else {
|
| 765 |
+
let parsedAction = null;
|
| 766 |
+
try { parsedAction = JSON.parse(llmFirstResponseContent); } catch (_) {}
|
| 767 |
+
if (parsedAction && parsedAction.action === "lookup_schema_info" && parsedAction.query) {
|
| 768 |
+
appendDiagnostic("RAG lookup requested by model: " + parsedAction.query);
|
| 769 |
+
updateLastAssistantMessage("🔎 Searching schema for: " + parsedAction.query);
|
| 770 |
+
messages.push({ role: "assistant", content: llmFirstResponseContent });
|
| 771 |
+
const retrievedContext = await performRagLookup(parsedAction.query);
|
| 772 |
+
if (retrievedContext) {
|
| 773 |
+
const toolOutputMessage = `Here is the requested schema information:\n\`\`\`\n${retrievedContext}\n\`\`\`\nPlease use this information to answer the user's original question: "${input}"`;
|
| 774 |
+
messages.push({ role: "user", content: toolOutputMessage });
|
| 775 |
+
updateLastAssistantMessage("🧠 Processing with retrieved info...");
|
| 776 |
+
const finalCompletion = await engine.chat.completions.create({
|
| 777 |
+
messages: messages,
|
| 778 |
+
stream: true,
|
| 779 |
+
temperature: 0.7,
|
| 780 |
+
top_p: 0.9,
|
| 781 |
+
});
|
| 782 |
+
for await (const chunk of finalCompletion) {
|
| 783 |
+
const curDelta = chunk.choices?.[0]?.delta.content;
|
| 784 |
+
if (curDelta) {
|
| 785 |
+
fullAssistantResponse += curDelta;
|
| 786 |
+
updateLastAssistantMessage(fullAssistantResponse);
|
| 787 |
+
}
|
| 788 |
}
|
| 789 |
+
finalResponseContent = fullAssistantResponse;
|
| 790 |
+
} else {
|
| 791 |
+
finalResponseContent = "No relevant context.";
|
| 792 |
+
updateLastAssistantMessage(finalResponseContent);
|
| 793 |
}
|
|
|
|
| 794 |
} else {
|
| 795 |
+
finalResponseContent = llmFirstResponseContent;
|
| 796 |
updateLastAssistantMessage(finalResponseContent);
|
| 797 |
}
|
|
|
|
|
|
|
|
|
|
| 798 |
}
|
| 799 |
messages.push({ content: finalResponseContent, role: 'assistant' });
|
| 800 |
const usageText = await engine.runtimeStatsText();
|
|
|
|
| 803 |
} else if (chatBackend && chatBackend.startsWith('transformers')) {
|
| 804 |
// Fallback CPU flow: single pass with RAG context (no tool JSON handshake to save latency)
|
| 805 |
updateLastAssistantMessage('🧠 Gathering relevant schema context...');
|
| 806 |
+
let ragContext = null;
|
| 807 |
+
if (!skipRagCheckbox.checked) ragContext = await performRagLookup(input);
|
| 808 |
+
const prompt = skipRagCheckbox.checked
|
| 809 |
+
? `${systemMessageContent}\n\nUser question: ${input}\n\nAnswer:`
|
| 810 |
+
: `${systemMessageContent}\n\nUser question: ${input}\n\nRelevant schema context:\n${ragContext || 'No relevant context.'}\n\nAnswer:`;
|
| 811 |
updateLastAssistantMessage(`✍️ Generating answer (${chatBackend}${chosenTransformersModel? '/' + chosenTransformersModel: ''})...`);
|
| 812 |
let streamedAnswer = '';
|
| 813 |
try {
|
|
|
|
| 876 |
window.location.reload();
|
| 877 |
}
|
| 878 |
});
|
| 879 |
+
applyModelsBtn.addEventListener('click', () => {
|
| 880 |
+
const raw = modelCandidatesInput.value.trim();
|
| 881 |
+
if (!raw) return;
|
| 882 |
+
localStorage.setItem('MODEL_CANDIDATES', raw);
|
| 883 |
+
window.location.reload();
|
| 884 |
+
});
|
| 885 |
forceReloadBtn.addEventListener('click', () => window.location.reload());
|
| 886 |
toggleDiagBtn.addEventListener('click', () => diagnosticsEl.classList.toggle('show'));
|
| 887 |
|
|
|
|
| 893 |
}
|
| 894 |
});
|
| 895 |
|
| 896 |
+
// Attempt to reduce ONNX Runtime verbosity (if backend loads onnxruntime-web)
|
| 897 |
+
async function quietOnnxLogs() {
|
| 898 |
+
try {
|
| 899 |
+
const ort = await import('https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.min.js');
|
| 900 |
+
// ort.env.logLevel values: 'verbose'|'info'|'warning'|'error' (or numeric severity)
|
| 901 |
+
ort.env.logLevel = 'info';
|
| 902 |
+
} catch (e) {
|
| 903 |
+
appendDiagnostic('ORT log level not set: ' + e.message);
|
| 904 |
+
}
|
| 905 |
+
}
|
| 906 |
+
|
| 907 |
// Initialize all models (WebLLM and Embedding model) when the page loads
|
| 908 |
+
document.addEventListener("DOMContentLoaded", () => { quietOnnxLogs(); initializeModels(); });
|
| 909 |
</script>
|
| 910 |
</body>
|
| 911 |
</html>
|