mihailik commited on
Commit
802753d
·
1 Parent(s): b9da2d8

With RAG lookup.

Browse files
Files changed (1) hide show
  1. index.html +596 -0
index.html ADDED
@@ -0,0 +1,596 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>TinyLLM In-Browser Chat with RAG</title>
7
+ <style>
8
+ body {
9
+ font-family: Arial, sans-serif;
10
+ margin: 0;
11
+ padding: 0;
12
+ background-color: #f0f2f5;
13
+ display: flex;
14
+ justify-content: center;
15
+ align-items: center;
16
+ min-height: 100vh;
17
+ color: #333;
18
+ }
19
+
20
+ #chat-container {
21
+ background-color: #fff;
22
+ border-radius: 10px;
23
+ box-shadow: 0 4px 15px rgba(0, 0, 0, 0.1);
24
+ width: 90%;
25
+ max-width: 600px;
26
+ display: flex;
27
+ flex-direction: column;
28
+ overflow: hidden;
29
+ min-height: 80vh;
30
+ max-height: 95vh;
31
+ position: relative;
32
+ }
33
+
34
+ h1 {
35
+ text-align: center;
36
+ color: #4a4a4a;
37
+ padding: 15px;
38
+ margin: 0;
39
+ border-bottom: 1px solid #eee;
40
+ font-size: 1.5em;
41
+ }
42
+
43
+ .status-message {
44
+ text-align: center;
45
+ padding: 10px;
46
+ background-color: #e0f7fa;
47
+ color: #00796b;
48
+ font-weight: bold;
49
+ border-bottom: 1px solid #b2ebf2;
50
+ }
51
+
52
+ #chat-box {
53
+ flex-grow: 1;
54
+ padding: 20px;
55
+ overflow-y: auto;
56
+ display: flex;
57
+ flex-direction: column;
58
+ gap: 10px;
59
+ }
60
+
61
+ .message-container {
62
+ display: flex;
63
+ flex-direction: column;
64
+ max-width: 80%;
65
+ }
66
+
67
+ .message-container.user {
68
+ align-self: flex-end;
69
+ align-items: flex-end;
70
+ }
71
+
72
+ .message-container.assistant {
73
+ align-self: flex-start;
74
+ align-items: flex-start;
75
+ }
76
+
77
+ .message-container.system {
78
+ align-self: center; /* Center system messages */
79
+ align-items: center;
80
+ font-size: 0.85em;
81
+ color: #666;
82
+ text-align: center;
83
+ padding: 5px 10px;
84
+ border-radius: 10px;
85
+ background-color: #f0f0f0;
86
+ margin: 5px 0;
87
+ }
88
+
89
+ .message-bubble {
90
+ padding: 10px 15px;
91
+ border-radius: 20px;
92
+ line-height: 1.4;
93
+ word-wrap: break-word;
94
+ white-space: pre-wrap; /* Preserve whitespace and line breaks */
95
+ }
96
+
97
+ .message-container.user .message-bubble {
98
+ background-color: #007bff;
99
+ color: white;
100
+ border-bottom-right-radius: 5px;
101
+ }
102
+
103
+ .message-container.assistant .message-bubble {
104
+ background-color: #e9e9eb;
105
+ color: #333;
106
+ border-bottom-left-radius: 5px;
107
+ }
108
+
109
+ .chat-input-container {
110
+ display: flex;
111
+ padding: 15px;
112
+ border-top: 1px solid #eee;
113
+ background-color: #fff;
114
+ gap: 10px;
115
+ }
116
+
117
+ #user-input {
118
+ flex-grow: 1;
119
+ padding: 10px 15px;
120
+ border: 1px solid #ddd;
121
+ border-radius: 20px;
122
+ font-size: 1em;
123
+ outline: none;
124
+ }
125
+
126
+ #user-input:focus {
127
+ border-color: #007bff;
128
+ }
129
+
130
+ #send {
131
+ padding: 10px 20px;
132
+ background-color: #007bff;
133
+ color: white;
134
+ border: none;
135
+ border-radius: 20px;
136
+ cursor: pointer;
137
+ font-size: 1em;
138
+ transition: background-color 0.2s;
139
+ }
140
+
141
+ #send:hover:not(:disabled) {
142
+ background-color: #0056b3;
143
+ }
144
+
145
+ #send:disabled {
146
+ background-color: #a0c9ff;
147
+ cursor: not-allowed;
148
+ }
149
+
150
+ .chat-stats {
151
+ font-size: 0.8em;
152
+ color: #666;
153
+ text-align: right;
154
+ padding: 5px 20px;
155
+ background-color: #f9f9f9;
156
+ border-top: 1px solid #eee;
157
+ }
158
+
159
+ .hidden {
160
+ display: none!important;
161
+ }
162
+ </style>
163
+ </head>
164
+ <body>
165
+ <div id="chat-container">
166
+ <h1>In-Browser LLM Chat with RAG</h1>
167
+ <div id="download-status" class="status-message">Loading model...</div>
168
+ <div id="chat-box">
169
+ </div>
170
+ <div id="chat-stats" class="chat-stats hidden"></div>
171
+ <div class="chat-input-container">
172
+ <input type="text" id="user-input" placeholder="Loading models for RAG..." disabled>
173
+ <button id="send" disabled>Send</button>
174
+ </div>
175
+ </div>
176
+
177
+ <script type="module">
178
+ import * as webllm from "https://esm.run/@mlc-ai/web-llm";
179
+ import { pipeline } from 'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.14.0';
180
+
181
+ /*************** WebLLM Logic & RAG Components ***************/
182
+
183
+ // System message for the LLM to understand its role and tool use
184
+ const systemMessageContent = `
185
+ You are an intelligent SQL database schema assistant. Your primary goal is to answer user questions about database tables, their columns, relationships, and provide SQL query suggestions.
186
+
187
+ You have access to a special "lookup" tool. If you need more specific details about tables or concepts to answer a user's question, you MUST respond with a JSON object in this exact format:
188
+
189
+ \`\`\`json
190
+ {
191
+ "action": "lookup_schema_info",
192
+ "query": "concise natural language phrase describing what schema information you need"
193
+ }
194
+ \`\`\`
195
+
196
+ Examples of "query" for the lookup_schema_info action:
197
+ - "details about the Users and Products tables"
198
+ - "columns in the Orders table and its related tables"
199
+ - "how Categories table relates to Products"
200
+
201
+ If you can answer the question directly with your existing knowledge or after using the tool, provide the natural language answer or SQL query. Do NOT use the lookup tool if you already have enough information.
202
+ `.trim(); // Trim to remove leading/trailing whitespace
203
+
204
+ const messages = [{ role: "system", content: systemMessageContent }];
205
+
206
+ const chatBox = document.getElementById("chat-box");
207
+ const userInput = document.getElementById("user-input");
208
+ const sendButton = document.getElementById("send");
209
+ const downloadStatus = document.getElementById("download-status");
210
+ const chatStats = document.getElementById("chat-stats");
211
+
212
+ let currentAssistantMessageElement = null; // To update the streaming message
213
+ let embedder = null; // In-browser embedding model
214
+ let miniTableIndexEmbeddings = []; // Stores { tableId: "users", text: "...", embedding: [...] }
215
+ let detailedSchemaEmbeddings = []; // Stores { tableId: "users", chunkId: "col_details", text: "...", embedding: [...] }
216
+
217
+ // --- Your SQL Table Data ---
218
+ // This static data represents your knowledge base. In a real app, this might come from a file.
219
+ const rawSqlSchema = [
220
+ {
221
+ name: "Users",
222
+ summary: "Stores user account details including authentication and profile information.",
223
+ details: [
224
+ "Table `Users` has columns: UserID (PRIMARY KEY, INTEGER), Username (TEXT UNIQUE), Email (TEXT UNIQUE), PasswordHash (TEXT), RegistrationDate (DATETIME).",
225
+ "Purpose of `Users` table: Manages user login, identifies individuals, and stores core contact info.",
226
+ "Relationships of `Users`: One-to-many with `Orders` (UserID in `Orders` references UserID in `Users`)."
227
+ ]
228
+ },
229
+ {
230
+ name: "Products",
231
+ summary: "Lists all available products with descriptions, pricing, and stock.",
232
+ details: [
233
+ "Table `Products` has columns: ProductID (PRIMARY KEY, INTEGER), ProductName (TEXT), Description (TEXT), CategoryID (FOREIGN KEY to Categories.CategoryID, INTEGER).",
234
+ "Table `Products` also has columns: Price (DECIMAL), StockQuantity (INTEGER), CreatedDate (DATETIME), LastUpdatedDate (DATETIME).",
235
+ "Table `Products` also has columns: ImageURL (TEXT), Weight (DECIMAL), Dimensions (TEXT), ProductStatus (TEXT).",
236
+ "Relationships of `Products`: One-to-many with `OrderItems` (ProductID in `OrderItems` references ProductID in `Products`)."
237
+ ]
238
+ },
239
+ {
240
+ name: "Orders",
241
+ summary: "Records customer purchase transactions.",
242
+ details: [
243
+ "Table `Orders` has columns: OrderID (PRIMARY KEY, INTEGER), UserID (FOREIGN KEY to Users.UserID, INTEGER), OrderDate (DATETIME), TotalAmount (DECIMAL).",
244
+ "Purpose of `Orders` table: Tracks individual customer purchases and their aggregated cost.",
245
+ "Relationships of `Orders`: One-to-many with `OrderItems` (OrderID in `OrderItems` references OrderID in `Orders`)."
246
+ ]
247
+ },
248
+ {
249
+ name: "OrderItems",
250
+ summary: "Details each item within a specific customer order.",
251
+ details: [
252
+ "Table `OrderItems` has columns: OrderItemID (PRIMARY KEY, INTEGER), OrderID (FOREIGN KEY to Orders.OrderID, INTEGER), ProductID (FOREIGN KEY to Products.ProductID, INTEGER).",
253
+ "Table `OrderItems` also has columns: Quantity (INTEGER), UnitPriceAtPurchase (DECIMAL), SubtotalItemAmount (DECIMAL).",
254
+ "Purpose of `OrderItems` table: Breaks down an order into its constituent products and quantities."
255
+ ]
256
+ },
257
+ {
258
+ name: "Categories",
259
+ summary: "Classifies products into various categories.",
260
+ details: [
261
+ "Table `Categories` has columns: CategoryID (PRIMARY KEY, INTEGER), CategoryName (TEXT UNIQUE), CategoryDescription (TEXT).",
262
+ "Purpose of `Categories` table: Helps organize products for easier browsing and filtering.",
263
+ "Relationships of `Categories`: One-to-many with `Products` (CategoryID in `Products` references CategoryID in `Categories`)."
264
+ ]
265
+ }
266
+ ];
267
+
268
+ // --- Helper Functions ---
269
+
270
+ // Callback function for initializing WebLLM progress.
271
+ function updateEngineInitProgressCallback(report) {
272
+ console.log("WebLLM Init:", report.progress, report.text);
273
+ downloadStatus.textContent = report.text;
274
+ }
275
+
276
+ // Helper function to append messages to the chat box
277
+ function appendMessage(message, isStreaming = false) {
278
+ const messageContainer = document.createElement("div");
279
+ messageContainer.classList.add("message-container", message.role);
280
+
281
+ // Only create a message bubble for user and assistant messages
282
+ if (message.role === "user" || message.role === "assistant") {
283
+ const messageBubble = document.createElement("div");
284
+ messageBubble.classList.add("message-bubble");
285
+ messageBubble.textContent = message.content;
286
+ messageContainer.appendChild(messageBubble);
287
+ } else {
288
+ // For system messages, just set the text content directly on the container
289
+ messageContainer.textContent = message.content;
290
+ }
291
+
292
+ chatBox.appendChild(messageContainer);
293
+ chatBox.scrollTop = chatBox.scrollHeight; // Scroll to bottom
294
+
295
+ if (isStreaming && message.role === "assistant") {
296
+ currentAssistantMessageElement = messageContainer.querySelector(".message-bubble");
297
+ }
298
+ }
299
+
300
+ // Helper function to update the content of the last assistant message (for streaming)
301
+ function updateLastAssistantMessage(newContent) {
302
+ if (currentAssistantMessageElement) {
303
+ currentAssistantMessageElement.textContent = newContent;
304
+ chatBox.scrollTop = chatBox.scrollHeight; // Scroll to bottom
305
+ }
306
+ }
307
+
308
+ // Cosine Similarity Function for RAG lookup
309
+ function cosineSimilarity(vec1, vec2) {
310
+ if (vec1.length !== vec2.length) {
311
+ return 0;
312
+ }
313
+ let dotProduct = 0;
314
+ let magnitude1 = 0;
315
+ let magnitude2 = 0;
316
+ for (let i = 0; i < vec1.length; i++) {
317
+ dotProduct += vec1[i] * vec2[i];
318
+ magnitude1 += vec1[i] * vec1[i];
319
+ magnitude2 += vec2[i] * vec2[i];
320
+ }
321
+ magnitude1 = Math.sqrt(magnitude1);
322
+ magnitude2 = Math.sqrt(magnitude2);
323
+ if (magnitude1 === 0 || magnitude2 === 0) {
324
+ return 0;
325
+ }
326
+ return dotProduct / (magnitude1 * magnitude2);
327
+ }
328
+
329
+ // --- RAG Lookup Logic ---
330
+ async function performRagLookup(query) {
331
+ if (!embedder || miniTableIndexEmbeddings.length === 0 || detailedSchemaEmbeddings.length === 0) {
332
+ console.warn("Embedding model or knowledge base not ready for RAG lookup.");
333
+ return null;
334
+ }
335
+
336
+ try {
337
+ // Stage 1: Embed user query and identify relevant tables from mini-index
338
+ const queryEmbeddingOutput = await embedder(query, { pooling: 'mean', normalize: true });
339
+ const queryEmbedding = queryEmbeddingOutput.data;
340
+
341
+ let tableSimilarities = [];
342
+ for (const tableIndex of miniTableIndexEmbeddings) {
343
+ const score = cosineSimilarity(queryEmbedding, tableIndex.embedding);
344
+ tableSimilarities.push({ tableId: tableIndex.tableId, score: score });
345
+ }
346
+
347
+ tableSimilarities.sort((a, b) => b.score - a.score);
348
+ const topRelevantTableIds = tableSimilarities.filter(s => s.score > 0.5).slice(0, 3).map(s => s.tableId); // Top 3 tables with a minimum score
349
+
350
+ if (topRelevantTableIds.length === 0) {
351
+ console.log("No highly relevant tables identified for query:", query);
352
+ return null;
353
+ }
354
+ console.log("Identified relevant tables for RAG:", topRelevantTableIds);
355
+
356
+ // Stage 2: Filter detailed chunks by relevant tables and re-rank
357
+ let relevantDetailedChunks = [];
358
+ const filteredDetailedChunks = detailedSchemaEmbeddings.filter(chunk =>
359
+ topRelevantTableIds.includes(chunk.tableId)
360
+ );
361
+
362
+ let chunkSimilarities = [];
363
+ for (const chunk of filteredDetailedChunks) {
364
+ const score = cosineSimilarity(queryEmbedding, chunk.embedding);
365
+ chunkSimilarities.push({ chunk: chunk.text, score: score });
366
+ }
367
+
368
+ chunkSimilarities.sort((a, b) => b.score - a.score);
369
+
370
+ // Consolidate context: take top N most relevant detailed chunks
371
+ const maxChunksToInclude = 5; // Limit the number of chunks to manage context window
372
+ const contextChunks = chunkSimilarities.filter(s => s.score > 0.4).slice(0, maxChunksToInclude).map(s => s.chunk); // Filter by score again
373
+
374
+ if (contextChunks.length > 0) {
375
+ return contextChunks.join("\n\n---\n\n");
376
+ } else {
377
+ return null; // No relevant chunks found after filtering
378
+ }
379
+
380
+ } catch (error) {
381
+ console.error("Error during RAG lookup:", error);
382
+ return null;
383
+ }
384
+ }
385
+
386
+ // --- Model Initialization ---
387
+
388
+ async function initializeModels() {
389
+ downloadStatus.classList.remove("hidden");
390
+ downloadStatus.textContent = "Loading WebLLM and Embedding Models...";
391
+
392
+ // 1. Load WebLLM (main chat model)
393
+ let selectedModel = null;
394
+ const preferredModelPattern = "TinyLlama"; // Try TinyLlama first
395
+ const availableModels = webllm.prebuiltAppConfig.model_list;
396
+
397
+ const suitableModels = availableModels.filter(m =>
398
+ m.model_id.toLowerCase().includes(preferredModelPattern.toLowerCase()) &&
399
+ (m.model_id.includes("q4f16_1-MLC") || m.model_id.includes("q4f32_1-MLC")) &&
400
+ m.model_id.includes("Instruct")
401
+ );
402
+
403
+ if (suitableModels.length > 0) {
404
+ selectedModel = suitableModels[0].model_id;
405
+ console.log(`Found preferred chat model: ${selectedModel}`);
406
+ } else {
407
+ const fallbackModels = [
408
+ "TinyLlama-1.1B-Chat-v1.0-q4f16_1-MLC",
409
+ "Qwen2.5-0.5B-Instruct-q4f16_1-MLC",
410
+ "gemma-2b-it-q4f16_1-MLC",
411
+ "Phi-3.5-mini-instruct-q4f16_1-MLC",
412
+ ];
413
+ for (const fbModelId of fallbackModels) {
414
+ const foundFbModel = availableModels.find(m => m.model_id === fbModelId);
415
+ if (foundFbModel) {
416
+ selectedModel = foundFbModel.model_id;
417
+ console.log(`Falling back to chat model: ${selectedModel}`);
418
+ break;
419
+ }
420
+ }
421
+ }
422
+
423
+ if (!selectedModel) {
424
+ downloadStatus.textContent = "Error: No suitable chat model found.";
425
+ console.error("No suitable chat model found in available models.");
426
+ return;
427
+ }
428
+
429
+ try {
430
+ const config = { temperature: 0.7, top_p: 0.9 };
431
+ await engine.reload(selectedModel, config);
432
+ downloadStatus.textContent = `Chat Model '${selectedModel}' loaded.`;
433
+ console.log("WebLLM engine initialized successfully.");
434
+
435
+ // 2. Load Embedding Model and Embed Knowledge Base
436
+ downloadStatus.textContent = "Loading Embedding Model and indexing schema...";
437
+ embedder = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2');
438
+ console.log("Embedding model loaded.");
439
+
440
+ for (const table of rawSqlSchema) {
441
+ // Embed mini-index summary
442
+ const summaryOutput = await embedder(table.summary, { pooling: 'mean', normalize: true });
443
+ miniTableIndexEmbeddings.push({
444
+ tableId: table.name,
445
+ text: table.summary,
446
+ embedding: summaryOutput.data
447
+ });
448
+
449
+ // Embed detailed chunks
450
+ for (let i = 0; i < table.details.length; i++) {
451
+ const chunkText = table.details[i];
452
+ const chunkOutput = await embedder(chunkText, { pooling: 'mean', normalize: true });
453
+ detailedSchemaEmbeddings.push({
454
+ tableId: table.name,
455
+ chunkId: `${table.name}_chunk_${i}`,
456
+ text: chunkText,
457
+ embedding: chunkOutput.data
458
+ });
459
+ }
460
+ }
461
+ downloadStatus.textContent = "All models loaded and schema indexed. Ready to chat!";
462
+ sendButton.disabled = false;
463
+ userInput.disabled = false;
464
+ userInput.setAttribute("placeholder", "Type a message...");
465
+ console.log("Knowledge bases (mini-index and detailed) embedded.");
466
+ appendMessage({ role: "system", content: "AI: I'm ready! Ask me anything about the SQL database schema (Users, Products, Orders, OrderItems, Categories)." });
467
+
468
+ } catch (error) {
469
+ downloadStatus.textContent = `Error loading models: ${error.message}`;
470
+ console.error("Error initializing models or RAG:", error);
471
+ }
472
+ }
473
+
474
+
475
+ // Function to handle sending a message - MODIFIED FOR LLM-DRIVEN RAG
476
+ async function onMessageSend() {
477
+ const input = userInput.value.trim();
478
+ if (input.length === 0) {
479
+ return;
480
+ }
481
+
482
+ // Add user message to UI
483
+ const userMessage = { content: input, role: "user" };
484
+ messages.push(userMessage); // Add to conversation history
485
+ appendMessage(userMessage);
486
+
487
+ userInput.value = "";
488
+ sendButton.disabled = true;
489
+ userInput.setAttribute("placeholder", "Thinking and possibly looking up schema...");
490
+
491
+ // Temporarily append a placeholder for AI response
492
+ const aiMessagePlaceholder = { content: "typing...", role: "assistant" };
493
+ appendMessage(aiMessagePlaceholder, true); // Mark as streaming message for potential update
494
+
495
+ let fullAssistantResponse = "";
496
+ chatStats.classList.add("hidden");
497
+
498
+ try {
499
+ // --- FIRST LLM CALL: Decide if it needs to lookup schema info ---
500
+ // Send the current conversation history (including initial system message and user's new query)
501
+ const initialCompletion = await engine.chat.completions.create({
502
+ messages: messages, // Send full current history
503
+ stream: false, // We need the full response to parse JSON
504
+ temperature: 0.7, // Ensure these are set for the model's behavior
505
+ top_p: 0.9,
506
+ });
507
+
508
+ const llmFirstResponseContent = initialCompletion.choices?.[0]?.message?.content || "";
509
+ console.log("LLM's first response (raw):", llmFirstResponseContent);
510
+
511
+ let parsedAction = null;
512
+ try {
513
+ parsedAction = JSON.parse(llmFirstResponseContent);
514
+ } catch (e) {
515
+ // Not valid JSON, so it's a direct answer from LLM or an error
516
+ console.log("LLM's first response was not a JSON action. Treating as direct answer.");
517
+ }
518
+
519
+ let finalResponseContent = "";
520
+
521
+ if (parsedAction && parsedAction.action === "lookup_schema_info" && parsedAction.query) {
522
+ // LLM requested a lookup
523
+ updateLastAssistantMessage("🔎 Searching schema for: " + parsedAction.query); // Update UI with lookup intent
524
+
525
+ // Append the LLM's lookup request to history (important for context of next turn)
526
+ messages.push({ role: "assistant", content: llmFirstResponseContent });
527
+
528
+ // Stage 1 & 2: Perform RAG lookup based on LLM's query
529
+ const retrievedContext = await performRagLookup(parsedAction.query);
530
+
531
+ if (retrievedContext) {
532
+ // Prepare "tool output" message for the LLM.
533
+ // We add this as a 'user' message to simulate us providing the tool's output back to the LLM.
534
+ const toolOutputMessage = `Here is the requested schema information:\n\`\`\`\n${retrievedContext}\n\`\`\`\nPlease use this information to answer the user's original question: "${input}"`;
535
+ messages.push({ role: "user", content: toolOutputMessage });
536
+
537
+ // --- SECOND LLM CALL: Answer with augmented context ---
538
+ updateLastAssistantMessage("🧠 Processing with retrieved info..."); // Update UI for second LLM call
539
+ const finalCompletion = await engine.chat.completions.create({
540
+ messages: messages, // Send *all* history including lookup request and tool output
541
+ stream: true, // Stream this final response
542
+ temperature: 0.7,
543
+ top_p: 0.9,
544
+ });
545
+
546
+ for await (const chunk of finalCompletion) {
547
+ const curDelta = chunk.choices?.[0]?.delta.content;
548
+ if (curDelta) {
549
+ fullAssistantResponse += curDelta;
550
+ updateLastAssistantMessage(fullAssistantResponse);
551
+ }
552
+ }
553
+ finalResponseContent = fullAssistantResponse; // Store final streamed response
554
+ } else {
555
+ // If no relevant context found, inform the user
556
+ finalResponseContent = "I couldn't find specific relevant schema information for your request: \"" + parsedAction.query + "\". Please try rephrasing.";
557
+ updateLastAssistantMessage(finalResponseContent);
558
+ }
559
+ } else {
560
+ // LLM gave a direct answer, or something unparseable, so use it as final
561
+ finalResponseContent = llmFirstResponseContent;
562
+ updateLastAssistantMessage(finalResponseContent); // Display direct answer
563
+ }
564
+
565
+ // Add the final assistant response to chat history
566
+ messages.push({ content: finalResponseContent, role: "assistant" });
567
+
568
+ // Display performance stats
569
+ const usageText = await engine.runtimeStatsText();
570
+ chatStats.classList.remove("hidden");
571
+ chatStats.textContent = usageText;
572
+
573
+ } catch (error) {
574
+ updateLastAssistantMessage(`Error: ${error.message}`);
575
+ console.error("Error during LLM inference or RAG:", error);
576
+ } finally {
577
+ sendButton.disabled = false;
578
+ userInput.disabled = false;
579
+ userInput.setAttribute("placeholder", "Type a message...");
580
+ currentAssistantMessageElement = null; // Clear reference
581
+ }
582
+ }
583
+
584
+ // Event Listeners
585
+ sendButton.addEventListener("click", onMessageSend);
586
+ userInput.addEventListener("keypress", (event) => {
587
+ if (event.key === "Enter" && !sendButton.disabled) {
588
+ onMessageSend();
589
+ }
590
+ });
591
+
592
+ // Initialize all models (WebLLM and Embedding model) when the page loads
593
+ document.addEventListener("DOMContentLoaded", initializeModels);
594
+ </script>
595
+ </body>
596
+ </html>