Spaces:
Running
Running
| <html lang="en"> | |
| <head> | |
| <meta charset="UTF-8"> | |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
| <title>Train Model from PDFs</title> | |
| <a href="entrenament-pdf.html" style="margin:5px;padding: 5px; border:1px solid green">Entrenament PDF</a> | |
| <a href="preguntar-pdf.html" style="margin:5px;padding: 5px; border:1px solid green">Preguntar PDF</a> | |
| <br><br> | |
| <script src="https://cdn.jsdelivr.net/npm/@tensorflow/tfjs"></script> | |
| <script src="https://cdn.jsdelivr.net/npm/pdfjs-dist/build/pdf.min.js"></script> | |
| </head> | |
| <body> | |
| <h1>Train Model from PDFs</h1> | |
| <input type="file" id="fileInput" multiple> | |
| <button id="trainModel">Train Model</button> | |
| <pre id="status"></pre> | |
| <script> | |
| async function extractTextFromPDF(file) { | |
| const pdf = await pdfjsLib.getDocument(await file.arrayBuffer()).promise; | |
| let text = ''; | |
| for (let i = 1; i <= pdf.numPages; i++) { | |
| const page = await pdf.getPage(i); | |
| const content = await page.getTextContent(); | |
| text += content.items.map(item => item.str).join(' ') + ' '; | |
| } | |
| return text; | |
| } | |
| async function trainModel(data) { | |
| const model = tf.sequential(); | |
| model.add(tf.layers.dense({ | |
| units: 128, | |
| activation: 'relu', | |
| inputShape: [data[0].length] | |
| })); | |
| model.add(tf.layers.dense({ units: 64, activation: 'relu' })); | |
| model.add(tf.layers.dense({ units: 1, activation: 'sigmoid' })); | |
| model.compile({ | |
| optimizer: 'adam', | |
| loss: 'binaryCrossentropy', | |
| metrics: ['accuracy'] | |
| }); | |
| const inputs = tf.tensor2d(data.map(d => d.input)); | |
| const labels = tf.tensor1d(data.map(d => d.label)); | |
| document.getElementById('status').textContent = 'Training the model...'; | |
| await model.fit(inputs, labels, { | |
| epochs: 10, | |
| callbacks: { | |
| onEpochEnd: (epoch, logs) => { | |
| console.log(`Epoch ${epoch}: loss = ${logs.loss}`); | |
| document.getElementById('status').textContent = `Epoch ${epoch + 1}: Loss = ${logs.loss}`; | |
| } | |
| } | |
| }); | |
| document.getElementById('status').textContent = 'Saving the model to IndexedDB...'; | |
| try { | |
| await model.save('indexeddb://pdf-trained-model'); | |
| document.getElementById('status').textContent = 'Model saved successfully in IndexedDB!'; | |
| } catch (err) { | |
| document.getElementById('status').textContent = 'Error saving the model: ' + err.message; | |
| console.error('Error saving the model:', err); | |
| } | |
| } | |
| document.getElementById('trainModel').addEventListener('click', async () => { | |
| const files = document.getElementById('fileInput').files; | |
| if (!files.length) { | |
| document.getElementById('status').textContent = 'Please select PDF files to train the model.'; | |
| return; | |
| } | |
| const data = []; | |
| document.getElementById('status').textContent = 'Extracting text from PDFs...'; | |
| for (const file of files) { | |
| const text = await extractTextFromPDF(file); | |
| const tokens = text.split(/\s+/).map(word => word.length); // Example: using word lengths as features | |
| data.push({ | |
| input: tokens.slice(0, 10), // Use the first 10 tokens as input features | |
| label: 1 // Example label (adjust as needed for your use case) | |
| }); | |
| } | |
| document.getElementById('status').textContent = 'Training the model...'; | |
| await trainModel(data); | |
| }); | |
| </script> | |
| </body> | |
| </html> | |