Spaces:
Runtime error
Runtime error
matt HOFFNER
commited on
Commit
Β·
73a1dae
1
Parent(s):
d072c37
add docHandle
Browse files- package-lock.json +38 -4
- package.json +2 -0
- src/pages/api/docHandle.ts +57 -0
- src/utils/file-handler.ts +75 -0
- src/utils/index.ts +1 -0
package-lock.json
CHANGED
|
@@ -17,6 +17,7 @@
|
|
| 17 |
"@xenova/transformers": "^2.1.1",
|
| 18 |
"eslint": "8.40.0",
|
| 19 |
"eslint-config-next": "13.4.2",
|
|
|
|
| 20 |
"langchain": "^0.0.90",
|
| 21 |
"next": "13.4.2",
|
| 22 |
"react": "18.2.0",
|
|
@@ -25,6 +26,7 @@
|
|
| 25 |
"uuid": "^9.0.0"
|
| 26 |
},
|
| 27 |
"devDependencies": {
|
|
|
|
| 28 |
"@types/uuid": "^9.0.1"
|
| 29 |
}
|
| 30 |
},
|
|
@@ -1354,11 +1356,30 @@
|
|
| 1354 |
"resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.1.tgz",
|
| 1355 |
"integrity": "sha512-LG4opVs2ANWZ1TJoKc937iMmNstM/d0ae1vNbnBvBhqCSezgVUOzcLCqbI5elV8Vy6WKwKjaqR+zO9VKirBBCA=="
|
| 1356 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1357 |
"node_modules/@types/json5": {
|
| 1358 |
"version": "0.0.29",
|
| 1359 |
"resolved": "https://registry.npmjs.org/@types/json5/-/json5-0.0.29.tgz",
|
| 1360 |
"integrity": "sha512-dRLjCWHYg4oaA77cxO64oO+7JwCwnIzkZPdrrC71jQmQtlhM556pwKo5bUzqvZndkVbeFLIIi+9TC40JNF5hNQ=="
|
| 1361 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1362 |
"node_modules/@types/long": {
|
| 1363 |
"version": "4.0.2",
|
| 1364 |
"resolved": "https://registry.npmjs.org/@types/long/-/long-4.0.2.tgz",
|
|
@@ -3144,16 +3165,16 @@
|
|
| 3144 |
"integrity": "sha512-y6OAwoSIf7FyjMIv94u+b5rdheZEjzR63GTyZJm5qh4Bi+2YgwLCcI/fPFZkL5PSixOt6ZNKm+w+Hfp/Bciwow=="
|
| 3145 |
},
|
| 3146 |
"node_modules/fs-extra": {
|
| 3147 |
-
"version": "
|
| 3148 |
-
"resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-
|
| 3149 |
-
"integrity": "sha512-
|
| 3150 |
"dependencies": {
|
| 3151 |
"graceful-fs": "^4.2.0",
|
| 3152 |
"jsonfile": "^6.0.1",
|
| 3153 |
"universalify": "^2.0.0"
|
| 3154 |
},
|
| 3155 |
"engines": {
|
| 3156 |
-
"node": ">=
|
| 3157 |
}
|
| 3158 |
},
|
| 3159 |
"node_modules/fs.realpath": {
|
|
@@ -5422,6 +5443,19 @@
|
|
| 5422 |
"node": ">= 8.0.0"
|
| 5423 |
}
|
| 5424 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5425 |
"node_modules/run-applescript": {
|
| 5426 |
"version": "5.0.0",
|
| 5427 |
"resolved": "https://registry.npmjs.org/run-applescript/-/run-applescript-5.0.0.tgz",
|
|
|
|
| 17 |
"@xenova/transformers": "^2.1.1",
|
| 18 |
"eslint": "8.40.0",
|
| 19 |
"eslint-config-next": "13.4.2",
|
| 20 |
+
"fs-extra": "^11.1.1",
|
| 21 |
"langchain": "^0.0.90",
|
| 22 |
"next": "13.4.2",
|
| 23 |
"react": "18.2.0",
|
|
|
|
| 26 |
"uuid": "^9.0.0"
|
| 27 |
},
|
| 28 |
"devDependencies": {
|
| 29 |
+
"@types/fs-extra": "^11.0.1",
|
| 30 |
"@types/uuid": "^9.0.1"
|
| 31 |
}
|
| 32 |
},
|
|
|
|
| 1356 |
"resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.1.tgz",
|
| 1357 |
"integrity": "sha512-LG4opVs2ANWZ1TJoKc937iMmNstM/d0ae1vNbnBvBhqCSezgVUOzcLCqbI5elV8Vy6WKwKjaqR+zO9VKirBBCA=="
|
| 1358 |
},
|
| 1359 |
+
"node_modules/@types/fs-extra": {
|
| 1360 |
+
"version": "11.0.1",
|
| 1361 |
+
"resolved": "https://registry.npmjs.org/@types/fs-extra/-/fs-extra-11.0.1.tgz",
|
| 1362 |
+
"integrity": "sha512-MxObHvNl4A69ofaTRU8DFqvgzzv8s9yRtaPPm5gud9HDNvpB3GPQFvNuTWAI59B9huVGV5jXYJwbCsmBsOGYWA==",
|
| 1363 |
+
"dev": true,
|
| 1364 |
+
"dependencies": {
|
| 1365 |
+
"@types/jsonfile": "*",
|
| 1366 |
+
"@types/node": "*"
|
| 1367 |
+
}
|
| 1368 |
+
},
|
| 1369 |
"node_modules/@types/json5": {
|
| 1370 |
"version": "0.0.29",
|
| 1371 |
"resolved": "https://registry.npmjs.org/@types/json5/-/json5-0.0.29.tgz",
|
| 1372 |
"integrity": "sha512-dRLjCWHYg4oaA77cxO64oO+7JwCwnIzkZPdrrC71jQmQtlhM556pwKo5bUzqvZndkVbeFLIIi+9TC40JNF5hNQ=="
|
| 1373 |
},
|
| 1374 |
+
"node_modules/@types/jsonfile": {
|
| 1375 |
+
"version": "6.1.1",
|
| 1376 |
+
"resolved": "https://registry.npmjs.org/@types/jsonfile/-/jsonfile-6.1.1.tgz",
|
| 1377 |
+
"integrity": "sha512-GSgiRCVeapDN+3pqA35IkQwasaCh/0YFH5dEF6S88iDvEn901DjOeH3/QPY+XYP1DFzDZPvIvfeEgk+7br5png==",
|
| 1378 |
+
"dev": true,
|
| 1379 |
+
"dependencies": {
|
| 1380 |
+
"@types/node": "*"
|
| 1381 |
+
}
|
| 1382 |
+
},
|
| 1383 |
"node_modules/@types/long": {
|
| 1384 |
"version": "4.0.2",
|
| 1385 |
"resolved": "https://registry.npmjs.org/@types/long/-/long-4.0.2.tgz",
|
|
|
|
| 3165 |
"integrity": "sha512-y6OAwoSIf7FyjMIv94u+b5rdheZEjzR63GTyZJm5qh4Bi+2YgwLCcI/fPFZkL5PSixOt6ZNKm+w+Hfp/Bciwow=="
|
| 3166 |
},
|
| 3167 |
"node_modules/fs-extra": {
|
| 3168 |
+
"version": "11.1.1",
|
| 3169 |
+
"resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-11.1.1.tgz",
|
| 3170 |
+
"integrity": "sha512-MGIE4HOvQCeUCzmlHs0vXpih4ysz4wg9qiSAu6cd42lVwPbTM1TjV7RusoyQqMmk/95gdQZX72u+YW+c3eEpFQ==",
|
| 3171 |
"dependencies": {
|
| 3172 |
"graceful-fs": "^4.2.0",
|
| 3173 |
"jsonfile": "^6.0.1",
|
| 3174 |
"universalify": "^2.0.0"
|
| 3175 |
},
|
| 3176 |
"engines": {
|
| 3177 |
+
"node": ">=14.14"
|
| 3178 |
}
|
| 3179 |
},
|
| 3180 |
"node_modules/fs.realpath": {
|
|
|
|
| 5443 |
"node": ">= 8.0.0"
|
| 5444 |
}
|
| 5445 |
},
|
| 5446 |
+
"node_modules/rollup-plugin-typescript2/node_modules/fs-extra": {
|
| 5447 |
+
"version": "10.1.0",
|
| 5448 |
+
"resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-10.1.0.tgz",
|
| 5449 |
+
"integrity": "sha512-oRXApq54ETRj4eMiFzGnHWGy+zo5raudjuxN0b8H7s/RU2oW0Wvsx9O0ACRN/kRq9E8Vu/ReskGB5o3ji+FzHQ==",
|
| 5450 |
+
"dependencies": {
|
| 5451 |
+
"graceful-fs": "^4.2.0",
|
| 5452 |
+
"jsonfile": "^6.0.1",
|
| 5453 |
+
"universalify": "^2.0.0"
|
| 5454 |
+
},
|
| 5455 |
+
"engines": {
|
| 5456 |
+
"node": ">=12"
|
| 5457 |
+
}
|
| 5458 |
+
},
|
| 5459 |
"node_modules/run-applescript": {
|
| 5460 |
"version": "5.0.0",
|
| 5461 |
"resolved": "https://registry.npmjs.org/run-applescript/-/run-applescript-5.0.0.tgz",
|
package.json
CHANGED
|
@@ -17,6 +17,7 @@
|
|
| 17 |
"@xenova/transformers": "^2.1.1",
|
| 18 |
"eslint": "8.40.0",
|
| 19 |
"eslint-config-next": "13.4.2",
|
|
|
|
| 20 |
"langchain": "^0.0.90",
|
| 21 |
"next": "13.4.2",
|
| 22 |
"react": "18.2.0",
|
|
@@ -25,6 +26,7 @@
|
|
| 25 |
"uuid": "^9.0.0"
|
| 26 |
},
|
| 27 |
"devDependencies": {
|
|
|
|
| 28 |
"@types/uuid": "^9.0.1"
|
| 29 |
}
|
| 30 |
}
|
|
|
|
| 17 |
"@xenova/transformers": "^2.1.1",
|
| 18 |
"eslint": "8.40.0",
|
| 19 |
"eslint-config-next": "13.4.2",
|
| 20 |
+
"fs-extra": "^11.1.1",
|
| 21 |
"langchain": "^0.0.90",
|
| 22 |
"next": "13.4.2",
|
| 23 |
"react": "18.2.0",
|
|
|
|
| 26 |
"uuid": "^9.0.0"
|
| 27 |
},
|
| 28 |
"devDependencies": {
|
| 29 |
+
"@types/fs-extra": "^11.0.1",
|
| 30 |
"@types/uuid": "^9.0.1"
|
| 31 |
}
|
| 32 |
}
|
src/pages/api/docHandle.ts
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import type { NextApiRequest, NextApiResponse } from 'next';
|
| 2 |
+
|
| 3 |
+
import {
|
| 4 |
+
readHNSWLibModelFromLocal,
|
| 5 |
+
storesDir,
|
| 6 |
+
vectorStoreToHNSWLibModel,
|
| 7 |
+
} from '@/utils/file-handler';
|
| 8 |
+
import fs from 'fs-extra';
|
| 9 |
+
import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
|
| 10 |
+
import { HNSWLib } from 'langchain/vectorstores/hnswlib';
|
| 11 |
+
import { XenovaTransformersEmbeddings } from '../../embed/hf'
|
| 12 |
+
|
| 13 |
+
async function handleDocs(text: string) {
|
| 14 |
+
const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: 1000 });
|
| 15 |
+
const docs = await textSplitter.createDocuments([text]);
|
| 16 |
+
console.log(docs);
|
| 17 |
+
|
| 18 |
+
const vectorStore = await HNSWLib.fromDocuments(docs, new XenovaTransformersEmbeddings());
|
| 19 |
+
console.log(vectorStore);
|
| 20 |
+
|
| 21 |
+
return vectorStore;
|
| 22 |
+
}
|
| 23 |
+
|
| 24 |
+
export default async function handler(
|
| 25 |
+
req: NextApiRequest,
|
| 26 |
+
res: NextApiResponse,
|
| 27 |
+
) {
|
| 28 |
+
const { text } = JSON.parse(req.body);
|
| 29 |
+
// console.log(text);
|
| 30 |
+
|
| 31 |
+
if (!text) {
|
| 32 |
+
return res.status(400).json({ message: 'No question in the request' });
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
const exists = await fs.exists(storesDir);
|
| 36 |
+
console.log(exists);
|
| 37 |
+
|
| 38 |
+
if (exists) {
|
| 39 |
+
console.log('read from ' + storesDir);
|
| 40 |
+
const model = await readHNSWLibModelFromLocal();
|
| 41 |
+
return res.status(200).send({
|
| 42 |
+
...model,
|
| 43 |
+
});
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
const vectorStore = await handleDocs(text);
|
| 47 |
+
const model = await vectorStoreToHNSWLibModel(vectorStore);
|
| 48 |
+
res.status(200).send({
|
| 49 |
+
...model,
|
| 50 |
+
});
|
| 51 |
+
}
|
| 52 |
+
|
| 53 |
+
export const config = {
|
| 54 |
+
api: {
|
| 55 |
+
bodyParser: true, // Disallow body parsing, consume as stream
|
| 56 |
+
},
|
| 57 |
+
};
|
src/utils/file-handler.ts
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import fs from 'fs-extra';
|
| 2 |
+
import type { OpenAIEmbeddings } from 'langchain/embeddings/openai';
|
| 3 |
+
import {
|
| 4 |
+
HNSWLib,
|
| 5 |
+
type HNSWLib as StoreTypeHNSWLib,
|
| 6 |
+
} from 'langchain/vectorstores/hnswlib';
|
| 7 |
+
import path from 'path';
|
| 8 |
+
|
| 9 |
+
const ifDev = process.env.NODE_ENV === 'development';
|
| 10 |
+
// in prod mode, only allowed to write to /tmp/
|
| 11 |
+
// https://vercel.com/guides/how-can-i-use-files-in-serverless-functions
|
| 12 |
+
export const storesDir = ifDev ? 'tmp/hnswlib-stores' : '/tmp/hnswlib-stores';
|
| 13 |
+
|
| 14 |
+
type HNSWLibModel = {
|
| 15 |
+
args: string;
|
| 16 |
+
docstore: string;
|
| 17 |
+
hnswlibIndex: string;
|
| 18 |
+
};
|
| 19 |
+
|
| 20 |
+
const HNSWLibModelFilesName = {
|
| 21 |
+
args: 'args.json',
|
| 22 |
+
docstore: 'docstore.json',
|
| 23 |
+
hnswlibIndex: 'hnswlib.index',
|
| 24 |
+
};
|
| 25 |
+
|
| 26 |
+
// looking forward to a better way to transfrom hnswlibStore <=> indexes
|
| 27 |
+
export async function HNSWLibModelToVectorStore(
|
| 28 |
+
model: HNSWLibModel,
|
| 29 |
+
embeddings: OpenAIEmbeddings,
|
| 30 |
+
) {
|
| 31 |
+
await saveHNSWLibModelToLocal(model);
|
| 32 |
+
// load from dir
|
| 33 |
+
const vectorStore = await HNSWLib.load(storesDir, embeddings);
|
| 34 |
+
return vectorStore;
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
export async function saveHNSWLibModelToLocal(model: HNSWLibModel) {
|
| 38 |
+
// save model to /tmp/
|
| 39 |
+
await Promise.all(
|
| 40 |
+
Object.keys(HNSWLibModelFilesName).map((key) => {
|
| 41 |
+
const fullPath = path.join(
|
| 42 |
+
storesDir,
|
| 43 |
+
(HNSWLibModelFilesName as Record<string, string>)[key],
|
| 44 |
+
);
|
| 45 |
+
console.log(fullPath);
|
| 46 |
+
const data = (model as Record<string, string>)[key];
|
| 47 |
+
console.log(data);
|
| 48 |
+
|
| 49 |
+
return fs.writeFile(fullPath, data);
|
| 50 |
+
}),
|
| 51 |
+
);
|
| 52 |
+
}
|
| 53 |
+
|
| 54 |
+
export async function vectorStoreToHNSWLibModel(
|
| 55 |
+
store: StoreTypeHNSWLib,
|
| 56 |
+
): Promise<HNSWLibModel> {
|
| 57 |
+
await store.save(storesDir);
|
| 58 |
+
return await readHNSWLibModelFromLocal();
|
| 59 |
+
}
|
| 60 |
+
|
| 61 |
+
export async function readHNSWLibModelFromLocal(): Promise<HNSWLibModel> {
|
| 62 |
+
const [args, docstore, hnswlibIndex] = await Promise.all([
|
| 63 |
+
fs.readFile(path.join(storesDir, HNSWLibModelFilesName.args), 'utf-8'),
|
| 64 |
+
fs.readFile(path.join(storesDir, HNSWLibModelFilesName.docstore), 'utf-8'),
|
| 65 |
+
fs.readFile(
|
| 66 |
+
path.join(storesDir, HNSWLibModelFilesName.hnswlibIndex),
|
| 67 |
+
'hex',
|
| 68 |
+
),
|
| 69 |
+
]);
|
| 70 |
+
return {
|
| 71 |
+
args,
|
| 72 |
+
docstore,
|
| 73 |
+
hnswlibIndex,
|
| 74 |
+
};
|
| 75 |
+
}
|
src/utils/index.ts
CHANGED
|
@@ -89,3 +89,4 @@ export function throttle<T extends (...args: any[]) => any>(
|
|
| 89 |
|
| 90 |
export const DEFAULT_TEMPERATURE =
|
| 91 |
parseFloat(process.env.NEXT_PUBLIC_DEFAULT_TEMPERATURE || "1");
|
|
|
|
|
|
| 89 |
|
| 90 |
export const DEFAULT_TEMPERATURE =
|
| 91 |
parseFloat(process.env.NEXT_PUBLIC_DEFAULT_TEMPERATURE || "1");
|
| 92 |
+
|