added embedding onnx

Browse files

Files changed (10) hide show

README.md +46 -3
config.json +25 -0
onnx/model.onnx +3 -0
onnx/model_fp16.onnx +3 -0
onnx/model_int8.onnx +3 -0
onnx/model_quantized.onnx +3 -0
special_tokens_map.json +7 -0
tokenizer.json +0 -0
tokenizer_config.json +15 -0
vocab.txt +0 -0

README.md CHANGED Viewed

@@ -1,3 +1,46 @@
----
-license: apache-2.0
----

+---
+base_model: sentence-transformers/all-MiniLM-L6-v2
+library_name: transformers.js
+license: apache-2.0
+---
+https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2 with ONNX weights to be compatible with Transformers.js.
+## Usage (Transformers.js)
+If you haven't already, you can install the [Transformers.js](https://huggingface.co/docs/transformers.js) JavaScript library from [NPM](https://www.npmjs.com/package/@huggingface/transformers) using:
+```bash
+npm i @huggingface/transformers
+```
+You can then use the model to compute embeddings like this:
+```js
+import { pipeline } from '@huggingface/transformers';
+// Create a feature-extraction pipeline
+const extractor = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2');
+// Compute sentence embeddings
+const sentences = ['This is an example sentence', 'Each sentence is converted'];
+const output = await extractor(sentences, { pooling: 'mean', normalize: true });
+console.log(output);
+// Tensor {
+//   dims: [ 2, 384 ],
+//   type: 'float32',
+//   data: Float32Array(768) [ 0.04592696577310562, 0.07328180968761444, ... ],
+//   size: 768
+// }
+```
+You can convert this Tensor to a nested JavaScript array using `.tolist()`:
+```js
+console.log(output.tolist());
+// [
+//   [ 0.04592696577310562, 0.07328180968761444, 0.05400655046105385, ... ],
+//   [ 0.08188057690858841, 0.10760223120450974, -0.013241755776107311, ... ]
+// ]
+```
+Note: Having a separate repo for ONNX weights is intended to be a temporary solution until WebML gains more traction. If you would like to make your models web-ready, we recommend converting to ONNX using [🤗 Optimum](https://huggingface.co/docs/optimum/index) and structuring your repo like this one (with ONNX weights located in a subfolder named `onnx`).

config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "_name_or_path": "sentence-transformers/all-MiniLM-L6-v2",
+  "architectures": [
+    "BertModel"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 384,
+  "initializer_range": 0.02,
+  "intermediate_size": 1536,
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 6,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "transformers_version": "4.29.2",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

onnx/model.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:759c3cd2b7fe7e93933ad23c4c9181b7396442a2ed746ec7c1d46192c469c46e
+size 90387606

onnx/model_fp16.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2cdb5e58291813b6d6e248ed69010100246821a367fa17b1b81ae9483744533d
+size 45297825

onnx/model_int8.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:afdb6f1a0e45b715d0bb9b11772f032c399babd23bfc31fed1c170afc848bdb1
+size 22972370

onnx/model_quantized.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:afdb6f1a0e45b715d0bb9b11772f032c399babd23bfc31fed1c170afc848bdb1
+size 22972370

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff