ybabakhin commited on
Commit
ff84320
·
verified ·
1 Parent(s): 8850046

Integrate with Sentence Transformers (#7)

Browse files

- Add Sentence Transformers configuration files (7c8af9e29d638aa324e20a96b7258267f9ee1c1c)
- Update the snippet slightly (d42246521432af1e55af2f7a4785d79755b32ba7)
- Add output if you have the correct transformers version (aca0f8a967302f7f6989444130bef6ef27cca0e9)

1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 4096,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
README.md CHANGED
@@ -3,13 +3,15 @@ license: other
3
  license_name: customized-nscl-v1
4
  license_link: LICENSE
5
  tags:
 
6
  - text
7
  - sentence-similarity
8
  - feature-extraction
9
  - mteb
 
10
  language:
11
  - multilingual
12
- library_name: transformers
13
  ---
14
 
15
  # llama-embed-nemotron-8b
@@ -128,6 +130,43 @@ pip install transformers==4.51.0
128
  pip install flash-attn==2.6.3
129
  ```
130
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  ```python
132
  import torch
133
  import torch.nn.functional as F
 
3
  license_name: customized-nscl-v1
4
  license_link: LICENSE
5
  tags:
6
+ - transformers
7
  - text
8
  - sentence-similarity
9
  - feature-extraction
10
  - mteb
11
+ - mmteb
12
  language:
13
  - multilingual
14
+ library_name: sentence-transformers
15
  ---
16
 
17
  # llama-embed-nemotron-8b
 
130
  pip install flash-attn==2.6.3
131
  ```
132
 
133
+ You can use either Sentence Transformers like here:
134
+
135
+ ```bash
136
+ pip install sentence-transformers
137
+ ```
138
+
139
+ ```python
140
+ from sentence_transformers import SentenceTransformer
141
+
142
+ attn_implementation = "eager" # Or "flash_attention_2"
143
+ model = SentenceTransformer(
144
+ "nvidia/llama-embed-nemotron-8b",
145
+ trust_remote_code=True,
146
+ model_kwargs={"attn_implementation": attn_implementation, "torch_dtype": "float16"},
147
+ tokenizer_kwargs={"padding_side": "left"},
148
+ )
149
+
150
+ queries = [
151
+ "How do neural networks learn patterns from examples?"
152
+ ]
153
+ documents = [
154
+ "Deep learning models adjust their weights through backpropagation, using gradient descent to minimize error on training data and improve predictions over time.",
155
+ "Market prices are determined by the relationship between how much people want to buy a product and how much is available for sale, with scarcity driving prices up and abundance driving them down.",
156
+ ]
157
+
158
+ # NOTE: encode_query uses the "query" prompt automatically
159
+ query_embeddings = model.encode_query(queries)
160
+ document_embeddings = model.encode_document(documents)
161
+
162
+ scores = (query_embeddings @ document_embeddings.T)
163
+
164
+ print(scores.tolist())
165
+ # [[0.37646484375, 0.057891845703125]]
166
+ ```
167
+
168
+ Or using Hugging Face Transformers like here:
169
+
170
  ```python
171
  import torch
172
  import torch.nn.functional as F
config_sentence_transformers.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "SentenceTransformer",
3
+ "__version__": {
4
+ "sentence_transformers": "5.1.2",
5
+ "transformers": "4.57.1",
6
+ "pytorch": "2.8.0+cu128"
7
+ },
8
+ "prompts": {
9
+ "query": "Instruct: Given a question, retrieve passages that answer the question\nQuery: ",
10
+ "document": ""
11
+ },
12
+ "default_prompt_name": null,
13
+ "similarity_fn_name": "cosine"
14
+ }
modules.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ },
14
+ {
15
+ "idx": 2,
16
+ "name": "2",
17
+ "path": "2_Normalize",
18
+ "type": "sentence_transformers.models.Normalize"
19
+ }
20
+ ]
sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 131072,
3
+ "do_lower_case": false
4
+ }