support sentence-transformers
#4
by
						
bwang0911
	
							
						- opened
							
					
No description provided.
			
			
				
					
	
	
				
	
	
testing script:
import numpy as np
import numpy.testing as npt
from sentence_transformers import SentenceTransformer
from transformers import AutoModel
model = SentenceTransformer('bwang0911/jina-v3-test', trust_remote_code=True)
model = model.to('cuda').half()
e = model.encode(['Hello world'])
import torch
import torch.nn.functional as F
from transformers import AutoModel, AutoTokenizer
def mean_pooling(model_output, attention_mask):
    token_embeddings = model_output[0]
    input_mask_expanded = (
        attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
    )
    return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(
        input_mask_expanded.sum(1), min=1e-9
    )
tokenizer = AutoTokenizer.from_pretrained('bwang0911/jina-v3-test')
m2 = AutoModel.from_pretrained('bwang0911/jina-v3-test', trust_remote_code=True)
m2 = m2.to('cuda').half()
encoded_input = tokenizer(
    'Hello world', padding=True, truncation=True, return_tensors='pt'
).to('cuda')
with torch.no_grad():
    model_output = m2(**encoded_input)
    e2 = mean_pooling(model_output, encoded_input['attention_mask'])
e2 = e2.detach().cpu().numpy()
npt.assert_almost_equal(e, e2, 5)
Max absolute difference: 0.00097656
Max relative difference: 0.00048379
 x: array([[ 1.865  , -1.623  ,  2.912  , ...,  0.388  , -0.1466 , -0.02988]],
      dtype=float16)
 y: array([[ 1.86548, -1.62329,  2.91211, ...,  0.388  , -0.14662, -0.02988]],
      dtype=float32)
LGTM! I'd recommend to compare embeddings with the HF encode() function too, and also in a num_examples > 1 setup
will make follow up and a bit more intensive testing on more adapters, and remove unneed imports.
bwang0911
	
				
		changed pull request status to
		merged