| from FlagEmbedding import FlagModel | |
| model = FlagModel("openbmb/MiniCPM-Embedding-Light", | |
| query_instruction_for_retrieval="Query: ", | |
| pooling_method="mean", | |
| trust_remote_code=True, | |
| normalize_embeddings=True, | |
| use_fp16=True) | |
| # You can hack the __init__() method of the FlagEmbedding BaseEmbedder class to use flash_attention_2 for faster inference | |
| # self.model = AutoModel.from_pretrained( | |
| # model_name_or_path, | |
| # trust_remote_code=trust_remote_code, | |
| # cache_dir=cache_dir, | |
| # # torch_dtype=torch.float16, # we need to add this line to use fp16 | |
| # # attn_implementation="flash_attention_2", # we need to add this line to use flash_attention_2 | |
| # ) | |
| queries = ["中国的首都是哪里?"] # "What is the capital of China?" | |
| passages = ["beijing", "shanghai"] # "北京", "上海" | |
| embeddings_query = model.encode_queries(queries) | |
| embeddings_doc = model.encode_corpus(passages) | |
| scores = (embeddings_query @ embeddings_doc.T) | |
| print(scores.tolist()) # [[0.40356746315956116, 0.36183440685272217]] |