Spaces:
Paused
Paused
| import pytest | |
| from typing import Any | |
| from huggingface_hub import snapshot_download | |
| from langchain.embeddings import HuggingFaceInstructEmbeddings | |
| from langchain.vectorstores import FAISS | |
| snapshot_download( | |
| repo_id='KonradSzafer/index', | |
| allow_patterns=['*.faiss', '*.pkl'], | |
| repo_type='dataset', | |
| local_dir='index/' | |
| ) | |
| def embedding_model() -> HuggingFaceInstructEmbeddings: | |
| model_name = "hkunlp/instructor-large" | |
| embed_instruction = "Represent the Hugging Face library documentation" | |
| query_instruction = "Query the most relevant piece of information from the Hugging Face documentation" | |
| return HuggingFaceInstructEmbeddings( | |
| model_name=model_name, | |
| embed_instruction=embed_instruction, | |
| query_instruction=query_instruction, | |
| ) | |
| def index_path() -> str: | |
| return "index/" | |
| def index(embedding_model: HuggingFaceInstructEmbeddings, index_path: str): | |
| return FAISS.load_local(index_path, embedding_model) | |
| def query() -> str: | |
| return "How to use the tokenizer?" | |
| def test_load_index(embedding_model: HuggingFaceInstructEmbeddings, index_path: str): | |
| index = FAISS.load_local(index_path, embedding_model) | |
| assert index is not None, "Failed to load index" | |
| def test_index_page_content(index, query: str): | |
| query_docs = index.similarity_search(query=query, k=3) | |
| assert isinstance(query_docs[0].page_content, str) | |
| def test_index_metadata(index, query): | |
| query_docs = index.similarity_search(query=query, k=3) | |
| assert isinstance(query_docs[0].metadata['source'], str) | |