Spaces:
Runtime error
Runtime error
| # Copyright (c) Facebook, Inc. and its affiliates. | |
| # | |
| # This source code is licensed under the MIT license found in the | |
| # LICENSE file in the root directory of this source tree. | |
| from typing import List | |
| import faiss | |
| import logging | |
| LOG = logging.getLogger(__name__) | |
| def merge_ondisk( | |
| trained_index: faiss.Index, shard_fnames: List[str], ivfdata_fname: str | |
| ) -> None: | |
| """Add the contents of the indexes stored in shard_fnames into the index | |
| trained_index. The on-disk data is stored in ivfdata_fname""" | |
| assert not isinstance( | |
| trained_index, faiss.IndexIVFPQR | |
| ), "IndexIVFPQR is not supported as an on disk index." | |
| # merge the images into an on-disk index | |
| # first load the inverted lists | |
| ivfs = [] | |
| for fname in shard_fnames: | |
| # the IO_FLAG_MMAP is to avoid actually loading the data thus | |
| # the total size of the inverted lists can exceed the | |
| # available RAM | |
| LOG.info("read " + fname) | |
| index = faiss.read_index(fname, faiss.IO_FLAG_MMAP) | |
| index_ivf = faiss.extract_index_ivf(index) | |
| ivfs.append(index_ivf.invlists) | |
| # avoid that the invlists get deallocated with the index | |
| index_ivf.own_invlists = False | |
| # construct the output index | |
| index = trained_index | |
| index_ivf = faiss.extract_index_ivf(index) | |
| assert index.ntotal == 0, "works only on empty index" | |
| # prepare the output inverted lists. They will be written | |
| # to merged_index.ivfdata | |
| invlists = faiss.OnDiskInvertedLists( | |
| index_ivf.nlist, index_ivf.code_size, ivfdata_fname | |
| ) | |
| # merge all the inverted lists | |
| ivf_vector = faiss.InvertedListsPtrVector() | |
| for ivf in ivfs: | |
| ivf_vector.push_back(ivf) | |
| LOG.info("merge %d inverted lists " % ivf_vector.size()) | |
| ntotal = invlists.merge_from(ivf_vector.data(), ivf_vector.size()) | |
| # now replace the inverted lists in the output index | |
| index.ntotal = index_ivf.ntotal = ntotal | |
| index_ivf.replace_invlists(invlists, True) | |
| invlists.this.disown() | |