当前位置: 首页>>代码示例>>Python>>正文


Python nmslib.init方法代码示例

本文整理汇总了Python中nmslib.init方法的典型用法代码示例。如果您正苦于以下问题:Python nmslib.init方法的具体用法?Python nmslib.init怎么用?Python nmslib.init使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在nmslib的用法示例。


在下文中一共展示了nmslib.init方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: nearest_neighbors

# 需要导入模块: import nmslib [as 别名]
# 或者: from nmslib import init [as 别名]
def nearest_neighbors(X, neighbors=16, threads=1):
    # initialize HNSW index on Cosine Similarity
    nn_index = nmslib.init(method='hnsw', space='cosinesimil')
    nn_index.addDataPointBatch(X)
    nn_index.createIndex({'post': 2}, print_progress=True)

    # get nearest neighbours
    Xn = nn_index.knnQueryBatch(X, k=(neighbors+1), num_threads=threads)

    # extract graph edges
    sources = []
    targets = []
    for i, neigh in enumerate(Xn):
        sources += [i]*(neighbors-1)
        targets += list(neigh[0][1:])

    # construct igraph
    nn_graph = igraph.Graph(directed=True)
    nn_graph.add_vertices(X.shape[0])
    nn_graph.add_edges(list(zip(sources, targets)))

    return nn_graph 
开发者ID:calico,项目名称:basenji,代码行数:24,代码来源:basenji_motifs_denovo.py

示例2: fit

# 需要导入模块: import nmslib [as 别名]
# 或者: from nmslib import init [as 别名]
def fit(self, X):
        if self._method_name == 'vptree':
            # To avoid this issue: terminate called after throwing an instance
            # of 'std::runtime_error'
            # what():  The data size is too small or the bucket size is too
            # big. Select the parameters so that <total # of records> is NOT
            # less than <bucket size> * 1000
            # Aborted (core dumped)
            self._index_param.append('bucketSize=%d' %
                                     min(int(X.shape[0] * 0.0005), 1000))

        self._index = nmslib.init(
            space=self._nmslib_metric, method=self._method_name)
        self._index.addDataPointBatch(X)

        if os.path.exists(self._index_name):
            print('Loading index from file')
            self._index.loadIndex(self._index_name)
        else:
            self._index.createIndex(self._index_param)
            if self._save_index:
                self._index.saveIndex(self._index_name)
        if self._query_param is not None:
            self._index.setQueryTimeParams(self._query_param) 
开发者ID:erikbern,项目名称:ann-benchmarks,代码行数:26,代码来源:nmslib.py

示例3: fit

# 需要导入模块: import nmslib [as 别名]
# 或者: from nmslib import init [as 别名]
def fit(self, Ciu, show_progress=True):
        # nmslib can be a little chatty when first imported, disable some of
        # the logging
        logging.getLogger('nmslib').setLevel(logging.WARNING)
        import nmslib

        # train the model
        super(NMSLibAlternatingLeastSquares, self).fit(Ciu, show_progress)

        # create index for similar_items
        if self.approximate_similar_items:
            log.debug("Building nmslib similar items index")
            self.similar_items_index = nmslib.init(
                method=self.method, space='cosinesimil')

            # there are some numerical instability issues here with
            # building a cosine index with vectors with 0 norms, hack around this
            # by just not indexing them
            norms = numpy.linalg.norm(self.item_factors, axis=1)
            ids = numpy.arange(self.item_factors.shape[0])

            # delete zero valued rows from the matrix
            item_factors = numpy.delete(self.item_factors, ids[norms == 0], axis=0)
            ids = ids[norms != 0]

            self.similar_items_index.addDataPointBatch(item_factors, ids=ids)
            self.similar_items_index.createIndex(self.index_params,
                                                 print_progress=show_progress)
            self.similar_items_index.setQueryTimeParams(self.query_params)

        # build up a separate index for the inner product (for recommend
        # methods)
        if self.approximate_recommend:
            log.debug("Building nmslib recommendation index")
            self.max_norm, extra = augment_inner_product_matrix(
                self.item_factors)
            self.recommend_index = nmslib.init(
                method='hnsw', space='cosinesimil')
            self.recommend_index.addDataPointBatch(extra)
            self.recommend_index.createIndex(self.index_params, print_progress=show_progress)
            self.recommend_index.setQueryTimeParams(self.query_params) 
开发者ID:benfred,项目名称:implicit,代码行数:43,代码来源:approximate_als.py

示例4: __init__

# 需要导入模块: import nmslib [as 别名]
# 或者: from nmslib import init [as 别名]
def __init__(self, M, efC, efS, num_neighbours, num_threads,
                 space='cosine'):
        space_map = {'cosine': 'cosinesimil'}
        space = space_map[space]
        self.index = nmslib.init(method='hnsw', space=space)
        self.M = M
        self.num_threads = num_threads
        self.efC = efC
        self.efS = efS
        self.num_neighbours = num_neighbours 
开发者ID:kunaldahiya,项目名称:pyxclib,代码行数:12,代码来源:ann.py

示例5: knn_nmslib

# 需要导入模块: import nmslib [as 别名]
# 或者: from nmslib import init [as 别名]
def knn_nmslib(feats, k, space='cosinesimil'):
    index = nmslib.init(method='hnsw', space=space)
    index.addDataPointBatch(feats)
    index.createIndex({'post': 2}, print_progress=True)
    neighbours = index.knnQueryBatch(feats, k=k, num_threads=multiprocessing.cpu_count())
    return neighbours 
开发者ID:XiaohangZhan,项目名称:cdp,代码行数:8,代码来源:knn.py

示例6: build_index

# 需要导入模块: import nmslib [as 别名]
# 或者: from nmslib import init [as 别名]
def build_index(self,
                    data: np.ndarray):
        index = nmslib.init(method=self.method, space=self.space,
                            data_type=nmslib.DataType.DENSE_VECTOR)
        index.addDataPointBatch(data)
        index.createIndex(self._index_time_params, print_progress=False)
        index.setQueryTimeParams(self._query_time_params)
        self.index = index
        self.times_queried = 0 
开发者ID:uclnlp,项目名称:gntp,代码行数:11,代码来源:nms.py

示例7: load_approximate_nearest_neighbours_index

# 需要导入模块: import nmslib [as 别名]
# 或者: from nmslib import init [as 别名]
def load_approximate_nearest_neighbours_index(
    linker_paths: LinkerPaths, ef_search: int = 200,
) -> FloatIndex:
    """
    Load an approximate nearest neighbours index from disk.

    Parameters
    ----------
    linker_paths: LinkerPaths, required.
        Contains the paths to the data required for the entity linker.
    ef_search: int, optional (default = 200)
        Controls speed performance at query time. Max value is 2000,
        but reducing to around ~100 will increase query speed by an order
        of magnitude for a small performance hit.
    """
    concept_alias_tfidfs = scipy.sparse.load_npz(
        cached_path(linker_paths.tfidf_vectors)
    ).astype(numpy.float32)
    ann_index = nmslib.init(
        method="hnsw",
        space="cosinesimil_sparse",
        data_type=nmslib.DataType.SPARSE_VECTOR,
    )
    ann_index.addDataPointBatch(concept_alias_tfidfs)
    ann_index.loadIndex(cached_path(linker_paths.ann_index))
    query_time_params = {"efSearch": ef_search}
    ann_index.setQueryTimeParams(query_time_params)

    return ann_index 
开发者ID:allenai,项目名称:scispacy,代码行数:31,代码来源:candidate_generation.py

示例8: __init__

# 需要导入模块: import nmslib [as 别名]
# 或者: from nmslib import init [as 别名]
def __init__(self,
                 rank,
                 fdim,
                 sample_num,
                 num_output,
                 bias=False,
                 interval=100,
                 start_iter=0,
                 midw='0',
                 midb='1'):
        super(HNSWSampler, self).__init__()
        self.rank = rank
        self.fdim = fdim
        self.sample_num = sample_num
        self.num_output = num_output
        self.full_cls = np.arange(self.num_output)
        # init param client
        self.client = ParameterClient(rank)
        self.midw = midw
        self.midb = midb
        self.is_bias = bias
        self.client.add_matrix(self.midw, [self.num_output, self.fdim])
        if self.is_bias:
            self.client.add_matrix(self.midb, [self.num_output, 1])
        # init hnsw
        self.space = 'cosinesimil'
        """ higher ef leads to better accuracy, but slower search
            higher M leads to higher accuracy/run_time at fixed ef, but consumes more memory
        """
        self.space_params = {'ef': 100, 'M': 16}
        self.interval = interval
        self.start_iter = start_iter
        self.iter = start_iter
        self.test_iter = start_iter 
开发者ID:yl-1993,项目名称:hfsoftmax,代码行数:36,代码来源:hnsw_sampler.py

示例9: _update_hf

# 需要导入模块: import nmslib [as 别名]
# 或者: from nmslib import init [as 别名]
def _update_hf(self):
        if not self.iter % self.interval == 0 and \
            not self.iter == self.start_iter:
            return
        w = self.client.get_value_by_rows(self.midw, self.full_cls)
        self.hnsw = nmslib.init(method='hnsw',
                                space=self.space,
                                space_params=self.space_params)
        self.hnsw.addDataPointBatch(w)
        """ `post` represents postprocessing applied to the constructed graph.
            The default value is 0, which means no postprocessing.
            Additional options are 1 and 2 (2 means more postprocessing).
        """
        self.hnsw.createIndex({'post': 2}, print_progress=True) 
开发者ID:yl-1993,项目名称:hfsoftmax,代码行数:16,代码来源:hnsw_sampler.py

示例10: tsne

# 需要导入模块: import nmslib [as 别名]
# 或者: from nmslib import init [as 别名]
def tsne(x, n_components=2, perplexity=30.0, early_exaggeration=12.0,
         learning_rate=200.0, n_iter=1000, n_iter_without_progress=300,
         min_grad_norm=1e-07, metric="euclidean", init="random", verbose=0,
         random_state=None, method="barnes_hut", angle=0.5):
    x_tsne = sklearn.manifold.TSNE(
        n_components=n_components, perplexity=perplexity,
        early_exaggeration=early_exaggeration,
        learning_rate=learning_rate, n_iter=n_iter,
        n_iter_without_progress=n_iter_without_progress,
        min_grad_norm=min_grad_norm, metric=metric,
        init=init, verbose=verbose,
        random_state=random_state, method=method,
        angle=angle).fit_transform(x)
    return x_tsne 
开发者ID:TaylorResearchLab,项目名称:scedar,代码行数:16,代码来源:sdm.py

示例11: _load_index

# 需要导入模块: import nmslib [as 别名]
# 或者: from nmslib import init [as 别名]
def _load_index(self):
        index_file = get_index_path(self.index_name)

        self.primary = nmslib.init(
            method='hnsw', space='l2', data_type=nmslib.DataType.DENSE_VECTOR)
        self.secondary = nmslib.init(
            method='hnsw', space='l2', data_type=nmslib.DataType.DENSE_VECTOR)
        self.bitmap = nmslib.init(
            method='hnsw', space='l2', data_type=nmslib.DataType.DENSE_VECTOR)

        if os.path.exists(index_file):
            self.primary_df = pd.read_hdf(index_file, 'primary')
            self.primary, self.primary_c = self._add_data(
                self.primary, self.primary_df)

            self.secondary_df = pd.read_hdf(index_file, 'secondary')
            self.secondary, self.secondary_c = self._add_data(
                self.secondary, self.secondary_df)

            self.bitmap_df = pd.read_hdf(index_file, 'bitmap')
            self.bitmap, self.bitmap_c = self._add_data(
                self.bitmap, self.bitmap_df)
        else:
            self.primary_df = None
            self.secondary_df = None
            self.bitmap_df = None

            self.primary_c, self.secondary_c, self.bitmap_c = 0, 0, 0 
开发者ID:rikenmehta03,项目名称:imsearch,代码行数:30,代码来源:nmslib.py

示例12: search_hnsw_jaccard_topk

# 需要导入模块: import nmslib [as 别名]
# 或者: from nmslib import init [as 别名]
def search_hnsw_jaccard_topk(index_data, query_data, index_params, k):
    (index_sets, index_keys) = index_data
    (query_sets, query_keys) = query_data
    print("Building HNSW Index.")
    start = time.perf_counter()
    index = nmslib.init(method="hnsw", space="jaccard_sparse", 
            data_type=nmslib.DataType.OBJECT_AS_STRING)
    index.addDataPointBatch(
            [" ".join(str(v) for v in s) for s in index_sets],
            range(len(index_keys)))
    index.createIndex(index_params)
    end = time.perf_counter()
    print("Indexing time: {:.3f}.".format(end-start))
    print("Querying.")
    times = []
    results = []
    index.setQueryTimeParams({"efSearch": index_params["efConstruction"]})
    for query_set, query_key in zip(query_sets, query_keys):
        start = time.perf_counter()
        result, _ = index.knnQuery(" ".join(str(v) for v in query_set), k)
        result = [[index_keys[i], compute_jaccard(query_set, index_sets[i])] 
                for i in result]
        result.sort(key=lambda x : x[1], reverse=True)
        duration = time.perf_counter() - start
        times.append(duration)
        results.append((query_key, result))
        sys.stdout.write(f"\rQueried {len(results)} sets")
    sys.stdout.write("\n")
    return (results, times) 
开发者ID:ekzhu,项目名称:datasketch,代码行数:31,代码来源:hnsw.py

示例13: __init__

# 需要导入模块: import nmslib [as 别名]
# 或者: from nmslib import init [as 别名]
def __init__(self, feats, k, index_path='', verbose=True, **kwargs):
        import nmslib
        self.verbose = verbose
        with Timer('[hnsw] build index', verbose):
            ''' higher ef leads to better accuracy, but slower search
                higher M leads to higher accuracy/run_time at fixed ef,
                but consumes more memory
            '''
            # space_params = {
            #     'ef': 100,
            #     'M': 16,
            # }
            # index = nmslib.init(method='hnsw',
            #                     space='cosinesimil',
            #                     space_params=space_params)
            index = nmslib.init(method='hnsw', space='cosinesimil')
            if index_path != '' and os.path.isfile(index_path):
                index.loadIndex(index_path)
            else:
                index.addDataPointBatch(feats)
                index.createIndex({
                    'post': 2,
                    'indexThreadQty': 1
                },
                                  print_progress=verbose)
                if index_path:
                    print('[hnsw] save index to {}'.format(index_path))
                    mkdir_if_no_exists(index_path)
                    index.saveIndex(index_path)
        with Timer('[hnsw] query topk {}'.format(k), verbose):
            knn_ofn = index_path + '.npz'
            if os.path.exists(knn_ofn):
                print('[hnsw] read knns from {}'.format(knn_ofn))
                self.knns = np.load(knn_ofn)['data']
            else:
                self.knns = index.knnQueryBatch(feats, k=k) 
开发者ID:yl-1993,项目名称:learn-to-cluster,代码行数:38,代码来源:knn.py

示例14: fit

# 需要导入模块: import nmslib [as 别名]
# 或者: from nmslib import init [as 别名]
def fit(self, X, y=None) -> HNSW:
        """ Setup the HNSW index from training data.

        Parameters
        ----------
        X: np.array
            Data to be indexed
        y: any
            Ignored

        Returns
        -------
        self: HNSW
            An instance of HNSW with a built graph
        """
        X = check_array(X)

        method = self.method
        post_processing = self.post_processing

        if self.metric in ['euclidean', 'l2', 'minkowski', 'squared_euclidean', 'sqeuclidean']:
            if self.metric in ['squared_euclidean', 'sqeuclidean']:
                self.metric = 'sqeuclidean'
            else:
                self.metric = 'euclidean'
            self.space = 'l2'
        elif self.metric in ['cosine', 'cosinesimil']:
            self.space = 'cosinesimil'
        else:
            raise ValueError(f'Invalid metric "{self.metric}". Please try "euclidean" or "cosine".')

        hnsw_index = nmslib.init(method=method,
                                 space=self.space)
        hnsw_index.addDataPointBatch(X)
        hnsw_index.createIndex({'post': post_processing,
                                'indexThreadQty': self.n_jobs,
                                },
                               print_progress=(self.verbose >= 2))
        self.index_ = hnsw_index
        self.n_samples_fit_ = len(self.index_)

        assert self.space in ['l2', 'cosinesimil'], f'Internal: self.space={self.space} not allowed'

        return self 
开发者ID:VarIr,项目名称:scikit-hubness,代码行数:46,代码来源:hnsw.py

示例15: nmslib_knn_with_zero_vectors

# 需要导入模块: import nmslib [as 别名]
# 或者: from nmslib import init [as 别名]
def nmslib_knn_with_zero_vectors(
        self, vectors: numpy.ndarray, k: int
    ) -> Tuple[numpy.ndarray, numpy.ndarray]:
        """
        ann_index.knnQueryBatch crashes if any of the vectors is all zeros.
        This function is a wrapper around `ann_index.knnQueryBatch` that solves this problem. It works as follows:
        - remove empty vectors from `vectors`.
        - call `ann_index.knnQueryBatch` with the non-empty vectors only. This returns `neighbors`,
        a list of list of neighbors. `len(neighbors)` equals the length of the non-empty vectors.
        - extend the list `neighbors` with `None`s in place of empty vectors.
        - return the extended list of neighbors and distances.
        """
        empty_vectors_boolean_flags = numpy.array(vectors.sum(axis=1) != 0).reshape(-1)
        empty_vectors_count = vectors.shape[0] - sum(empty_vectors_boolean_flags)
        if self.verbose:
            print(f"Number of empty vectors: {empty_vectors_count}")

        # init extended_neighbors with a list of Nones
        extended_neighbors = numpy.empty(
            (len(empty_vectors_boolean_flags),), dtype=object
        )
        extended_distances = numpy.empty(
            (len(empty_vectors_boolean_flags),), dtype=object
        )

        if vectors.shape[0] - empty_vectors_count == 0:
            return extended_neighbors, extended_distances

        # remove empty vectors before calling `ann_index.knnQueryBatch`
        vectors = vectors[empty_vectors_boolean_flags]

        # call `knnQueryBatch` to get neighbors
        original_neighbours = self.ann_index.knnQueryBatch(vectors, k=k)

        neighbors, distances = zip(
            *[(x[0].tolist(), x[1].tolist()) for x in original_neighbours]
        )
        neighbors = list(neighbors)
        distances = list(distances)

        # neighbors need to be converted to an np.array of objects instead of ndarray of dimensions len(vectors)xk
        # Solution: add a row to `neighbors` with any length other than k. This way, calling np.array(neighbors)
        # returns an np.array of objects
        neighbors.append([])
        distances.append([])
        # interleave `neighbors` and Nones in `extended_neighbors`
        extended_neighbors[empty_vectors_boolean_flags] = numpy.array(neighbors)[:-1]
        extended_distances[empty_vectors_boolean_flags] = numpy.array(distances)[:-1]

        return extended_neighbors, extended_distances 
开发者ID:allenai,项目名称:scispacy,代码行数:52,代码来源:candidate_generation.py


注:本文中的nmslib.init方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。