当前位置: 首页>>代码示例>>Python>>正文


Python matutils.argsort方法代码示例

本文整理汇总了Python中gensim.matutils.argsort方法的典型用法代码示例。如果您正苦于以下问题:Python matutils.argsort方法的具体用法?Python matutils.argsort怎么用?Python matutils.argsort使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在gensim.matutils的用法示例。


在下文中一共展示了matutils.argsort方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: calculate_text_similar

# 需要导入模块: from gensim import matutils [as 别名]
# 或者: from gensim.matutils import argsort [as 别名]
def calculate_text_similar(vec_ques, matrix_org_norm, matrix_org_index, top_vec):
    """
      最相似的句子,句向量与矩阵点乘
    :param vec: 
    :param matrix: 
    :param keys: 
    :param topn: 
    :return: 
    """
    # 问句向量标准化, Scale a vector to unit length. The only exception is the zero vector, which is returned back unchanged.
    vec_ques_mean = matutils.unitvec(np.array([vec_ques]).mean(axis=0)).astype(numpy_type)
    # 矩阵点乘, 即问句与标准问句库里边的问句点乘,
    matrix_vec_dot = np.dot(matrix_org_norm, vec_ques_mean)
    # 相似度排序
    most_similar_sentence_vec_sort = matutils.argsort(matrix_vec_dot, topn=top_vec, reverse=True)
    # 获取最相似标准问句的index和得分score
    index_score = []
    for t in most_similar_sentence_vec_sort[:top_vec]:
        index_score.append([matrix_org_index[t], float(matrix_vec_dot[t])])
    return index_score 
开发者ID:yongzhuo,项目名称:nlp_xiaojiang,代码行数:22,代码来源:chatbot_sentence_vec_by_word.py

示例2: compute_dt_dist

# 需要导入模块: from gensim import matutils [as 别名]
# 或者: from gensim.matutils import argsort [as 别名]
def compute_dt_dist(docs, labels, tags, model, max_len, batch_size, pad_id, idxvocab, output_file):
    #generate batches
    num_batches = int(math.ceil(float(len(docs)) / batch_size))
    dt_dist = []
    t = []
    combined = []
    docid = 0
    for i in xrange(num_batches):
        x, _, _, t, s = get_batch_doc(docs, labels, tags, i, max_len, cf.tag_len, batch_size, pad_id)
        attention, mean_topic = sess.run([model.attention, model.mean_topic], {model.doc: x, model.tag: t})
        dt_dist.extend(attention[:s])

        if debug:
            for si in xrange(s):
                d = x[si]
                print "\n\nDoc", docid, "=", " ".join([idxvocab[item] for item in d if (item != pad_id)])
                sorted_dist = matutils.argsort(attention[si], reverse=True)
                for ti in sorted_dist:
                    print "Topic", ti, "=", attention[si][ti]
                docid += 1

    np.save(open(output_file, "w"), dt_dist) 
开发者ID:jhlau,项目名称:topically-driven-language-model,代码行数:24,代码来源:tdlm_test.py

示例3: get_topics

# 需要导入模块: from gensim import matutils [as 别名]
# 或者: from gensim.matutils import argsort [as 别名]
def get_topics(self, sess, topn):
        topics = []
        entropy = []
        tw_dist = sess.run(tf.nn.softmax(tf.matmul(self.topic_output_embedding, self.tm_softmax_w) + self.tm_softmax_b))
        for ti in xrange(self.config.topic_number):
            best = matutils.argsort(tw_dist[ti], topn=topn, reverse=True)
            topics.append(best)
            entropy.append(scipy.stats.entropy(tw_dist[ti]))

        return topics, entropy

    #get top topics and words given a doc 
开发者ID:jhlau,项目名称:topically-driven-language-model,代码行数:14,代码来源:tdlm_model.py

示例4: get_topics_on_doc

# 需要导入模块: from gensim import matutils [as 别名]
# 或者: from gensim.matutils import argsort [as 别名]
def get_topics_on_doc(self, sess, doc, tag, topn):
        tw_dist, logits = sess.run([self.attention, self.tm_logits], {self.doc: doc, self.tag: tag})
        probs = sess.run(tf.nn.softmax(logits))[0]
        best_words = matutils.argsort(probs, topn=topn, reverse=True)
        best_words = [ (item, probs[item]) for item in best_words ] #attach word probability
        best_topics = matutils.argsort(tw_dist[0], topn=topn, reverse=True)
        best_topics = [ (item, tw_dist[0][item]) for item in best_topics ] #attach topic probability

        return best_topics, best_words

#convolutional topic model + lstm language model 
开发者ID:jhlau,项目名称:topically-driven-language-model,代码行数:13,代码来源:tdlm_model.py

示例5: most_similar

# 需要导入模块: from gensim import matutils [as 别名]
# 或者: from gensim.matutils import argsort [as 别名]
def most_similar(self, sWord, iTopN=10, fMinDist=-1.0):
    npaWord_unit = self.getUnitVector(sWord)

    if npaWord_unit is None:
      return None

    npaCosineSimilarities = np.dot(self.npaWordEmbeddings_units, npaWord_unit)

    npaBestIndices = \
        matutils.argsort(npaCosineSimilarities, topn=iTopN +1, reverse=True)

    # npaBestIndices[1:] - Ignore the first one (which is sWord itself)
    return [(self.oVocab.index2word(x), npaCosineSimilarities[x]) for x in npaBestIndices[1:] if npaCosineSimilarities[x] > fMinDist] 
开发者ID:UKPLab,项目名称:semeval2017-scienceie,代码行数:15,代码来源:wordEmbeddings.py

示例6: sortByNorm

# 需要导入模块: from gensim import matutils [as 别名]
# 或者: from gensim.matutils import argsort [as 别名]
def sortByNorm(self, iMin, iMax):
    if not hasattr(self, 'npaIndicesByNorm'):
      self.npaNorms = np.sqrt(np.square(self.npaWordEmbeddings).sum(axis=1))
      self.npaIndicesByNorm = matutils.argsort(self.npaNorms)

    return [(self.oVocab.index2word(x), self.npaNorms[x]) for x in self.npaIndicesByNorm[iMin:iMax]] 
开发者ID:UKPLab,项目名称:semeval2017-scienceie,代码行数:8,代码来源:wordEmbeddings.py

示例7: most_similar_simple

# 需要导入模块: from gensim import matutils [as 别名]
# 或者: from gensim.matutils import argsort [as 别名]
def most_similar_simple(self, sWord, iTopN=10):
    npaWordEmbedding = self[sWord]

    if npaWordEmbedding is None:
      return None

    npaSimilarities = np.dot(self.npaWordEmbeddings, npaWordEmbedding)

    npaBestIndices = \
        matutils.argsort(npaSimilarities, topn=iTopN +1, reverse=True)

    # npaBestIndices[1:] - Ignore the first one (which is sWord itself)
    return [(self.oVocab.index2word(x), npaSimilarities[x]) for x in npaBestIndices[1:]] 
开发者ID:UKPLab,项目名称:semeval2017-scienceie,代码行数:15,代码来源:wordEmbeddings.py

示例8: topn_similarity_label

# 需要导入模块: from gensim import matutils [as 别名]
# 或者: from gensim.matutils import argsort [as 别名]
def topn_similarity_label(self, words, topn=10, normalization=True):
        if self.model==None:
            raise Exception('no model.')
        if isinstance(words, string_types):
            words=[words]
        
            """ we can discard this version.
            vectors=np.transpose(self.model.wv.__getitem__(words))
            if normalization:
                unit_vector=np.zeros((len(vectors),len(words)))
                for i in range(len(words)):
                    unit_vector[:,i]=matutils.unitvec(vectors[:,i])
                dists=np.dot(self.Label_vec_u, unit_vector)
            else:
                dists=np.dot(self.Label_vec, vectors)
            # 排除掉自身(因为有可能word本身就在label_dict里)
            # best = matutils.argsort(dists, topn = topn+1, reverse=True)
            # result = [(self.index2word[sim], float(dists[sim])) for sim in best if sim not in all_words]
            best = matutils.argsort(dists[:,0], topn = topn, reverse=True)
            result = [(self.Label_index[sim], float(dists[sim])) for sim in best]
            return result
        else:
            """
        vectors=np.transpose(self.model.wv.__getitem__(words))
        if normalization:
            unit_vector=unitvec(vectors,ax=0)
            dists=np.dot(self.Label_vec_u, unit_vector)
        else:
            dists=np.dot(self.Label_vec, vectors)
            #topwords=np.empty((topn,len(words)), np.string_)
        topwords=[]
        topsims=np.empty((topn,len(words)))
        best = np.argsort(dists, axis=0)
        for i in range(topn):
            topword=[]
            for j in range(len(words)):
                topword.append(self.Label_index[best[-i-1][j]])
                topsims[i][j]=dists[best[-i-1][j]][j]
            topwords.append(topword)
        result=[(topwords[i], topsims[i]) for i in range(topn)]
        return result
        """ print this result by:

            | for iword,isim in result:  |
            |     print(iword, isim)     |
            or
            | for iword, isim in b:                               |
            |     for i in range(len(b[0])):                      |
            |         print("%s:%f\t" %(iword[i],isim[i]),end="") |
            |     print("")                                       |
                
        """ 
开发者ID:Coldog2333,项目名称:Financial-NLP,代码行数:54,代码来源:NLP.py

示例9: topn_synonym_label

# 需要导入模块: from gensim import matutils [as 别名]
# 或者: from gensim.matutils import argsort [as 别名]
def topn_synonym_label(self, word, topn=10, calc='all', calc_k=5):
        ww=list()
        for w in self.findWordNet(word):
            ww.append(self.id2ss(w))
        if (len(ww)==0):
            return 0
        else:
            similarities=[0]*len(self.Label_index)
            if calc=='all': # 默认全部平均
                for i in range(len(self.Label_index)):
                    count=0
                    for w in ww:
                        for l in self.Label_wn[self.Label_index[i]]:
                            sim=w.path_similarity(l)
                            if(sim!=None):
                                similarities[i]+=sim
                            else:
                                count+=1
                    try:
                        similarities[i]/=(len(ww)*len(self.Label_wn[self.Label_index[i]])-count) # 平均similarity
                    except:
                        similarities[i]=0
                        
            elif calc=='calc_k': # 仅取前calc_k个词义
                for i in range(len(self.Label_index)):
                    count=0
                    simlist=[]
                    for w in ww:
                        for l in self.Label_wn[self.Label_index[i]]:
                            sim=w.path_similarity(l)
                            if(sim!=None):
                                simlist.append(sim)
                                count+=1
                    if count<=calc_k:
                        similarities[i]=np.mean(simlist)
                    else:
                        simlist=sorted(simlist,reverse=True)
                        similarities[i]=simlist[:calc_k-1]/calc_k # 取最大的k个用于计算平均的similarity
                        
        best=matutils.argsort(similarities, topn = topn, reverse=True)
        result = [(self.Label_index[sim], float(similarities[sim])) for sim in best]
        return result 
开发者ID:Coldog2333,项目名称:Financial-NLP,代码行数:44,代码来源:NLP.py

示例10: most_similar

# 需要导入模块: from gensim import matutils [as 别名]
# 或者: from gensim.matutils import argsort [as 别名]
def most_similar(self, node_or_vector, topn=10, restrict_vocab=None):
        """
        Find the top-N most similar nodes to the given node or vector, sorted in increasing order of distance.

        Parameters
        ----------

        node_or_vector : str/int or numpy.array
            node key or vector for which similar nodes are to be found.
        topn : int or None, optional
            number of similar nodes to return, if `None`, returns all.
        restrict_vocab : int or None, optional
            Optional integer which limits the range of vectors which are searched for most-similar values.
            For example, restrict_vocab=10000 would only check the first 10000 node vectors in the vocabulary order.
            This may be meaningful if vocabulary is sorted by descending frequency.

        Returns
        --------
        list of tuples (str, float)
            List of tuples containing (node, distance) pairs in increasing order of distance.

        Examples
        --------
        >>> vectors.most_similar('lion.n.01')
        [('lion_cub.n.01', 0.4484), ('lionet.n.01', 0.6552), ...]

        """
        if not restrict_vocab:
            all_distances = self.distances(node_or_vector)
        else:
            nodes_to_use = self.index2word[:restrict_vocab]
            all_distances = self.distances(node_or_vector, nodes_to_use)

        if isinstance(node_or_vector, string_types + (int,)):
            node_index = self.vocab[node_or_vector].index
        else:
            node_index = None
        if not topn:
            closest_indices = matutils.argsort(all_distances)
        else:
            closest_indices = matutils.argsort(all_distances, topn=1 + topn)
        result = [
            (self.index2word[index], float(all_distances[index]))
            for index in closest_indices if (not node_index or index != node_index)  # ignore the input node
        ]
        if topn:
            result = result[:topn]
        return result 
开发者ID:dalab,项目名称:hyperbolic_cones,代码行数:50,代码来源:dag_emb_model.py


注:本文中的gensim.matutils.argsort方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。