本文整理汇总了Python中gensim.matutils.argsort方法的典型用法代码示例。如果您正苦于以下问题:Python matutils.argsort方法的具体用法?Python matutils.argsort怎么用?Python matutils.argsort使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类gensim.matutils
的用法示例。
在下文中一共展示了matutils.argsort方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: calculate_text_similar
# 需要导入模块: from gensim import matutils [as 别名]
# 或者: from gensim.matutils import argsort [as 别名]
def calculate_text_similar(vec_ques, matrix_org_norm, matrix_org_index, top_vec):
"""
最相似的句子,句向量与矩阵点乘
:param vec:
:param matrix:
:param keys:
:param topn:
:return:
"""
# 问句向量标准化, Scale a vector to unit length. The only exception is the zero vector, which is returned back unchanged.
vec_ques_mean = matutils.unitvec(np.array([vec_ques]).mean(axis=0)).astype(numpy_type)
# 矩阵点乘, 即问句与标准问句库里边的问句点乘,
matrix_vec_dot = np.dot(matrix_org_norm, vec_ques_mean)
# 相似度排序
most_similar_sentence_vec_sort = matutils.argsort(matrix_vec_dot, topn=top_vec, reverse=True)
# 获取最相似标准问句的index和得分score
index_score = []
for t in most_similar_sentence_vec_sort[:top_vec]:
index_score.append([matrix_org_index[t], float(matrix_vec_dot[t])])
return index_score
示例2: compute_dt_dist
# 需要导入模块: from gensim import matutils [as 别名]
# 或者: from gensim.matutils import argsort [as 别名]
def compute_dt_dist(docs, labels, tags, model, max_len, batch_size, pad_id, idxvocab, output_file):
#generate batches
num_batches = int(math.ceil(float(len(docs)) / batch_size))
dt_dist = []
t = []
combined = []
docid = 0
for i in xrange(num_batches):
x, _, _, t, s = get_batch_doc(docs, labels, tags, i, max_len, cf.tag_len, batch_size, pad_id)
attention, mean_topic = sess.run([model.attention, model.mean_topic], {model.doc: x, model.tag: t})
dt_dist.extend(attention[:s])
if debug:
for si in xrange(s):
d = x[si]
print "\n\nDoc", docid, "=", " ".join([idxvocab[item] for item in d if (item != pad_id)])
sorted_dist = matutils.argsort(attention[si], reverse=True)
for ti in sorted_dist:
print "Topic", ti, "=", attention[si][ti]
docid += 1
np.save(open(output_file, "w"), dt_dist)
示例3: get_topics
# 需要导入模块: from gensim import matutils [as 别名]
# 或者: from gensim.matutils import argsort [as 别名]
def get_topics(self, sess, topn):
topics = []
entropy = []
tw_dist = sess.run(tf.nn.softmax(tf.matmul(self.topic_output_embedding, self.tm_softmax_w) + self.tm_softmax_b))
for ti in xrange(self.config.topic_number):
best = matutils.argsort(tw_dist[ti], topn=topn, reverse=True)
topics.append(best)
entropy.append(scipy.stats.entropy(tw_dist[ti]))
return topics, entropy
#get top topics and words given a doc
示例4: get_topics_on_doc
# 需要导入模块: from gensim import matutils [as 别名]
# 或者: from gensim.matutils import argsort [as 别名]
def get_topics_on_doc(self, sess, doc, tag, topn):
tw_dist, logits = sess.run([self.attention, self.tm_logits], {self.doc: doc, self.tag: tag})
probs = sess.run(tf.nn.softmax(logits))[0]
best_words = matutils.argsort(probs, topn=topn, reverse=True)
best_words = [ (item, probs[item]) for item in best_words ] #attach word probability
best_topics = matutils.argsort(tw_dist[0], topn=topn, reverse=True)
best_topics = [ (item, tw_dist[0][item]) for item in best_topics ] #attach topic probability
return best_topics, best_words
#convolutional topic model + lstm language model
示例5: most_similar
# 需要导入模块: from gensim import matutils [as 别名]
# 或者: from gensim.matutils import argsort [as 别名]
def most_similar(self, sWord, iTopN=10, fMinDist=-1.0):
npaWord_unit = self.getUnitVector(sWord)
if npaWord_unit is None:
return None
npaCosineSimilarities = np.dot(self.npaWordEmbeddings_units, npaWord_unit)
npaBestIndices = \
matutils.argsort(npaCosineSimilarities, topn=iTopN +1, reverse=True)
# npaBestIndices[1:] - Ignore the first one (which is sWord itself)
return [(self.oVocab.index2word(x), npaCosineSimilarities[x]) for x in npaBestIndices[1:] if npaCosineSimilarities[x] > fMinDist]
示例6: sortByNorm
# 需要导入模块: from gensim import matutils [as 别名]
# 或者: from gensim.matutils import argsort [as 别名]
def sortByNorm(self, iMin, iMax):
if not hasattr(self, 'npaIndicesByNorm'):
self.npaNorms = np.sqrt(np.square(self.npaWordEmbeddings).sum(axis=1))
self.npaIndicesByNorm = matutils.argsort(self.npaNorms)
return [(self.oVocab.index2word(x), self.npaNorms[x]) for x in self.npaIndicesByNorm[iMin:iMax]]
示例7: most_similar_simple
# 需要导入模块: from gensim import matutils [as 别名]
# 或者: from gensim.matutils import argsort [as 别名]
def most_similar_simple(self, sWord, iTopN=10):
npaWordEmbedding = self[sWord]
if npaWordEmbedding is None:
return None
npaSimilarities = np.dot(self.npaWordEmbeddings, npaWordEmbedding)
npaBestIndices = \
matutils.argsort(npaSimilarities, topn=iTopN +1, reverse=True)
# npaBestIndices[1:] - Ignore the first one (which is sWord itself)
return [(self.oVocab.index2word(x), npaSimilarities[x]) for x in npaBestIndices[1:]]
示例8: topn_similarity_label
# 需要导入模块: from gensim import matutils [as 别名]
# 或者: from gensim.matutils import argsort [as 别名]
def topn_similarity_label(self, words, topn=10, normalization=True):
if self.model==None:
raise Exception('no model.')
if isinstance(words, string_types):
words=[words]
""" we can discard this version.
vectors=np.transpose(self.model.wv.__getitem__(words))
if normalization:
unit_vector=np.zeros((len(vectors),len(words)))
for i in range(len(words)):
unit_vector[:,i]=matutils.unitvec(vectors[:,i])
dists=np.dot(self.Label_vec_u, unit_vector)
else:
dists=np.dot(self.Label_vec, vectors)
# 排除掉自身(因为有可能word本身就在label_dict里)
# best = matutils.argsort(dists, topn = topn+1, reverse=True)
# result = [(self.index2word[sim], float(dists[sim])) for sim in best if sim not in all_words]
best = matutils.argsort(dists[:,0], topn = topn, reverse=True)
result = [(self.Label_index[sim], float(dists[sim])) for sim in best]
return result
else:
"""
vectors=np.transpose(self.model.wv.__getitem__(words))
if normalization:
unit_vector=unitvec(vectors,ax=0)
dists=np.dot(self.Label_vec_u, unit_vector)
else:
dists=np.dot(self.Label_vec, vectors)
#topwords=np.empty((topn,len(words)), np.string_)
topwords=[]
topsims=np.empty((topn,len(words)))
best = np.argsort(dists, axis=0)
for i in range(topn):
topword=[]
for j in range(len(words)):
topword.append(self.Label_index[best[-i-1][j]])
topsims[i][j]=dists[best[-i-1][j]][j]
topwords.append(topword)
result=[(topwords[i], topsims[i]) for i in range(topn)]
return result
""" print this result by:
| for iword,isim in result: |
| print(iword, isim) |
or
| for iword, isim in b: |
| for i in range(len(b[0])): |
| print("%s:%f\t" %(iword[i],isim[i]),end="") |
| print("") |
"""
示例9: topn_synonym_label
# 需要导入模块: from gensim import matutils [as 别名]
# 或者: from gensim.matutils import argsort [as 别名]
def topn_synonym_label(self, word, topn=10, calc='all', calc_k=5):
ww=list()
for w in self.findWordNet(word):
ww.append(self.id2ss(w))
if (len(ww)==0):
return 0
else:
similarities=[0]*len(self.Label_index)
if calc=='all': # 默认全部平均
for i in range(len(self.Label_index)):
count=0
for w in ww:
for l in self.Label_wn[self.Label_index[i]]:
sim=w.path_similarity(l)
if(sim!=None):
similarities[i]+=sim
else:
count+=1
try:
similarities[i]/=(len(ww)*len(self.Label_wn[self.Label_index[i]])-count) # 平均similarity
except:
similarities[i]=0
elif calc=='calc_k': # 仅取前calc_k个词义
for i in range(len(self.Label_index)):
count=0
simlist=[]
for w in ww:
for l in self.Label_wn[self.Label_index[i]]:
sim=w.path_similarity(l)
if(sim!=None):
simlist.append(sim)
count+=1
if count<=calc_k:
similarities[i]=np.mean(simlist)
else:
simlist=sorted(simlist,reverse=True)
similarities[i]=simlist[:calc_k-1]/calc_k # 取最大的k个用于计算平均的similarity
best=matutils.argsort(similarities, topn = topn, reverse=True)
result = [(self.Label_index[sim], float(similarities[sim])) for sim in best]
return result
示例10: most_similar
# 需要导入模块: from gensim import matutils [as 别名]
# 或者: from gensim.matutils import argsort [as 别名]
def most_similar(self, node_or_vector, topn=10, restrict_vocab=None):
"""
Find the top-N most similar nodes to the given node or vector, sorted in increasing order of distance.
Parameters
----------
node_or_vector : str/int or numpy.array
node key or vector for which similar nodes are to be found.
topn : int or None, optional
number of similar nodes to return, if `None`, returns all.
restrict_vocab : int or None, optional
Optional integer which limits the range of vectors which are searched for most-similar values.
For example, restrict_vocab=10000 would only check the first 10000 node vectors in the vocabulary order.
This may be meaningful if vocabulary is sorted by descending frequency.
Returns
--------
list of tuples (str, float)
List of tuples containing (node, distance) pairs in increasing order of distance.
Examples
--------
>>> vectors.most_similar('lion.n.01')
[('lion_cub.n.01', 0.4484), ('lionet.n.01', 0.6552), ...]
"""
if not restrict_vocab:
all_distances = self.distances(node_or_vector)
else:
nodes_to_use = self.index2word[:restrict_vocab]
all_distances = self.distances(node_or_vector, nodes_to_use)
if isinstance(node_or_vector, string_types + (int,)):
node_index = self.vocab[node_or_vector].index
else:
node_index = None
if not topn:
closest_indices = matutils.argsort(all_distances)
else:
closest_indices = matutils.argsort(all_distances, topn=1 + topn)
result = [
(self.index2word[index], float(all_distances[index]))
for index in closest_indices if (not node_index or index != node_index) # ignore the input node
]
if topn:
result = result[:topn]
return result