本文整理汇总了Python中networkx.pagerank方法的典型用法代码示例。如果您正苦于以下问题:Python networkx.pagerank方法的具体用法?Python networkx.pagerank怎么用?Python networkx.pagerank使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类networkx
的用法示例。
在下文中一共展示了networkx.pagerank方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: score
# 需要导入模块: import networkx [as 别名]
# 或者: from networkx import pagerank [as 别名]
def score(self, sg, *args, **xargs): # get_pagerank_probability
"""
:param sg: egocentric subgraph around topic in networkx format
:param distance_degradation: A factor for degrading as distance from the topic increases
:return: Dictionary of probabilities keyed by node
"""
# convert to digraph if needed
if sg.is_multigraph():
sg = self.multigraph_to_digraph(sg)
personalized = {}
for node in sg.nodes():
# personalized[node] = linear_weight(sg.node[node]['topic_distance'], distance_degradation)
# INSERT WEIGHTING FUNCTION BELOW
personalized[node] = self.exponential_weight(sg.node[node]['topic_distance'])
# return the pagerank scores
return nx.pagerank(sg, personalization=personalized, weight='confidence')
示例2: textrank_tfidf
# 需要导入模块: import networkx [as 别名]
# 或者: from networkx import pagerank [as 别名]
def textrank_tfidf(sentences, topk=6):
"""
使用tf-idf作为相似度, networkx.pagerank获取中心句子作为摘要
:param sentences: str, docs of text
:param topk:int
:return:list
"""
# 切句子
sentences = list(cut_sentence(sentences))
# tf-idf相似度
matrix_norm = tdidf_sim(sentences)
# 构建相似度矩阵
tfidf_sim = nx.from_scipy_sparse_matrix(matrix_norm * matrix_norm.T)
# nx.pagerank
sens_scores = nx.pagerank(tfidf_sim)
# 得分排序
sen_rank = sorted(sens_scores.items(), key=lambda x: x[1], reverse=True)
# 保留topk个, 防止越界
topk = min(len(sentences), topk)
# 返回原句子和得分
return [(sr[1], sentences[sr[0]]) for sr in sen_rank][0:topk]
示例3: summarize
# 需要导入模块: import networkx [as 别名]
# 或者: from networkx import pagerank [as 别名]
def summarize(self, doc: Document, size: int = 3) -> List[int]:
"""Summarize a given document using LexRank algorithm.
Args:
doc (Document): The document to summarize.
size (int): Maximum number of sentences that the summary should have.
Returns:
list: The indices of the extracted sentences that form the summary, sorted
ascending.
"""
size = min(size, len(doc.sentences))
positions = [self._get_position(k, len(doc.sentences))
for k in range(len(doc.sentences))]
G = self._build_graph(doc.sentences)
ranks = nx.pagerank(G, alpha=self.damping_factor, tol=self.tol, max_iter=self.max_iter)
candidates = sorted(
ranks.keys(), key=lambda k: self._combine_features(positions[k], ranks[k]),
reverse=True)
return self._csis(doc.sentences, candidates, size)
示例4: textrank
# 需要导入模块: import networkx [as 别名]
# 或者: from networkx import pagerank [as 别名]
def textrank(text, hdr):
# finding out the most possible language of the text
lang_code = lang_identifier.classify(' '.join([hdr, text]))[0]
# tokenizing for words
sentences = [sentence for sentence in split_multi(text)]
stemmer = snowballstemmer.stemmer(LANG_CODES.get(lang_code, 'english'))
words = [set(stemmer.stemWord(word) for word in word_tokenizer(sentence.lower()) if word.isalpha())
for sentence in sentences]
pairs = combinations(range(len(sentences)), 2)
scores = [(i, j, similarity(words[i], words[j])) for i, j in pairs]
scores = filter(lambda x: x[2], scores)
g = nx.Graph()
g.add_weighted_edges_from(scores)
pr = nx.pagerank(g)
return sorted(((i, pr[i], s) for i, s in enumerate(sentences) if i in pr),
key=lambda x: pr[x[0]], reverse=True), lang_code
示例5: add_sentences
# 需要导入模块: import networkx [as 别名]
# 或者: from networkx import pagerank [as 别名]
def add_sentences(self, sentences):
"""
@type sentences: list[Sentence]
:param sentences:
:return:
"""
counter = self.counter
G = self.G
for sentence in sentences:
G.add_nodes_from(sentence.concepts)
counter.update(ngrams(sentence.concepts, self.N))
for (keys, value) in counter.items():
for i in range(0, len(keys) - 1):
for j in range(1, len(keys)):
G.add_edge(keys[i], keys[j], weight=value)
# counter.update((keys[i], keys[j]))
# for (key, value) in counter.items():
# G.add_edge(key[0], key[1], attr={"weight": value})
print("V := (N,E), |N| = %s, |E| = %s" % (len(G.nodes()), len(G.edges())))
self.pr = nx.pagerank(G)
示例6: incorporate_feedback
# 需要导入模块: import networkx [as 别名]
# 或者: from networkx import pagerank [as 别名]
def incorporate_feedback(self, flightrecorder):
"""
:param flightrecorder:
:return:
@type flightrecorder: FlightRecorder
"""
G = self.G
print("V := (N,E), |N| = %s, |E| = %s" % (len(G.nodes()), len(G.edges())))
# use the pagerank personalization feature to incorporate flightrecorder feedback
union = flightrecorder.union()
for rejected in union.reject:
if(G.has_node(rejected)):
G.remove_node(rejected)
print("V := (N,E), |N| = %s, |E| = %s" % (len(G.nodes()), len(G.edges())))
self.pr = nx.pagerank(G)
示例7: initModel
# 需要导入模块: import networkx [as 别名]
# 或者: from networkx import pagerank [as 别名]
def initModel(self):
super(LOCABAL, self).initModel()
self.H = np.random.rand(self.embed_size,self.embed_size)
G = nx.DiGraph()
for re in self.social.relation:
G.add_edge(re[0], re[1])
pr = nx.pagerank(G, alpha=0.85)
pr = sorted(pr.iteritems(),key=lambda d:d[1],reverse=True)
pr = [(u[0],ind+1) for ind,u in enumerate(pr)]
self.W = {}
for user in pr:
self.W[user[0]] = 1/(1+math.log(user[1]))
self.S = {}
for line in self.social.relation:
u1,u2,weight = line
if self.data.containsUser(u1) and self.data.containsUser(u2):
uvec1=self.data.trainSet_u[u1]
uvec2=self.data.trainSet_u[u2]
#add relations to dict
if not self.S.has_key(u1):
self.S[u1] = {}
self.S[u1][u2] = qmath.cosine_sp(uvec1,uvec2)
示例8: textrank_text_summarizer
# 需要导入模块: import networkx [as 别名]
# 或者: from networkx import pagerank [as 别名]
def textrank_text_summarizer(documents, num_sentences=2,
feature_type='frequency'):
vec, dt_matrix = build_feature_matrix(norm_sentences,
feature_type='tfidf')
similarity_matrix = (dt_matrix * dt_matrix.T)
similarity_graph = networkx.from_scipy_sparse_matrix(similarity_matrix)
scores = networkx.pagerank(similarity_graph)
ranked_sentences = sorted(((score, index)
for index, score
in scores.items()),
reverse=True)
top_sentence_indices = [ranked_sentences[index][1]
for index in range(num_sentences)]
top_sentence_indices.sort()
for index in top_sentence_indices:
print sentences[index]
示例9: setUp
# 需要导入模块: import networkx [as 别名]
# 或者: from networkx import pagerank [as 别名]
def setUp(self):
G = networkx.DiGraph()
edges = [(1, 2), (1, 3),
# 2 is a dangling node
(3, 1), (3, 2), (3, 5),
(4, 5), (4, 6),
(5, 4), (5, 6),
(6, 4)]
G.add_edges_from(edges)
self.G = G
self.G.pagerank = dict(zip(G,
[0.03721197, 0.05395735, 0.04150565,
0.37508082, 0.20599833, 0.28624589]))
self.dangling_node_index = 1
self.dangling_edges = {1: 2, 2: 3,
3: 0, 4: 0, 5: 0, 6: 0}
self.G.dangling_pagerank = dict(zip(G,
[0.10844518, 0.18618601, 0.0710892,
0.2683668, 0.15919783, 0.20671497]))
示例10: setUp
# 需要导入模块: import networkx [as 别名]
# 或者: from networkx import pagerank [as 别名]
def setUp(self):
G = networkx.DiGraph()
edges = [(1, 2), (1, 3),
# 2 is a dangling node
(3, 1), (3, 2), (3, 5),
(4, 5), (4, 6),
(5, 4), (5, 6),
(6, 4)]
G.add_edges_from(edges)
self.G = G
self.G.pagerank = dict(zip(sorted(G),
[0.03721197, 0.05395735, 0.04150565,
0.37508082, 0.20599833, 0.28624589]))
self.dangling_node_index = 1
self.dangling_edges = {1: 2, 2: 3,
3: 0, 4: 0, 5: 0, 6: 0}
self.G.dangling_pagerank = dict(zip(sorted(G),
[0.10844518, 0.18618601, 0.0710892,
0.2683668, 0.15919783, 0.20671497]))
示例11: summarize
# 需要导入模块: import networkx [as 别名]
# 或者: from networkx import pagerank [as 别名]
def summarize(self, text):
self.sentences = self.factory.text2sentences(text)
self.num_sentences = len(self.sentences)
self.corpus = SentenceCorpus(self.sentences, self.no_below_word_count, self.no_above_word_portion, self.max_dictionary_size)
self.model = TfidfModel(self.corpus.bows, id2word=self.corpus.dictionary, normalize=True)
self.tfidfs = self.model[self.corpus.bows]
self._inject_tfidfs()
self._build_matrix()
self._clustering()
if self.compactify:
self._compactify()
self.graphs = []
for i in range(self.num_clusters):
graph = self.sentences2graph(self.clusters[i])
pagerank = networkx.pagerank(graph, weight='weight')
self.clusters[i] = sorted(pagerank, key=pagerank.get, reverse=True)
self.graphs.append(graph)
示例12: sort_words
# 需要导入模块: import networkx [as 别名]
# 或者: from networkx import pagerank [as 别名]
def sort_words(vertex_source, window = 2, pagerank_config = {'alpha': 0.85,}):
"""将单词按关键程度从大到小排序
Keyword arguments:
vertex_source -- 二维列表,子列表代表句子,子列表的元素是单词,这些单词用来构造pagerank中的节点
edge_source -- 二维列表,子列表代表句子,子列表的元素是单词,根据单词位置关系构造pagerank中的边
window -- 一个句子中相邻的window个单词,两两之间认为有边
pagerank_config -- pagerank的设置
"""
sorted_words = []
word_index = {}
index_word = {}
_vertex_source = vertex_source
words_number = 0
for word_list in _vertex_source:
for word in word_list:
if not word in word_index:
word_index[word] = words_number
index_word[words_number] = word
words_number += 1
graph = np.zeros((words_number, words_number))
for word_list in _vertex_source:
for w1, w2 in combine(word_list, window):
if w1 in word_index and w2 in word_index:
index1 = word_index[w1]
index2 = word_index[w2]
graph[index1][index2] = 1.0
graph[index2][index1] = 1.0
debug('graph:\n', graph)
nx_graph = nx.from_numpy_matrix(graph)
scores = nx.pagerank(nx_graph, **pagerank_config) # this is a dict
sorted_scores = sorted(scores.items(), key = lambda item: item[1], reverse=True)
for index, score in sorted_scores:
item = AttrDict(word=index_word[index], weight=score)
sorted_words.append(item)
return sorted_words
示例13: sort_sentences
# 需要导入模块: import networkx [as 别名]
# 或者: from networkx import pagerank [as 别名]
def sort_sentences(sentences, words, sim_func = get_similarity, pagerank_config = {'alpha': 0.85,}):
"""将句子按照关键程度从大到小排序
Keyword arguments:
sentences -- 列表,元素是句子
words -- 二维列表,子列表和sentences中的句子对应,子列表由单词组成
sim_func -- 计算两个句子的相似性,参数是两个由单词组成的列表
pagerank_config -- pagerank的设置
"""
sorted_sentences = []
_source = words
sentences_num = len(_source)
graph = np.zeros((sentences_num, sentences_num))
for x in xrange(sentences_num):
for y in xrange(x, sentences_num):
similarity = sim_func( _source[x], _source[y] )
graph[x, y] = similarity
graph[y, x] = similarity
nx_graph = nx.from_numpy_matrix(graph)
scores = nx.pagerank(nx_graph, **pagerank_config) # this is a dict
sorted_scores = sorted(scores.items(), key = lambda item: item[1], reverse=True)
for index, score in sorted_scores:
item = AttrDict(index=index, sentence=sentences[index], weight=score)
sorted_sentences.append(item)
return sorted_sentences
示例14: score
# 需要导入模块: import networkx [as 别名]
# 或者: from networkx import pagerank [as 别名]
def score(self, sg, topic, personalization=None): # get_pagerank_probability_2
"""
:param sg: egocentric subgraph around topic in networkx format
:param topic: A factor for degrading as distance from the topic increases
:param personalization: Dictionary with key of a node and value of a node weight. If none specified, defaults to the linear weight of the 'topic_distance' feature of the nodes. The topic_distance is the topic for which the subgraph was generated.
:return: Dictionary of probabilities keyed by node
"""
if sg.is_multigraph():
sg = self.multigraph_to_digraph(sg)
if personalization == None:
personalization = {}
for node in sg.nodes():
# personalized[node] = linear_weight(sg.node[node]['topic_distance'], distance_degradation)
# INSERT WEIGHTING FUNCTION BELOW
personalization[node] = self.linear_weight(sg.node[node]['topic_distance'])
# Build topic weights to start topic with all weight and always jump to topic
topic_weight = 1/float(len(topic.nodes()))
topic_weighted = {k if 1 else k: topic_weight if k in topic.nodes() else 0 for k in sg.nodes()}
# return the pagerank scores
return nx.pagerank(sg,
personalization=personalization,
weight='confidence',
nstart=topic_weighted,
dangling=topic_weighted)
示例15: _textrank
# 需要导入模块: import networkx [as 别名]
# 或者: from networkx import pagerank [as 别名]
def _textrank(matrix):
'''returns principal eigenvector
of the adjacency matrix'''
graph = nx.from_numpy_matrix(matrix)
return nx.pagerank(graph)