本文整理汇总了Python中wsd.database.MySQLDatabase._create_connection方法的典型用法代码示例。如果您正苦于以下问题:Python MySQLDatabase._create_connection方法的具体用法?Python MySQLDatabase._create_connection怎么用?Python MySQLDatabase._create_connection使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类wsd.database.MySQLDatabase
的用法示例。
在下文中一共展示了MySQLDatabase._create_connection方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: pickle_vis_data_pandas
# 需要导入模块: from wsd.database import MySQLDatabase [as 别名]
# 或者: from wsd.database.MySQLDatabase import _create_connection [as 别名]
def pickle_vis_data_pandas():
db = MySQLDatabase(DATABASE_HOST, DATABASE_USER, DATABASE_PASSWORD, DATABASE_NAME)
conn = db._create_connection()
df = pd.read_sql('select source_article_id, target_article_id, target_y_coord_1920_1080, target_x_coord_1920_1080, visual_region from link_features', conn)
print len(df)
no_dup = df.sort(['source_article_id','target_y_coord_1920_1080','target_x_coord_1920_1080']).groupby(["source_article_id", "target_article_id"]).first()
print len(no_dup)
feature = no_dup.loc[no_dup['visual_region']=='lead']
print len(feature)
feature.reset_index(inplace=True)
feature = no_dup.loc[no_dup['visual_region']=='infobox']
print len(feature)
feature.reset_index(inplace=True)
feature[['source_article_id','target_article_id']].to_csv('/home/ddimitrov/tmp/infobox.tsv', sep='\t', index=False)
feature = no_dup.loc[no_dup['visual_region']=='navbox']
print len(feature)
feature.reset_index(inplace=True)
feature[['source_article_id','target_article_id']].to_csv('/home/ddimitrov/tmp/navbox.tsv', sep='\t', index=False)
feature = no_dup.loc[no_dup['visual_region']=='left-body']
print len(feature)
feature.reset_index(inplace=True)
feature[['source_article_id','target_article_id']].to_csv('/home/ddimitrov/tmp/left-body.tsv', sep='\t',index=False)
feature = no_dup.loc[no_dup['visual_region']=='body']
print len(feature)
feature.reset_index(inplace=True)
feature[['source_article_id','target_article_id']].to_csv('/home/ddimitrov/tmp/body.tsv', sep='\t',index=False)
示例2: build_links_position_table
# 需要导入模块: from wsd.database import MySQLDatabase [as 别名]
# 或者: from wsd.database.MySQLDatabase import _create_connection [as 别名]
def build_links_position_table():
"""creates up the basic database structure
"""
db = MySQLDatabase(DATABASE_HOST, DATABASE_USER, DATABASE_PASSWORD, DATABASE_NAME)
connection = db._create_connection()
cursor = connection.cursor()
cursor.execute('CREATE TABLE `redirects_candidates` ('
'`id` BIGINT UNSIGNED NOT NULL PRIMARY KEY AUTO_INCREMENT,'
'`source_article_id` BIGINT UNSIGNED NOT NULL,'
'`target_article_id` BIGINT UNSIGNED NULL,'
'`target_article_name` VARCHAR(1000) CHARACTER SET utf8 COLLATE utf8_bin NOT NULL,'
' target_position_in_text INT UNSIGNED NOT NULL,'
' target_position_in_text_only INT UNSIGNED,'
' target_position_in_section INT UNSIGNED,'
' target_position_in_section_in_text_only INT UNSIGNED,'
' section_name VARCHAR(1000) CHARACTER SET utf8 COLLATE utf8_bin NOT NULL,'
' section_number INT UNSIGNED,'
' target_position_in_table INT UNSIGNED,'
' table_number INT UNSIGNED,'
' table_css_class VARCHAR(255) CHARACTER SET utf8 COLLATE utf8_bin,'
' table_css_style VARCHAR(255) CHARACTER SET utf8 COLLATE utf8_bin,'
' target_x_coord_1920_1080 INT UNSIGNED DEFAULT NULL,'
' target_y_coord_1920_1080 INT UNSIGNED DEFAULT NULL ,'
'INDEX(`target_article_id`),'
'INDEX(`source_article_id`)'
') ENGINE=InnoDB;')
connection.close()
示例3: pickle_correlations_zeros
# 需要导入模块: from wsd.database import MySQLDatabase [as 别名]
# 或者: from wsd.database.MySQLDatabase import _create_connection [as 别名]
def pickle_correlations_zeros():
db = MySQLDatabase(DATABASE_HOST, DATABASE_USER, DATABASE_PASSWORD, DATABASE_NAME)
conn = db._create_connection()
print 'read'
df = pd.read_sql('select source_article_id, target_article_id, IFNULL(counts, 0) as counts from link_features group by source_article_id, target_article_id', conn)
print 'group'
article_counts = df.groupby(by=["target_article_id"])['counts'].sum().reset_index()
print 'write to file'
article_counts[["target_article_id","counts"]].to_csv(TMP+'article_counts.tsv', sep='\t', index=False)
示例4: build_page_length_table
# 需要导入模块: from wsd.database import MySQLDatabase [as 别名]
# 或者: from wsd.database.MySQLDatabase import _create_connection [as 别名]
def build_page_length_table():
"""creates up the basic database structure
"""
db = MySQLDatabase(DATABASE_HOST, DATABASE_USER, DATABASE_PASSWORD, DATABASE_NAME)
connection = db._create_connection()
cursor = connection.cursor()
cursor.execute('CREATE TABLE `redirects_candidates_page_length` ('
'`id` BIGINT UNSIGNED NOT NULL PRIMARY KEY,'
' page_length_1920_1080 INT UNSIGNED DEFAULT NULL'
') ENGINE=InnoDB;')
connection.close()
示例5: build_table
# 需要导入模块: from wsd.database import MySQLDatabase [as 别名]
# 或者: from wsd.database.MySQLDatabase import _create_connection [as 别名]
def build_table():
"""creates up the basic database structure
"""
db = MySQLDatabase(DATABASE_HOST, DATABASE_USER, DATABASE_PASSWORD, DATABASE_NAME)
connection = db._create_connection()
cursor = connection.cursor()
cursor.execute('CREATE TABLE `table_css_class` ('
'`id` BIGINT UNSIGNED NOT NULL PRIMARY KEY AUTO_INCREMENT,'
'`source_article_id` BIGINT UNSIGNED NOT NULL,'
' css_class VARCHAR(255) CHARACTER SET utf8 COLLATE utf8_bin NOT NULL,'
'INDEX(`source_article_id`)'
') ENGINE=InnoDB;')
connection.close()
示例6: correlations
# 需要导入模块: from wsd.database import MySQLDatabase [as 别名]
# 或者: from wsd.database.MySQLDatabase import _create_connection [as 别名]
def correlations(network_name):
db = MySQLDatabase(DATABASE_HOST, DATABASE_USER, DATABASE_PASSWORD, DATABASE_NAME)
conn = db._create_connection()
cursor = conn.cursor()
# wikipedia graph structural statistics
results = None
try:
results = cursor.execute('select c.curr_id, sum(c.counts) as counts from clickstream_derived c where c.link_type_derived= %s group by c.curr_id;', ("internal-link",))
results = cursor.fetchall()
except MySQLdb.Error, e:
print ('error retrieving xy coord for all links links %s (%d)' % (e.args[1], e.args[0]))
示例7: rbo
# 需要导入模块: from wsd.database import MySQLDatabase [as 别名]
# 或者: from wsd.database.MySQLDatabase import _create_connection [as 别名]
def rbo():
print 'loading'
db = MySQLDatabase(DATABASE_HOST, DATABASE_USER, DATABASE_PASSWORD, DATABASE_NAME)
conn = db._create_connection()
cursor = conn.cursor()
sm = []
try:
cursor.execute('select curr_id, sum(counts) as counts_sum, curr_title from clickstream_derived where link_type_derived=%s group by curr_id order by counts_sum desc limit 10000;', ("entry-sm",))
result = cursor.fetchall()
for row in result:
record = {}
record['curr_id']= row[0]
record['counts_sum'] = row[1]
record['curr_title'] = row[2]
sm.append(row[0])
except MySQLdb.Error, e:
print e
示例8: pickle_correlations_zeros_january
# 需要导入模块: from wsd.database import MySQLDatabase [as 别名]
# 或者: from wsd.database.MySQLDatabase import _create_connection [as 别名]
def pickle_correlations_zeros_january():
db = MySQLDatabase(DATABASE_HOST, DATABASE_USER, DATABASE_PASSWORD, DATABASE_NAME)
conn = db._create_connection()
print 'read'
df = pd.read_sql('select source_article_id, target_article_id from link_features', conn)
print 'loaded links'
df2 = pd.read_sql('select prev_id, curr_id, counts from clickstream_derived_en_201501 where link_type_derived= "internal-link";', conn)
print 'loaded counts'
result = pd.merge(df, df2, how='left', left_on = ['source_article_id', 'target_article_id'], right_on = ['prev_id', 'curr_id'])
print 'merged counts'
print result
article_counts = result.groupby(by=["target_article_id"])['counts'].sum().reset_index()
article_counts['counts'].fillna(0.0, inplace=True)
print article_counts
print 'write to file'
article_counts[["target_article_id","counts"]].to_csv(TMP+'january_article_counts.tsv', sep='\t', index=False)
示例9: plot_degree_filtered_sql
# 需要导入模块: from wsd.database import MySQLDatabase [as 别名]
# 或者: from wsd.database.MySQLDatabase import _create_connection [as 别名]
def plot_degree_filtered_sql():
print 'before select'
db = MySQLDatabase(DATABASE_HOST, DATABASE_USER, DATABASE_PASSWORD, DATABASE_NAME)
conn = db._create_connection()
cursor = conn.cursor()
cursor.execute('SELECT source_article_id, target_article_id FROM link_occurences where source_article_id in '
' (select distinct prev_id from clickstream_derived_internal_links);')
result = cursor.fetchall()
network = Graph()
print 'after select'
print 'result len'
print len(result)
for i, link in enumerate(result):
if i % 1000000==0:
print i, len(result)
network.add_edge(link[0], link[1])
# filter all nodes that have no edges
print 'filter nodes with degree zero graph tool specific code'
network = GraphView(network, vfilt=lambda v : v.out_degree()+v.in_degree()>0 )
print 'before save'
network.save("output/wikipedianetworkfilteredwithtransitions_prev_id.xml.gz")
print 'done'
cursor.execute('SELECT source_article_id, target_article_id FROM link_occurences where target_article_id in '
' (select distinct curr_id from clickstream_derived_internal_links);')
result = cursor.fetchall()
network = Graph()
print 'after select'
print 'resutl len'
print len(result)
for i, link in enumerate(result):
if i % 1000000==0:
print i, len(result)
network.add_edge(link[0], link[1])
# filter all nodes that have no edges
print 'filter nodes with degree zero graph tool specific code'
network = GraphView(network, vfilt=lambda v : v.out_degree()+v.in_degree()>0 )
print 'before save'
network.save("output/wikipedianetworkfilteredwithtransitions_curr_id.xml.gz")
print 'done'
示例10: print_table
# 需要导入模块: from wsd.database import MySQLDatabase [as 别名]
# 或者: from wsd.database.MySQLDatabase import _create_connection [as 别名]
def print_table():
db = MySQLDatabase(DATABASE_HOST, DATABASE_USER, DATABASE_PASSWORD, DATABASE_NAME)
conn = db._create_connection()
df = pd.read_sql('select source_article_id, target_article_id, rel_degree, rel_in_degree, rel_out_degree, '
'rel_page_rank, rel_kcore, target_x_coord_1920_1080, target_y_coord_1920_1080, visual_region, '
'IFNULL(counts, 0) as counts from link_features order by source_article_id, target_y_coord_1920_1080, target_x_coord_1920_1080', conn)
print "dup"
#no_dup = df.sort(['source_article_id','target_y_coord_1920_1080','target_x_coord_1920_1080']).groupby(["source_article_id", "target_article_id"]).first()
no_dup = df.groupby(["source_article_id", "target_article_id"]).first()
no_dup = no_dup.reset_index()
print "no dup"
del df
#print no_dup
df_top = pd.read_sql("select source_article_id, target_article_id, sim as topic_similarity from topic_similarity", conn)
print "no up"
topDF = df_top.groupby("source_article_id", as_index=False)["topic_similarity"].median()
#print topDF
print "no up1"
topDF.columns = ["source_article_id", "topic_similarity_article_median"]
#print topDF
print "no up2"
df_top = df_top.merge(topDF, on="source_article_id")
#print df_top[(df_top['topic_similarity_article_median'] >0)]
print "no up3"
df_sem = pd.read_sql("select source_article_id, target_article_id, sim as sem_similarity from semantic_similarity", conn)
print "no up4"
semDF = df_sem.groupby("source_article_id", as_index=False)["sem_similarity"].median()
#rename
print "no up5"
semDF.columns = ["source_article_id", "sem_similarity_article_median"]
print "no up6"
#print df_top
df_sem = df_sem.merge(semDF, on="source_article_id")
#print len(df_sem)
print "no up7"
df1 = no_dup.merge(df_sem[['source_article_id', 'sem_similarity', 'sem_similarity_article_median']], on="source_article_id")
#print no_dup
del df_sem, semDF
df = no_dup.merge(df_top[['source_article_id', 'topic_similarity', 'topic_similarity_article_median']], on="source_article_id")
print "no up9"
del no_dup
del df_top, topDF
table = ""
table += resultTableLine (df, "src_degr > target_degr", "df.rel_degree > 0")
table += resultTableLine (df, "src_degr <= target_degr", "df.rel_degree <= 0")
table += resultTableLine (df, "src_in_degr > target_in_degr", "df.rel_in_degree > 0")
table += resultTableLine (df, "src_in_degr <= target_in_degr", "df.rel_in_degree <= 0")
table += resultTableLine (df, "src_out_degr > target_out_degr", "df.rel_out_degree > 0")
table += resultTableLine (df, "src_out_degr <= target_out_degr", "df.rel_out_degree <= 0")
table += resultTableLine (df, "src_kcore > target_kcore", "df.rel_kcore > 0")
table += resultTableLine (df, "src_kcore <= target_kcore", "df.rel_kcore <= 0")
table += resultTableLine (df, "src_page_rank > target_page_rank", "df.rel_page_rank > 0")
table += resultTableLine (df, "src_page_rank <= target_page_rank", "df.rel_page_rank <= 0")
table += resultTableLine (df1, "text_sim > median(text_sim) of page", "df.sem_similarity > df.sem_similarity_article_median")
table += resultTableLine (df1, "text_sim <= median(text_sim) of page", "df.sem_similarity <= df.sem_similarity_article_median")
table += resultTableLine (df, "topic_sim > median(topic_sim) of page", "df.topic_similarity > df.topic_similarity_article_median")
table += resultTableLine (df, "topic_sim <= median(topic_sim) of page", "df.topic_similarity <= df.topic_similarity_article_median")
table += resultTableLine (df, "left third of screen", "df.target_x_coord_1920_1080 <= 360")
table += resultTableLine (df, "middle third of screen", "(df.target_x_coord_1920_1080 > 360) & (df.target_x_coord_1920_1080 <= 720)")
table += resultTableLine (df, "right third of screen", "df.target_x_coord_1920_1080 > 720")
table += resultTableLine (df, "position = lead", "df.visual_region == 'lead'")
table += resultTableLine (df, "position = body", "(df.visual_region == 'body') | (df.visual_region == 'left-body')")
table += resultTableLine (df, "position = navbox", "df.visual_region == 'navbox'")
#table += resultTableLine (df, "position = left-body", "df.visual_region == 'left-body'")
table += resultTableLine (df, "position = infobox", "df.visual_region == 'infobox'")
print table
示例11: MySQLDatabase
# 需要导入模块: from wsd.database import MySQLDatabase [as 别名]
# 或者: from wsd.database.MySQLDatabase import _create_connection [as 别名]
from wsd.database import MySQLDatabase
from graph_tool.all import *
from conf import *
db = MySQLDatabase(DATABASE_HOST, DATABASE_USER, DATABASE_PASSWORD, DATABASE_NAME)
conn = db._create_connection()
cursor = conn.cursor()
cursor.execute('SELECT source_article_id, target_article_id FROM link_occurences;')
result = cursor.fetchall()
wikipedia = Graph()
for link in result:
wikipedia.add_edge(link[0], link[1])
# filter all nodes that have no edges
wikipedia = GraphView(wikipedia, vfilt=lambda v : v.out_degree()+v.in_degree()>0 )
print "clust"
wikipedia.vertex_properties["local_clust"] = local_clustering(wikipedia)
print "page_rank"
wikipedia.vertex_properties["page_rank"] = pagerank(wikipedia)
print "eigenvector_centr"
eigenvalue, eigenvectorcentr = eigenvector(wikipedia)
wikipedia.vertex_properties["eigenvector_centr"] = eigenvectorcentr
print "kcore"
wikipedia.vertex_properties["kcore"] = kcore_decomposition(wikipedia)
示例12: correlations_zeros
# 需要导入模块: from wsd.database import MySQLDatabase [as 别名]
# 或者: from wsd.database.MySQLDatabase import _create_connection [as 别名]
def correlations_zeros(labels, consider_zeros=True, clickstream_data='', struct=False):
#load network
print struct
name = '_'.join(labels)
wikipedia = load_graph("output/weightedpagerank/wikipedianetwork_hyp_engineering_"+name+".xml.gz")
#read counts with zeros
if consider_zeros:
article_counts = pd.read_csv(TMP+clickstream_data+'article_counts.tsv', sep='\t')
print TMP+clickstream_data+'article_counts.tsv'
correlations_weighted_pagerank = {}
for label in labels:
if struct:
label = label[7:]
for damping in [0.8,0.85,0.9]:
key = label+"_page_rank_weighted_"+str(damping)
pagerank = wikipedia.vertex_properties[key]
page_rank_values = list()
counts = list()
correlations_values = {}
for index, row in article_counts.iterrows():
counts.append(float(row['counts']))
page_rank_values.append(pagerank[wikipedia.vertex(int(row['target_article_id']))])
print 'pearson'
p = pearsonr(page_rank_values, counts)
print p
correlations_values['pearson']=p
print 'spearmanr'
s = spearmanr(page_rank_values, counts)
print s
correlations_values['spearmanr']=s
print 'kendalltau'
k = kendalltau(page_rank_values, counts)
print k
correlations_values['kendalltau']=k
correlations_weighted_pagerank[key]=correlations_values
write_pickle(HOME+'output/correlations/'+clickstream_data+'correlations_pagerank_'+name+'.obj', correlations_weighted_pagerank)
else:
db = MySQLDatabase(DATABASE_HOST, DATABASE_USER, DATABASE_PASSWORD, DATABASE_NAME)
conn = db._create_connection()
cursor = conn.cursor()
# wikipedia graph structural statistics
results = None
try:
if clickstream_data != '':
results = cursor.execute('select c.curr_id, sum(c.counts) as counts from clickstream_derived c where c.link_type_derived= %s group by c.curr_id;', ("internal-link",))
results = cursor.fetchall()
else:
results = cursor.execute('select c.curr_id, sum(c.counts) as counts from clickstream_derived_en_201501 c where c.link_type_derived= %s group by c.curr_id;', ("internal-link",))
results = cursor.fetchall()
except MySQLdb.Error, e:
print ('error retrieving xy coord for all links links %s (%d)' % (e.args[1], e.args[0]))
print 'after sql load'
correlations_weighted_pagerank = {}
for label in labels:
if struct:
label = label[7:]
for damping in [0.8,0.85,0.9]:
key = label+"_page_rank_weighted_"+str(damping)
pagerank = wikipedia.vertex_properties[key]
correlations={}
counts=[]
page_rank_values=[]
for row in results:
counts.append(float(row[1]))
page_rank_values.append(pagerank[wikipedia.vertex(int(row[0]))])
print 'pearson'
p = pearsonr(page_rank_values, counts)
print p
correlations['pearson']=p
print 'spearmanr'
s= spearmanr(page_rank_values, counts)
print s
correlations['spearmanr']=s
print 'kendalltau'
k= kendalltau(page_rank_values, counts)
print k
correlations['kendalltau']=k
correlations_weighted_pagerank[key]=correlations
write_pickle(HOME+'output/correlations/'+clickstream_data+'correlations_pagerank_without_zeros'+name+'.obj', correlations_weighted_pagerank)
示例13: weighted_pagerank_hyp_engineering
# 需要导入模块: from wsd.database import MySQLDatabase [as 别名]
# 或者: from wsd.database.MySQLDatabase import _create_connection [as 别名]
def weighted_pagerank_hyp_engineering(labels):
#read vocab, graph
graph = read_pickle(SSD_HOME+"pickle/graph")
print "loaded graph"
values = read_pickle(SSD_HOME+"pickle/values")
values_kcore = read_pickle(SSD_HOME+"pickle/values_kcore")
# transform kcore values to model going out of the kcore
values_kcore = [1./np.sqrt(float(x)) for x in values_kcore]
print 'kcore values tranfsormation'
#sem_sim_hyp = read_pickle(SSD_HOME+"pickle/sem_sim_hyp")
#print "sem_sim_hyp values"
#lead_hyp = read_pickle(SSD_HOME+"pickle/lead_hyp")
#infobox_hyp = read_pickle(SSD_HOME+"pickle/infobox_hyp")
#left_body_hyp = read_pickle(SSD_HOME+"pickle/left-body_hyp")
#print "gamma values"
vocab = read_pickle(SSD_HOME+"pickle/vocab")
print "loaded vocab"
state_count = len(vocab)
states = vocab.keys()
shape = (state_count, state_count)
hyp_structural = csr_matrix((values, (graph[0], graph[1])),
shape=shape, dtype=np.float)
hyp_kcore = csr_matrix((values_kcore, (graph[0], graph[1])),
shape=shape, dtype=np.float)
print "hyp_kcore"
del graph
del values_kcore
print "after delete"
#read sem sim form db and create hyp
db = MySQLDatabase(DATABASE_HOST, DATABASE_USER, DATABASE_PASSWORD, DATABASE_NAME)
conn = db._create_connection()
print 'read'
df = pd.read_sql('select source_article_id, target_article_id, sim from semantic_similarity', conn)
print 'map sem sim'
sem_sim_hyp_i = map_to_hyp_indicies(vocab, df['source_article_id'])
sem_sim_hyp_j = map_to_hyp_indicies(vocab, df['target_article_id'])
hyp_sem_sim = csr_matrix((df['sim'].values, (sem_sim_hyp_i, sem_sim_hyp_j)),
shape=shape, dtype=np.float)
print 'done map sem sim'
print hyp_sem_sim.shape
del sem_sim_hyp_i
del sem_sim_hyp_j
del df
#read vis form csv and create hyp
lead = pd.read_csv(TMP+'lead.tsv',sep='\t')
lead_i = map_to_hyp_indicies(vocab, lead['source_article_id'])
lead_j = map_to_hyp_indicies(vocab, lead['target_article_id'])
lead_v = np.ones(len(lead_i), dtype=np.float)
hyp_lead = csr_matrix((lead_v, (lead_i, lead_j)),
shape=shape, dtype=np.float)
print 'done map lead'
print hyp_lead.shape
del lead
del lead_i
del lead_j
del lead_v
infobox = pd.read_csv(TMP+'infobox.tsv',sep='\t')
infobox_i = map_to_hyp_indicies(vocab, infobox['source_article_id'])
infobox_j = map_to_hyp_indicies(vocab, infobox['target_article_id'])
infobox_v = np.ones(len(infobox_i), dtype=np.float)
hyp_infobox = csr_matrix((infobox_v, (infobox_i, infobox_j)),
shape=shape, dtype=np.float)
print 'done map infobox'
print hyp_infobox.shape
del infobox
del infobox_i
del infobox_j
del infobox_v
left_body = pd.read_csv(TMP+'left-body.tsv',sep='\t')
left_body_i = map_to_hyp_indicies(vocab, left_body['source_article_id'])
left_body_j = map_to_hyp_indicies(vocab, left_body['target_article_id'])
left_body_v = np.ones(len(left_body_i), dtype=np.float)
hyp_left_body = csr_matrix((left_body_v, (left_body_i, left_body_j)),
shape=shape, dtype=np.float)
print 'done map infobox'
print hyp_left_body.shape
del left_body
del left_body_i
#.........这里部分代码省略.........
示例14: weighted_pagerank
# 需要导入模块: from wsd.database import MySQLDatabase [as 别名]
# 或者: from wsd.database.MySQLDatabase import _create_connection [as 别名]
def weighted_pagerank():
db = MySQLDatabase(DATABASE_HOST, DATABASE_USER, DATABASE_PASSWORD, DATABASE_NAME)
conn = db._create_connection()
cursor = conn.cursor()
cursor.execute('SELECT source_article_id, target_article_id, occ FROM link_occurences;')
result = cursor.fetchall()
wikipedia = Graph()
eprop = wikipedia.new_edge_property("int")
for link in result:
e = wikipedia.add_edge(link[0], link[1])
eprop[e] = link[2]
# filter all nodes that have no edges
wikipedia = GraphView(wikipedia, vfilt=lambda v : v.out_degree()+v.in_degree()>0 )
print "page_rank_weighted"
for damping in [0.8, 0.85, 0.9 ,0.95]:
print damping
key = "page_rank_weighted"+str(damping)
wikipedia.vertex_properties[key] = pagerank(wikipedia, weight=eprop,damping=damping)
print "page_rank"
for damping in [0.8, 0.85, 0.9 ,0.95]:
print damping
key = "page_rank"+str(damping)
wikipedia.vertex_properties[key] = pagerank(wikipedia, damping=damping)
wikipedia.save("output/weightedpagerank/wikipedianetwork_link_occ.xml.gz")
print 'link_occ done'
cursor.execute('SELECT source_article_id, target_article_id, sim FROM semantic_similarity group by '
'source_article_id, target_article_id;')
result = cursor.fetchall()
wikipedia = Graph()
eprop = wikipedia.new_edge_property("double")
for link in result:
e = wikipedia.add_edge(link[0], link[1])
eprop[e] = link[2]
# filter all nodes that have no edges
print 'filter nodes graph tool specific code'
wikipedia = GraphView(wikipedia, vfilt=lambda v : v.out_degree()+v.in_degree()>0 )
print "page_rank_weighted"
for damping in [0.8, 0.85, 0.9 ,0.95]:
print damping
key = "page_rank_weighted"+str(damping)
wikipedia.vertex_properties[key] = pagerank(wikipedia, weight=eprop,damping=damping)
print "page_rank"
for damping in [0.8, 0.85, 0.9 ,0.95]:
print damping
key = "page_rank"+str(damping)
wikipedia.vertex_properties[key] = pagerank(wikipedia, damping=damping)
wikipedia.save("output/weightedpagerank/wikipedianetwork_sem_sim_distinct_links.xml.gz")
print 'sem sim distrinct links done'
cursor.execute('SELECT source_article_id, target_article_id, sim FROM semantic_similarity;')
result = cursor.fetchall()
wikipedia = Graph()
eprop = wikipedia.new_edge_property("double")
for link in result:
e = wikipedia.add_edge(link[0], link[1])
eprop[e] = link[2]
# filter all nodes that have no edges
wikipedia = GraphView(wikipedia, vfilt=lambda v : v.out_degree()+v.in_degree()>0 )
print "page_rank_weighted"
for damping in [0.8, 0.85, 0.9 ,0.95]:
print damping
key = "page_rank_weighted"+str(damping)
wikipedia.vertex_properties[key] = pagerank(wikipedia, weight=eprop,damping=damping)
print "page_rank"
for damping in [0.8, 0.85, 0.9 ,0.95]:
print damping
key = "page_rank"+str(damping)
wikipedia.vertex_properties[key] = pagerank(wikipedia, damping=damping)
wikipedia.save("output/weightedpagerank/wikipedianetwork_sem_sim.xml.gz")
print 'sem_sim done'
示例15: get_redirecsfromXML
# 需要导入模块: from wsd.database import MySQLDatabase [as 别名]
# 或者: from wsd.database.MySQLDatabase import _create_connection [as 别名]
def get_redirecsfromXML(self, dump_date):
db = MySQLDatabase(DATABASE_HOST, DATABASE_USER, DATABASE_PASSWORD, DATABASE_NAME)
conn = db._create_connection()
df = pd.read_sql(('select * from redirects'),conn)
return df.set_index('source_article_name')['target_article_name'].to_dict()