当前位置: 首页>>代码示例>>Python>>正文


Python MySQLDatabase.get_work_view方法代码示例

本文整理汇总了Python中wsd.database.MySQLDatabase.get_work_view方法的典型用法代码示例。如果您正苦于以下问题:Python MySQLDatabase.get_work_view方法的具体用法?Python MySQLDatabase.get_work_view怎么用?Python MySQLDatabase.get_work_view使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在wsd.database.MySQLDatabase的用法示例。


在下文中一共展示了MySQLDatabase.get_work_view方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: req

# 需要导入模块: from wsd.database import MySQLDatabase [as 别名]
# 或者: from wsd.database.MySQLDatabase import get_work_view [as 别名]
def req():
    # Get URLs from a text file, remove white space.
    db = MySQLDatabase(DATABASE_HOST, DATABASE_USER, DATABASE_PASSWORD, DATABASE_NAME)
    db_worker_view = db.get_work_view()
    articles = db_worker_view.retrieve_all_articles()
    #articles = db_worker_view.retrieve_all_articles_questionmark()
    # measure time
    start = time.clock()
    start_time_iteration = start
    iteration_number = 483
    for i, article in enumerate(articles):
        # print some progress
        if i % 10000 == 0:
            #print time for the iteration
            seconds = time.clock() - start_time_iteration
            m, s = divmod(seconds, 60)
            h, m = divmod(m, 60)
            print "Number of crawled articles: %d. Total time for last iteration of 10000 articles: %d:%02d:%02d" % (i, h, m, s)
            start_time_iteration = time.clock()
            iteration_number += 1

        # Thread pool.
        # Blocks other threads (more than the set limit).
        pool.acquire(blocking=True)
        # Create a new thread.
        # Pass each URL (i.e. u parameter) to the worker function.
        t = threading.Thread(target=worker, args=(MEDIAWIKI_API_ENDPOINT+urllib.quote(article['title'])+'/'+str(article['rev_id']), article, iteration_number))

        # Start the newly create thread.
        t.start()
    seconds = time.clock() - start
    m, s = divmod(seconds, 60)
    h, m = divmod(m, 60)
    print "Total time: %d:%02d:%02d" % (h, m, s)
开发者ID:linksuccess,项目名称:linksuccess,代码行数:36,代码来源:crawler.py

示例2: _evaluate_disambiguations

# 需要导入模块: from wsd.database import MySQLDatabase [as 别名]
# 或者: from wsd.database.MySQLDatabase import get_work_view [as 别名]
    def _evaluate_disambiguations(self):
        INPUT_FILE = self.read_path('Please enter the path of the samples file [.xml]', default='./tmp/samples.xml')
        LOGGING_PATH = self.read_path('Please enter the path of the logging file [.log]', default='./tmp/evaluation3.log', must_exist=False)
        
        CONTINUE = self.read_yes_no('This process might take from several minutes to several hours.\nDo you want to continue?')

        if not CONTINUE:
            print '# Aborting...'
            return

        print '# Starting evaluation...'
        # setup logging
        LOGGING_FORMAT = '%(levelname)s:\t%(asctime)-15s %(message)s'
        logging.basicConfig(filename=LOGGING_PATH, level=logging.DEBUG, format=LOGGING_FORMAT, filemode='w')

        # connecting to db
        db = MySQLDatabase(DATABASE_HOST, DATABASE_USER, DATABASE_PASSWORD, DATABASE_NAME)
        work_view = db.get_work_view()

        # measure time
        start = time.clock()

        evaluator = Evaluator(INPUT_FILE, work_view)
        result = evaluator.evaluate_disambiguations()

        seconds = round (time.clock() - start)
        print 'Finished after %02d:%02d minutes' % (seconds / 60, seconds % 60)
        print 'Evaluation done! - precision: %d%%, recall: %d%%' % (round(result['precision']*100), round(result['recall']*100))
开发者ID:plaufer,项目名称:wikiwsd,代码行数:30,代码来源:evaluator.py

示例3: run

# 需要导入模块: from wsd.database import MySQLDatabase [as 别名]
# 或者: from wsd.database.MySQLDatabase import get_work_view [as 别名]
    def run(self):
        self.print_title('This is the interactive runner program')
        self.create_tmp_if_not_exists()

        INPUT_FILE = self.read_path('Please enter the path of the input file [.txt]', default='./tmp/input.txt')
        OUTPUT_FILE = self.read_path('Please enter the path of the output file [.html]', default='./tmp/output.html', must_exist=False)
        LOGGING_PATH = self.read_path('Please enter the path of the logging file [.log]', default='./tmp/runner.log', must_exist=False)


        print '# Starting runner...'
        # setup logging
        LOGGING_FORMAT = '%(levelname)s:\t%(asctime)-15s %(message)s'
        logging.basicConfig(filename=LOGGING_PATH, level=logging.DEBUG, format=LOGGING_FORMAT, filemode='w')

        # measure time
        start = time.clock()

        # connect to db
        db = MySQLDatabase(DATABASE_HOST, DATABASE_USER, DATABASE_PASSWORD, DATABASE_NAME)
        work_view = db.get_work_view()

        # read input
        f = open(INPUT_FILE, 'r')
        text = f.read()
        text = text.replace(' ', ' ')
        f.close()

        # create dummy article
        article = {}
        article['type'] = 'article'
        article['id'] = None
        article['title'] = None
        article['text'] = text
        article['links'] = []

        # identify links
        link_detector = LinkDetector(work_view)
        link_detector.detect_links(article)
        # identify terms
        #term_identifier = TermIdentifier()
        #article = term_identifier.identify_terms(text)

        # find possible meanings
        meaning_finder = MeaningFinder(work_view)
        meaning_finder.find_meanings(article)

        # calculate relatedness
        relatedness_calculator = RelatednessCalculator(work_view)

        # decide for meaning
        decider = Decider(relatedness_calculator)
        decider.decide(article)

        # output results
        html_outputter = HTMLOutputter()
        html_outputter.output(article, OUTPUT_FILE)

        seconds = round (time.clock() - start)
        print 'Finished after %02d:%02d minutes' % (seconds / 60, seconds % 60)
开发者ID:plaufer,项目名称:wikiwsd,代码行数:61,代码来源:runner.py

示例4: pickle_aggregated_counts_distribution

# 需要导入模块: from wsd.database import MySQLDatabase [as 别名]
# 或者: from wsd.database.MySQLDatabase import get_work_view [as 别名]
def pickle_aggregated_counts_distribution():

    db = MySQLDatabase(DATABASE_HOST, DATABASE_USER, DATABASE_PASSWORD, DATABASE_NAME)
    db_worker_view = db.get_work_view()
    cursor = db_worker_view._cursor
    results = {}
    try:
        cursor.execute('select sum(counts) from clickstream_derived_internal_links group by prev_id;')
        result = cursor.fetchall()
        results['source_article']=result
    except MySQLdb.Error, e:
        print e
开发者ID:linksuccess,项目名称:linksuccess,代码行数:14,代码来源:click_distributions.py

示例5: pickle_category_counts_distribution

# 需要导入模块: from wsd.database import MySQLDatabase [as 别名]
# 或者: from wsd.database.MySQLDatabase import get_work_view [as 别名]
def pickle_category_counts_distribution():
    results =  {}
    db = MySQLDatabase(DATABASE_HOST, DATABASE_USER, DATABASE_PASSWORD, DATABASE_NAME)
    db_worker_view = db.get_work_view()
    cursor = db_worker_view._cursor
    for category in ['lead', 'infobox', 'body', 'left-body', 'navbox']:
        try:
            cursor.execute('select counts from link_features where counts is not null and visual_region=%s;', (category,))
            result = cursor.fetchall()
            results[category] = result
        except MySQLdb.Error, e:
            print e
开发者ID:linksuccess,项目名称:linksuccess,代码行数:14,代码来源:click_distributions.py

示例6: links_heatmap

# 需要导入模块: from wsd.database import MySQLDatabase [as 别名]
# 或者: from wsd.database.MySQLDatabase import get_work_view [as 别名]
def links_heatmap():
    #http://stackoverflow.com/questions/2369492/generate-a-heatmap-in-matplotlib-using-a-scatter-data-set
    # Get URLs from a text file, remove white space.
    print 'loading'
    db = MySQLDatabase(DATABASE_HOST, DATABASE_USER, DATABASE_PASSWORD, DATABASE_NAME)
    db_worker_view = db.get_work_view()
    coords = db_worker_view.retrieve_all_links_coords()
    print 'coord loaded'
    x=[]
    y=[]

    page_lenghts = db_worker_view.retrieve_all_page_lengths()
    print 'lenghts loaded'
    for coord in coords:
        x_normed = float(coord['x'])/float(1920)
        y_normed = float(coord['y'])/float(page_lenghts[coord['source_article_id']])
        if  x_normed <=1.0 and y_normed <=1.0:
            x.append(x_normed)
            y.append(y_normed)



    heatmap, xedges, yedges = np.histogram2d(x, y, bins=100)
    extent = [xedges[0], xedges[-1], yedges[-1], yedges[0]]

    fig_size = (2.4, 2)
    #fig_size = (3.5, 3)
    plt.clf()
    plt.figure(figsize=fig_size)
    plt.grid(True)

    plt.imshow(heatmap, extent=extent, origin='upper', norm=LogNorm(), cmap=plt.get_cmap('jet'))
    plt.colorbar()
    #plt.title("Links Heatmap Log Normalized")

    plt.show()
    plt.savefig('output/links_heatmap_lognormed_self_loop.pdf')

    plt.clf()
    plt.figure(figsize=fig_size)
    plt.grid(True)

    plt.imshow(heatmap , extent=extent, origin='upper', norm=Normalize(),cmap=plt.get_cmap('jet'))
    plt.colorbar()
    #plt.title("Links Heatmap Normalized")

    plt.show()
    plt.savefig('output/links_heatmap_normed_self_loop.pdf')

    print "done"
开发者ID:linksuccess,项目名称:linksuccess,代码行数:52,代码来源:heatmaps.py

示例7: pickle_redirects_ids

# 需要导入模块: from wsd.database import MySQLDatabase [as 别名]
# 或者: from wsd.database.MySQLDatabase import get_work_view [as 别名]
def pickle_redirects_ids():
    db = MySQLDatabase(DATABASE_HOST, DATABASE_USER, DATABASE_PASSWORD, DATABASE_NAME)
    db_work_view = db.get_work_view()

    redirects_list_id = []
    with open(HOME+"data/candidate_articles.tsv") as f:
        next(f)
        for line in f:
            line = line.strip().split('\t')
            #look up id
            tmp = db_work_view.resolve_title(line[0].replace('_',' '))
            #print tmp
            if tmp is not None:
               redirects_list_id.append(tmp['id'])
    pickle.dump(redirects_list_id, open(SSD_HOME+"pickle/redirects_ids.obj", "wb"), protocol=pickle.HIGHEST_PROTOCOL)
开发者ID:trovdimi,项目名称:wikilinks,代码行数:17,代码来源:redirects_candidates.py

示例8: clicks_heatmap_total

# 需要导入模块: from wsd.database import MySQLDatabase [as 别名]
# 或者: from wsd.database.MySQLDatabase import get_work_view [as 别名]
def clicks_heatmap_total():
    print 'loading'
    db = MySQLDatabase(DATABASE_HOST, DATABASE_USER, DATABASE_PASSWORD, DATABASE_NAME)
    db_worker_view = db.get_work_view()
    coords = db_worker_view.retrieve_all_links_coords_clicks()
    print 'coord loaded'
    links = {}
    x = []
    y = []
    values = []
    for coord in coords:
        x_normed = float(coord['x'])/float(1920)
        y_normed = float(coord['y'])/float(coord['page_length'])
        if x_normed <=1.0 and y_normed <=1.0:
            x.append(x_normed)
            y.append(y_normed)
            values.append(float(coord['counts']))

    heatmap, xedges, yedges = np.histogram2d(x, y, bins=100, weights=values)
    extent = [xedges[0], xedges[-1], yedges[-1], yedges[0] ]


    fig_size = (2.4, 2)

    plt.clf()
    plt.figure(figsize=fig_size)

    plt.grid(True)
    plt.imshow(heatmap , extent=extent, origin='upper', norm=LogNorm(), cmap=plt.get_cmap('jet'))
    plt.colorbar()
    #plt.title("Clicks Heatmap Log Normalized")

    plt.show()
    plt.savefig('output/clicks_heatmap_lognormed_self_loop_total.pdf')

    plt.clf()
    plt.figure(figsize=fig_size)

    plt.grid(True)
    plt.imshow(heatmap , extent=extent, origin='upper', norm=Normalize(), cmap=plt.get_cmap('jet'))
    plt.colorbar()
    #plt.title("Clicks Heatmap Normalized")

    plt.show()
    plt.savefig('output/clicks_heatmap_normed_self_loop_total.pdf')
    print "done"
开发者ID:linksuccess,项目名称:linksuccess,代码行数:48,代码来源:heatmaps.py

示例9: export_data_unresolved

# 需要导入模块: from wsd.database import MySQLDatabase [as 别名]
# 或者: from wsd.database.MySQLDatabase import get_work_view [as 别名]
def export_data_unresolved():

    db = MySQLDatabase(DATABASE_HOST, DATABASE_USER, DATABASE_PASSWORD, DATABASE_NAME)
    db_work_view = db.get_work_view()
    connection = db_work_view._db_connection


    df_clickstream = pn.read_csv('/home/ddimitrov/data/enwiki201608_unresolved_redirects/2016_08_clickstream_unresolved.tsv', sep='\t', error_bad_lines=False)

    df_clickstream['prev']=df_clickstream['prev'].str.replace('_', ' ')
    df_clickstream['curr']=df_clickstream['curr'].str.replace('_', ' ')
    df_clickstream['curr_unresolved']=df_clickstream['curr_unresolved'].str.replace('_', ' ')


    df_redirects_candidates = pn.read_sql('select * from redirects_candidates_sample', connection)


    sample_unresoleved = pn.merge(df_redirects_candidates, df_clickstream, how='left', left_on= ['source_article_name','target_article_name'], right_on=['prev', 'curr_unresolved'])

    sample_unresoleved['n'].fillna(0, inplace=True)
    sample_unresoleved.to_csv('/home/ddimitrov/data/enwiki201608_unresolved_redirects/data_unresolved.tsv', sep='\t',encoding="utf-8")
开发者ID:trovdimi,项目名称:wikilinks,代码行数:23,代码来源:redirects_candidates.py

示例10: links_heatmap_rel_prob

# 需要导入模块: from wsd.database import MySQLDatabase [as 别名]
# 或者: from wsd.database.MySQLDatabase import get_work_view [as 别名]
def links_heatmap_rel_prob():
    #http://stackoverflow.com/questions/2369492/generate-a-heatmap-in-matplotlib-using-a-scatter-data-set
    # Get URLs from a text file, remove white space.
    print 'loading'
    db = MySQLDatabase(DATABASE_HOST, DATABASE_USER, DATABASE_PASSWORD, DATABASE_NAME)
    db_worker_view = db.get_work_view()
    coords = db_worker_view.retrieve_all_links_coords()

    x=[]
    y=[]


    page_lenghts = db_worker_view.retrieve_all_page_lengths()

    for coord in coords:
        x_normed = float(coord['x'])/float(1920)
        y_normed = float(coord['y'])/float(page_lenghts[coord['source_article_id']])
        if  x_normed <=1.0 and y_normed <=1.0:
            x.append(x_normed)
            y.append(y_normed)



    links_heatmap_hist, xedges, yedges = np.histogram2d(x, y, normed=True,  bins=100)
    links_extent = [xedges[0], xedges[-1], yedges[-1], yedges[0]]




    coords = db_worker_view.retrieve_all_links_coords_clicks()
    print 'coord loaded'
    links = {}
    x = []
    y = []
    values = []
    for coord in coords:
       try:
           v = links[coord['key']]
           links[coord['key']]+=1
       except:
           links[coord['key']]=0
    for coord in coords:
       x_normed = float(coord['x'])/float(1920)
       y_normed = float(coord['y'])/float(coord['page_length'])
       if  x_normed <=1.0 and y_normed <=1.0:
           x.append(x_normed)
           y.append(y_normed)
           if links[coord['key']]==0:
	       #x.append(x_normed)
               #y.append(y_normed)
               values.append(float(coord['counts']))
           else:
               values.append(float(coord['counts'])/float(links[coord['key']]))

    clicks_heatmap_hist, xedges, yedges = np.histogram2d(x, y, bins=100, normed=True, weights=values)
    clicks_extent = [xedges[0], xedges[-1], yedges[-1], yedges[0]]

    substraction_hist = np.subtract(clicks_heatmap_hist,links_heatmap_hist)
    #rel_prob_hist = np.divide(clicks_heatmap_hist, links_heatmap_hist)
    with np.errstate(divide='ignore', invalid='ignore'):
        rel_prob_hist = np.divide(clicks_heatmap_hist, links_heatmap_hist)
        rel_prob_hist[rel_prob_hist == np.inf] = 0
        rel_prob_hist = np.nan_to_num(rel_prob_hist)

    fig_size = (2.4, 2)

    plt.clf()
    plt.figure(figsize=fig_size)
    plt.grid(True)

    plt.imshow(substraction_hist, extent=clicks_extent, origin='upper',norm=Normalize(), cmap=plt.get_cmap('jet'))
    plt.colorbar()


    plt.show()
    plt.savefig('output/clicks-links_heatmap_normed_self_loop.pdf')


    plt.clf()
    plt.figure(figsize=fig_size)
    plt.grid(True)

    plt.imshow(rel_prob_hist , extent=clicks_extent, origin='upper', norm=Normalize(),cmap=plt.get_cmap('jet'))
    plt.colorbar()


    plt.show()
    plt.savefig('output/clicks_over_links_heatmap_normed_self_loop.pdf')


    plt.clf()
    plt.figure(figsize=fig_size)
    plt.grid(True)

    plt.imshow(substraction_hist, extent=clicks_extent, origin='upper', norm=LogNorm(), cmap=plt.get_cmap('jet'))
    plt.colorbar()


    plt.show()
    plt.savefig('output/clicks-links_heatmap_lognormed_self_loop.pdf')
#.........这里部分代码省略.........
开发者ID:linksuccess,项目名称:linksuccess,代码行数:103,代码来源:heatmaps.py

示例11: multiple_links_heatmap

# 需要导入模块: from wsd.database import MySQLDatabase [as 别名]
# 或者: from wsd.database.MySQLDatabase import get_work_view [as 别名]
def multiple_links_heatmap():
    print 'loading'
    db = MySQLDatabase(DATABASE_HOST, DATABASE_USER, DATABASE_PASSWORD, DATABASE_NAME)
    db_worker_view = db.get_work_view()
    coords = db_worker_view.retrieve_all_links_multpile_occ()
    print 'coord loaded'
    page_lenghts = db_worker_view.retrieve_all_page_lengths()
    print 'lenghts loaded'
    links = {}
    x = []
    y = []
    x_conf = []
    y_conf = []
    x_not_conf = []
    y_not_conf = []
    number_of_not_confident_clicks=0
    number_of_confident_clicks = 0
    number_of_valid_normed_links=0
    for coord in coords:
        try:
            v = links[coord['key']]
            links[coord['key']]+=1
        except:
            links[coord['key']]=0
    for coord in coords:
        x_normed = float(coord['x'])/float(1920)
        y_normed = float(coord['y'])/float(page_lenghts[coord['key'][0]])
        if  x_normed <=1.0 and y_normed <=1.0:
            x.append(x_normed)
            y.append(y_normed)
            number_of_valid_normed_links+=1
            if links[coord['key']]==0:
                x_conf.append(x_normed)
                y_conf.append(y_normed)
                number_of_confident_clicks+=1
            else:
                x_not_conf.append(x_normed)
                y_not_conf.append(y_normed)
                number_of_not_confident_clicks+=1
    print '###########'
    print number_of_confident_clicks
    print number_of_not_confident_clicks
    print number_of_valid_normed_links
    print len(coords)
    print '###########'



    heatmap, xedges, yedges = np.histogram2d(x_conf, y_conf, bins=100)
    extent = [xedges[0], xedges[-1], yedges[-1], yedges[0]]

    fig_size = (2.4, 2)
    #fig_size = (3.5, 3)
    plt.clf()
    plt.figure(figsize=fig_size)
    plt.grid(True)

    plt.imshow(heatmap, extent=extent, origin='upper', norm=LogNorm(), cmap=plt.get_cmap('jet'))
    plt.colorbar()
    #plt.title("Links Heatmap Log Normalized")

    plt.show()
    plt.savefig('output/links_heatmap_lognormed_self_loop_unique.pdf')

    plt.clf()
    plt.figure(figsize=fig_size)
    plt.grid(True)

    plt.imshow(heatmap , extent=extent, origin='upper', norm=Normalize(),cmap=plt.get_cmap('jet'))
    plt.colorbar()
    #plt.title("Links Heatmap Normalized")

    plt.show()
    plt.savefig('output/links_heatmap_normed_self_loop_unique.pdf')

    print "unique done"

    heatmap, xedges, yedges = np.histogram2d(x_not_conf, y_not_conf, bins=100)
    extent = [xedges[0], xedges[-1], yedges[-1], yedges[0]]

    fig_size = (2.4, 2)
    #fig_size = (3.5, 3)
    plt.clf()
    plt.figure(figsize=fig_size)
    plt.grid(True)

    plt.imshow(heatmap, extent=extent, origin='upper', norm=LogNorm(), cmap=plt.get_cmap('jet'))
    plt.colorbar()
    #plt.title("Links Heatmap Log Normalized")

    plt.show()
    plt.savefig('output/links_heatmap_lognormed_self_loop_multiple.pdf')

    plt.clf()
    plt.figure(figsize=fig_size)
    plt.grid(True)

    plt.imshow(heatmap , extent=extent, origin='upper', norm=Normalize(),cmap=plt.get_cmap('jet'))
    plt.colorbar()
    #plt.title("Links Heatmap Normalized")
#.........这里部分代码省略.........
开发者ID:linksuccess,项目名称:linksuccess,代码行数:103,代码来源:heatmaps.py

示例12: MySQLDatabase

# 需要导入模块: from wsd.database import MySQLDatabase [as 别名]
# 或者: from wsd.database.MySQLDatabase import get_work_view [as 别名]
from wsd.database import MySQLDatabase
from graph_tool.all import *
from conf import *
__author__ = 'dimitrovdr'


db = MySQLDatabase(DATABASE_HOST, DATABASE_USER, DATABASE_PASSWORD, DATABASE_NAME)
db_work_view = db.get_work_view()

wikipedia = Graph()

for link in db_work_view.retrieve_all_internal_transitions():
    wikipedia.add_edge(link['from'], link['to'])
    #print 'from %s, to %s', link['from'], link['to']



#wikipedia.save("output/transitionsnetwork.xml.gz")

# filter all nodes that have no edges
transitions_network = GraphView(wikipedia, vfilt=lambda v : v.out_degree()+v.in_degree()>0 )

print "clust"
transitions_network.vertex_properties["local_clust"] = local_clustering(transitions_network)

print "page_rank"
transitions_network.vertex_properties["page_rank"] = pagerank(transitions_network)

print "eigenvector_centr"
eigenvalue, eigenvectorcentr = eigenvector(transitions_network)
transitions_network.vertex_properties["eigenvector_centr"] = eigenvectorcentr
开发者ID:trovdimi,项目名称:wikilinks,代码行数:33,代码来源:createtransitionsnetwork.py


注:本文中的wsd.database.MySQLDatabase.get_work_view方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。