当前位置: 首页>>代码示例>>Python>>正文


Python database.MySQLDatabase类代码示例

本文整理汇总了Python中wsd.database.MySQLDatabase的典型用法代码示例。如果您正苦于以下问题:Python MySQLDatabase类的具体用法?Python MySQLDatabase怎么用?Python MySQLDatabase使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了MySQLDatabase类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: table_parser

    def table_parser(self, file_name, root):

        db = MySQLDatabase(DATABASE_HOST, DATABASE_USER, DATABASE_PASSWORD, DATABASE_NAME)
        db_build_view = db.get_build_view()

        cursor = db_build_view._cursor

        # setup logging
        LOGGING_FORMAT = '%(levelname)s:\t%(asctime)-15s %(message)s'
        LOGGING_PATH = 'tmp/tableclasses-dbinsert.log'
        logging.basicConfig(filename=LOGGING_PATH, level=logging.DEBUG, format=LOGGING_FORMAT, filemode='w')

        html_parser = WikipediaHTMLTableParser()
        zip_file_path = os.path.join(root, file_name)
        html = self.zip2html(zip_file_path)
        html_parser.feed(html.decode('utf-8'))
        source_article_id = file_name.split('_')[1]
        try:
            fed_parser = WikipediaFedTextParser(html_parser.get_data())
            table_classes = fed_parser.table_classes(None)
            table_classes = list(set(table_classes))
            for table_class in table_classes:
               self.insert_table_class(source_article_id, table_class, cursor)
        except KeyError:
            db_build_view._db_connection.rollback()
            logging.error('KeyError FedTextParser source article id: %s ' % source_article_id)
        db_build_view.commit()
        db_build_view.reset_cache()
开发者ID:trovdimi,项目名称:wikilinks,代码行数:28,代码来源:tableclassinserter.py

示例2: req

def req():
    # Get URLs from a text file, remove white space.
    db = MySQLDatabase(DATABASE_HOST, DATABASE_USER, DATABASE_PASSWORD, DATABASE_NAME)
    db_worker_view = db.get_work_view()
    articles = db_worker_view.retrieve_all_articles()
    #articles = db_worker_view.retrieve_all_articles_questionmark()
    # measure time
    start = time.clock()
    start_time_iteration = start
    iteration_number = 483
    for i, article in enumerate(articles):
        # print some progress
        if i % 10000 == 0:
            #print time for the iteration
            seconds = time.clock() - start_time_iteration
            m, s = divmod(seconds, 60)
            h, m = divmod(m, 60)
            print "Number of crawled articles: %d. Total time for last iteration of 10000 articles: %d:%02d:%02d" % (i, h, m, s)
            start_time_iteration = time.clock()
            iteration_number += 1

        # Thread pool.
        # Blocks other threads (more than the set limit).
        pool.acquire(blocking=True)
        # Create a new thread.
        # Pass each URL (i.e. u parameter) to the worker function.
        t = threading.Thread(target=worker, args=(MEDIAWIKI_API_ENDPOINT+urllib.quote(article['title'])+'/'+str(article['rev_id']), article, iteration_number))

        # Start the newly create thread.
        t.start()
    seconds = time.clock() - start
    m, s = divmod(seconds, 60)
    h, m = divmod(m, 60)
    print "Total time: %d:%02d:%02d" % (h, m, s)
开发者ID:linksuccess,项目名称:linksuccess,代码行数:34,代码来源:crawler.py

示例3: _evaluate_disambiguations

    def _evaluate_disambiguations(self):
        INPUT_FILE = self.read_path('Please enter the path of the samples file [.xml]', default='./tmp/samples.xml')
        LOGGING_PATH = self.read_path('Please enter the path of the logging file [.log]', default='./tmp/evaluation3.log', must_exist=False)
        
        CONTINUE = self.read_yes_no('This process might take from several minutes to several hours.\nDo you want to continue?')

        if not CONTINUE:
            print '# Aborting...'
            return

        print '# Starting evaluation...'
        # setup logging
        LOGGING_FORMAT = '%(levelname)s:\t%(asctime)-15s %(message)s'
        logging.basicConfig(filename=LOGGING_PATH, level=logging.DEBUG, format=LOGGING_FORMAT, filemode='w')

        # connecting to db
        db = MySQLDatabase(DATABASE_HOST, DATABASE_USER, DATABASE_PASSWORD, DATABASE_NAME)
        work_view = db.get_work_view()

        # measure time
        start = time.clock()

        evaluator = Evaluator(INPUT_FILE, work_view)
        result = evaluator.evaluate_disambiguations()

        seconds = round (time.clock() - start)
        print 'Finished after %02d:%02d minutes' % (seconds / 60, seconds % 60)
        print 'Evaluation done! - precision: %d%%, recall: %d%%' % (round(result['precision']*100), round(result['recall']*100))
开发者ID:plaufer,项目名称:wikiwsd,代码行数:28,代码来源:evaluator.py

示例4: build_links_position_table

def build_links_position_table():
    """creates up the basic database structure
    """
    db = MySQLDatabase(DATABASE_HOST, DATABASE_USER, DATABASE_PASSWORD, DATABASE_NAME)
    connection = db._create_connection()
    cursor = connection.cursor()

    cursor.execute('CREATE TABLE `redirects_candidates` ('
                      '`id` BIGINT UNSIGNED NOT NULL PRIMARY KEY AUTO_INCREMENT,'
                      '`source_article_id` BIGINT UNSIGNED NOT NULL,'
                      '`target_article_id` BIGINT UNSIGNED NULL,'
                      '`target_article_name` VARCHAR(1000) CHARACTER SET utf8 COLLATE utf8_bin NOT NULL,'
                      ' target_position_in_text INT UNSIGNED NOT NULL,'
                      ' target_position_in_text_only INT UNSIGNED,'
                      ' target_position_in_section INT UNSIGNED,'
                      ' target_position_in_section_in_text_only INT UNSIGNED,'
                      ' section_name VARCHAR(1000) CHARACTER SET utf8 COLLATE utf8_bin NOT NULL,'
                      ' section_number INT UNSIGNED,'
                      ' target_position_in_table INT UNSIGNED,'
                      ' table_number INT UNSIGNED,'
                      ' table_css_class VARCHAR(255) CHARACTER SET utf8 COLLATE utf8_bin,'
                      ' table_css_style VARCHAR(255) CHARACTER SET utf8 COLLATE utf8_bin,'
                      ' target_x_coord_1920_1080 INT UNSIGNED DEFAULT NULL,'
                      ' target_y_coord_1920_1080 INT UNSIGNED DEFAULT NULL ,'
                      'INDEX(`target_article_id`),'
                      'INDEX(`source_article_id`)'
                  ') ENGINE=InnoDB;')
    connection.close()
开发者ID:trovdimi,项目名称:wikilinks,代码行数:28,代码来源:startredirectsinserter.py

示例5: pickle_vis_data_pandas

def pickle_vis_data_pandas():
    db = MySQLDatabase(DATABASE_HOST, DATABASE_USER, DATABASE_PASSWORD, DATABASE_NAME)
    conn = db._create_connection()


    df = pd.read_sql('select source_article_id, target_article_id, target_y_coord_1920_1080, target_x_coord_1920_1080, visual_region from link_features', conn)
    print len(df)

    no_dup = df.sort(['source_article_id','target_y_coord_1920_1080','target_x_coord_1920_1080']).groupby(["source_article_id", "target_article_id"]).first()
    print len(no_dup)

    feature = no_dup.loc[no_dup['visual_region']=='lead']
    print len(feature)
    feature.reset_index(inplace=True)


    feature = no_dup.loc[no_dup['visual_region']=='infobox']
    print len(feature)
    feature.reset_index(inplace=True)
    feature[['source_article_id','target_article_id']].to_csv('/home/ddimitrov/tmp/infobox.tsv', sep='\t', index=False)

    feature = no_dup.loc[no_dup['visual_region']=='navbox']
    print len(feature)
    feature.reset_index(inplace=True)
    feature[['source_article_id','target_article_id']].to_csv('/home/ddimitrov/tmp/navbox.tsv', sep='\t', index=False)

    feature = no_dup.loc[no_dup['visual_region']=='left-body']
    print len(feature)
    feature.reset_index(inplace=True)
    feature[['source_article_id','target_article_id']].to_csv('/home/ddimitrov/tmp/left-body.tsv', sep='\t',index=False)

    feature = no_dup.loc[no_dup['visual_region']=='body']
    print len(feature)
    feature.reset_index(inplace=True)
    feature[['source_article_id','target_article_id']].to_csv('/home/ddimitrov/tmp/body.tsv', sep='\t',index=False)
开发者ID:trovdimi,项目名称:wikilinks,代码行数:35,代码来源:pickle_data.py

示例6: run

    def run(self):
        self.print_title('This is the interactive runner program')
        self.create_tmp_if_not_exists()

        INPUT_FILE = self.read_path('Please enter the path of the input file [.txt]', default='./tmp/input.txt')
        OUTPUT_FILE = self.read_path('Please enter the path of the output file [.html]', default='./tmp/output.html', must_exist=False)
        LOGGING_PATH = self.read_path('Please enter the path of the logging file [.log]', default='./tmp/runner.log', must_exist=False)


        print '# Starting runner...'
        # setup logging
        LOGGING_FORMAT = '%(levelname)s:\t%(asctime)-15s %(message)s'
        logging.basicConfig(filename=LOGGING_PATH, level=logging.DEBUG, format=LOGGING_FORMAT, filemode='w')

        # measure time
        start = time.clock()

        # connect to db
        db = MySQLDatabase(DATABASE_HOST, DATABASE_USER, DATABASE_PASSWORD, DATABASE_NAME)
        work_view = db.get_work_view()

        # read input
        f = open(INPUT_FILE, 'r')
        text = f.read()
        text = text.replace(' ', ' ')
        f.close()

        # create dummy article
        article = {}
        article['type'] = 'article'
        article['id'] = None
        article['title'] = None
        article['text'] = text
        article['links'] = []

        # identify links
        link_detector = LinkDetector(work_view)
        link_detector.detect_links(article)
        # identify terms
        #term_identifier = TermIdentifier()
        #article = term_identifier.identify_terms(text)

        # find possible meanings
        meaning_finder = MeaningFinder(work_view)
        meaning_finder.find_meanings(article)

        # calculate relatedness
        relatedness_calculator = RelatednessCalculator(work_view)

        # decide for meaning
        decider = Decider(relatedness_calculator)
        decider.decide(article)

        # output results
        html_outputter = HTMLOutputter()
        html_outputter.output(article, OUTPUT_FILE)

        seconds = round (time.clock() - start)
        print 'Finished after %02d:%02d minutes' % (seconds / 60, seconds % 60)
开发者ID:plaufer,项目名称:wikiwsd,代码行数:59,代码来源:runner.py

示例7: pickle_correlations_zeros

def pickle_correlations_zeros():
    db = MySQLDatabase(DATABASE_HOST, DATABASE_USER, DATABASE_PASSWORD, DATABASE_NAME)
    conn = db._create_connection()

    print 'read'
    df = pd.read_sql('select source_article_id, target_article_id, IFNULL(counts, 0) as counts from link_features group by source_article_id, target_article_id', conn)
    print 'group'
    article_counts = df.groupby(by=["target_article_id"])['counts'].sum().reset_index()
    print 'write to file'
    article_counts[["target_article_id","counts"]].to_csv(TMP+'article_counts.tsv', sep='\t', index=False)
开发者ID:trovdimi,项目名称:wikilinks,代码行数:10,代码来源:weighted_pagerank.py

示例8: __init__

    def __init__(self, path):
        #os.environ["DISPLAY"]=":1"
        print path
        os.environ["DISPLAY"]=":1"
        db = MySQLDatabase(DATABASE_HOST, DATABASE_USER, DATABASE_PASSWORD, DATABASE_NAME)
        self.db_build_view = db.get_build_view()
        self.cursor = self.db_build_view._cursor

        self.app = QApplication(sys.argv)
        self.path = path
开发者ID:trovdimi,项目名称:wikilinks,代码行数:10,代码来源:redirectscandidatespostioninserter.py

示例9: pickle_aggregated_counts_distribution

def pickle_aggregated_counts_distribution():

    db = MySQLDatabase(DATABASE_HOST, DATABASE_USER, DATABASE_PASSWORD, DATABASE_NAME)
    db_worker_view = db.get_work_view()
    cursor = db_worker_view._cursor
    results = {}
    try:
        cursor.execute('select sum(counts) from clickstream_derived_internal_links group by prev_id;')
        result = cursor.fetchall()
        results['source_article']=result
    except MySQLdb.Error, e:
        print e
开发者ID:linksuccess,项目名称:linksuccess,代码行数:12,代码来源:click_distributions.py

示例10: build_page_length_table

def build_page_length_table():
    """creates up the basic database structure
    """
    db = MySQLDatabase(DATABASE_HOST, DATABASE_USER, DATABASE_PASSWORD, DATABASE_NAME)
    connection = db._create_connection()
    cursor = connection.cursor()

    cursor.execute('CREATE TABLE `redirects_candidates_page_length` ('
                      '`id` BIGINT UNSIGNED NOT NULL PRIMARY KEY,'
                      ' page_length_1920_1080 INT UNSIGNED DEFAULT NULL'
                  ') ENGINE=InnoDB;')
    connection.close()
开发者ID:trovdimi,项目名称:wikilinks,代码行数:12,代码来源:startredirectsinserter.py

示例11: _create_structure

    def _create_structure(self):

        # measure time
        start = time.clock()

        # creating structure
        db = MySQLDatabase(DATABASE_HOST, DATABASE_USER, DATABASE_PASSWORD, DATABASE_NAME)
        db.build()

        seconds = round (time.clock() - start)
        logging.info('Finished after %02d:%02d minutes' % (seconds / 60, seconds % 60))
        print 'Finished after %02d:%02d minutes' % (seconds / 60, seconds % 60)
开发者ID:linksuccess,项目名称:linksuccess,代码行数:12,代码来源:builder.py

示例12: pickle_category_counts_distribution

def pickle_category_counts_distribution():
    results =  {}
    db = MySQLDatabase(DATABASE_HOST, DATABASE_USER, DATABASE_PASSWORD, DATABASE_NAME)
    db_worker_view = db.get_work_view()
    cursor = db_worker_view._cursor
    for category in ['lead', 'infobox', 'body', 'left-body', 'navbox']:
        try:
            cursor.execute('select counts from link_features where counts is not null and visual_region=%s;', (category,))
            result = cursor.fetchall()
            results[category] = result
        except MySQLdb.Error, e:
            print e
开发者ID:linksuccess,项目名称:linksuccess,代码行数:12,代码来源:click_distributions.py

示例13: correlations

def correlations(network_name):
    db = MySQLDatabase(DATABASE_HOST, DATABASE_USER, DATABASE_PASSWORD, DATABASE_NAME)
    conn = db._create_connection()
    cursor = conn.cursor()
    # wikipedia  graph  structural statistics

    results = None
    try:
        results = cursor.execute('select c.curr_id,  sum(c.counts) as counts from clickstream_derived c where c.link_type_derived= %s  group by c.curr_id;', ("internal-link",))
        results = cursor.fetchall()


    except MySQLdb.Error, e:
        print ('error retrieving xy coord for all links links %s (%d)' % (e.args[1], e.args[0]))
开发者ID:trovdimi,项目名称:wikilinks,代码行数:14,代码来源:weighted_pagerank.py

示例14: links_heatmap

def links_heatmap():
    #http://stackoverflow.com/questions/2369492/generate-a-heatmap-in-matplotlib-using-a-scatter-data-set
    # Get URLs from a text file, remove white space.
    print 'loading'
    db = MySQLDatabase(DATABASE_HOST, DATABASE_USER, DATABASE_PASSWORD, DATABASE_NAME)
    db_worker_view = db.get_work_view()
    coords = db_worker_view.retrieve_all_links_coords()
    print 'coord loaded'
    x=[]
    y=[]

    page_lenghts = db_worker_view.retrieve_all_page_lengths()
    print 'lenghts loaded'
    for coord in coords:
        x_normed = float(coord['x'])/float(1920)
        y_normed = float(coord['y'])/float(page_lenghts[coord['source_article_id']])
        if  x_normed <=1.0 and y_normed <=1.0:
            x.append(x_normed)
            y.append(y_normed)



    heatmap, xedges, yedges = np.histogram2d(x, y, bins=100)
    extent = [xedges[0], xedges[-1], yedges[-1], yedges[0]]

    fig_size = (2.4, 2)
    #fig_size = (3.5, 3)
    plt.clf()
    plt.figure(figsize=fig_size)
    plt.grid(True)

    plt.imshow(heatmap, extent=extent, origin='upper', norm=LogNorm(), cmap=plt.get_cmap('jet'))
    plt.colorbar()
    #plt.title("Links Heatmap Log Normalized")

    plt.show()
    plt.savefig('output/links_heatmap_lognormed_self_loop.pdf')

    plt.clf()
    plt.figure(figsize=fig_size)
    plt.grid(True)

    plt.imshow(heatmap , extent=extent, origin='upper', norm=Normalize(),cmap=plt.get_cmap('jet'))
    plt.colorbar()
    #plt.title("Links Heatmap Normalized")

    plt.show()
    plt.savefig('output/links_heatmap_normed_self_loop.pdf')

    print "done"
开发者ID:linksuccess,项目名称:linksuccess,代码行数:50,代码来源:heatmaps.py

示例15: build_table

def build_table():
    """creates up the basic database structure
    """
    db = MySQLDatabase(DATABASE_HOST, DATABASE_USER, DATABASE_PASSWORD, DATABASE_NAME)
    connection = db._create_connection()
    cursor = connection.cursor()

    cursor.execute('CREATE TABLE `table_css_class` ('
                      '`id` BIGINT UNSIGNED NOT NULL PRIMARY KEY AUTO_INCREMENT,'
                      '`source_article_id` BIGINT UNSIGNED NOT NULL,'
                      ' css_class VARCHAR(255) CHARACTER SET utf8 COLLATE utf8_bin NOT NULL,'
                      'INDEX(`source_article_id`)'
                  ') ENGINE=InnoDB;')
    connection.close()
开发者ID:trovdimi,项目名称:wikilinks,代码行数:14,代码来源:tableclassinserter.py


注:本文中的wsd.database.MySQLDatabase类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。