本文整理汇总了Python中wsd.database.MySQLDatabase.get_build_view方法的典型用法代码示例。如果您正苦于以下问题:Python MySQLDatabase.get_build_view方法的具体用法?Python MySQLDatabase.get_build_view怎么用?Python MySQLDatabase.get_build_view使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类wsd.database.MySQLDatabase
的用法示例。
在下文中一共展示了MySQLDatabase.get_build_view方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: table_parser
# 需要导入模块: from wsd.database import MySQLDatabase [as 别名]
# 或者: from wsd.database.MySQLDatabase import get_build_view [as 别名]
def table_parser(self, file_name, root):
db = MySQLDatabase(DATABASE_HOST, DATABASE_USER, DATABASE_PASSWORD, DATABASE_NAME)
db_build_view = db.get_build_view()
cursor = db_build_view._cursor
# setup logging
LOGGING_FORMAT = '%(levelname)s:\t%(asctime)-15s %(message)s'
LOGGING_PATH = 'tmp/tableclasses-dbinsert.log'
logging.basicConfig(filename=LOGGING_PATH, level=logging.DEBUG, format=LOGGING_FORMAT, filemode='w')
html_parser = WikipediaHTMLTableParser()
zip_file_path = os.path.join(root, file_name)
html = self.zip2html(zip_file_path)
html_parser.feed(html.decode('utf-8'))
source_article_id = file_name.split('_')[1]
try:
fed_parser = WikipediaFedTextParser(html_parser.get_data())
table_classes = fed_parser.table_classes(None)
table_classes = list(set(table_classes))
for table_class in table_classes:
self.insert_table_class(source_article_id, table_class, cursor)
except KeyError:
db_build_view._db_connection.rollback()
logging.error('KeyError FedTextParser source article id: %s ' % source_article_id)
db_build_view.commit()
db_build_view.reset_cache()
示例2: __init__
# 需要导入模块: from wsd.database import MySQLDatabase [as 别名]
# 或者: from wsd.database.MySQLDatabase import get_build_view [as 别名]
def __init__(self, path):
#os.environ["DISPLAY"]=":1"
print path
os.environ["DISPLAY"]=":1"
db = MySQLDatabase(DATABASE_HOST, DATABASE_USER, DATABASE_PASSWORD, DATABASE_NAME)
self.db_build_view = db.get_build_view()
self.cursor = self.db_build_view._cursor
self.app = QApplication(sys.argv)
self.path = path
示例3: _extract_articles
# 需要导入模块: from wsd.database import MySQLDatabase [as 别名]
# 或者: from wsd.database.MySQLDatabase import get_build_view [as 别名]
def _extract_articles(self):
INPUT_FILE = WIKI_DUMP_XML_FILE #self.read_path('Please enter the path of the wiki dump file [.xml]')
#INPUT_FILE = "/home/ddimitrov/wikiwsd/data/training.xml"#self.read_path('Please enter the path of the wiki dump file [.xml]')
MAX_ARTICLES_IN_QUEUE = 200#self.read_number('How many articles should be kept in the memory at any time at most?', 200, 20, 1000)
NUM_THREADS = 1#self.read_number('How many threads shall be used to write to the database?', 20, 1, 50)
CONTINUE = True#self.read_yes_no('This process might take several days to finish.\nDo you want to continue?')
if CONTINUE:
# measure time
start = time.clock()
# connect to database and create article queue
db = MySQLDatabase(DATABASE_HOST, DATABASE_USER, DATABASE_PASSWORD, DATABASE_NAME)
queue = Queue.Queue(maxsize=MAX_ARTICLES_IN_QUEUE)
# create reader and threads
reader = WikipediaReader(INPUT_FILE, queue, extract_text=False)
threads = []
for i in range(0, NUM_THREADS):
inserter = ArticleInserter(queue, db.get_build_view())
threads.append(inserter)
# start reader
reader.start()
# start insert threads
for thread in threads:
thread.start()
# wait for reading thread, queue and inserters to be done
reader.join()
queue.join()
for thread in threads:
thread.end()
for thread in threads:
thread.join()
seconds = round (time.clock() - start)
print 'Finished after %02d:%02d minutes' % (seconds / 60, seconds % 60)
else:
print 'Aborting...'