本文整理汇总了Python中store.Store.store_author方法的典型用法代码示例。如果您正苦于以下问题:Python Store.store_author方法的具体用法?Python Store.store_author怎么用?Python Store.store_author使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类store.Store
的用法示例。
在下文中一共展示了Store.store_author方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: write_to_queue
# 需要导入模块: from store import Store [as 别名]
# 或者: from store.Store import store_author [as 别名]
class DP:
# list of authors that are remembered
author_list = []
# list of authors to skip crawling for various reasons
skip_list = ['[deleted]']
# internal sqlite3 store
store = None
def write_to_queue(self, data, prefix='tmp'):
fh, filename = tempfile.mkstemp(dir=os.path.join(tmpdir, 'dp', 'queue'), prefix=prefix)
os.close(fh)
fp = open(filename, 'w')
fp.write(data)
fp.close()
return os.path.split(filename)[1]
def seed(self):
self.store = Store('/collection/sharvey/reddit/')
self.store.open()
print 'Created seed queue'
return self.write_to_queue('a,t3_1u4kuf', 'tmp_a_')
def process_author(self, abspath, filename):
filetype = filename.split('_')
fp = open(os.path.join(abspath, filename))
blob = json.load(fp)
fp.close()
elements = parser.extract_listing_elements(blob)
self.store.store_author(elements)
return []
def process_snapshot(self, abspath, filename):
filetype = filename.split('_')
fp = open(os.path.join(abspath, filename))
blob = json.load(fp)
fp.close()
if filetype[0] == 'a':
posts = blob['posts']
nav = blob['nav']
start_hit = False
queue_file_list = []
queue_list = []
for sube in posts:
utctime = int(sube['created_utc'])
sttime = time.strftime('%Y%m%d', time.gmtime(utctime))
if (int(sttime) > int(dateend)):
continue
elif (int(sttime) < int(datestart)):
start_hit = True
break
else:
queue_list.append('p,'+sube['id'])
queue_file_list.append(self.write_to_queue('\n'.join(queue_list), 'tmp_p_'))
if start_hit is not True:
if nav['after'] is not None:
queue_file_list.append(self.write_to_queue('a,'+nav['after'], 'tmp_a_'))
return queue_file_list
elif filetype[0] == 'p':
post = blob['post']
comments = blob['comments']
self.store.store_snapshot(post, comments)
if crawl_author:
queue_file_list = []
if post['author'] not in self.author_list and post['author'] not in self.skip_list:
queue_file_list.append(self.write_to_queue('u,'+post['author'], 'tmp_u_'))
self.author_list.append(post['author'])
for comment in comments:
if comment['author'] not in self.author_list and comment['author'] not in self.skip_list:
queue_file_list.append(self.write_to_queue('u,'+comment['author'], 'tmp_u_'))
self.author_list.append(comment['author'])
return queue_file_list
return []
def process_snapshots(self, abspath, filename_list):
post_tuples = []
for filename in filename_list:
filetype = filename.split('_')
fp = open(os.path.join(abspath, filename))
blob = json.load(fp)
fp.close()
post_tuples.append( (blob['post'], blob['comments']) )
self.store.store_batch_snapshot(post_tuples)
if crawl_author:
queue_file_list = []
def run(self):
seedfile = self.seed()
os.rename(os.path.join(tmpdir, 'dp', 'queue', seedfile), os.path.join(tmpdir, 'server', 'queue', seedfile))
sleepcount = 0
while True:
for filename in os.listdir(os.path.join(tmpdir, 'dp', 'staging')):
sleepcount = 0
self.store.open()
prefix = filename.split('.')[0]
absfilename = os.path.join(tmpdir, 'dp', 'staging', filename)
abspath = os.path.join(tmpdir, 'dp', 'staging', prefix)
os.mkdir(abspath)
#.........这里部分代码省略.........