当前位置: 首页>>代码示例>>Python>>正文


Python Store.store_snapshot方法代码示例

本文整理汇总了Python中store.Store.store_snapshot方法的典型用法代码示例。如果您正苦于以下问题:Python Store.store_snapshot方法的具体用法?Python Store.store_snapshot怎么用?Python Store.store_snapshot使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在store.Store的用法示例。


在下文中一共展示了Store.store_snapshot方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: write_to_queue

# 需要导入模块: from store import Store [as 别名]
# 或者: from store.Store import store_snapshot [as 别名]
class DP:
    # list of authors that are remembered
    author_list = []
    # list of authors to skip crawling for various reasons
    skip_list = ['[deleted]']
    # internal sqlite3 store
    store = None
    
    def write_to_queue(self, data, prefix='tmp'):
        fh, filename = tempfile.mkstemp(dir=os.path.join(tmpdir, 'dp', 'queue'), prefix=prefix)
        os.close(fh)
        fp = open(filename, 'w')
        fp.write(data)
        fp.close()
        return os.path.split(filename)[1]

    def seed(self):
        self.store = Store('/collection/sharvey/reddit/')
        self.store.open()
        print 'Created seed queue'
        return self.write_to_queue('a,t3_1u4kuf', 'tmp_a_')

    def process_author(self, abspath, filename):
        filetype = filename.split('_')
        fp = open(os.path.join(abspath, filename))
        blob = json.load(fp)
        fp.close()
        elements = parser.extract_listing_elements(blob)
        self.store.store_author(elements)
        return []

    def process_snapshot(self, abspath, filename):
        filetype = filename.split('_')
        fp = open(os.path.join(abspath, filename))
        blob = json.load(fp)
        fp.close()
        if filetype[0] == 'a':
            posts = blob['posts']
            nav = blob['nav']
            start_hit = False
            queue_file_list = []
            queue_list = []
            for sube in posts:
                utctime = int(sube['created_utc'])
                sttime = time.strftime('%Y%m%d', time.gmtime(utctime))
                if (int(sttime) > int(dateend)):
                    continue
                elif (int(sttime) < int(datestart)):
                    start_hit = True
                    break
                else:
                    queue_list.append('p,'+sube['id'])
            queue_file_list.append(self.write_to_queue('\n'.join(queue_list), 'tmp_p_'))
            if start_hit is not True:
                if nav['after'] is not None:
                    queue_file_list.append(self.write_to_queue('a,'+nav['after'], 'tmp_a_'))
            return queue_file_list
        elif filetype[0] == 'p':
            post = blob['post']
            comments = blob['comments']
            self.store.store_snapshot(post, comments)
            
            if crawl_author:
                queue_file_list = []
                if post['author'] not in self.author_list and post['author'] not in self.skip_list:
                    queue_file_list.append(self.write_to_queue('u,'+post['author'], 'tmp_u_'))
                    self.author_list.append(post['author'])
                for comment in comments:
                    if comment['author'] not in self.author_list and comment['author'] not in self.skip_list:
                        queue_file_list.append(self.write_to_queue('u,'+comment['author'], 'tmp_u_'))
                        self.author_list.append(comment['author'])
                return queue_file_list
        return []

    def process_snapshots(self, abspath, filename_list):
        post_tuples = []
        for filename in filename_list:
            filetype = filename.split('_')
            fp = open(os.path.join(abspath, filename))
            blob = json.load(fp)
            fp.close()
            post_tuples.append( (blob['post'], blob['comments']) )
        self.store.store_batch_snapshot(post_tuples)
            
        if crawl_author:
                queue_file_list = []

    def run(self):
        seedfile = self.seed()
        os.rename(os.path.join(tmpdir, 'dp', 'queue', seedfile), os.path.join(tmpdir, 'server', 'queue', seedfile))
        sleepcount = 0
        while True:
            for filename in os.listdir(os.path.join(tmpdir, 'dp', 'staging')):
                sleepcount = 0
                self.store.open()
                prefix = filename.split('.')[0]
                absfilename = os.path.join(tmpdir, 'dp', 'staging', filename)
                abspath = os.path.join(tmpdir, 'dp', 'staging', prefix)

                os.mkdir(abspath)
#.........这里部分代码省略.........
开发者ID:aleboz,项目名称:reddit-crawler,代码行数:103,代码来源:crawler.py


注:本文中的store.Store.store_snapshot方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。