当前位置: 首页>>代码示例>>Python>>正文


Python conf.settings方法代码示例

本文整理汇总了Python中scrapy.conf.settings方法的典型用法代码示例。如果您正苦于以下问题:Python conf.settings方法的具体用法?Python conf.settings怎么用?Python conf.settings使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在scrapy.conf的用法示例。


在下文中一共展示了conf.settings方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

# 需要导入模块: from scrapy import conf [as 别名]
# 或者: from scrapy.conf import settings [as 别名]
def __init__(self):
        host = settings['MONGODB_HOST']
        port = settings['MONGODB_PORT']
        name = settings['MONGODB_DBNAME']
        client = MongoClient(host=host,port=port)
        db = client[name]
        self.col = db[settings['MONGODB_DOCNAME']] 
开发者ID:WiseDoge,项目名称:crawler_examples,代码行数:9,代码来源:pipelines.py

示例2: __init__

# 需要导入模块: from scrapy import conf [as 别名]
# 或者: from scrapy.conf import settings [as 别名]
def __init__(self):
        host = settings['MONGODB_HOST']
        port = settings['MONGODB_PORT']
        db_name = settings['MONGODB_DBNAME']
        client = pymongo.MongoClient(host=host, port=port)
        db = client[db_name]
        self.post = db[settings['MONGODB_DOCNAME']] 
开发者ID:kingname,项目名称:SourceCodeOfBook,代码行数:9,代码来源:pipelines.py

示例3: __init__

# 需要导入模块: from scrapy import conf [as 别名]
# 或者: from scrapy.conf import settings [as 别名]
def __init__(self):
        self.db = pymongo.MongoClient()[settings['MONGODB_DB']]
        self.handler = None 
开发者ID:kingname,项目名称:SourceCodeOfBook,代码行数:5,代码来源:pipelines.py

示例4: process_error

# 需要导入模块: from scrapy import conf [as 别名]
# 或者: from scrapy.conf import settings [as 别名]
def process_error(self, item):
        if not self.handler:
            self.handler = self.db[settings['MONGODB_ERROR']]
        self.handler.insert_one(dict(item)) 
开发者ID:kingname,项目名称:SourceCodeOfBook,代码行数:6,代码来源:pipelines.py

示例5: process_request

# 需要导入模块: from scrapy import conf [as 别名]
# 或者: from scrapy.conf import settings [as 别名]
def process_request(self, request, spider):
        proxy = random.choice(settings['PROXIES'])
        request.meta['proxy'] = proxy 
开发者ID:kingname,项目名称:SourceCodeOfBook,代码行数:5,代码来源:middlewares.py

示例6: __init__

# 需要导入模块: from scrapy import conf [as 别名]
# 或者: from scrapy.conf import settings [as 别名]
def __init__(self):
        self.server = settings['MONGODB_SERVER']
        self.port = settings['MONGODB_PORT']
        self.db = settings['MONGODB_DB']
        self.col = settings['MONGODB_COLLECTION']
        connection = pymongo.Connection(self.server, self.port)
        db = connection[self.db]
        self.collection = db[self.col] 
开发者ID:lamjack,项目名称:LotteryTicket,代码行数:10,代码来源:pipelines.py

示例7: __init__

# 需要导入模块: from scrapy import conf [as 别名]
# 或者: from scrapy.conf import settings [as 别名]
def __init__(self):
        conn = pymongo.Connection(
            settings['MONGO_CONF']['host'],
            settings['MONGO_CONF']['port']
        )
        db = conn[settings['MONGO_CONF']['db']]
        self.news_collection = db[settings['MONGO_CONF']['collection']] 
开发者ID:BillBillBillBill,项目名称:NewsCrawler,代码行数:9,代码来源:pipelines.py

示例8: __init__

# 需要导入模块: from scrapy import conf [as 别名]
# 或者: from scrapy.conf import settings [as 别名]
def __init__(self):
        host = settings['MONGODB_HOST']
        port = settings['MONGODB_PORT']
        dbname = settings['MONGODB_DBNAME']
        docname = settings['MONGODB_DOCNAME']
        self.client = pymongo.MongoClient(host=host,port=port)
        db = self.client[dbname]
        db[docname].ensure_index('casedocid', unique=True)  # 设置文书ID为唯一索引,避免插入重复数据
        self.post = db[docname] 
开发者ID:Henryhaohao,项目名称:Wenshu_Spider,代码行数:11,代码来源:pipelines.py

示例9: __init__

# 需要导入模块: from scrapy import conf [as 别名]
# 或者: from scrapy.conf import settings [as 别名]
def __init__(self):
        import pymongo
        connection = pymongo.Connection(settings['MONGODB_SERVER'], settings['MONGODB_PORT'])
        self.db = connection[settings['MONGODB_DB']]
        self.collection = self.db[settings['MONGODB_COLLECTION']]
        if self.__get_uniq_key() is not None:
            self.collection.create_index(self.__get_uniq_key(), unique=True) 
开发者ID:openslack,项目名称:openslack-crawler,代码行数:9,代码来源:pipelines.py

示例10: process_item

# 需要导入模块: from scrapy import conf [as 别名]
# 或者: from scrapy.conf import settings [as 别名]
def process_item(self, item, spider):
        if self.__get_uniq_key() is None:
            self.collection.insert(dict(item))
        else:
            self.collection.update(
                {self.__get_uniq_key(): item[self.__get_uniq_key()]},
                dict(item),
                upsert=True)
        log.msg("Item wrote to MongoDB database %s/%s" %
                (settings['MONGODB_DB'], settings['MONGODB_COLLECTION']),
                level=log.DEBUG, spider=spider)
        return item 
开发者ID:openslack,项目名称:openslack-crawler,代码行数:14,代码来源:pipelines.py

示例11: __get_uniq_key

# 需要导入模块: from scrapy import conf [as 别名]
# 或者: from scrapy.conf import settings [as 别名]
def __get_uniq_key(self):
        if not settings['MONGODB_UNIQ_KEY'] or settings['MONGODB_UNIQ_KEY'] == "":
            return None
        return settings['MONGODB_UNIQ_KEY'] 
开发者ID:openslack,项目名称:openslack-crawler,代码行数:6,代码来源:pipelines.py

示例12: process_item

# 需要导入模块: from scrapy import conf [as 别名]
# 或者: from scrapy.conf import settings [as 别名]
def process_item(self, item, spider):
        '''插入数据'''
        try:
            data = dict(item)
            self.post.insert_one(data)
            return item
        except DuplicateKeyError:
            # 索引相同,即为重复数据,捕获错误
            spider.logger.debug('Duplicate key error collection')
            return item


# 2.异步存储item - 不行!插入不了数据! (参考:https://zhuanlan.zhihu.com/p/44003499)
# from twisted.internet import defer, reactor
# class WenshuPipeline(object):
#     def __init__(self, mongo_host, mongo_port, mongo_db, mongo_doc):
#         self.mongo_host = mongo_host
#         self.mongo_port = mongo_port
#         self.mongo_db = mongo_db
#         self.mongo_doc = mongo_doc
#
#     @classmethod
#     def from_crawler(cls, crawler):
#         return cls(
#             mongo_host=crawler.settings.get('MONGODB_HOST'),
#             mongo_port=crawler.settings.get('MONGODB_PORT'),
#             mongo_db=crawler.settings.get('MONGODB_DBNAME'),
#             mongo_doc=crawler.settings.get('MONGODB_DOCNAME'),
#         )
#
#     def open_spider(self, spider):
#         self.client = pymongo.MongoClient(host=self.mongo_host,port=self.mongo_port)
#         self.mongodb = self.client[self.mongo_db]
#         self.mongodb[self.mongo_doc].create_index('id', unique=True) # 创建索引,避免插入数据
#
#     def close_spider(self, spider):
#         self.client.close()
#
#     # 下面的操作是重点
#     @defer.inlineCallbacks
#     def process_item(self, item, spider):
#         out = defer.Deferred()
#         reactor.callInThread(self._insert, item, out, spider)
#         yield out
#         defer.returnValue(item)
#         return item
#
#     def _insert(self, item, out, spider):
#         time.sleep(10)
#         try:
#             self.mongodb[self.mongo_doc].insert_one(dict(item))
#             reactor.callFromThread(out.callback, item)
#         except DuplicateKeyError:
#             # 索引相同,即为重复数据,捕获错误
#             spider.logger.debug('duplicate key error collection')
#             reactor.callFromThread(out.callback, item) 
开发者ID:Henryhaohao,项目名称:Wenshu_Spider,代码行数:58,代码来源:pipelines.py


注:本文中的scrapy.conf.settings方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。