本文整理汇总了Python中scrapy.conf.settings方法的典型用法代码示例。如果您正苦于以下问题:Python conf.settings方法的具体用法?Python conf.settings怎么用?Python conf.settings使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类scrapy.conf
的用法示例。
在下文中一共展示了conf.settings方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: from scrapy import conf [as 别名]
# 或者: from scrapy.conf import settings [as 别名]
def __init__(self):
host = settings['MONGODB_HOST']
port = settings['MONGODB_PORT']
name = settings['MONGODB_DBNAME']
client = MongoClient(host=host,port=port)
db = client[name]
self.col = db[settings['MONGODB_DOCNAME']]
示例2: __init__
# 需要导入模块: from scrapy import conf [as 别名]
# 或者: from scrapy.conf import settings [as 别名]
def __init__(self):
host = settings['MONGODB_HOST']
port = settings['MONGODB_PORT']
db_name = settings['MONGODB_DBNAME']
client = pymongo.MongoClient(host=host, port=port)
db = client[db_name]
self.post = db[settings['MONGODB_DOCNAME']]
示例3: __init__
# 需要导入模块: from scrapy import conf [as 别名]
# 或者: from scrapy.conf import settings [as 别名]
def __init__(self):
self.db = pymongo.MongoClient()[settings['MONGODB_DB']]
self.handler = None
示例4: process_error
# 需要导入模块: from scrapy import conf [as 别名]
# 或者: from scrapy.conf import settings [as 别名]
def process_error(self, item):
if not self.handler:
self.handler = self.db[settings['MONGODB_ERROR']]
self.handler.insert_one(dict(item))
示例5: process_request
# 需要导入模块: from scrapy import conf [as 别名]
# 或者: from scrapy.conf import settings [as 别名]
def process_request(self, request, spider):
proxy = random.choice(settings['PROXIES'])
request.meta['proxy'] = proxy
示例6: __init__
# 需要导入模块: from scrapy import conf [as 别名]
# 或者: from scrapy.conf import settings [as 别名]
def __init__(self):
self.server = settings['MONGODB_SERVER']
self.port = settings['MONGODB_PORT']
self.db = settings['MONGODB_DB']
self.col = settings['MONGODB_COLLECTION']
connection = pymongo.Connection(self.server, self.port)
db = connection[self.db]
self.collection = db[self.col]
示例7: __init__
# 需要导入模块: from scrapy import conf [as 别名]
# 或者: from scrapy.conf import settings [as 别名]
def __init__(self):
conn = pymongo.Connection(
settings['MONGO_CONF']['host'],
settings['MONGO_CONF']['port']
)
db = conn[settings['MONGO_CONF']['db']]
self.news_collection = db[settings['MONGO_CONF']['collection']]
示例8: __init__
# 需要导入模块: from scrapy import conf [as 别名]
# 或者: from scrapy.conf import settings [as 别名]
def __init__(self):
host = settings['MONGODB_HOST']
port = settings['MONGODB_PORT']
dbname = settings['MONGODB_DBNAME']
docname = settings['MONGODB_DOCNAME']
self.client = pymongo.MongoClient(host=host,port=port)
db = self.client[dbname]
db[docname].ensure_index('casedocid', unique=True) # 设置文书ID为唯一索引,避免插入重复数据
self.post = db[docname]
示例9: __init__
# 需要导入模块: from scrapy import conf [as 别名]
# 或者: from scrapy.conf import settings [as 别名]
def __init__(self):
import pymongo
connection = pymongo.Connection(settings['MONGODB_SERVER'], settings['MONGODB_PORT'])
self.db = connection[settings['MONGODB_DB']]
self.collection = self.db[settings['MONGODB_COLLECTION']]
if self.__get_uniq_key() is not None:
self.collection.create_index(self.__get_uniq_key(), unique=True)
示例10: process_item
# 需要导入模块: from scrapy import conf [as 别名]
# 或者: from scrapy.conf import settings [as 别名]
def process_item(self, item, spider):
if self.__get_uniq_key() is None:
self.collection.insert(dict(item))
else:
self.collection.update(
{self.__get_uniq_key(): item[self.__get_uniq_key()]},
dict(item),
upsert=True)
log.msg("Item wrote to MongoDB database %s/%s" %
(settings['MONGODB_DB'], settings['MONGODB_COLLECTION']),
level=log.DEBUG, spider=spider)
return item
示例11: __get_uniq_key
# 需要导入模块: from scrapy import conf [as 别名]
# 或者: from scrapy.conf import settings [as 别名]
def __get_uniq_key(self):
if not settings['MONGODB_UNIQ_KEY'] or settings['MONGODB_UNIQ_KEY'] == "":
return None
return settings['MONGODB_UNIQ_KEY']
示例12: process_item
# 需要导入模块: from scrapy import conf [as 别名]
# 或者: from scrapy.conf import settings [as 别名]
def process_item(self, item, spider):
'''插入数据'''
try:
data = dict(item)
self.post.insert_one(data)
return item
except DuplicateKeyError:
# 索引相同,即为重复数据,捕获错误
spider.logger.debug('Duplicate key error collection')
return item
# 2.异步存储item - 不行!插入不了数据! (参考:https://zhuanlan.zhihu.com/p/44003499)
# from twisted.internet import defer, reactor
# class WenshuPipeline(object):
# def __init__(self, mongo_host, mongo_port, mongo_db, mongo_doc):
# self.mongo_host = mongo_host
# self.mongo_port = mongo_port
# self.mongo_db = mongo_db
# self.mongo_doc = mongo_doc
#
# @classmethod
# def from_crawler(cls, crawler):
# return cls(
# mongo_host=crawler.settings.get('MONGODB_HOST'),
# mongo_port=crawler.settings.get('MONGODB_PORT'),
# mongo_db=crawler.settings.get('MONGODB_DBNAME'),
# mongo_doc=crawler.settings.get('MONGODB_DOCNAME'),
# )
#
# def open_spider(self, spider):
# self.client = pymongo.MongoClient(host=self.mongo_host,port=self.mongo_port)
# self.mongodb = self.client[self.mongo_db]
# self.mongodb[self.mongo_doc].create_index('id', unique=True) # 创建索引,避免插入数据
#
# def close_spider(self, spider):
# self.client.close()
#
# # 下面的操作是重点
# @defer.inlineCallbacks
# def process_item(self, item, spider):
# out = defer.Deferred()
# reactor.callInThread(self._insert, item, out, spider)
# yield out
# defer.returnValue(item)
# return item
#
# def _insert(self, item, out, spider):
# time.sleep(10)
# try:
# self.mongodb[self.mongo_doc].insert_one(dict(item))
# reactor.callFromThread(out.callback, item)
# except DuplicateKeyError:
# # 索引相同,即为重复数据,捕获错误
# spider.logger.debug('duplicate key error collection')
# reactor.callFromThread(out.callback, item)