本文整理匯總了Python中scrapy.conf.settings方法的典型用法代碼示例。如果您正苦於以下問題:Python conf.settings方法的具體用法?Python conf.settings怎麽用?Python conf.settings使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類scrapy.conf
的用法示例。
在下文中一共展示了conf.settings方法的12個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: __init__
# 需要導入模塊: from scrapy import conf [as 別名]
# 或者: from scrapy.conf import settings [as 別名]
def __init__(self):
host = settings['MONGODB_HOST']
port = settings['MONGODB_PORT']
name = settings['MONGODB_DBNAME']
client = MongoClient(host=host,port=port)
db = client[name]
self.col = db[settings['MONGODB_DOCNAME']]
示例2: __init__
# 需要導入模塊: from scrapy import conf [as 別名]
# 或者: from scrapy.conf import settings [as 別名]
def __init__(self):
host = settings['MONGODB_HOST']
port = settings['MONGODB_PORT']
db_name = settings['MONGODB_DBNAME']
client = pymongo.MongoClient(host=host, port=port)
db = client[db_name]
self.post = db[settings['MONGODB_DOCNAME']]
示例3: __init__
# 需要導入模塊: from scrapy import conf [as 別名]
# 或者: from scrapy.conf import settings [as 別名]
def __init__(self):
self.db = pymongo.MongoClient()[settings['MONGODB_DB']]
self.handler = None
示例4: process_error
# 需要導入模塊: from scrapy import conf [as 別名]
# 或者: from scrapy.conf import settings [as 別名]
def process_error(self, item):
if not self.handler:
self.handler = self.db[settings['MONGODB_ERROR']]
self.handler.insert_one(dict(item))
示例5: process_request
# 需要導入模塊: from scrapy import conf [as 別名]
# 或者: from scrapy.conf import settings [as 別名]
def process_request(self, request, spider):
proxy = random.choice(settings['PROXIES'])
request.meta['proxy'] = proxy
示例6: __init__
# 需要導入模塊: from scrapy import conf [as 別名]
# 或者: from scrapy.conf import settings [as 別名]
def __init__(self):
self.server = settings['MONGODB_SERVER']
self.port = settings['MONGODB_PORT']
self.db = settings['MONGODB_DB']
self.col = settings['MONGODB_COLLECTION']
connection = pymongo.Connection(self.server, self.port)
db = connection[self.db]
self.collection = db[self.col]
示例7: __init__
# 需要導入模塊: from scrapy import conf [as 別名]
# 或者: from scrapy.conf import settings [as 別名]
def __init__(self):
conn = pymongo.Connection(
settings['MONGO_CONF']['host'],
settings['MONGO_CONF']['port']
)
db = conn[settings['MONGO_CONF']['db']]
self.news_collection = db[settings['MONGO_CONF']['collection']]
示例8: __init__
# 需要導入模塊: from scrapy import conf [as 別名]
# 或者: from scrapy.conf import settings [as 別名]
def __init__(self):
host = settings['MONGODB_HOST']
port = settings['MONGODB_PORT']
dbname = settings['MONGODB_DBNAME']
docname = settings['MONGODB_DOCNAME']
self.client = pymongo.MongoClient(host=host,port=port)
db = self.client[dbname]
db[docname].ensure_index('casedocid', unique=True) # 設置文書ID為唯一索引,避免插入重複數據
self.post = db[docname]
示例9: __init__
# 需要導入模塊: from scrapy import conf [as 別名]
# 或者: from scrapy.conf import settings [as 別名]
def __init__(self):
import pymongo
connection = pymongo.Connection(settings['MONGODB_SERVER'], settings['MONGODB_PORT'])
self.db = connection[settings['MONGODB_DB']]
self.collection = self.db[settings['MONGODB_COLLECTION']]
if self.__get_uniq_key() is not None:
self.collection.create_index(self.__get_uniq_key(), unique=True)
示例10: process_item
# 需要導入模塊: from scrapy import conf [as 別名]
# 或者: from scrapy.conf import settings [as 別名]
def process_item(self, item, spider):
if self.__get_uniq_key() is None:
self.collection.insert(dict(item))
else:
self.collection.update(
{self.__get_uniq_key(): item[self.__get_uniq_key()]},
dict(item),
upsert=True)
log.msg("Item wrote to MongoDB database %s/%s" %
(settings['MONGODB_DB'], settings['MONGODB_COLLECTION']),
level=log.DEBUG, spider=spider)
return item
示例11: __get_uniq_key
# 需要導入模塊: from scrapy import conf [as 別名]
# 或者: from scrapy.conf import settings [as 別名]
def __get_uniq_key(self):
if not settings['MONGODB_UNIQ_KEY'] or settings['MONGODB_UNIQ_KEY'] == "":
return None
return settings['MONGODB_UNIQ_KEY']
示例12: process_item
# 需要導入模塊: from scrapy import conf [as 別名]
# 或者: from scrapy.conf import settings [as 別名]
def process_item(self, item, spider):
'''插入數據'''
try:
data = dict(item)
self.post.insert_one(data)
return item
except DuplicateKeyError:
# 索引相同,即為重複數據,捕獲錯誤
spider.logger.debug('Duplicate key error collection')
return item
# 2.異步存儲item - 不行!插入不了數據! (參考:https://zhuanlan.zhihu.com/p/44003499)
# from twisted.internet import defer, reactor
# class WenshuPipeline(object):
# def __init__(self, mongo_host, mongo_port, mongo_db, mongo_doc):
# self.mongo_host = mongo_host
# self.mongo_port = mongo_port
# self.mongo_db = mongo_db
# self.mongo_doc = mongo_doc
#
# @classmethod
# def from_crawler(cls, crawler):
# return cls(
# mongo_host=crawler.settings.get('MONGODB_HOST'),
# mongo_port=crawler.settings.get('MONGODB_PORT'),
# mongo_db=crawler.settings.get('MONGODB_DBNAME'),
# mongo_doc=crawler.settings.get('MONGODB_DOCNAME'),
# )
#
# def open_spider(self, spider):
# self.client = pymongo.MongoClient(host=self.mongo_host,port=self.mongo_port)
# self.mongodb = self.client[self.mongo_db]
# self.mongodb[self.mongo_doc].create_index('id', unique=True) # 創建索引,避免插入數據
#
# def close_spider(self, spider):
# self.client.close()
#
# # 下麵的操作是重點
# @defer.inlineCallbacks
# def process_item(self, item, spider):
# out = defer.Deferred()
# reactor.callInThread(self._insert, item, out, spider)
# yield out
# defer.returnValue(item)
# return item
#
# def _insert(self, item, out, spider):
# time.sleep(10)
# try:
# self.mongodb[self.mongo_doc].insert_one(dict(item))
# reactor.callFromThread(out.callback, item)
# except DuplicateKeyError:
# # 索引相同,即為重複數據,捕獲錯誤
# spider.logger.debug('duplicate key error collection')
# reactor.callFromThread(out.callback, item)