本文整理汇总了Python中mongo_connector.doc_managers.formatters.DefaultDocumentFormatter类的典型用法代码示例。如果您正苦于以下问题:Python DefaultDocumentFormatter类的具体用法?Python DefaultDocumentFormatter怎么用?Python DefaultDocumentFormatter使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了DefaultDocumentFormatter类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
def __init__(self, url, auto_commit_interval=DEFAULT_COMMIT_INTERVAL,
unique_key='_id', chunk_size=DEFAULT_MAX_BULK,
meta_index_name="mongodb_meta", meta_type="mongodb_meta",
attachment_field="content", **kwargs):
client_options = kwargs.get('clientOptions', {})
if 'aws' in kwargs:
if _HAS_AWS is False:
raise ConfigurationError('aws extras must be installed to sign Elasticsearch requests')
aws_args = kwargs.get('aws', {'region': 'us-east-1'})
aws = aws_session.Session()
if 'access_id' in aws_args and 'secret_key' in aws_args:
aws = aws_session.Session(
aws_access_key_id = aws_args['access_id'],
aws_secret_access_key = aws_args['secret_key'])
credentials = aws.get_credentials()
region = aws.region_name or aws_args['region']
aws_auth = AWSV4Sign(credentials, region, 'es')
client_options['http_auth'] = aws_auth
client_options['use_ssl'] = True
client_options['verify_certs'] = True
client_options['connection_class'] = es_connection.RequestsHttpConnection
self.elastic = Elasticsearch(
hosts=[url], **client_options)
self.auto_commit_interval = auto_commit_interval
self.meta_index_name = meta_index_name
self.meta_type = meta_type
self.unique_key = unique_key
self.chunk_size = chunk_size
if self.auto_commit_interval not in [None, 0]:
self.run_auto_commit()
self._formatter = DefaultDocumentFormatter()
self.has_attachment_mapping = False
self.attachment_field = attachment_field
示例2: __init__
def __init__(self, url, auto_commit_interval=DEFAULT_COMMIT_INTERVAL,
unique_key='_id', chunk_size=DEFAULT_MAX_BULK, **kwargs):
self.graph = Graph(url)
self.auto_commit_interval = auto_commit_interval
self.unique_key = unique_key
self.chunk_size = chunk_size
self._formatter = DefaultDocumentFormatter()
self.kwargs = kwargs.get("clientOptions")
示例3: __init__
def __init__(self, url, auto_commit_interval=DEFAULT_COMMIT_INTERVAL,
unique_key='_id', chunk_size=DEFAULT_MAX_BULK, **kwargs):
self.elastic = Elasticsearch(hosts=[url])
self.auto_commit_interval = auto_commit_interval
self.doc_type = 'string' # default type is string, change if needed
self.unique_key = unique_key
self.chunk_size = chunk_size
if self.auto_commit_interval not in [None, 0]:
self.run_auto_commit()
self._formatter = DefaultDocumentFormatter()
示例4: __init__
def __init__(self, url, auto_commit_interval=DEFAULT_COMMIT_INTERVAL,
unique_key='uid', chunk_size=DEFAULT_MAX_BULK, **kwargs):
self.graph = Graph(url)
self.url = url
self.auto_commit_interval = auto_commit_interval
self.unique_key = unique_key
self.chunk_size = chunk_size
self._formatter = DefaultDocumentFormatter()
self.kwargs = kwargs.get("clientOptions")
self.authorization_token = base64.b64encode(os.getenv('NEO4J_AUTH'))
示例5: __init__
def __init__(self, url, auto_commit_interval=DEFAULT_COMMIT_INTERVAL,
unique_key='_id', chunk_size=DEFAULT_MAX_BULK,
meta_index_name="mongodb_meta", meta_type="mongodb_meta",
**kwargs):
self.elastic = Elasticsearch(hosts=[url])
self.auto_commit_interval = auto_commit_interval
self.meta_index_name = meta_index_name
self.meta_type = meta_type
self.unique_key = unique_key
self.chunk_size = chunk_size
if self.auto_commit_interval not in [None, 0]:
self.run_auto_commit()
self._formatter = DefaultDocumentFormatter()
示例6: __init__
def __init__(
self,
url,
auto_commit_interval=DEFAULT_COMMIT_INTERVAL,
unique_key="_id",
chunk_size=DEFAULT_MAX_BULK,
meta_index_name="mongodb_meta",
meta_type="mongodb_meta",
attachment_field="content",
**kwargs
):
client_options = kwargs.get("clientOptions", {})
if "aws" in kwargs:
if not _HAS_AWS:
raise errors.InvalidConfiguration(
"aws extras must be installed to sign Elasticsearch "
"requests. Install with: "
"pip install elastic2-doc-manager[aws]"
)
client_options["http_auth"] = create_aws_auth(kwargs["aws"])
client_options["use_ssl"] = True
client_options["verify_certs"] = True
client_options["connection_class"] = es_connection.RequestsHttpConnection
if type(url) is not list:
url = [url]
self.elastic = Elasticsearch(hosts=url, **client_options)
self._formatter = DefaultDocumentFormatter()
self.BulkBuffer = BulkBuffer(self)
# As bulk operation can be done in another thread
# lock is needed to prevent access to BulkBuffer
# while commiting documents to Elasticsearch
# It is because BulkBuffer might get outdated
# docs from Elasticsearch if bulk is still ongoing
self.lock = threading.Lock()
self.auto_commit_interval = auto_commit_interval
self.auto_send_interval = kwargs.get("autoSendInterval", DEFAULT_SEND_INTERVAL)
self.meta_index_name = meta_index_name
self.meta_type = meta_type
self.unique_key = unique_key
self.chunk_size = chunk_size
self.has_attachment_mapping = False
self.attachment_field = attachment_field
self.auto_commiter = AutoCommiter(
self, self.auto_send_interval, self.auto_commit_interval
)
self.auto_commiter.start()
示例7: __init__
def __init__(self, url, auto_commit_interval=DEFAULT_COMMIT_INTERVAL,
unique_key='_id', chunk_size=DEFAULT_MAX_BULK,
meta_index_name="mongodb_meta", meta_type="mongodb_meta",
attachment_field="content", **kwargs):
self.elastic = self._create_elasticsearch_client(url, kwargs.get('clientOptions', {}))
self.auto_commit_interval = auto_commit_interval
self.meta_index_name = meta_index_name
self.meta_type = meta_type
self.unique_key = unique_key
self.chunk_size = chunk_size
if self.auto_commit_interval not in [None, 0]:
self.run_auto_commit()
self._formatter = DefaultDocumentFormatter()
self.has_attachment_mapping = False
self.attachment_field = attachment_field
示例8: __init__
def __init__(self, url, auto_commit_interval=DEFAULT_COMMIT_INTERVAL,
unique_key='_id', chunk_size=DEFAULT_MAX_BULK,
meta_index_name="mongodb_meta", meta_type="mongodb_meta",
attachment_field="content", **kwargs):
client_options = kwargs.get('clientOptions', {})
client_options.setdefault('sniff_on_start', True)
client_options.setdefault('sniff_on_connection_fail', True)
client_options.setdefault('sniffer_timeout', 60)
if 'aws' in kwargs:
if not _HAS_AWS:
raise errors.InvalidConfiguration(
'aws extras must be installed to sign Elasticsearch '
'requests. Install with: '
'pip install elastic2-doc-manager[aws]')
client_options['http_auth'] = create_aws_auth(kwargs['aws'])
client_options['use_ssl'] = True
client_options['verify_certs'] = True
client_options['connection_class'] = \
es_connection.RequestsHttpConnection
if type(url) is not list:
url = [url]
self.elastic = Elasticsearch(hosts=url, **client_options)
self._formatter = DefaultDocumentFormatter()
self.BulkBuffer = BulkBuffer(self)
# As bulk operation can be done in another thread
# lock is needed to prevent access to BulkBuffer
# while commiting documents to Elasticsearch
# It is because BulkBuffer might get outdated
# docs from Elasticsearch if bulk is still ongoing
self.lock = Lock()
self.auto_commit_interval = auto_commit_interval
self.meta_index_name = meta_index_name
self.meta_type = meta_type
self.unique_key = unique_key
self.chunk_size = chunk_size
if self.auto_commit_interval not in [None, 0]:
self.run_auto_commit()
self.has_attachment_mapping = False
self.attachment_field = attachment_field
示例9: DocManager
class DocManager(DocManagerBase):
"""Elasticsearch implementation of the DocManager interface.
Receives documents from an OplogThread and takes the appropriate actions on
Elasticsearch.
"""
def __init__(self, url, auto_commit_interval=DEFAULT_COMMIT_INTERVAL,
unique_key='_id', chunk_size=DEFAULT_MAX_BULK,
meta_index_name="mongodb_meta", meta_type="mongodb_meta",
**kwargs):
self.elastic = Elasticsearch(hosts=[url])
self.auto_commit_interval = auto_commit_interval
self.doc_type = 'string' # default type is string, change if needed
self.meta_index_name = meta_index_name
self.meta_type = meta_type
self.unique_key = unique_key
self.chunk_size = chunk_size
if self.auto_commit_interval not in [None, 0]:
self.run_auto_commit()
self._formatter = DefaultDocumentFormatter()
def stop(self):
"""Stop the auto-commit thread."""
self.auto_commit_interval = None
def apply_update(self, doc, update_spec):
if "$set" not in update_spec and "$unset" not in update_spec:
# Don't try to add ns and _ts fields back in from doc
return update_spec
return super(DocManager, self).apply_update(doc, update_spec)
@wrap_exceptions
def update(self, doc, update_spec):
"""Apply updates given in update_spec to the document whose id
matches that of doc.
"""
self.commit()
document = self.elastic.get(index=doc['ns'],
id=str(doc['_id']))
updated = self.apply_update(document['_source'], update_spec)
# _id is immutable in MongoDB, so won't have changed in update
updated['_id'] = document['_id']
# Add metadata fields back into updated, for the purposes of
# calling upsert(). Need to do this until these become separate
# arguments in 2.x
updated['ns'] = doc['ns']
updated['_ts'] = doc['_ts']
self.upsert(updated)
# upsert() strips metadata, so only _id + fields in _source still here
return updated
@wrap_exceptions
def upsert(self, doc):
"""Insert a document into Elasticsearch."""
doc_type = self.doc_type
index = doc.pop('ns')
# No need to duplicate '_id' in source document
doc_id = str(doc.pop("_id"))
metadata = {
"ns": index,
"_ts": doc.pop("_ts")
}
# Index the source document
self.elastic.index(index=index, doc_type=doc_type,
body=self._formatter.format_document(doc), id=doc_id,
refresh=(self.auto_commit_interval == 0))
# Index document metadata
self.elastic.index(index=self.meta_index_name, doc_type=self.meta_type,
body=bson.json_util.dumps(metadata), id=doc_id,
refresh=(self.auto_commit_interval == 0))
# Leave _id, since it's part of the original document
doc['_id'] = doc_id
@wrap_exceptions
def bulk_upsert(self, docs):
"""Insert multiple documents into Elasticsearch."""
def docs_to_upsert():
doc = None
for doc in docs:
# Remove metadata and redundant _id
index = doc.pop("ns")
doc_id = str(doc.pop("_id"))
timestamp = doc.pop("_ts")
document_action = {
"_index": index,
"_type": self.doc_type,
"_id": doc_id,
"_source": self._formatter.format_document(doc)
}
document_meta = {
"_index": self.meta_index_name,
"_type": self.meta_type,
"_id": doc_id,
"_source": {
"ns": index,
"_ts": timestamp
}
}
yield document_action
#.........这里部分代码省略.........
示例10: DocManager
class DocManager(DocManagerBase):
"""Elasticsearch implementation of the DocManager interface.
Receives documents from an OplogThread and takes the appropriate actions on
Elasticsearch.
"""
def __init__(self, url, auto_commit_interval=DEFAULT_COMMIT_INTERVAL,
unique_key='_id', chunk_size=DEFAULT_MAX_BULK,
meta_index_name="mongodb_meta", meta_type="mongodb_meta",
attachment_field="content", **kwargs):
hosts = self._get_hosts(url)
self.elastic = Elasticsearch(
hosts=hosts, timeout=60, **kwargs.get('clientOptions', {}))
self.auto_commit_interval = auto_commit_interval
self.meta_index_name = meta_index_name
self.meta_type = meta_type
self.unique_key = unique_key
self.chunk_size = chunk_size
if self.auto_commit_interval not in [None, 0]:
self.run_auto_commit()
self._formatter = DefaultDocumentFormatter()
self.has_attachment_mapping = False
self.attachment_field = attachment_field
def _get_hosts(self, url):
if isinstance(url, list):
return url
elif isinstance(url, str):
return url.split(',')
else:
raise errors.ConnectionFailed("Invalid URI for Elastic")
def _index_and_mapping(self, namespace):
"""Helper method for getting the index and type from a namespace."""
index, doc_type = namespace.split('.', 1)
return index.lower(), doc_type
def stop(self):
"""Stop the auto-commit thread."""
self.auto_commit_interval = None
def apply_update(self, doc, update_spec):
if "$set" not in update_spec and "$unset" not in update_spec:
# Don't try to add ns and _ts fields back in from doc
return update_spec
return super(DocManager, self).apply_update(doc, update_spec)
@wrap_exceptions
def handle_command(self, doc, namespace, timestamp):
db = namespace.split('.', 1)[0]
if doc.get('dropDatabase'):
dbs = self.command_helper.map_db(db)
for _db in dbs:
self.elastic.indices.delete(index=_db.lower())
if doc.get('renameCollection'):
raise errors.OperationFailed(
"elastic_doc_manager does not support renaming a mapping.")
if doc.get('create'):
db, coll = self.command_helper.map_collection(db, doc['create'])
if db and coll:
self.elastic.indices.put_mapping(
index=db.lower(), doc_type=coll,
body={
"_source": {"enabled": True}
})
if doc.get('drop'):
db, coll = self.command_helper.map_collection(db, doc['drop'])
if db and coll:
self.elastic.indices.delete_mapping(index=db.lower(),
doc_type=coll)
@wrap_exceptions
def update(self, document_id, update_spec, namespace, timestamp):
"""Apply updates given in update_spec to the document whose id
matches that of doc.
"""
self.commit()
index, doc_type = self._index_and_mapping(namespace)
document = self.elastic.get(index=index, doc_type=doc_type,
id=u(document_id))
updated = self.apply_update(document['_source'], update_spec)
# _id is immutable in MongoDB, so won't have changed in update
updated['_id'] = document['_id']
self.upsert(updated, namespace, timestamp)
# upsert() strips metadata, so only _id + fields in _source still here
return updated
@wrap_exceptions
def upsert(self, doc, namespace, timestamp):
"""Insert a document into Elasticsearch."""
index, doc_type = self._index_and_mapping(namespace)
# No need to duplicate '_id' in source document
doc_id = u(doc.pop("_id"))
metadata = {
"ns": namespace,
#.........这里部分代码省略.........
示例11: DocManager
class DocManager(DocManagerBase):
"""
Neo4j implementation for the DocManager. Receives documents and
communicates with Neo4j Server.
"""
def __init__(
self, url, auto_commit_interval=DEFAULT_COMMIT_INTERVAL, unique_key="_id", chunk_size=DEFAULT_MAX_BULK, **kwargs
):
self.graph = Graph(url)
self.auto_commit_interval = auto_commit_interval
self.unique_key = unique_key
self.chunk_size = chunk_size
self._formatter = DefaultDocumentFormatter()
self.kwargs = kwargs.get("clientOptions")
def apply_id_constraint(self, doc_types):
for doc_type in doc_types:
constraint = "CREATE CONSTRAINT ON (d:`{doc_type}`) ASSERT d._id IS UNIQUE".format(doc_type=doc_type)
self.graph.cypher.execute(constraint)
def stop(self):
"""Stop the auto-commit thread."""
self.auto_commit_interval = None
@wrap_exceptions
def upsert(self, doc, namespace, timestamp):
"""Inserts a document into Neo4j."""
index, doc_type = self._index_and_mapping(namespace)
doc_id = u(doc.pop("_id"))
metadata = {"_ts": timestamp}
doc = self._formatter.format_document(doc)
builder = NodesAndRelationshipsBuilder(doc, doc_type, doc_id, metadata)
self.apply_id_constraint(builder.doc_types)
tx = self.graph.cypher.begin()
for statement in builder.query_nodes.keys():
tx.append(statement, builder.query_nodes[statement])
for relationship in builder.relationships_query.keys():
tx.append(relationship, builder.relationships_query[relationship])
tx.commit()
@wrap_exceptions
def bulk_upsert(self, docs, namespace, timestamp):
def iterate_chunks():
more_chunks = True
while more_chunks:
tx = self.graph.cypher.begin()
metadata = {"_ts": timestamp}
for i in range(self.chunk_size):
try:
doc = next(docs)
index, doc_type = self._index_and_mapping(namespace)
doc_id = u(doc.pop("_id"))
doc = self._formatter.format_document(doc)
builder = NodesAndRelationshipsBuilder(doc, doc_type, doc_id, metadata)
self.apply_id_constraint(builder.doc_types)
for statement in builder.query_nodes.keys():
tx.append(statement, builder.query_nodes[statement])
for relationship in builder.relationships_query.keys():
tx.append(relationship, builder.relationships_query[relationship])
except StopIteration:
more_chunks = False
if i > 0:
yield tx
break
if more_chunks:
yield tx
for tx in iterate_chunks():
tx.commit()
@wrap_exceptions
def update(self, document_id, update_spec, namespace, timestamp):
doc_id = u(document_id)
tx = self.graph.cypher.begin()
index, doc_type = self._index_and_mapping(namespace)
updater = NodesAndRelationshipsUpdater()
updater.run_update(update_spec, doc_id, doc_type)
for statement in updater.statements_with_params:
for key in statement.keys():
tx.append(key, statement[key])
tx.commit()
@wrap_exceptions
def remove(self, document_id, namespace, timestamp):
"""Removes a document from Neo4j."""
doc_id = u(document_id)
index, doc_type = self._index_and_mapping(namespace)
params_dict = {"doc_id": doc_id}
tx = self.graph.cypher.begin()
statement = "MATCH (d:Document) WHERE d._id={doc_id} OPTIONAL MATCH (d)-[r]-() DELETE d, r"
tx.append(statement, params_dict)
tx.commit()
@wrap_exceptions
def search(self, start_ts, end_ts):
statement = "MATCH (d:Document) WHERE d._ts>={start_ts} AND d._ts<={end_ts} RETURN d".format(
start_ts=start_ts, end_ts=end_ts
#.........这里部分代码省略.........
示例12: DocManager
class DocManager(DocManagerBase):
"""Elasticsearch implementation of the DocManager interface.
Receives documents from an OplogThread and takes the appropriate actions on
Elasticsearch.
"""
def __init__(
self,
url,
auto_commit_interval=DEFAULT_COMMIT_INTERVAL,
unique_key="_id",
chunk_size=DEFAULT_MAX_BULK,
meta_index_name="mongodb_meta",
meta_type="mongodb_meta",
attachment_field="content",
**kwargs
):
client_options = kwargs.get("clientOptions", {})
if "aws" in kwargs:
if not _HAS_AWS:
raise errors.InvalidConfiguration(
"aws extras must be installed to sign Elasticsearch "
"requests. Install with: "
"pip install elastic2-doc-manager[aws]"
)
client_options["http_auth"] = create_aws_auth(kwargs["aws"])
client_options["use_ssl"] = True
client_options["verify_certs"] = True
client_options["connection_class"] = es_connection.RequestsHttpConnection
if type(url) is not list:
url = [url]
self.elastic = Elasticsearch(hosts=url, **client_options)
self._formatter = DefaultDocumentFormatter()
self.BulkBuffer = BulkBuffer(self)
# As bulk operation can be done in another thread
# lock is needed to prevent access to BulkBuffer
# while commiting documents to Elasticsearch
# It is because BulkBuffer might get outdated
# docs from Elasticsearch if bulk is still ongoing
self.lock = threading.Lock()
self.auto_commit_interval = auto_commit_interval
self.auto_send_interval = kwargs.get("autoSendInterval", DEFAULT_SEND_INTERVAL)
self.meta_index_name = meta_index_name
self.meta_type = meta_type
self.unique_key = unique_key
self.chunk_size = chunk_size
self.has_attachment_mapping = False
self.attachment_field = attachment_field
self.auto_commiter = AutoCommiter(
self, self.auto_send_interval, self.auto_commit_interval
)
self.auto_commiter.start()
def _index_and_mapping(self, namespace):
"""Helper method for getting the index and type from a namespace."""
index, doc_type = namespace.split(".", 1)
return index.lower(), doc_type
def stop(self):
"""Stop the auto-commit thread."""
self.auto_commiter.join()
self.auto_commit_interval = 0
# Commit any remaining docs from buffer
self.commit()
def apply_update(self, doc, update_spec):
if "$set" not in update_spec and "$unset" not in update_spec:
# Don't try to add ns and _ts fields back in from doc
return update_spec
return super(DocManager, self).apply_update(doc, update_spec)
@wrap_exceptions
def handle_command(self, doc, namespace, timestamp):
# Flush buffer before handle command
self.commit()
db = namespace.split(".", 1)[0]
if doc.get("dropDatabase"):
dbs = self.command_helper.map_db(db)
for _db in dbs:
self.elastic.indices.delete(index=_db.lower())
if doc.get("renameCollection"):
raise errors.OperationFailed(
"elastic_doc_manager does not support renaming a mapping."
)
if doc.get("create"):
db, coll = self.command_helper.map_collection(db, doc["create"])
if db and coll:
self.elastic.indices.put_mapping(
index=db.lower(), doc_type=coll, body={"_source": {"enabled": True}}
)
if doc.get("drop"):
db, coll = self.command_helper.map_collection(db, doc["drop"])
if db and coll:
#.........这里部分代码省略.........
示例13: DocManager
class DocManager(DocManagerBase):
"""Elasticsearch implementation of the DocManager interface.
Receives documents from an OplogThread and takes the appropriate actions on
Elasticsearch.
"""
def __init__(self, url, auto_commit_interval=DEFAULT_COMMIT_INTERVAL,
unique_key='_id', chunk_size=DEFAULT_MAX_BULK, **kwargs):
self.elastic = Elasticsearch(hosts=[url])
self.auto_commit_interval = auto_commit_interval
self.doc_type = 'string' # default type is string, change if needed
self.unique_key = unique_key
self.chunk_size = chunk_size
if self.auto_commit_interval not in [None, 0]:
self.run_auto_commit()
self._formatter = DefaultDocumentFormatter()
def stop(self):
"""Stop the auto-commit thread."""
self.auto_commit_interval = None
@wrap_exceptions
def handle_command(self, doc, namespace_set):
"""Handle database and other command operations"""
logging.debug ("ES:handle_command")
if namespace_set:
db, cmd_ns = doc['ns'].split(".", 1)
coll = doc['drop']
if coll not in [None, ""]:
index = db+"."+coll
if index in namespace_set:
logging.debug ("ES: received drop for " + index)
self.elastic.indices.delete(index)
@wrap_exceptions
def update(self, doc, update_spec):
"""Apply updates given in update_spec to the document whose id
matches that of doc.
"""
document = self.elastic.get(index=doc['ns'],
id=str(doc['_id']))
updated = self.apply_update(document['_source'], update_spec)
# _id is immutable in MongoDB, so won't have changed in update
updated['_id'] = document['_id']
self.upsert(updated)
return updated
@wrap_exceptions
def upsert(self, doc):
"""Insert a document into Elasticsearch."""
doc_type = self.doc_type
index = doc['ns']
# No need to duplicate '_id' in source document
doc_id = str(doc.pop("_id"))
self.elastic.index(index=index, doc_type=doc_type,
body=self._formatter.format_document(doc), id=doc_id,
refresh=(self.auto_commit_interval == 0))
# Don't mutate doc argument
doc['_id'] = doc_id
@wrap_exceptions
def bulk_upsert(self, docs):
"""Insert multiple documents into Elasticsearch."""
def docs_to_upsert():
doc = None
for doc in docs:
index = doc["ns"]
doc_id = str(doc.pop("_id"))
yield {
"_index": index,
"_type": self.doc_type,
"_id": doc_id,
"_source": self._formatter.format_document(doc)
}
if not doc:
raise errors.EmptyDocsError(
"Cannot upsert an empty sequence of "
"documents into Elastic Search")
try:
kw = {}
if self.chunk_size > 0:
kw['chunk_size'] = self.chunk_size
responses = streaming_bulk(client=self.elastic,
actions=docs_to_upsert(),
**kw)
for ok, resp in responses:
if not ok:
logging.error(
"Could not bulk-upsert document "
"into ElasticSearch: %r" % resp)
if self.auto_commit_interval == 0:
self.commit()
except errors.EmptyDocsError:
# This can happen when mongo-connector starts up, there is no
# config file, but nothing to dump
pass
#.........这里部分代码省略.........
示例14: DocManager
class DocManager(DocManagerBase):
"""Elasticsearch implementation of the DocManager interface.
Receives documents from an OplogThread and takes the appropriate actions on
Elasticsearch.
"""
def __init__(self, url, auto_commit_interval=DEFAULT_COMMIT_INTERVAL,
unique_key='_id', chunk_size=DEFAULT_MAX_BULK,
meta_index_name="mongodb_meta", meta_type="mongodb_meta",
attachment_field="content", **kwargs):
self.elastic = Elasticsearch(
hosts=[url], **kwargs.get('clientOptions', {}))
self.auto_commit_interval = auto_commit_interval
self.meta_index_name = meta_index_name
self.meta_type = meta_type
self.unique_key = unique_key
self.chunk_size = chunk_size
self.routing = kwargs.get('routing', {})
if self.auto_commit_interval not in [None, 0]:
self.run_auto_commit()
self._formatter = DefaultDocumentFormatter()
self.has_attachment_mapping = False
self.attachment_field = attachment_field
def _index_and_mapping(self, namespace):
"""Helper method for getting the index and type from a namespace."""
index, doc_type = namespace.split('.', 1)
return index.lower(), doc_type
def _get_parent_id(self, doc_type, doc):
"""Get parent ID from doc"""
if doc_type in self.routing:
if '_parent' in doc:
return doc.pop('_parent')
parent_field = self.routing[doc_type].get('parentField')
if not parent_field:
return None
parent_id = doc.pop(parent_field) if parent_field in doc else None
return self._formatter.transform_value(parent_id)
def _search_doc_by_id(self, index, doc_type, doc_id):
"""Search document in Elasticsearch by _id"""
result = self.elastic.search(index=index, doc_type=doc_type,
body={
'query': {
'ids': {
'type': doc_type,
'values': [u(doc_id)]
}
}
})
if result['hits']['total'] == 1:
return result['hits']['hits'][0]
else:
return None
def stop(self):
"""Stop the auto-commit thread."""
self.auto_commit_interval = None
def apply_update(self, doc, update_spec):
if "$set" not in update_spec and "$unset" not in update_spec:
# Don't try to add ns and _ts fields back in from doc
return update_spec
return super(DocManager, self).apply_update(doc, update_spec)
@wrap_exceptions
def handle_command(self, doc, namespace, timestamp):
db = namespace.split('.', 1)[0]
if doc.get('dropDatabase'):
dbs = self.command_helper.map_db(db)
for _db in dbs:
self.elastic.indices.delete(index=_db.lower())
if doc.get('renameCollection'):
raise errors.OperationFailed(
"elastic_doc_manager does not support renaming a mapping.")
if doc.get('create'):
db, coll = self.command_helper.map_collection(db, doc['create'])
if db and coll:
self.elastic.indices.put_mapping(
index=db.lower(), doc_type=coll,
body={
"_source": {"enabled": True}
})
if doc.get('drop'):
db, coll = self.command_helper.map_collection(db, doc['drop'])
if db and coll:
# This will delete the items in coll, but not get rid of the
# mapping.
warnings.warn("Deleting all documents of type %s on index %s."
"The mapping definition will persist and must be"
"removed manually." % (coll, db))
#.........这里部分代码省略.........
示例15: DocManager
class DocManager(DocManagerBase):
"""
Neo4j implementation for the DocManager. Receives documents and
communicates with Neo4j Server.
"""
def __init__(self, url, auto_commit_interval=DEFAULT_COMMIT_INTERVAL,
unique_key='uid', chunk_size=DEFAULT_MAX_BULK, **kwargs):
self.graph = Graph(url)
self.url = url
self.auto_commit_interval = auto_commit_interval
self.unique_key = unique_key
self.chunk_size = chunk_size
self._formatter = DefaultDocumentFormatter()
self.kwargs = kwargs.get("clientOptions")
self.authorization_token = base64.b64encode(os.getenv('NEO4J_AUTH'))
def apply_id_constraint(self, doc_types):
for doc_type in doc_types:
doc_type = doc_type.upper()
constraint = "CREATE CONSTRAINT ON (d:`{doc_type}`) ASSERT d.uid IS UNIQUE".format(doc_type=doc_type)
self.graph.cypher.execute(constraint)
def stop(self):
"""Stop the auto-commit thread."""
self.auto_commit_interval = None
@wrap_exceptions
def upsert(self, doc, namespace, timestamp):
"""Inserts a document into Neo4j."""
index, doc_type = self._index_and_mapping(namespace)
doc_id = u(doc.pop("uid"))
metadata = { "_ts": timestamp }
doc = self._formatter.format_document(doc)
builder = NodesAndRelationshipsBuilder(doc, doc_type, doc_id, metadata)
self.apply_id_constraint(builder.doc_types)
tx = self.graph.cypher.begin()
for statement in builder.query_nodes.keys():
tx.append(statement, builder.query_nodes[statement])
for query in builder.cypher_list:
tx.append(query)
# Adding cyphers from cypher list
for relationship, params in builder.relationships_query:
tx.append(relationship, params)
for statement in builder.statements_with_params:
for key in statement.keys():
tx.append(key, statement[key])
commit_result = None
try:
commit_result = tx.commit()
print commit_result
except Exception as e:
LOG.error('{}'.format(e.message))
pass
if commit_result:
nodeids_list = self._get_nodeids(commit_result)
self.create_geospatial_indices(nodeids_list)
def _get_nodeids(self, commit_result):
node_id_list = []
a = len(commit_result)
for i in range(len(commit_result)):
res = commit_result.pop(0)
records = res.records
if not records:
continue
for record in records:
node_ids = list(record.__values__)
node_id_list.extend(node_ids)
return node_id_list
def create_geospatial_indices(self, node_ids_list):
"""
Creates geo spatial indices on the node ids
:param node_ids_list: list of node ids
"""
layer_name = 'geom'
lat = 'lat'
lon = 'lon'
geometry_type = 'point'
self._set_id_to_nodeid(node_ids_list)
# if_layer = self.if_layer_exists(layer_name)
# if if_layer:
self._create_layer(layer_name, lat, lon)
self._add_geometry(layer_name, geometry_type, lat, lon)
result = self._add_node_to_layer(node_ids_list, layer_name)
LOG.info('Geospatial index creation response {}', repr(result))
def _set_id_to_nodeid(self, node_ids_list):
# TODO: We may want it to change to label name
"""
Set id on basis of node ids
:param node_ids_list:
:param label_name:
:return:
"""
tx = self.graph.cypher.begin()
for count, nodeid in enumerate(node_ids_list, 1):
#.........这里部分代码省略.........