当前位置: 首页>>代码示例>>Python>>正文


Python formatters.DefaultDocumentFormatter类代码示例

本文整理汇总了Python中mongo_connector.doc_managers.formatters.DefaultDocumentFormatter的典型用法代码示例。如果您正苦于以下问题:Python DefaultDocumentFormatter类的具体用法?Python DefaultDocumentFormatter怎么用?Python DefaultDocumentFormatter使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了DefaultDocumentFormatter类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

    def __init__(self, url, auto_commit_interval=DEFAULT_COMMIT_INTERVAL,
                 unique_key='_id', chunk_size=DEFAULT_MAX_BULK,
                 meta_index_name="mongodb_meta", meta_type="mongodb_meta",
                 attachment_field="content", **kwargs):
        client_options = kwargs.get('clientOptions', {})
        if 'aws' in kwargs:
            if _HAS_AWS is False:
                raise ConfigurationError('aws extras must be installed to sign Elasticsearch requests')
            aws_args = kwargs.get('aws', {'region': 'us-east-1'})
            aws = aws_session.Session()
            if 'access_id' in aws_args and 'secret_key' in aws_args:
                aws = aws_session.Session(
                    aws_access_key_id = aws_args['access_id'],
                    aws_secret_access_key = aws_args['secret_key'])
            credentials = aws.get_credentials()
            region = aws.region_name or aws_args['region']
            aws_auth = AWSV4Sign(credentials, region, 'es')
            client_options['http_auth'] = aws_auth
            client_options['use_ssl'] = True
            client_options['verify_certs'] = True
            client_options['connection_class'] = es_connection.RequestsHttpConnection
        self.elastic = Elasticsearch(
            hosts=[url], **client_options)
        self.auto_commit_interval = auto_commit_interval
        self.meta_index_name = meta_index_name
        self.meta_type = meta_type
        self.unique_key = unique_key
        self.chunk_size = chunk_size
        if self.auto_commit_interval not in [None, 0]:
            self.run_auto_commit()
        self._formatter = DefaultDocumentFormatter()

        self.has_attachment_mapping = False
        self.attachment_field = attachment_field
开发者ID:sha1sum,项目名称:elastic-doc-manager,代码行数:34,代码来源:elastic_doc_manager.py

示例2: __init__

 def __init__(self, url, auto_commit_interval=DEFAULT_COMMIT_INTERVAL,
                unique_key='_id', chunk_size=DEFAULT_MAX_BULK, **kwargs):
   
   self.graph = Graph(url)
   self.auto_commit_interval = auto_commit_interval
   self.unique_key = unique_key
   self.chunk_size = chunk_size
   self._formatter = DefaultDocumentFormatter()
   self.kwargs = kwargs.get("clientOptions")
开发者ID:dsjennin,项目名称:neo4j_doc_manager,代码行数:9,代码来源:neo4j_doc_manager.py

示例3: __init__

 def __init__(self, url, auto_commit_interval=DEFAULT_COMMIT_INTERVAL,
              unique_key='_id', chunk_size=DEFAULT_MAX_BULK, **kwargs):
     self.elastic = Elasticsearch(hosts=[url])
     self.auto_commit_interval = auto_commit_interval
     self.doc_type = 'string'  # default type is string, change if needed
     self.unique_key = unique_key
     self.chunk_size = chunk_size
     if self.auto_commit_interval not in [None, 0]:
         self.run_auto_commit()
     self._formatter = DefaultDocumentFormatter()
开发者ID:gwaller,项目名称:mongo-connector,代码行数:10,代码来源:elastic_doc_manager.py

示例4: __init__

    def __init__(self, url, auto_commit_interval=DEFAULT_COMMIT_INTERVAL,
                 unique_key='uid', chunk_size=DEFAULT_MAX_BULK, **kwargs):

        self.graph = Graph(url)
        self.url = url
        self.auto_commit_interval = auto_commit_interval
        self.unique_key = unique_key
        self.chunk_size = chunk_size
        self._formatter = DefaultDocumentFormatter()
        self.kwargs = kwargs.get("clientOptions")
        self.authorization_token = base64.b64encode(os.getenv('NEO4J_AUTH'))
开发者ID:mayank-chutani,项目名称:mongo-connector,代码行数:11,代码来源:neo4j_doc_manager.py

示例5: __init__

 def __init__(self, url, auto_commit_interval=DEFAULT_COMMIT_INTERVAL,
              unique_key='_id', chunk_size=DEFAULT_MAX_BULK,
              meta_index_name="mongodb_meta", meta_type="mongodb_meta",
              **kwargs):
     self.elastic = Elasticsearch(hosts=[url])
     self.auto_commit_interval = auto_commit_interval
     self.meta_index_name = meta_index_name
     self.meta_type = meta_type
     self.unique_key = unique_key
     self.chunk_size = chunk_size
     if self.auto_commit_interval not in [None, 0]:
         self.run_auto_commit()
     self._formatter = DefaultDocumentFormatter()
开发者ID:izzui,项目名称:mongo-connector,代码行数:13,代码来源:elastic_doc_manager.py

示例6: __init__

    def __init__(
        self,
        url,
        auto_commit_interval=DEFAULT_COMMIT_INTERVAL,
        unique_key="_id",
        chunk_size=DEFAULT_MAX_BULK,
        meta_index_name="mongodb_meta",
        meta_type="mongodb_meta",
        attachment_field="content",
        **kwargs
    ):
        client_options = kwargs.get("clientOptions", {})
        if "aws" in kwargs:
            if not _HAS_AWS:
                raise errors.InvalidConfiguration(
                    "aws extras must be installed to sign Elasticsearch "
                    "requests. Install with: "
                    "pip install elastic2-doc-manager[aws]"
                )
            client_options["http_auth"] = create_aws_auth(kwargs["aws"])
            client_options["use_ssl"] = True
            client_options["verify_certs"] = True
            client_options["connection_class"] = es_connection.RequestsHttpConnection
        if type(url) is not list:
            url = [url]
        self.elastic = Elasticsearch(hosts=url, **client_options)

        self._formatter = DefaultDocumentFormatter()
        self.BulkBuffer = BulkBuffer(self)

        # As bulk operation can be done in another thread
        # lock is needed to prevent access to BulkBuffer
        # while commiting documents to Elasticsearch
        # It is because BulkBuffer might get outdated
        # docs from Elasticsearch if bulk is still ongoing
        self.lock = threading.Lock()

        self.auto_commit_interval = auto_commit_interval
        self.auto_send_interval = kwargs.get("autoSendInterval", DEFAULT_SEND_INTERVAL)
        self.meta_index_name = meta_index_name
        self.meta_type = meta_type
        self.unique_key = unique_key
        self.chunk_size = chunk_size
        self.has_attachment_mapping = False
        self.attachment_field = attachment_field
        self.auto_commiter = AutoCommiter(
            self, self.auto_send_interval, self.auto_commit_interval
        )
        self.auto_commiter.start()
开发者ID:mongodb-labs,项目名称:elastic2-doc-manager,代码行数:49,代码来源:elastic2_doc_manager.py

示例7: __init__

    def __init__(self, url, auto_commit_interval=DEFAULT_COMMIT_INTERVAL,
                 unique_key='_id', chunk_size=DEFAULT_MAX_BULK,
                 meta_index_name="mongodb_meta", meta_type="mongodb_meta",
                 attachment_field="content", **kwargs):
        self.elastic = self._create_elasticsearch_client(url, kwargs.get('clientOptions', {}))
        self.auto_commit_interval = auto_commit_interval
        self.meta_index_name = meta_index_name
        self.meta_type = meta_type
        self.unique_key = unique_key
        self.chunk_size = chunk_size
        if self.auto_commit_interval not in [None, 0]:
            self.run_auto_commit()
        self._formatter = DefaultDocumentFormatter()

        self.has_attachment_mapping = False
        self.attachment_field = attachment_field
开发者ID:luisobo,项目名称:elastic2-doc-manager,代码行数:16,代码来源:elastic2_doc_manager.py

示例8: __init__

    def __init__(self, url, auto_commit_interval=DEFAULT_COMMIT_INTERVAL,
                 unique_key='_id', chunk_size=DEFAULT_MAX_BULK,
                 meta_index_name="mongodb_meta", meta_type="mongodb_meta",
                 attachment_field="content", **kwargs):
        client_options = kwargs.get('clientOptions', {})
        client_options.setdefault('sniff_on_start', True)
        client_options.setdefault('sniff_on_connection_fail', True)
        client_options.setdefault('sniffer_timeout', 60)
        if 'aws' in kwargs:
            if not _HAS_AWS:
                raise errors.InvalidConfiguration(
                    'aws extras must be installed to sign Elasticsearch '
                    'requests. Install with: '
                    'pip install elastic2-doc-manager[aws]')
            client_options['http_auth'] = create_aws_auth(kwargs['aws'])
            client_options['use_ssl'] = True
            client_options['verify_certs'] = True
            client_options['connection_class'] = \
                es_connection.RequestsHttpConnection
        if type(url) is not list:
            url = [url]
        self.elastic = Elasticsearch(hosts=url, **client_options)

        self._formatter = DefaultDocumentFormatter()
        self.BulkBuffer = BulkBuffer(self)

        # As bulk operation can be done in another thread
        # lock is needed to prevent access to BulkBuffer
        # while commiting documents to Elasticsearch
        # It is because BulkBuffer might get outdated
        # docs from Elasticsearch if bulk is still ongoing
        self.lock = Lock()

        self.auto_commit_interval = auto_commit_interval
        self.meta_index_name = meta_index_name
        self.meta_type = meta_type
        self.unique_key = unique_key
        self.chunk_size = chunk_size
        if self.auto_commit_interval not in [None, 0]:
            self.run_auto_commit()

        self.has_attachment_mapping = False
        self.attachment_field = attachment_field
开发者ID:sliwinski-milosz,项目名称:elastic2-doc-manager,代码行数:43,代码来源:elastic2_doc_manager.py

示例9: DocManager

class DocManager(DocManagerBase):
    """Elasticsearch implementation of the DocManager interface.

    Receives documents from an OplogThread and takes the appropriate actions on
    Elasticsearch.
    """

    def __init__(self, url, auto_commit_interval=DEFAULT_COMMIT_INTERVAL,
                 unique_key='_id', chunk_size=DEFAULT_MAX_BULK,
                 meta_index_name="mongodb_meta", meta_type="mongodb_meta",
                 **kwargs):
        self.elastic = Elasticsearch(hosts=[url])
        self.auto_commit_interval = auto_commit_interval
        self.doc_type = 'string'  # default type is string, change if needed
        self.meta_index_name = meta_index_name
        self.meta_type = meta_type
        self.unique_key = unique_key
        self.chunk_size = chunk_size
        if self.auto_commit_interval not in [None, 0]:
            self.run_auto_commit()
        self._formatter = DefaultDocumentFormatter()

    def stop(self):
        """Stop the auto-commit thread."""
        self.auto_commit_interval = None

    def apply_update(self, doc, update_spec):
        if "$set" not in update_spec and "$unset" not in update_spec:
            # Don't try to add ns and _ts fields back in from doc
            return update_spec
        return super(DocManager, self).apply_update(doc, update_spec)

    @wrap_exceptions
    def update(self, doc, update_spec):
        """Apply updates given in update_spec to the document whose id
        matches that of doc.
        """
        self.commit()
        document = self.elastic.get(index=doc['ns'],
                                    id=str(doc['_id']))
        updated = self.apply_update(document['_source'], update_spec)
        # _id is immutable in MongoDB, so won't have changed in update
        updated['_id'] = document['_id']
        # Add metadata fields back into updated, for the purposes of
        # calling upsert(). Need to do this until these become separate
        # arguments in 2.x
        updated['ns'] = doc['ns']
        updated['_ts'] = doc['_ts']
        self.upsert(updated)
        # upsert() strips metadata, so only _id + fields in _source still here
        return updated

    @wrap_exceptions
    def upsert(self, doc):
        """Insert a document into Elasticsearch."""
        doc_type = self.doc_type
        index = doc.pop('ns')
        # No need to duplicate '_id' in source document
        doc_id = str(doc.pop("_id"))
        metadata = {
            "ns": index,
            "_ts": doc.pop("_ts")
        }
        # Index the source document
        self.elastic.index(index=index, doc_type=doc_type,
                           body=self._formatter.format_document(doc), id=doc_id,
                           refresh=(self.auto_commit_interval == 0))
        # Index document metadata
        self.elastic.index(index=self.meta_index_name, doc_type=self.meta_type,
                           body=bson.json_util.dumps(metadata), id=doc_id,
                           refresh=(self.auto_commit_interval == 0))
        # Leave _id, since it's part of the original document
        doc['_id'] = doc_id

    @wrap_exceptions
    def bulk_upsert(self, docs):
        """Insert multiple documents into Elasticsearch."""
        def docs_to_upsert():
            doc = None
            for doc in docs:
                # Remove metadata and redundant _id
                index = doc.pop("ns")
                doc_id = str(doc.pop("_id"))
                timestamp = doc.pop("_ts")
                document_action = {
                    "_index": index,
                    "_type": self.doc_type,
                    "_id": doc_id,
                    "_source": self._formatter.format_document(doc)
                }
                document_meta = {
                    "_index": self.meta_index_name,
                    "_type": self.meta_type,
                    "_id": doc_id,
                    "_source": {
                        "ns": index,
                        "_ts": timestamp
                    }
                }
                yield document_action
#.........这里部分代码省略.........
开发者ID:gwecho,项目名称:mongo-connector,代码行数:101,代码来源:elastic_doc_manager.py

示例10: DocManager

class DocManager(DocManagerBase):
    """Elasticsearch implementation of the DocManager interface.

    Receives documents from an OplogThread and takes the appropriate actions on
    Elasticsearch.
    """

    def __init__(self, url, auto_commit_interval=DEFAULT_COMMIT_INTERVAL,
                 unique_key='_id', chunk_size=DEFAULT_MAX_BULK,
                 meta_index_name="mongodb_meta", meta_type="mongodb_meta",
                 attachment_field="content", **kwargs):
        hosts = self._get_hosts(url)
        self.elastic = Elasticsearch(
            hosts=hosts, timeout=60, **kwargs.get('clientOptions', {}))
        self.auto_commit_interval = auto_commit_interval
        self.meta_index_name = meta_index_name
        self.meta_type = meta_type
        self.unique_key = unique_key
        self.chunk_size = chunk_size
        if self.auto_commit_interval not in [None, 0]:
            self.run_auto_commit()
        self._formatter = DefaultDocumentFormatter()

        self.has_attachment_mapping = False
        self.attachment_field = attachment_field

    def _get_hosts(self, url):
        if isinstance(url, list):
            return url
        elif isinstance(url, str):
            return url.split(',')
        else:
            raise errors.ConnectionFailed("Invalid URI for Elastic")

    def _index_and_mapping(self, namespace):
        """Helper method for getting the index and type from a namespace."""
        index, doc_type = namespace.split('.', 1)
        return index.lower(), doc_type

    def stop(self):
        """Stop the auto-commit thread."""
        self.auto_commit_interval = None

    def apply_update(self, doc, update_spec):
        if "$set" not in update_spec and "$unset" not in update_spec:
            # Don't try to add ns and _ts fields back in from doc
            return update_spec
        return super(DocManager, self).apply_update(doc, update_spec)

    @wrap_exceptions
    def handle_command(self, doc, namespace, timestamp):
        db = namespace.split('.', 1)[0]
        if doc.get('dropDatabase'):
            dbs = self.command_helper.map_db(db)
            for _db in dbs:
                self.elastic.indices.delete(index=_db.lower())

        if doc.get('renameCollection'):
            raise errors.OperationFailed(
                "elastic_doc_manager does not support renaming a mapping.")

        if doc.get('create'):
            db, coll = self.command_helper.map_collection(db, doc['create'])
            if db and coll:
                self.elastic.indices.put_mapping(
                    index=db.lower(), doc_type=coll,
                    body={
                        "_source": {"enabled": True}
                    })

        if doc.get('drop'):
            db, coll = self.command_helper.map_collection(db, doc['drop'])
            if db and coll:
                self.elastic.indices.delete_mapping(index=db.lower(),
                                                    doc_type=coll)

    @wrap_exceptions
    def update(self, document_id, update_spec, namespace, timestamp):
        """Apply updates given in update_spec to the document whose id
        matches that of doc.
        """
        self.commit()
        index, doc_type = self._index_and_mapping(namespace)
        document = self.elastic.get(index=index, doc_type=doc_type,
                                    id=u(document_id))
        updated = self.apply_update(document['_source'], update_spec)
        # _id is immutable in MongoDB, so won't have changed in update
        updated['_id'] = document['_id']
        self.upsert(updated, namespace, timestamp)
        # upsert() strips metadata, so only _id + fields in _source still here
        return updated

    @wrap_exceptions
    def upsert(self, doc, namespace, timestamp):
        """Insert a document into Elasticsearch."""
        index, doc_type = self._index_and_mapping(namespace)
        # No need to duplicate '_id' in source document
        doc_id = u(doc.pop("_id"))
        metadata = {
            "ns": namespace,
#.........这里部分代码省略.........
开发者ID:quintstoffers,项目名称:elastic-doc-manager,代码行数:101,代码来源:elastic_doc_manager.py

示例11: DocManager

class DocManager(DocManagerBase):
    """
  Neo4j implementation for the DocManager. Receives documents and 
  communicates with Neo4j Server.
  """

    def __init__(
        self, url, auto_commit_interval=DEFAULT_COMMIT_INTERVAL, unique_key="_id", chunk_size=DEFAULT_MAX_BULK, **kwargs
    ):

        self.graph = Graph(url)
        self.auto_commit_interval = auto_commit_interval
        self.unique_key = unique_key
        self.chunk_size = chunk_size
        self._formatter = DefaultDocumentFormatter()
        self.kwargs = kwargs.get("clientOptions")

    def apply_id_constraint(self, doc_types):
        for doc_type in doc_types:
            constraint = "CREATE CONSTRAINT ON (d:`{doc_type}`) ASSERT d._id IS UNIQUE".format(doc_type=doc_type)
            self.graph.cypher.execute(constraint)

    def stop(self):
        """Stop the auto-commit thread."""
        self.auto_commit_interval = None

    @wrap_exceptions
    def upsert(self, doc, namespace, timestamp):
        """Inserts a document into Neo4j."""
        index, doc_type = self._index_and_mapping(namespace)
        doc_id = u(doc.pop("_id"))
        metadata = {"_ts": timestamp}
        doc = self._formatter.format_document(doc)
        builder = NodesAndRelationshipsBuilder(doc, doc_type, doc_id, metadata)
        self.apply_id_constraint(builder.doc_types)
        tx = self.graph.cypher.begin()
        for statement in builder.query_nodes.keys():
            tx.append(statement, builder.query_nodes[statement])
        for relationship in builder.relationships_query.keys():
            tx.append(relationship, builder.relationships_query[relationship])
        tx.commit()

    @wrap_exceptions
    def bulk_upsert(self, docs, namespace, timestamp):
        def iterate_chunks():
            more_chunks = True

            while more_chunks:
                tx = self.graph.cypher.begin()
                metadata = {"_ts": timestamp}
                for i in range(self.chunk_size):
                    try:
                        doc = next(docs)
                        index, doc_type = self._index_and_mapping(namespace)
                        doc_id = u(doc.pop("_id"))
                        doc = self._formatter.format_document(doc)
                        builder = NodesAndRelationshipsBuilder(doc, doc_type, doc_id, metadata)
                        self.apply_id_constraint(builder.doc_types)
                        for statement in builder.query_nodes.keys():
                            tx.append(statement, builder.query_nodes[statement])
                        for relationship in builder.relationships_query.keys():
                            tx.append(relationship, builder.relationships_query[relationship])
                    except StopIteration:
                        more_chunks = False
                        if i > 0:
                            yield tx
                        break
                if more_chunks:
                    yield tx

        for tx in iterate_chunks():
            tx.commit()

    @wrap_exceptions
    def update(self, document_id, update_spec, namespace, timestamp):
        doc_id = u(document_id)
        tx = self.graph.cypher.begin()
        index, doc_type = self._index_and_mapping(namespace)
        updater = NodesAndRelationshipsUpdater()
        updater.run_update(update_spec, doc_id, doc_type)
        for statement in updater.statements_with_params:
            for key in statement.keys():
                tx.append(key, statement[key])
        tx.commit()

    @wrap_exceptions
    def remove(self, document_id, namespace, timestamp):
        """Removes a document from Neo4j."""
        doc_id = u(document_id)
        index, doc_type = self._index_and_mapping(namespace)
        params_dict = {"doc_id": doc_id}
        tx = self.graph.cypher.begin()
        statement = "MATCH (d:Document) WHERE d._id={doc_id} OPTIONAL MATCH (d)-[r]-() DELETE d, r"
        tx.append(statement, params_dict)
        tx.commit()

    @wrap_exceptions
    def search(self, start_ts, end_ts):
        statement = "MATCH (d:Document) WHERE d._ts>={start_ts} AND d._ts<={end_ts} RETURN d".format(
            start_ts=start_ts, end_ts=end_ts
#.........这里部分代码省略.........
开发者ID:alibahsisoglu,项目名称:neo4j_doc_manager,代码行数:101,代码来源:neo4j_doc_manager.py

示例12: DocManager

class DocManager(DocManagerBase):
    """Elasticsearch implementation of the DocManager interface.

    Receives documents from an OplogThread and takes the appropriate actions on
    Elasticsearch.
    """

    def __init__(
        self,
        url,
        auto_commit_interval=DEFAULT_COMMIT_INTERVAL,
        unique_key="_id",
        chunk_size=DEFAULT_MAX_BULK,
        meta_index_name="mongodb_meta",
        meta_type="mongodb_meta",
        attachment_field="content",
        **kwargs
    ):
        client_options = kwargs.get("clientOptions", {})
        if "aws" in kwargs:
            if not _HAS_AWS:
                raise errors.InvalidConfiguration(
                    "aws extras must be installed to sign Elasticsearch "
                    "requests. Install with: "
                    "pip install elastic2-doc-manager[aws]"
                )
            client_options["http_auth"] = create_aws_auth(kwargs["aws"])
            client_options["use_ssl"] = True
            client_options["verify_certs"] = True
            client_options["connection_class"] = es_connection.RequestsHttpConnection
        if type(url) is not list:
            url = [url]
        self.elastic = Elasticsearch(hosts=url, **client_options)

        self._formatter = DefaultDocumentFormatter()
        self.BulkBuffer = BulkBuffer(self)

        # As bulk operation can be done in another thread
        # lock is needed to prevent access to BulkBuffer
        # while commiting documents to Elasticsearch
        # It is because BulkBuffer might get outdated
        # docs from Elasticsearch if bulk is still ongoing
        self.lock = threading.Lock()

        self.auto_commit_interval = auto_commit_interval
        self.auto_send_interval = kwargs.get("autoSendInterval", DEFAULT_SEND_INTERVAL)
        self.meta_index_name = meta_index_name
        self.meta_type = meta_type
        self.unique_key = unique_key
        self.chunk_size = chunk_size
        self.has_attachment_mapping = False
        self.attachment_field = attachment_field
        self.auto_commiter = AutoCommiter(
            self, self.auto_send_interval, self.auto_commit_interval
        )
        self.auto_commiter.start()

    def _index_and_mapping(self, namespace):
        """Helper method for getting the index and type from a namespace."""
        index, doc_type = namespace.split(".", 1)
        return index.lower(), doc_type

    def stop(self):
        """Stop the auto-commit thread."""
        self.auto_commiter.join()
        self.auto_commit_interval = 0
        # Commit any remaining docs from buffer
        self.commit()

    def apply_update(self, doc, update_spec):
        if "$set" not in update_spec and "$unset" not in update_spec:
            # Don't try to add ns and _ts fields back in from doc
            return update_spec
        return super(DocManager, self).apply_update(doc, update_spec)

    @wrap_exceptions
    def handle_command(self, doc, namespace, timestamp):
        # Flush buffer before handle command
        self.commit()
        db = namespace.split(".", 1)[0]
        if doc.get("dropDatabase"):
            dbs = self.command_helper.map_db(db)
            for _db in dbs:
                self.elastic.indices.delete(index=_db.lower())

        if doc.get("renameCollection"):
            raise errors.OperationFailed(
                "elastic_doc_manager does not support renaming a mapping."
            )

        if doc.get("create"):
            db, coll = self.command_helper.map_collection(db, doc["create"])
            if db and coll:
                self.elastic.indices.put_mapping(
                    index=db.lower(), doc_type=coll, body={"_source": {"enabled": True}}
                )

        if doc.get("drop"):
            db, coll = self.command_helper.map_collection(db, doc["drop"])
            if db and coll:
#.........这里部分代码省略.........
开发者ID:mongodb-labs,项目名称:elastic2-doc-manager,代码行数:101,代码来源:elastic2_doc_manager.py

示例13: DocManager

class DocManager(DocManagerBase):
    """Elasticsearch implementation of the DocManager interface.

    Receives documents from an OplogThread and takes the appropriate actions on
    Elasticsearch.
    """

    def __init__(self, url, auto_commit_interval=DEFAULT_COMMIT_INTERVAL,
                 unique_key='_id', chunk_size=DEFAULT_MAX_BULK, **kwargs):
        self.elastic = Elasticsearch(hosts=[url])
        self.auto_commit_interval = auto_commit_interval
        self.doc_type = 'string'  # default type is string, change if needed
        self.unique_key = unique_key
        self.chunk_size = chunk_size
        if self.auto_commit_interval not in [None, 0]:
            self.run_auto_commit()
        self._formatter = DefaultDocumentFormatter()

    def stop(self):
        """Stop the auto-commit thread."""
        self.auto_commit_interval = None

    @wrap_exceptions
    def handle_command(self, doc, namespace_set):
        """Handle database and other command operations"""
        logging.debug ("ES:handle_command")
	
        if namespace_set:
            db, cmd_ns = doc['ns'].split(".", 1)
            coll = doc['drop']
            if coll not in [None, ""]:
                index = db+"."+coll
                if index in namespace_set:
                    logging.debug ("ES: received drop for " + index)
                    self.elastic.indices.delete(index)
	
    @wrap_exceptions
    def update(self, doc, update_spec):
        """Apply updates given in update_spec to the document whose id
        matches that of doc.
        """
        document = self.elastic.get(index=doc['ns'],
                                    id=str(doc['_id']))
        updated = self.apply_update(document['_source'], update_spec)
        # _id is immutable in MongoDB, so won't have changed in update
        updated['_id'] = document['_id']
        self.upsert(updated)
        return updated

    @wrap_exceptions
    def upsert(self, doc):
        """Insert a document into Elasticsearch."""
        doc_type = self.doc_type
        index = doc['ns']
        # No need to duplicate '_id' in source document
        doc_id = str(doc.pop("_id"))
        self.elastic.index(index=index, doc_type=doc_type,
                           body=self._formatter.format_document(doc), id=doc_id,
                           refresh=(self.auto_commit_interval == 0))
        # Don't mutate doc argument
        doc['_id'] = doc_id

    @wrap_exceptions
    def bulk_upsert(self, docs):
        """Insert multiple documents into Elasticsearch."""
        def docs_to_upsert():
            doc = None
            for doc in docs:
                index = doc["ns"]
                doc_id = str(doc.pop("_id"))
                yield {
                    "_index": index,
                    "_type": self.doc_type,
                    "_id": doc_id,
                    "_source": self._formatter.format_document(doc)
                }
            if not doc:
                raise errors.EmptyDocsError(
                    "Cannot upsert an empty sequence of "
                    "documents into Elastic Search")
        try:
            kw = {}
            if self.chunk_size > 0:
                kw['chunk_size'] = self.chunk_size

            responses = streaming_bulk(client=self.elastic,
                                       actions=docs_to_upsert(),
                                       **kw)

            for ok, resp in responses:
                if not ok:
                    logging.error(
                        "Could not bulk-upsert document "
                        "into ElasticSearch: %r" % resp)
            if self.auto_commit_interval == 0:
                self.commit()
        except errors.EmptyDocsError:
            # This can happen when mongo-connector starts up, there is no
            # config file, but nothing to dump
            pass
#.........这里部分代码省略.........
开发者ID:MicroFocus,项目名称:mongo-connector,代码行数:101,代码来源:elastic_doc_manager.py

示例14: DocManager

class DocManager(DocManagerBase):
    """Elasticsearch implementation of the DocManager interface.

    Receives documents from an OplogThread and takes the appropriate actions on
    Elasticsearch.
    """

    def __init__(self, url, auto_commit_interval=DEFAULT_COMMIT_INTERVAL,
                 unique_key='_id', chunk_size=DEFAULT_MAX_BULK,
                 meta_index_name="mongodb_meta", meta_type="mongodb_meta",
                 attachment_field="content", **kwargs):
        self.elastic = Elasticsearch(
            hosts=[url], **kwargs.get('clientOptions', {}))
        self.auto_commit_interval = auto_commit_interval
        self.meta_index_name = meta_index_name
        self.meta_type = meta_type
        self.unique_key = unique_key
        self.chunk_size = chunk_size
        self.routing = kwargs.get('routing', {})
        if self.auto_commit_interval not in [None, 0]:
            self.run_auto_commit()
        self._formatter = DefaultDocumentFormatter()

        self.has_attachment_mapping = False
        self.attachment_field = attachment_field

    def _index_and_mapping(self, namespace):
        """Helper method for getting the index and type from a namespace."""
        index, doc_type = namespace.split('.', 1)
        return index.lower(), doc_type

    def _get_parent_id(self, doc_type, doc):
        """Get parent ID from doc"""
        if doc_type in self.routing:
            if '_parent' in doc:
                return doc.pop('_parent')

            parent_field = self.routing[doc_type].get('parentField')

            if not parent_field:
                return None

            parent_id = doc.pop(parent_field) if parent_field in doc else None
            return self._formatter.transform_value(parent_id)

    def _search_doc_by_id(self, index, doc_type, doc_id):
        """Search document in Elasticsearch by _id"""
        result = self.elastic.search(index=index, doc_type=doc_type,
                                     body={
                                         'query': {
                                             'ids': {
                                                 'type': doc_type,
                                                 'values': [u(doc_id)]
                                             }
                                         }
                                     })
        if result['hits']['total'] == 1:
            return result['hits']['hits'][0]
        else:
            return None

    def stop(self):
        """Stop the auto-commit thread."""
        self.auto_commit_interval = None

    def apply_update(self, doc, update_spec):
        if "$set" not in update_spec and "$unset" not in update_spec:
            # Don't try to add ns and _ts fields back in from doc
            return update_spec
        return super(DocManager, self).apply_update(doc, update_spec)

    @wrap_exceptions
    def handle_command(self, doc, namespace, timestamp):
        db = namespace.split('.', 1)[0]
        if doc.get('dropDatabase'):
            dbs = self.command_helper.map_db(db)
            for _db in dbs:
                self.elastic.indices.delete(index=_db.lower())

        if doc.get('renameCollection'):
            raise errors.OperationFailed(
                "elastic_doc_manager does not support renaming a mapping.")

        if doc.get('create'):
            db, coll = self.command_helper.map_collection(db, doc['create'])
            if db and coll:
                self.elastic.indices.put_mapping(
                    index=db.lower(), doc_type=coll,
                    body={
                        "_source": {"enabled": True}
                    })

        if doc.get('drop'):
            db, coll = self.command_helper.map_collection(db, doc['drop'])
            if db and coll:
                # This will delete the items in coll, but not get rid of the
                # mapping.
                warnings.warn("Deleting all documents of type %s on index %s."
                              "The mapping definition will persist and must be"
                              "removed manually." % (coll, db))
#.........这里部分代码省略.........
开发者ID:mallegrini,项目名称:elastic2-doc-manager,代码行数:101,代码来源:elastic2_doc_manager.py

示例15: DocManager

class DocManager(DocManagerBase):
    """
    Neo4j implementation for the DocManager. Receives documents and
    communicates with Neo4j Server.
    """

    def __init__(self, url, auto_commit_interval=DEFAULT_COMMIT_INTERVAL,
                 unique_key='uid', chunk_size=DEFAULT_MAX_BULK, **kwargs):

        self.graph = Graph(url)
        self.url = url
        self.auto_commit_interval = auto_commit_interval
        self.unique_key = unique_key
        self.chunk_size = chunk_size
        self._formatter = DefaultDocumentFormatter()
        self.kwargs = kwargs.get("clientOptions")
        self.authorization_token = base64.b64encode(os.getenv('NEO4J_AUTH'))

    def apply_id_constraint(self, doc_types):
        for doc_type in doc_types:
            doc_type = doc_type.upper()
            constraint = "CREATE CONSTRAINT ON (d:`{doc_type}`) ASSERT d.uid IS UNIQUE".format(doc_type=doc_type)
            self.graph.cypher.execute(constraint)

    def stop(self):
        """Stop the auto-commit thread."""
        self.auto_commit_interval = None

    @wrap_exceptions
    def upsert(self, doc, namespace, timestamp):
        """Inserts a document into Neo4j."""
        index, doc_type = self._index_and_mapping(namespace)
        doc_id = u(doc.pop("uid"))
        metadata = { "_ts": timestamp }
        doc = self._formatter.format_document(doc)
        builder = NodesAndRelationshipsBuilder(doc, doc_type, doc_id, metadata)
        self.apply_id_constraint(builder.doc_types)
        tx = self.graph.cypher.begin()
        for statement in builder.query_nodes.keys():
            tx.append(statement, builder.query_nodes[statement])
        for query in builder.cypher_list:
            tx.append(query)
            # Adding cyphers from cypher list
        for relationship, params in builder.relationships_query:
            tx.append(relationship, params)
        for statement in builder.statements_with_params:
            for key in statement.keys():
                tx.append(key, statement[key])
        commit_result = None
        try:
            commit_result = tx.commit()
            print commit_result
        except Exception as e:
            LOG.error('{}'.format(e.message))
            pass

        if commit_result:
            nodeids_list = self._get_nodeids(commit_result)
            self.create_geospatial_indices(nodeids_list)

    def _get_nodeids(self, commit_result):
        node_id_list = []
        a = len(commit_result)
        for i in range(len(commit_result)):
            res = commit_result.pop(0)
            records = res.records
            if not records:
                continue
            for record in records:
                node_ids = list(record.__values__)
                node_id_list.extend(node_ids)
        return node_id_list

    def create_geospatial_indices(self, node_ids_list):
        """
        Creates geo spatial indices on the node ids
        :param node_ids_list:  list of node ids
        """
        layer_name = 'geom'
        lat = 'lat'
        lon = 'lon'
        geometry_type = 'point'
        self._set_id_to_nodeid(node_ids_list)
        # if_layer = self.if_layer_exists(layer_name)
        # if if_layer:
        self._create_layer(layer_name, lat, lon)
        self._add_geometry(layer_name, geometry_type, lat, lon)
        result = self._add_node_to_layer(node_ids_list, layer_name)
        LOG.info('Geospatial index creation response {}', repr(result))

    def _set_id_to_nodeid(self, node_ids_list):
        # TODO: We may want it to change to label name
        """
        Set id on basis of node ids
        :param node_ids_list:
        :param label_name:
        :return:
        """
        tx = self.graph.cypher.begin()
        for count, nodeid in enumerate(node_ids_list, 1):
#.........这里部分代码省略.........
开发者ID:mayank-chutani,项目名称:mongo-connector,代码行数:101,代码来源:neo4j_doc_manager.py


注:本文中的mongo_connector.doc_managers.formatters.DefaultDocumentFormatter类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。