当前位置: 首页>>代码示例>>Python>>正文


Python DefaultDocumentFormatter.format_document方法代码示例

本文整理汇总了Python中mongo_connector.doc_managers.formatters.DefaultDocumentFormatter.format_document方法的典型用法代码示例。如果您正苦于以下问题:Python DefaultDocumentFormatter.format_document方法的具体用法?Python DefaultDocumentFormatter.format_document怎么用?Python DefaultDocumentFormatter.format_document使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在mongo_connector.doc_managers.formatters.DefaultDocumentFormatter的用法示例。


在下文中一共展示了DefaultDocumentFormatter.format_document方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: DocManager

# 需要导入模块: from mongo_connector.doc_managers.formatters import DefaultDocumentFormatter [as 别名]
# 或者: from mongo_connector.doc_managers.formatters.DefaultDocumentFormatter import format_document [as 别名]
class DocManager(DocManagerBase):
    """Elasticsearch implementation of the DocManager interface.

    Receives documents from an OplogThread and takes the appropriate actions on
    Elasticsearch.
    """

    def __init__(self, url, auto_commit_interval=DEFAULT_COMMIT_INTERVAL,
                 unique_key='_id', chunk_size=DEFAULT_MAX_BULK,
                 meta_index_name="mongodb_meta", meta_type="mongodb_meta",
                 **kwargs):
        self.elastic = Elasticsearch(hosts=[url])
        self.auto_commit_interval = auto_commit_interval
        self.doc_type = 'string'  # default type is string, change if needed
        self.meta_index_name = meta_index_name
        self.meta_type = meta_type
        self.unique_key = unique_key
        self.chunk_size = chunk_size
        if self.auto_commit_interval not in [None, 0]:
            self.run_auto_commit()
        self._formatter = DefaultDocumentFormatter()

    def stop(self):
        """Stop the auto-commit thread."""
        self.auto_commit_interval = None

    def apply_update(self, doc, update_spec):
        if "$set" not in update_spec and "$unset" not in update_spec:
            # Don't try to add ns and _ts fields back in from doc
            return update_spec
        return super(DocManager, self).apply_update(doc, update_spec)

    @wrap_exceptions
    def update(self, doc, update_spec):
        """Apply updates given in update_spec to the document whose id
        matches that of doc.
        """
        self.commit()
        document = self.elastic.get(index=doc['ns'],
                                    id=str(doc['_id']))
        updated = self.apply_update(document['_source'], update_spec)
        # _id is immutable in MongoDB, so won't have changed in update
        updated['_id'] = document['_id']
        # Add metadata fields back into updated, for the purposes of
        # calling upsert(). Need to do this until these become separate
        # arguments in 2.x
        updated['ns'] = doc['ns']
        updated['_ts'] = doc['_ts']
        self.upsert(updated)
        # upsert() strips metadata, so only _id + fields in _source still here
        return updated

    @wrap_exceptions
    def upsert(self, doc):
        """Insert a document into Elasticsearch."""
        doc_type = self.doc_type
        index = doc.pop('ns')
        # No need to duplicate '_id' in source document
        doc_id = str(doc.pop("_id"))
        metadata = {
            "ns": index,
            "_ts": doc.pop("_ts")
        }
        # Index the source document
        self.elastic.index(index=index, doc_type=doc_type,
                           body=self._formatter.format_document(doc), id=doc_id,
                           refresh=(self.auto_commit_interval == 0))
        # Index document metadata
        self.elastic.index(index=self.meta_index_name, doc_type=self.meta_type,
                           body=bson.json_util.dumps(metadata), id=doc_id,
                           refresh=(self.auto_commit_interval == 0))
        # Leave _id, since it's part of the original document
        doc['_id'] = doc_id

    @wrap_exceptions
    def bulk_upsert(self, docs):
        """Insert multiple documents into Elasticsearch."""
        def docs_to_upsert():
            doc = None
            for doc in docs:
                # Remove metadata and redundant _id
                index = doc.pop("ns")
                doc_id = str(doc.pop("_id"))
                timestamp = doc.pop("_ts")
                document_action = {
                    "_index": index,
                    "_type": self.doc_type,
                    "_id": doc_id,
                    "_source": self._formatter.format_document(doc)
                }
                document_meta = {
                    "_index": self.meta_index_name,
                    "_type": self.meta_type,
                    "_id": doc_id,
                    "_source": {
                        "ns": index,
                        "_ts": timestamp
                    }
                }
                yield document_action
#.........这里部分代码省略.........
开发者ID:gwecho,项目名称:mongo-connector,代码行数:103,代码来源:elastic_doc_manager.py

示例2: DocManager

# 需要导入模块: from mongo_connector.doc_managers.formatters import DefaultDocumentFormatter [as 别名]
# 或者: from mongo_connector.doc_managers.formatters.DefaultDocumentFormatter import format_document [as 别名]
class DocManager(DocManagerBase):
    """
  Neo4j implementation for the DocManager. Receives documents and 
  communicates with Neo4j Server.
  """

    def __init__(
        self, url, auto_commit_interval=DEFAULT_COMMIT_INTERVAL, unique_key="_id", chunk_size=DEFAULT_MAX_BULK, **kwargs
    ):

        self.graph = Graph(url)
        self.auto_commit_interval = auto_commit_interval
        self.unique_key = unique_key
        self.chunk_size = chunk_size
        self._formatter = DefaultDocumentFormatter()
        self.kwargs = kwargs.get("clientOptions")

    def apply_id_constraint(self, doc_types):
        for doc_type in doc_types:
            constraint = "CREATE CONSTRAINT ON (d:`{doc_type}`) ASSERT d._id IS UNIQUE".format(doc_type=doc_type)
            self.graph.cypher.execute(constraint)

    def stop(self):
        """Stop the auto-commit thread."""
        self.auto_commit_interval = None

    @wrap_exceptions
    def upsert(self, doc, namespace, timestamp):
        """Inserts a document into Neo4j."""
        index, doc_type = self._index_and_mapping(namespace)
        doc_id = u(doc.pop("_id"))
        metadata = {"_ts": timestamp}
        doc = self._formatter.format_document(doc)
        builder = NodesAndRelationshipsBuilder(doc, doc_type, doc_id, metadata)
        self.apply_id_constraint(builder.doc_types)
        tx = self.graph.cypher.begin()
        for statement in builder.query_nodes.keys():
            tx.append(statement, builder.query_nodes[statement])
        for relationship in builder.relationships_query.keys():
            tx.append(relationship, builder.relationships_query[relationship])
        tx.commit()

    @wrap_exceptions
    def bulk_upsert(self, docs, namespace, timestamp):
        def iterate_chunks():
            more_chunks = True

            while more_chunks:
                tx = self.graph.cypher.begin()
                metadata = {"_ts": timestamp}
                for i in range(self.chunk_size):
                    try:
                        doc = next(docs)
                        index, doc_type = self._index_and_mapping(namespace)
                        doc_id = u(doc.pop("_id"))
                        doc = self._formatter.format_document(doc)
                        builder = NodesAndRelationshipsBuilder(doc, doc_type, doc_id, metadata)
                        self.apply_id_constraint(builder.doc_types)
                        for statement in builder.query_nodes.keys():
                            tx.append(statement, builder.query_nodes[statement])
                        for relationship in builder.relationships_query.keys():
                            tx.append(relationship, builder.relationships_query[relationship])
                    except StopIteration:
                        more_chunks = False
                        if i > 0:
                            yield tx
                        break
                if more_chunks:
                    yield tx

        for tx in iterate_chunks():
            tx.commit()

    @wrap_exceptions
    def update(self, document_id, update_spec, namespace, timestamp):
        doc_id = u(document_id)
        tx = self.graph.cypher.begin()
        index, doc_type = self._index_and_mapping(namespace)
        updater = NodesAndRelationshipsUpdater()
        updater.run_update(update_spec, doc_id, doc_type)
        for statement in updater.statements_with_params:
            for key in statement.keys():
                tx.append(key, statement[key])
        tx.commit()

    @wrap_exceptions
    def remove(self, document_id, namespace, timestamp):
        """Removes a document from Neo4j."""
        doc_id = u(document_id)
        index, doc_type = self._index_and_mapping(namespace)
        params_dict = {"doc_id": doc_id}
        tx = self.graph.cypher.begin()
        statement = "MATCH (d:Document) WHERE d._id={doc_id} OPTIONAL MATCH (d)-[r]-() DELETE d, r"
        tx.append(statement, params_dict)
        tx.commit()

    @wrap_exceptions
    def search(self, start_ts, end_ts):
        statement = "MATCH (d:Document) WHERE d._ts>={start_ts} AND d._ts<={end_ts} RETURN d".format(
            start_ts=start_ts, end_ts=end_ts
#.........这里部分代码省略.........
开发者ID:alibahsisoglu,项目名称:neo4j_doc_manager,代码行数:103,代码来源:neo4j_doc_manager.py

示例3: DocManager

# 需要导入模块: from mongo_connector.doc_managers.formatters import DefaultDocumentFormatter [as 别名]
# 或者: from mongo_connector.doc_managers.formatters.DefaultDocumentFormatter import format_document [as 别名]

#.........这里部分代码省略.........
            db, coll = self.command_helper.map_collection(db, doc['drop'])
            if db and coll:
                self.elastic.indices.delete_mapping(index=db.lower(),
                                                    doc_type=coll)

    @wrap_exceptions
    def update(self, document_id, update_spec, namespace, timestamp):
        """Apply updates given in update_spec to the document whose id
        matches that of doc.
        """
        self.commit()
        index, doc_type = self._index_and_mapping(namespace)
        document = self.elastic.get(index=index, doc_type=doc_type,
                                    id=u(document_id))
        updated = self.apply_update(document['_source'], update_spec)
        # _id is immutable in MongoDB, so won't have changed in update
        updated['_id'] = document['_id']
        self.upsert(updated, namespace, timestamp)
        # upsert() strips metadata, so only _id + fields in _source still here
        return updated

    @wrap_exceptions
    def upsert(self, doc, namespace, timestamp):
        """Insert a document into Elasticsearch."""
        index, doc_type = self._index_and_mapping(namespace)
        # No need to duplicate '_id' in source document
        doc_id = u(doc.pop("_id"))
        metadata = {
            "ns": namespace,
            "_ts": timestamp
        }
        # Index the source document, using lowercase namespace as index name.
        self.elastic.index(index=index, doc_type=doc_type,
                           body=self._formatter.format_document(doc), id=doc_id,
                           refresh=(self.auto_commit_interval == 0))
        # Index document metadata with original namespace (mixed upper/lower).
        self.elastic.index(index=self.meta_index_name, doc_type=self.meta_type,
                           body=bson.json_util.dumps(metadata), id=doc_id,
                           refresh=(self.auto_commit_interval == 0))
        # Leave _id, since it's part of the original document
        doc['_id'] = doc_id

    @wrap_exceptions
    def bulk_upsert(self, docs, namespace, timestamp):
        """Insert multiple documents into Elasticsearch."""
        def docs_to_upsert():
            doc = None
            for doc in docs:
                # Remove metadata and redundant _id
                index, doc_type = self._index_and_mapping(namespace)
                doc_id = u(doc.pop("_id"))
                document_action = {
                    "_index": index,
                    "_type": doc_type,
                    "_id": doc_id,
                    "_source": self._formatter.format_document(doc)
                }
                document_meta = {
                    "_index": self.meta_index_name,
                    "_type": self.meta_type,
                    "_id": doc_id,
                    "_source": {
                        "ns": namespace,
                        "_ts": timestamp
                    }
                }
开发者ID:quintstoffers,项目名称:elastic-doc-manager,代码行数:70,代码来源:elastic_doc_manager.py

示例4: DocManager

# 需要导入模块: from mongo_connector.doc_managers.formatters import DefaultDocumentFormatter [as 别名]
# 或者: from mongo_connector.doc_managers.formatters.DefaultDocumentFormatter import format_document [as 别名]

#.........这里部分代码省略.........
            document = self.BulkBuffer.get_from_sources(
                index, doc_type, str(document_id)
            )
        if document:
            # Document source collected from local buffer
            # Perform apply_update on it and then it will be
            # ready for commiting to Elasticsearch
            updated = self.apply_update(document, update_spec)
            # _id is immutable in MongoDB, so won't have changed in update
            updated["_id"] = document_id
            self.upsert(updated, namespace, timestamp)
        else:
            # Document source needs to be retrieved from Elasticsearch
            # before performing update. Pass update_spec to upsert function
            updated = {"_id": document_id}
            self.upsert(updated, namespace, timestamp, update_spec)
        # upsert() strips metadata, so only _id + fields in _source still here
        return updated

    @wrap_exceptions
    def upsert(self, doc, namespace, timestamp, update_spec=None):
        """Insert a document into Elasticsearch."""
        index, doc_type = self._index_and_mapping(namespace)
        # No need to duplicate '_id' in source document
        doc_id = str(doc.pop("_id"))
        metadata = {"ns": namespace, "_ts": timestamp}

        # Index the source document, using lowercase namespace as index name.
        action = {
            "_op_type": "index",
            "_index": index,
            "_type": doc_type,
            "_id": doc_id,
            "_source": self._formatter.format_document(doc),
        }
        # Index document metadata with original namespace (mixed upper/lower).
        meta_action = {
            "_op_type": "index",
            "_index": self.meta_index_name,
            "_type": self.meta_type,
            "_id": doc_id,
            "_source": bson.json_util.dumps(metadata),
        }

        self.index(action, meta_action, doc, update_spec)

        # Leave _id, since it's part of the original document
        doc["_id"] = doc_id

    @wrap_exceptions
    def bulk_upsert(self, docs, namespace, timestamp):
        """Insert multiple documents into Elasticsearch."""

        def docs_to_upsert():
            doc = None
            for doc in docs:
                # Remove metadata and redundant _id
                index, doc_type = self._index_and_mapping(namespace)
                doc_id = str(doc.pop("_id"))
                document_action = {
                    "_index": index,
                    "_type": doc_type,
                    "_id": doc_id,
                    "_source": self._formatter.format_document(doc),
                }
                document_meta = {
开发者ID:mongodb-labs,项目名称:elastic2-doc-manager,代码行数:70,代码来源:elastic2_doc_manager.py

示例5: DocManager

# 需要导入模块: from mongo_connector.doc_managers.formatters import DefaultDocumentFormatter [as 别名]
# 或者: from mongo_connector.doc_managers.formatters.DefaultDocumentFormatter import format_document [as 别名]
class DocManager(DocManagerBase):
    """Elasticsearch implementation of the DocManager interface.

    Receives documents from an OplogThread and takes the appropriate actions on
    Elasticsearch.
    """

    def __init__(self, url, auto_commit_interval=DEFAULT_COMMIT_INTERVAL,
                 unique_key='_id', chunk_size=DEFAULT_MAX_BULK, **kwargs):
        self.elastic = Elasticsearch(hosts=[url])
        self.auto_commit_interval = auto_commit_interval
        self.doc_type = 'string'  # default type is string, change if needed
        self.unique_key = unique_key
        self.chunk_size = chunk_size
        if self.auto_commit_interval not in [None, 0]:
            self.run_auto_commit()
        self._formatter = DefaultDocumentFormatter()

    def stop(self):
        """Stop the auto-commit thread."""
        self.auto_commit_interval = None

    @wrap_exceptions
    def handle_command(self, doc, namespace_set):
        """Handle database and other command operations"""
        logging.debug ("ES:handle_command")
	
        if namespace_set:
            db, cmd_ns = doc['ns'].split(".", 1)
            coll = doc['drop']
            if coll not in [None, ""]:
                index = db+"."+coll
                if index in namespace_set:
                    logging.debug ("ES: received drop for " + index)
                    self.elastic.indices.delete(index)
	
    @wrap_exceptions
    def update(self, doc, update_spec):
        """Apply updates given in update_spec to the document whose id
        matches that of doc.
        """
        document = self.elastic.get(index=doc['ns'],
                                    id=str(doc['_id']))
        updated = self.apply_update(document['_source'], update_spec)
        # _id is immutable in MongoDB, so won't have changed in update
        updated['_id'] = document['_id']
        self.upsert(updated)
        return updated

    @wrap_exceptions
    def upsert(self, doc):
        """Insert a document into Elasticsearch."""
        doc_type = self.doc_type
        index = doc['ns']
        # No need to duplicate '_id' in source document
        doc_id = str(doc.pop("_id"))
        self.elastic.index(index=index, doc_type=doc_type,
                           body=self._formatter.format_document(doc), id=doc_id,
                           refresh=(self.auto_commit_interval == 0))
        # Don't mutate doc argument
        doc['_id'] = doc_id

    @wrap_exceptions
    def bulk_upsert(self, docs):
        """Insert multiple documents into Elasticsearch."""
        def docs_to_upsert():
            doc = None
            for doc in docs:
                index = doc["ns"]
                doc_id = str(doc.pop("_id"))
                yield {
                    "_index": index,
                    "_type": self.doc_type,
                    "_id": doc_id,
                    "_source": self._formatter.format_document(doc)
                }
            if not doc:
                raise errors.EmptyDocsError(
                    "Cannot upsert an empty sequence of "
                    "documents into Elastic Search")
        try:
            kw = {}
            if self.chunk_size > 0:
                kw['chunk_size'] = self.chunk_size

            responses = streaming_bulk(client=self.elastic,
                                       actions=docs_to_upsert(),
                                       **kw)

            for ok, resp in responses:
                if not ok:
                    logging.error(
                        "Could not bulk-upsert document "
                        "into ElasticSearch: %r" % resp)
            if self.auto_commit_interval == 0:
                self.commit()
        except errors.EmptyDocsError:
            # This can happen when mongo-connector starts up, there is no
            # config file, but nothing to dump
            pass
#.........这里部分代码省略.........
开发者ID:MicroFocus,项目名称:mongo-connector,代码行数:103,代码来源:elastic_doc_manager.py

示例6: DocManager

# 需要导入模块: from mongo_connector.doc_managers.formatters import DefaultDocumentFormatter [as 别名]
# 或者: from mongo_connector.doc_managers.formatters.DefaultDocumentFormatter import format_document [as 别名]

#.........这里部分代码省略.........
            # ID available in our update request though.
            document = self._search_doc_by_id(index, doc_type, document_id)
            if document is None:
                LOG.error('Could not find document with ID "%s" in Elasticsearch to apply update', u(document_id))
                return None
        else:
            document = self.elastic.get(index=index, doc_type=doc_type,
                                        id=u(document_id))

        updated = self.apply_update(document['_source'], update_spec)
        # _id is immutable in MongoDB, so won't have changed in update
        updated['_id'] = document['_id']
        if '_parent' in document:
            updated['_parent'] = document['_parent']
        self.upsert(updated, namespace, timestamp)
        # upsert() strips metadata, so only _id + fields in _source still here
        return updated

    @wrap_exceptions
    def upsert(self, doc, namespace, timestamp):
        """Insert a document into Elasticsearch."""
        index, doc_type = self._index_and_mapping(namespace)
        # No need to duplicate '_id' in source document
        doc_id = u(doc.pop("_id"))
        metadata = {
            "ns": namespace,
            "_ts": timestamp
        }

        parent_id = self._get_parent_id(doc_type, doc)
        # Index the source document, using lowercase namespace as index name.
        if parent_id is None:
            self.elastic.index(index=index, doc_type=doc_type,
                               body=self._formatter.format_document(doc), id=doc_id,
                               refresh=(self.auto_commit_interval == 0))
        else:
            self.elastic.index(index=index, doc_type=doc_type,
                               body=self._formatter.format_document(doc), id=doc_id,
                               parent=parent_id, refresh=(self.auto_commit_interval == 0))

        # Index document metadata with original namespace (mixed upper/lower).
        self.elastic.index(index=self.meta_index_name, doc_type=self.meta_type,
                           body=bson.json_util.dumps(metadata), id=doc_id,
                           refresh=(self.auto_commit_interval == 0))
        # Leave _id, since it's part of the original document
        doc['_id'] = doc_id

    @wrap_exceptions
    def bulk_upsert(self, docs, namespace, timestamp):
        """Insert multiple documents into Elasticsearch."""
        def docs_to_upsert():
            doc = None
            for doc in docs:
                # Remove metadata and redundant _id
                index, doc_type = self._index_and_mapping(namespace)
                doc_id = u(doc.pop("_id"))
                document_action = {
                    "_index": index,
                    "_type": doc_type,
                    "_id": doc_id,
                    "_source": self._formatter.format_document(doc)
                }
                document_meta = {
                    "_index": self.meta_index_name,
                    "_type": self.meta_type,
                    "_id": doc_id,
开发者ID:mallegrini,项目名称:elastic2-doc-manager,代码行数:70,代码来源:elastic2_doc_manager.py

示例7: DocManager

# 需要导入模块: from mongo_connector.doc_managers.formatters import DefaultDocumentFormatter [as 别名]
# 或者: from mongo_connector.doc_managers.formatters.DefaultDocumentFormatter import format_document [as 别名]
class DocManager(DocManagerBase):
    """
    Neo4j implementation for the DocManager. Receives documents and
    communicates with Neo4j Server.
    """

    def __init__(self, url, auto_commit_interval=DEFAULT_COMMIT_INTERVAL,
                 unique_key='uid', chunk_size=DEFAULT_MAX_BULK, **kwargs):

        self.graph = Graph(url)
        self.url = url
        self.auto_commit_interval = auto_commit_interval
        self.unique_key = unique_key
        self.chunk_size = chunk_size
        self._formatter = DefaultDocumentFormatter()
        self.kwargs = kwargs.get("clientOptions")
        self.authorization_token = base64.b64encode(os.getenv('NEO4J_AUTH'))

    def apply_id_constraint(self, doc_types):
        for doc_type in doc_types:
            doc_type = doc_type.upper()
            constraint = "CREATE CONSTRAINT ON (d:`{doc_type}`) ASSERT d.uid IS UNIQUE".format(doc_type=doc_type)
            self.graph.cypher.execute(constraint)

    def stop(self):
        """Stop the auto-commit thread."""
        self.auto_commit_interval = None

    @wrap_exceptions
    def upsert(self, doc, namespace, timestamp):
        """Inserts a document into Neo4j."""
        index, doc_type = self._index_and_mapping(namespace)
        doc_id = u(doc.pop("uid"))
        metadata = { "_ts": timestamp }
        doc = self._formatter.format_document(doc)
        builder = NodesAndRelationshipsBuilder(doc, doc_type, doc_id, metadata)
        self.apply_id_constraint(builder.doc_types)
        tx = self.graph.cypher.begin()
        for statement in builder.query_nodes.keys():
            tx.append(statement, builder.query_nodes[statement])
        for query in builder.cypher_list:
            tx.append(query)
            # Adding cyphers from cypher list
        for relationship, params in builder.relationships_query:
            tx.append(relationship, params)
        for statement in builder.statements_with_params:
            for key in statement.keys():
                tx.append(key, statement[key])
        commit_result = None
        try:
            commit_result = tx.commit()
            print commit_result
        except Exception as e:
            LOG.error('{}'.format(e.message))
            pass

        if commit_result:
            nodeids_list = self._get_nodeids(commit_result)
            self.create_geospatial_indices(nodeids_list)

    def _get_nodeids(self, commit_result):
        node_id_list = []
        a = len(commit_result)
        for i in range(len(commit_result)):
            res = commit_result.pop(0)
            records = res.records
            if not records:
                continue
            for record in records:
                node_ids = list(record.__values__)
                node_id_list.extend(node_ids)
        return node_id_list

    def create_geospatial_indices(self, node_ids_list):
        """
        Creates geo spatial indices on the node ids
        :param node_ids_list:  list of node ids
        """
        layer_name = 'geom'
        lat = 'lat'
        lon = 'lon'
        geometry_type = 'point'
        self._set_id_to_nodeid(node_ids_list)
        # if_layer = self.if_layer_exists(layer_name)
        # if if_layer:
        self._create_layer(layer_name, lat, lon)
        self._add_geometry(layer_name, geometry_type, lat, lon)
        result = self._add_node_to_layer(node_ids_list, layer_name)
        LOG.info('Geospatial index creation response {}', repr(result))

    def _set_id_to_nodeid(self, node_ids_list):
        # TODO: We may want it to change to label name
        """
        Set id on basis of node ids
        :param node_ids_list:
        :param label_name:
        :return:
        """
        tx = self.graph.cypher.begin()
        for count, nodeid in enumerate(node_ids_list, 1):
#.........这里部分代码省略.........
开发者ID:mayank-chutani,项目名称:mongo-connector,代码行数:103,代码来源:neo4j_doc_manager.py

示例8: DocManager

# 需要导入模块: from mongo_connector.doc_managers.formatters import DefaultDocumentFormatter [as 别名]
# 或者: from mongo_connector.doc_managers.formatters.DefaultDocumentFormatter import format_document [as 别名]
class DocManager(DocManagerBase):
  """
  Neo4j implementation for the DocManager. Receives documents and 
  communicates with Neo4j Server.
  """

  def __init__(self, url, auto_commit_interval=DEFAULT_COMMIT_INTERVAL,
                 unique_key='_id', chunk_size=DEFAULT_MAX_BULK, **kwargs):
    
    self.graph = Graph(url)
    self.auto_commit_interval = auto_commit_interval
    self.unique_key = unique_key
    self.chunk_size = chunk_size
    self._formatter = DefaultDocumentFormatter()
    self.kwargs = kwargs.get("clientOptions")

  def apply_id_constraint(self, doc_types):
    for doc_type in doc_types:
      constraint = "CREATE CONSTRAINT ON (d:`{doc_type}`) ASSERT d._id IS UNIQUE".format(doc_type=doc_type)
      self.graph.cypher.execute(constraint)

  def stop(self):
    """Stop the auto-commit thread."""
    self.auto_commit_interval = None
  
  @wrap_exceptions
  def upsert(self, doc, namespace, timestamp):
    """Inserts a document into Neo4j."""
    index, doc_type = self._index_and_mapping(namespace)
    doc_id = u(doc.pop("_id"))
    metadata = { "ns": namespace, "_ts": timestamp }
    doc = self._formatter.format_document(doc)
    builder = NodesAndRelationshipsBuilder(doc, doc_type, doc_id)
    self.apply_id_constraint(builder.doc_types)
    tx = self.graph.cypher.begin()
    for statement in builder.query_nodes.keys():
      tx.append(statement, builder.query_nodes[statement])
    for relationship in builder.relationships_query.keys():
      tx.append(relationship, builder.relationships_query[relationship])
    tx.commit()

  @wrap_exceptions
  def bulk_upsert(self, docs, namespace, timestamp):
    """Insert multiple documents into Neo4j."""
    """Maximum chunk size is 1000. Transaction blocks won't have more than 1000 statements."""
    metadata = { "ns": namespace, "_ts": timestamp }
    tx = self.graph.cypher.begin()
    for doc in docs:
      index, doc_type = self._index_and_mapping(namespace)
      doc_id = u(doc.pop("_id"))
      doc = self._formatter.format_document(doc)
      builder = NodesAndRelationshipsBuilder(doc, doc_type, doc_id)
      self.apply_id_constraint(builder.doc_types)
      for statement in builder.query_nodes.keys():
        tx.append(statement, builder.query_nodes[statement])
      for relationship in builder.relationships_query.keys():
        tx.append(relationship, builder.relationships_query[relationship])
    tx.commit()

  @wrap_exceptions
  def update(self, document_id, update_spec, namespace, timestamp):
    doc_id = u(document_id)
    tx = self.graph.cypher.begin()
    index, doc_type = self._index_and_mapping(namespace)
    updater = NodesAndRelationshipsUpdater()
    updater.run_update(update_spec, doc_id, doc_type)
    for statement in updater.statements_with_params:
      for key in statement.keys():
        tx.append(key, statement[key])
    tx.commit()

  @wrap_exceptions
  def remove(self, document_id, namespace, timestamp):
    """Removes a document from Neo4j."""
    doc_id = u(document_id)
    index, doc_type = self._index_and_mapping(namespace)
    params_dict = {"doc_id": doc_id}
    tx = self.graph.cypher.begin()
    statement = "MATCH (d:Document) WHERE d._id={doc_id} OPTIONAL MATCH (d)-[r]-() DELETE d, r"
    tx.append(statement, params_dict)
    tx.commit()

  def search(self, start_ts, end_ts):
    LOG.error("Search")

  def commit(self):
    LOG.error("Commit")
    

  def get_last_doc(self):
    LOG.error("get last doc")
    
  def handle_command(self, doc, namespace, timestamp):
    db = namespace.split('.', 1)[0]

  def _index_and_mapping(self, namespace):
    """Helper method for getting the index and type from a namespace."""
    index, doc_type = namespace.split('.', 1)
    return index.lower(), doc_type
开发者ID:hannelita,项目名称:neo4j_doc_manager,代码行数:101,代码来源:neo4j_doc_manager.py


注:本文中的mongo_connector.doc_managers.formatters.DefaultDocumentFormatter.format_document方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。