本文整理汇总了Python中elasticsearch.helpers.BulkIndexError方法的典型用法代码示例。如果您正苦于以下问题:Python helpers.BulkIndexError方法的具体用法?Python helpers.BulkIndexError怎么用?Python helpers.BulkIndexError使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类elasticsearch.helpers
的用法示例。
在下文中一共展示了helpers.BulkIndexError方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: save
# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import BulkIndexError [as 别名]
def save(self):
nb_blocks = sum(act["_type"] == "b" for act in self.actions)
nb_txs = sum(act["_type"] == "tx" for act in self.actions)
if self.actions:
try:
helpers.bulk(self.elastic, self.actions)
return "{} blocks and {} transactions indexed".format(
nb_blocks, nb_txs
)
except helpers.BulkIndexError as exception:
print("Issue with {} blocks:\n{}\n".format(nb_blocks, exception))
blocks = (act for act in self.actions if act["_type"] == "b")
for block in blocks:
logging.error("block: " + str(block["_id"]))
示例2: update
# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import BulkIndexError [as 别名]
def update(self):
"""
Updates the entire index.
We do this in batch to optimize performance. NB: Requires automatic IDs.
"""
def batch(queryset, func, chunk=100, start=0):
if not queryset.exists():
return
try:
while start < queryset.order_by("pk").last().pk:
func(
queryset.filter(pk__gt=start, pk__lte=start + chunk).iterator()
)
start += chunk
except TypeError:
func(queryset.all().iterator())
def prepare(result):
prepared = self.prepare(result)
return prepared["content_type"], prepared["pk"], prepared["data"]
def update_bulk(result_set):
try:
self.get_backend().update_many(map(prepare, result_set))
except BulkIndexError as e:
log.critical(e)
batch(self.get_queryset(), update_bulk)
示例3: test_bulk_index_error_handling
# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import BulkIndexError [as 别名]
def test_bulk_index_error_handling(self):
"""Check that 404 and 409 errors are appropriately ignored"""
from elasticsearch import helpers
mock_engine = mock.Mock()
plugin = fake_plugins.FakeSimplePlugin(es_engine=mock_engine)
indexing_helper = helper.IndexingHelper(plugin)
bulk_name = 'searchlight.elasticsearch.plugins.helper.helpers.bulk'
with mock.patch(bulk_name) as mock_bulk:
mock_bulk.side_effect = helpers.BulkIndexError(
"1 document(s) failed to index",
[{'delete': {"_id": "1", "error": "Some error", "status": 404,
"exception": helpers.TransportError()}}]
)
indexing_helper.delete_documents([{'_id': '1'}])
self.assertEqual(1, mock_bulk.call_count)
with mock.patch(bulk_name) as mock_bulk:
mock_bulk.side_effect = helpers.BulkIndexError(
"1 document(s) failed to index",
[{'index': {"_id": "1",
"error": {
"type": "version_conflict_engine_exception"},
"status": 409}}]
)
indexing_helper.save_documents([{'id': '1'}])
self.assertEqual(1, mock_bulk.call_count)
示例4: test_rule_update_exception
# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import BulkIndexError [as 别名]
def test_rule_update_exception(self):
# Set up the return documents.
payload = _secgrouprule_fixture(ID1, TENANT1)
doc = {'_source': {'security_group_rules': [], 'id': 1},
'_version': 1}
handler = self.plugin.get_notification_handler()
with mock.patch.object(self.plugin.index_helper,
'get_document') as mock_get:
with mock.patch.object(self.plugin.index_helper,
'save_document') as mock_save:
mock_get.return_value = doc
exc_obj = helpers.BulkIndexError(
"Version conflict", [{'index': {
"_id": "1", "error": "Some error", "status": 409}}]
)
# 1 retry (exception).
mock_save.side_effect = [exc_obj, {}]
handler.create_or_update_rule(
'security_group_rule.create.end', payload, None)
# 1 retry + 1 success = 2 calls.
self.assertEqual(2, mock_get.call_count)
self.assertEqual(2, mock_save.call_count)
# 24 retries (exceptions) that exceed the retry limit.
# Not all retries will be used.
mock_get.reset_mock()
mock_save.reset_mock()
mock_save.side_effect = [exc_obj, exc_obj, exc_obj, exc_obj,
exc_obj, exc_obj, exc_obj, exc_obj,
exc_obj, exc_obj, exc_obj, exc_obj,
exc_obj, exc_obj, exc_obj, exc_obj,
exc_obj, exc_obj, exc_obj, exc_obj,
exc_obj, exc_obj, exc_obj, exc_obj,
{}]
handler.create_or_update_rule(
'security_group_rule.create.end', payload, None)
# Verified we bailed out after 20 retires.
self.assertEqual(20, mock_get.call_count)
self.assertEqual(20, mock_save.call_count)
示例5: create
# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import BulkIndexError [as 别名]
def create(self, docs):
def gen():
for pid, doc in docs:
d = {}
d['_index'] = self.index
d['_type'] = self.dbname
d['_op_type'] = 'create'
d['_id'] = pid
d['_source'] = doc
yield d
try:
bulk(self.es, gen())
except BulkIndexError as exc:
self.manage_bulk_err(exc)
self.es.indices.refresh(index=self.index)
示例6: save_documents
# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import BulkIndexError [as 别名]
def save_documents(self, documents):
# ES library still requires _type to be set
for document in documents:
document['_type'] = DOCUMENT_TYPE
try:
bulk(self.es_connection, documents)
except BulkIndexError as e:
logger.error("Error bulk indexing: " + str(e))
示例7: remove
# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import BulkIndexError [as 别名]
def remove(self, doc_type, doc_ids, **kwargs):
""" Implements call to remove the documents from the index """
try:
# ignore is flagged as an unexpected-keyword-arg; ES python client documents that it can be used
# pylint: disable=unexpected-keyword-arg
actions = []
for doc_id in doc_ids:
log.debug("Removing document of type %s and index %s", doc_type, doc_id) # lint-amnesty, pylint: disable=unicode-format-string
action = {
'_op_type': 'delete',
"_index": self.index_name,
"_type": doc_type,
"_id": doc_id
}
actions.append(action)
bulk(self._es, actions, **kwargs)
except BulkIndexError as ex:
valid_errors = [error for error in ex.errors if error['delete']['status'] != 404]
if valid_errors:
log.exception("An error occurred while removing documents from the index.")
raise
# A few disabled pylint violations here:
# This procedure takes each of the possible input parameters and builds the query with each argument
# I tried doing this in separate steps, but IMO it makes it more difficult to follow instead of less
# So, reasoning:
#
# too-many-arguments: We have all these different parameters to which we
# wish to pay attention, it makes more sense to have them listed here
# instead of burying them within kwargs
#
# too-many-locals: I think this counts all the arguments as well, but
# there are some local variables used herein that are there for transient
# purposes and actually promote the ease of understanding
#
# too-many-branches: There's a lot of logic on the 'if I have this
# optional argument then...'. Reasoning goes back to its easier to read
# the (somewhat linear) flow rather than to jump up to other locations in code
示例8: test_remove_failure_bulk
# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import BulkIndexError [as 别名]
def test_remove_failure_bulk(self):
""" the remove operation should fail """
doc_id = 'test_id'
doc_type = 'test_doc'
error = {'delete': {
'status': 500, '_type': doc_type, '_index': 'test_index', '_version': 1, 'found': True, '_id': doc_id
}}
with patch('search.elastic.bulk', side_effect=BulkIndexError('Simulated error', [error])):
with self.assertRaises(BulkIndexError):
self.searcher.remove("test_doc", ["test_id"])
示例9: test_es_if_exists_append_es_type_coerce_error
# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import BulkIndexError [as 别名]
def test_es_if_exists_append_es_type_coerce_error(self):
df1 = pandas_to_eland(
pd_df,
es_client=ES_TEST_CLIENT,
es_dest_index="test-index",
es_if_exists="append",
es_refresh=True,
es_type_overrides={"a": "byte"},
)
assert_pandas_eland_frame_equal(pd_df, df1)
pd_df_short = pd.DataFrame(
{
"a": [128], # This value is too large for 'byte'
"b": [-1.0],
"c": ["A"],
"d": [dt],
},
index=["3"],
)
with pytest.raises(BulkIndexError) as e:
pandas_to_eland(
pd_df_short,
es_client=ES_TEST_CLIENT,
es_dest_index="test-index",
es_if_exists="append",
)
# Assert that the value 128 caused the index error
assert "Value [128] is out of range for a byte" in str(e.value)
示例10: test_rule_delete_exception
# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import BulkIndexError [as 别名]
def test_rule_delete_exception(self):
# Set up the return documents.
payload = {'security_group_rule_id': ID1}
doc_get = {'_source': {'security_group_rules': [], 'id': 1},
'_version': 1}
doc_nest = {'hits': {'hits': [{
'_id': 123456789,
'_source': {'security_group_rules': []},
'_version': 1}]}}
handler = self.plugin.get_notification_handler()
with mock.patch.object(self.plugin.index_helper,
'get_docs_by_nested_field') as mo_nest:
with mock.patch.object(self.plugin.index_helper,
'get_document') as mock_get:
with mock.patch.object(self.plugin.index_helper,
'save_document') as mock_save:
mo_nest.return_value = doc_nest
mock_get.return_value = doc_get
exc_obj = helpers.BulkIndexError(
"Version conflict", [{'index': {
"_id": "1", "error": "Some error", "status": 409}}]
)
# 1 retry (exception).
mock_save.side_effect = [exc_obj, {}]
handler.delete_rule(
'security_group_rule.delete.end', payload, None)
# 1 retry + 1 success = 2 calls.
self.assertEqual(1, mo_nest.call_count)
self.assertEqual(1, mock_get.call_count)
self.assertEqual(2, mock_save.call_count)
# 24 retries (exceptions) that exceed the retry limit.
# Not all retries will be used.
mo_nest.reset_mock()
mock_get.reset_mock()
mock_save.reset_mock()
mock_save.side_effect = [exc_obj, exc_obj, exc_obj,
exc_obj, exc_obj, exc_obj,
exc_obj, exc_obj, exc_obj,
exc_obj, exc_obj, exc_obj,
exc_obj, exc_obj, exc_obj,
exc_obj, exc_obj, exc_obj,
exc_obj, exc_obj, exc_obj,
exc_obj, exc_obj, exc_obj,
{}]
handler.delete_rule(
'security_group_rule.delete.end', payload, None)
# Verified we bailed out after 20 retires.
self.assertEqual(1, mo_nest.call_count)
self.assertEqual(20, mock_get.call_count)
self.assertEqual(20, mock_save.call_count)
示例11: _update_server_group_members
# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import BulkIndexError [as 别名]
def _update_server_group_members(self, sg_id, member_id, delete=False):
# The issue here is that the notification is not complete.
# We have only a single member that needs to be added to an
# existing group. A major issue is that we may be updating
# the ES document while other workers are modifying the rules
# in the same ES document. This requires an aggressive retry policy,
# using the "version" field. Since the ES document will have been
# modified after a conflict, we will need to grab the latest version
# of the document before continuing. After "retries" number of times,
# we will admit failure and not try the update anymore.
LOG.debug("Updating server group member information for %s", sg_id)
for attempts in range(SERVERGROUP_RETRIES):
# Read, modify, write of an existing security group.
doc = self.index_helper.get_document(sg_id)
if not doc:
return
body = doc['_source']
if not body or 'members' not in body:
return
if delete:
body['members'] = list(filter(
lambda r: r != member_id, body['members']))
else:
body['members'].append(member_id)
version = doc['_version']
try:
version += 1
self.index_helper.save_document(body, version=version)
return body
except helpers.BulkIndexError as e:
if e.errors[0]['index']['status'] == 409:
# Conflict error, retry with new version of doc.
pass
else:
raise
if attempts == (SERVERGROUP_RETRIES - 1):
LOG.error('Error updating server group member %(id)s:'
' Too many retries' % {'id': member_id})
示例12: save_documents
# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import BulkIndexError [as 别名]
def save_documents(self, documents, versions=None, index=None):
"""Send list of serialized documents into search engine.
Warning: Index vs Alias usage.
Listeners [plugins/*/notification_handlers.py]:
When the plugin listeners are indexing documents, we will want
to use the normal ES alias for their resource group. In this case
the index parameter will not be set. Listeners are by far the most
common usage case.
Re-Indexing [plugins/base.py::index_initial_data()]:
When we are re-indexing we will want to use the new ES index.
Bypassing the alias means we will not send duplicate documents
to the old index. In this case the index will be set. Re-indexing
is an event that will rarely happen.
"""
if not index:
use_index = self.alias_name
else:
use_index = index
for document in documents:
# NOTE: In Elasticsearch 2.0 field names cannot contain '.', change
# '.' to '__'.
utils.replace_dots_in_field_names(document)
try:
result = helpers.bulk(
client=self.engine,
index=use_index,
doc_type=self.document_type,
chunk_size=self.index_chunk_size,
actions=self._prepare_actions(documents, versions))
except helpers.BulkIndexError as e:
err_msg = []
for err in e.errors:
if (err['index']['error']['type'] !=
VERSION_CONFLICT_EXCEPTION and
err['index']['status'] != 409):
raise
err_msg.append("id %(_id)s: %(error)s" % err['index'])
LOG.warning('Version conflict %s' % ';'.join(err_msg))
result = 0
except es_exc.RequestError as e:
if _is_multiple_alias_exception(e):
LOG.error("Alias [%(a)s] with multiple indexes error" %
{'a': self.alias_name})
self._index_alias_multiple_indexes_bulk(documents=documents,
versions=versions)
result = 0
LOG.debug("Indexing result: %s", result)
示例13: delete_rule
# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import BulkIndexError [as 别名]
def delete_rule(self, event_type, payload, timestamp):
# See comment for create_or_update_rule() for details.
rule_id = payload['security_group_rule_id']
LOG.debug("Updating security group rule information for %s", rule_id)
field = 'security_group_rules'
# Read, modify, write of an existing security group.
# To avoid a race condition, we are searching for the document
# in a round-about way. Outside of the retry loop, we will
# search for the document and save the document ID. This way we
# do not need to search inside the loop. We will access the document
# directly by the ID which will always return the latest version.
orig_doc = self.index_helper.get_docs_by_nested_field(
"security_group_rules", "id", rule_id, version=True)
if not orig_doc:
return
doc_id = orig_doc['hits']['hits'][0]['_id']
doc = orig_doc['hits']['hits'][0]
for attempts in range(SECGROUP_RETRIES):
body = doc['_source']
if not body or field not in body:
return
body[field] = \
list(filter(lambda r: r['id'] != rule_id, body[field]))
version = doc['_version']
try:
version += 1
self.index_helper.save_document(body, version=version)
return pipeline.IndexItem(self.index_helper.plugin,
event_type,
payload,
body)
except helpers.BulkIndexError as e:
if e.errors[0]['index']['status'] == 409:
# Conflict. Retry with new version.
doc = self.index_helper.get_document(doc_id)
if not doc:
return
else:
raise
if attempts == (SECGROUP_RETRIES - 1):
LOG.error('Error deleting security group rule %(id)s:'
' Too many retries' % {'id': rule_id})