本文整理汇总了Python中SolrClient.SolrClient.load_document_by_id方法的典型用法代码示例。如果您正苦于以下问题:Python SolrClient.load_document_by_id方法的具体用法?Python SolrClient.load_document_by_id怎么用?Python SolrClient.load_document_by_id使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类SolrClient.SolrClient
的用法示例。
在下文中一共展示了SolrClient.load_document_by_id方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: Integrator
# 需要导入模块: from SolrClient import SolrClient [as 别名]
# 或者: from SolrClient.SolrClient import load_document_by_id [as 别名]
class Integrator(object):
"""
Provide integration with KNow Knowledge Portal as scheduled batch job
"""
def __init__(self):
self._logger = logging.getLogger(__name__)
self.remote_solr_client = SolrClient(remote_solr_server, username="admin", password="ekicas71")
self.local_solr_client = SolrClient(local_solr_server)
def batch_processing_product_issue_attachments(self):
"""
query remote Solr server to retrieve all the attachment ids
:return:
"""
self._logger.info("starting to retrieving attachement urls and batch indexing textual attachments ...")
# solrClient=SolrClient(remote_solr_server)
batch_num = 10
response = self.remote_solr_client.load_documents_by_custom_query('attachment_ids_txt:*', start=0,
rows=batch_num)
total_num = response['numFound']
self._logger.info("total number of document with attachments: [%s]", total_num)
# if total_num > batch_num :
for start_index in range(0, total_num, batch_num):
response = self.remote_solr_client.load_documents_by_custom_query('attachment_ids_txt:*',
start=start_index,
rows=batch_num)
docs = response['docs']
try:
self.batch_indexing_documents(docs)
self._logger.info("batch indexing documents. progress [%s]", start_index)
except IntegrationException as error:
self._logger.error("error batch processing while indexing!")
raise
self._logger.info("complete batch processing of documents. Documents has been indexed completely.")
def batch_indexing_documents(self, docs):
"""
batch process a number of attachments associated with product issue
:param docs: dictionary, Solr document objects
:return:
"""
self._logger.info("batch processing and indexing [%s] product issues ..." % len(docs))
docs_to_index = []
for doc in docs:
prod_issue_doc_id = doc['id']
attachment_ids = doc['attachment_ids_txt'] if 'attachment_ids_txt' in doc else ''
# domain specific metadata
prod_issue = doc[
'product_issue_details#productIssue_s'] if 'product_issue_details#productIssue_s' in doc else ''
product = doc['product_issue_details#product_s'] if 'product_issue_details#product_s' in doc else ''
prod_issue_location = doc[
'product_issue_details#location_s'] if 'product_issue_details#location_s' in doc else ''
prod_issue_owner = doc['product_issue_details#owner_s'] if 'product_issue_details#owner_s' in doc else ''
location_type = doc['location#type_s'] if 'location#type_s' in doc else ''
location_local_name = doc['location#localName_s'] if 'location#localName_s' in doc else ''
metadata_dict = {"literal.product_issue_details#productIssue_s": prod_issue,
"literal.product_issue_details#product_s": product,
"literal.product_issue_details#location_s": prod_issue_location,
"literal.location#type_s": location_type,
"literal.product_issue_details#owner_s": prod_issue_owner,
"literal.location#localName_s": location_local_name,
"literal.prod_issue_doc_id_s": prod_issue_doc_id}
for attachment_id in attachment_ids:
attachment_url = self.request_attachment_url_by_id(attachment_id)
if not is_url_accessible(attachment_url):
self._logger.warn("The attachment [%s] is not accessible.", attachment_url)
continue
if is_image(attachment_url):
self._logger.warn("The attachment [%s] is image. Skip for indexing", attachment_url)
continue
existing_doc = self.local_solr_client.load_document_by_id(attachment_url)
try:
if existing_doc is None:
self._logger.debug("current doc is not exist. Indexing now...")
self.local_solr_client.update_document_by_url(attachment_url, metadata=metadata_dict)
self._logger.debug("new doc is indexed.")
else:
# if current doc is existed
# update existing doc with possible new metadata
self._logger.debug("current doc is exist. update existing index now...")
existing_doc.update(metadata_dict)
self.local_solr_client.update_document_by_url(attachment_url, metadata=existing_doc)
self._logger.debug("updating of existing doc is complete.")
except SolrError as solrError:
error_message = str(solrError)
self._logger.error(error_message)
#.........这里部分代码省略.........