本文整理汇总了Python中pysolr.Solr.add方法的典型用法代码示例。如果您正苦于以下问题:Python Solr.add方法的具体用法?Python Solr.add怎么用?Python Solr.add使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pysolr.Solr
的用法示例。
在下文中一共展示了Solr.add方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: Processor
# 需要导入模块: from pysolr import Solr [as 别名]
# 或者: from pysolr.Solr import add [as 别名]
class Processor(object):
def __init__(self, solr_server_url):
self.server = Solr(solr_server_url)
def process(self, fname):
base, _ = os.path.splitext(os.path.basename(fname))
url = DOCUMENT_URL + base + '.html'
fp = open(fname)
title = None
while not title:
title = fp.next().strip()
content = ''
for line in fp:
s = line.strip()
if s and not s.startswith(('**', '==', '--')):
content += s
fp.close()
document_id = u"%s-%s" % (DOCUMENT_SITE_ID, title)
logging.info("new document: %s" % (document_id,))
t = os.path.getmtime(fname)
doc = {
'id': hashlib.sha1(document_id.encode('utf-8')).hexdigest(),
'site': DOCUMENT_SITE_ID,
'url': url,
'title': title,
'content': content,
'last_modified': datetime.datetime.fromtimestamp(t)
}
self.server.add([doc])
示例2: SolrSearchBackend
# 需要导入模块: from pysolr import Solr [as 别名]
# 或者: from pysolr.Solr import add [as 别名]
class SolrSearchBackend(BaseSearchBackend):
# Word reserved by Solr for special use.
RESERVED_WORDS = ("AND", "NOT", "OR", "TO")
# Characters reserved by Solr for special use.
# The '\\' must come first, so as not to overwrite the other slash replacements.
RESERVED_CHARACTERS = ("\\", "+", "-", "&&", "||", "!", "(", ")", "{", "}", "[", "]", "^", '"', "~", "*", "?", ":")
def __init__(self, connection_alias, **connection_options):
super(SolrSearchBackend, self).__init__(connection_alias, **connection_options)
if not "URL" in connection_options:
raise ImproperlyConfigured(
"You must specify a 'URL' in your settings for connection '%s'." % connection_alias
)
self.conn = Solr(connection_options["URL"], timeout=self.timeout)
self.log = logging.getLogger("haystack")
def update(self, index, iterable, commit=True):
docs = []
try:
for obj in iterable:
docs.append(index.full_prepare(obj))
except UnicodeDecodeError:
sys.stderr.write("Chunk failed.\n")
if len(docs) > 0:
try:
self.conn.add(docs, commit=commit, boost=index.get_field_weights())
except (IOError, SolrError), e:
self.log.error("Failed to add documents to Solr: %s", e)
示例3: SolrSearchBackend
# 需要导入模块: from pysolr import Solr [as 别名]
# 或者: from pysolr.Solr import add [as 别名]
class SolrSearchBackend(BaseSearchBackend):
# Word reserved by Solr for special use.
RESERVED_WORDS = (
'AND',
'NOT',
'OR',
'TO',
)
# Characters reserved by Solr for special use.
# The '\\' must come first, so as not to overwrite the other slash replacements.
RESERVED_CHARACTERS = (
'\\', '+', '-', '&&', '||', '!', '(', ')', '{', '}',
'[', ']', '^', '"', '~', '*', '?', ':',
)
def __init__(self, connection_alias, **connection_options):
super(SolrSearchBackend, self).__init__(connection_alias, **connection_options)
if not 'URL' in connection_options:
raise ImproperlyConfigured("You must specify a 'URL' in your settings for connection '%s'." % connection_alias)
self.connection_options = connection_options
self.conn = Solr(connection_options['URL'], timeout=self.timeout)
self.log = logging.getLogger('haystack')
def update(self, index, iterable, commit=None):
docs = []
if commit == None:
commit = self.connection_options.get('COMMIT_UPDATES', True)
for obj in iterable:
try:
docs.append(index.full_prepare(obj))
except UnicodeDecodeError:
if not self.silently_fail:
raise
# We'll log the object identifier but won't include the actual object
# to avoid the possibility of that generating encoding errors while
# processing the log message:
self.log.error(u"UnicodeDecodeError while preparing object for update", exc_info=True, extra={
"data": {
"index": index,
"object": get_identifier(obj)
}
})
if len(docs) > 0:
try:
self.conn.add(docs, commit=commit, boost=index.get_field_weights())
except (IOError, SolrError), e:
if not self.silently_fail:
raise
self.log.error("Failed to add documents to Solr: %s", e)
示例4: add_to_solr
# 需要导入模块: from pysolr import Solr [as 别名]
# 或者: from pysolr.Solr import add [as 别名]
def add_to_solr(response, body):
conn = Solr(settings.SOLR_BASE)
conn.add(
[
dict(
id="response:%d" % response.id,
name=response.url.url,
text=body.encode('utf-8'),
)
])
示例5: SolrSearchBackend
# 需要导入模块: from pysolr import Solr [as 别名]
# 或者: from pysolr.Solr import add [as 别名]
class SolrSearchBackend(BaseSearchBackend):
# Word reserved by Solr for special use.
RESERVED_WORDS = (
'AND',
'NOT',
'OR',
'TO',
)
# Characters reserved by Solr for special use.
# The '\\' must come first, so as not to overwrite the other slash replacements.
RESERVED_CHARACTERS = (
'\\', '+', '-', '&&', '||', '!', '(', ')', '{', '}',
'[', ']', '^', '"', '~', '*', '?', ':',
)
def __init__(self, connection_alias, **connection_options):
super(SolrSearchBackend, self).__init__(connection_alias, **connection_options)
if not 'URL' in connection_options:
raise ImproperlyConfigured("You must specify a 'URL' in your settings for connection '%s'." % connection_alias)
user = connection_options.get("HTTP_AUTH_USERNAME")
passwd = connection_options.get("HTTP_AUTH_PASSWORD")
self.conn = Solr(connection_options['URL'], auth=(user,passwd),
timeout=self.timeout)
self.log = logging.getLogger('haystack')
def update(self, index, iterable, commit=True):
docs = []
try:
for obj in iterable:
docs.append(index.full_prepare(obj))
except UnicodeDecodeError:
if not self.silently_fail:
raise
self.log.error("Chunk failed.\n")
if len(docs) > 0:
try:
self.conn.add(docs, commit=commit, boost=index.get_field_weights())
except (IOError, SolrError), e:
if not self.silently_fail:
raise
self.log.error("Failed to add documents to Solr: %s", e)
示例6: SearchBackend
# 需要导入模块: from pysolr import Solr [as 别名]
# 或者: from pysolr.Solr import add [as 别名]
class SearchBackend(BaseSearchBackend):
# Word reserved by Solr for special use.
RESERVED_WORDS = (
'AND',
'NOT',
'OR',
'TO',
)
# Characters reserved by Solr for special use.
# The '\\' must come first, so as not to overwrite the other slash replacements.
RESERVED_CHARACTERS = (
'\\', '+', '-', '&&', '||', '!', '(', ')', '{', '}',
'[', ']', '^', '"', '~', '*', '?', ':',
)
def __init__(self, site=None):
super(SearchBackend, self).__init__(site)
if not hasattr(settings, 'HAYSTACK_SOLR_URL'):
raise ImproperlyConfigured('You must specify a HAYSTACK_SOLR_URL in your settings.')
timeout = getattr(settings, 'HAYSTACK_SOLR_TIMEOUT', 10)
self.conn = Solr(settings.HAYSTACK_SOLR_URL, timeout=timeout)
self.log = logging.getLogger('haystack')
def update(self, index, iterable, commit=True):
docs = []
try:
for obj in iterable:
docs.append(index.full_prepare(obj))
except UnicodeDecodeError:
if not self.silently_fail:
raise
self.log.error("Chunk failed.\n")
if len(docs) > 0:
try:
self.conn.add(docs, commit=commit, boost=index.get_field_weights())
except (IOError, SolrError), e:
if not self.silently_fail:
raise
self.log.error("Failed to add documents to Solr: %s", e)
示例7: solr_save
# 需要导入模块: from pysolr import Solr [as 别名]
# 或者: from pysolr.Solr import add [as 别名]
def solr_save(self):
"""Save the item to the solr index. Index will be updated if solr_id already exists in the index."""
if settings.SOLR['running']:
con = Solr(settings.SOLR_URL)
docs = [{
'solr_id': 'Item_' + str(self.id),
'id': str(self.id),
'class': 'Item',
'title_t': self.title,
'description_t': self.description,
'lat_f': str(self.lat),
'lng_f': str(self.lng)
}]
con.add(docs)
return True
else:
return False
示例8: SolrSearchBackend
# 需要导入模块: from pysolr import Solr [as 别名]
# 或者: from pysolr.Solr import add [as 别名]
class SolrSearchBackend(BaseSearchBackend):
# Word reserved by Solr for special use.
RESERVED_WORDS = ("AND", "NOT", "OR", "TO")
# Characters reserved by Solr for special use.
# The '\\' must come first, so as not to overwrite the other slash replacements.
RESERVED_CHARACTERS = ("\\", "+", "-", "&&", "||", "!", "(", ")", "{", "}", "[", "]", "^", '"', "~", "*", "?", ":")
def __init__(self, connection_alias, **connection_options):
super(SolrSearchBackend, self).__init__(connection_alias, **connection_options)
if not "URL" in connection_options:
raise ImproperlyConfigured(
"You must specify a 'URL' in your settings for connection '%s'." % connection_alias
)
self.conn = Solr(connection_options["URL"], timeout=self.timeout)
self.log = logging.getLogger("haystack")
def update(self, index, iterable, commit=True):
docs = []
for obj in iterable:
try:
docs.append(index.full_prepare(obj))
except UnicodeDecodeError:
if not self.silently_fail:
raise
# We'll log the object identifier but won't include the actual object
# to avoid the possibility of that generating encoding errors while
# processing the log message:
self.log.error(
u"UnicodeDecodeError while preparing object for update",
exc_info=True,
extra={"data": {"index": index, "object": get_identifier(obj)}},
)
if len(docs) > 0:
try:
self.conn.add(docs, commit=commit, boost=index.get_field_weights())
except (IOError, SolrError), e:
if not self.silently_fail:
raise
self.log.error("Failed to add documents to Solr: %s", e)
示例9: add_doc
# 需要导入模块: from pysolr import Solr [as 别名]
# 或者: from pysolr.Solr import add [as 别名]
def add_doc(self, id):
conn = Solr(settings.SOLR_URL)
self._build_group_tree()
record_ids = [id]
media_dict = self._preload_related(Media, record_ids)
fieldvalue_dict = self._preload_related(FieldValue, record_ids,
related=2)
groups_dict = self._preload_related(CollectionItem, record_ids)
core_fields = dict((f, f.get_equivalent_fields())
for f in Field.objects.filter(standard__prefix='dc'))
record = Record.objects.filter(id__in=record_ids)[0]
doc = self._record_to_solr(record, core_fields,
groups_dict.get(record.id, []),
fieldvalue_dict.get(record.id, []),
media_dict.get(record.id, []))
conn.add([doc])
示例10: reindex_resources
# 需要导入模块: from pysolr import Solr [as 别名]
# 或者: from pysolr.Solr import add [as 别名]
def reindex_resources(dbname, url=settings.SOLR_URL, printit=False):
"""docstring for reindex_resources"""
# logger.error("indexing resources:")
if printit:
print 'CLEARING SOLR INDEX: ', url
conn = Solr(url)
conn.delete(q='*:*')
batch_size = getattr(settings, 'SOLR_BATCH_SIZE', 100)
if printit:
print 'Indexing %s Resources... (batch: %s)' % (Resource.objects.count(), batch_size)
docs = []
for i, res in enumerate(Resource.objects):
docs.extend(res.index())
if i % batch_size == 0:
conn.add(docs)
docs = []
conn.add(docs)
示例11: _index_products
# 需要导入模块: from pysolr import Solr [as 别名]
# 或者: from pysolr.Solr import add [as 别名]
def _index_products(products, request, delete=False):
"""Indexes given products.
"""
conn = Solr(SOLR_ADDRESS)
if delete:
conn.delete(q='*:*')
temp = []
for product in products:
# Just index the default variant of a "Product with Variants"
if product.is_product_with_variants():
product = product.get_default_variant()
if product is None:
continue
# Categories
categories = []
for category in product.get_categories():
categories.append(category.name)
# Manufacturer
manufacturer = product.manufacturer
if manufacturer:
manufacturer_name = manufacturer.name
else:
manufacturer_name = ""
temp.append({
"id" : product.id,
"name" : product.get_name(),
"price" : product.get_price(request),
"categories" : categories,
"keywords" : product.get_meta_keywords(),
"manufacturer" : manufacturer_name,
"sku_manufacturer" : product.sku_manufacturer,
"description" : product.description,
})
conn.add(temp)
示例12: reindex_resources
# 需要导入模块: from pysolr import Solr [as 别名]
# 或者: from pysolr.Solr import add [as 别名]
def reindex_resources(url=settings.SOLR_URL, printit=False):
"""docstring for reindex_resources"""
# logger.error("indexing resources:")
from resources.models import Resource
if printit:
print 'CLEARING SOLR INDEX for Resources: ', url
conn = Solr(url)
conn.delete(q='res_type:%s' % settings.SOLR_RES)
batch_size = getattr(settings, 'SOLR_BATCH_SIZE', 100)
if printit:
print 'Indexing %s Resources... (batch: %s)' % (Resource.objects.count(), batch_size)
docs = []
for i, res in enumerate(Resource.objects):
entry = res.index()
if entry:
docs.extend(entry)
if i % batch_size == 0:
conn.add(docs)
docs = []
conn.add(docs)
示例13: documents
# 需要导入模块: from pysolr import Solr [as 别名]
# 或者: from pysolr.Solr import add [as 别名]
output[header] = newTime.isoformat() + 'Z'
except:
# print col
print lineno
validRow = False
# print newTime.isoformat()
elif header == 'fb_assoc':
output[header] = col.strip().split(' ')
elif header == 'geoloc':
try:
cleanCol = col.replace('geolocation{latitude=','').replace('longitude=','').replace('}','').replace(', ',',')
# print cleanCol
if cleanCol != 'null':
output[header] = cleanCol
except:
print lineno
validRow = False
else:
output[header] = col
if validRow:
data.append(output)
# update the index every 10000 documents (reduces overhead)
if i > (10000*index):
conn.add(data)
data = []
index = index + 1
i = i + 1
if data:
conn.add(data)
示例14: SearchBackend
# 需要导入模块: from pysolr import Solr [as 别名]
# 或者: from pysolr.Solr import add [as 别名]
class SearchBackend(BaseSearchBackend):
def __init__(self):
if not hasattr(settings, "HAYSTACK_SOLR_URL"):
raise ImproperlyConfigured("You must specify a HAYSTACK_SOLR_URL in your settings.")
# DRL_TODO: This should handle the connection more graceful, especially
# if the backend is down.
self.conn = Solr(settings.HAYSTACK_SOLR_URL)
def update(self, index, iterable, commit=True):
docs = []
try:
for obj in iterable:
doc = {}
doc["id"] = self.get_identifier(obj)
doc["django_ct_s"] = "%s.%s" % (obj._meta.app_label, obj._meta.module_name)
doc["django_id_s"] = force_unicode(obj.pk)
doc.update(index.prepare(obj))
docs.append(doc)
except UnicodeDecodeError:
sys.stderr.write("Chunk failed.\n")
pass
self.conn.add(docs, commit=commit)
def remove(self, obj, commit=True):
solr_id = self.get_identifier(obj)
self.conn.delete(id=solr_id, commit=commit)
def clear(self, models=[], commit=True):
if not models:
# *:* matches all docs in Solr
self.conn.delete(q="*:*", commit=commit)
else:
models_to_delete = []
for model in models:
models_to_delete.append("django_ct_s:%s.%s" % (model._meta.app_label, model._meta.module_name))
self.conn.delete(q=" OR ".join(models_to_delete), commit=commit)
# Run an optimize post-clear. http://wiki.apache.org/solr/FAQ#head-9aafb5d8dff5308e8ea4fcf4b71f19f029c4bb99
self.conn.optimize()
def search(
self,
query_string,
sort_by=None,
start_offset=0,
end_offset=None,
fields="",
highlight=False,
facets=None,
date_facets=None,
query_facets=None,
narrow_queries=None,
):
if len(query_string) == 0:
return []
kwargs = {"fl": "* score"}
if fields:
kwargs["fl"] = fields
if sort_by is not None:
kwargs["sort"] = sort_by
if start_offset is not None:
kwargs["start"] = start_offset
if end_offset is not None:
kwargs["rows"] = end_offset
if highlight is True:
kwargs["hl"] = "true"
kwargs["hl.fragsize"] = "200"
if facets is not None:
kwargs["facet"] = "on"
kwargs["facet.field"] = facets
if date_facets is not None:
kwargs["facet"] = "on"
kwargs["facet.date"] = date_facets.keys()
for key, value in date_facets.items():
# Date-based facets in Solr kinda suck.
kwargs["f.%s.facet.date.start" % key] = self.conn._from_python(value.get("start_date"))
kwargs["f.%s.facet.date.end" % key] = self.conn._from_python(value.get("end_date"))
kwargs["f.%s.facet.date.gap" % key] = value.get("gap")
if query_facets is not None:
kwargs["facet"] = "on"
kwargs["facet.query"] = ["%s:%s" % (field, value) for field, value in query_facets.items()]
if narrow_queries is not None:
kwargs["fq"] = list(narrow_queries)
#.........这里部分代码省略.........
示例15: DocManager
# 需要导入模块: from pysolr import Solr [as 别名]
# 或者: from pysolr.Solr import add [as 别名]
class DocManager():
"""The DocManager class creates a connection to the backend engine and
adds/removes documents, and in the case of rollback, searches for them.
The reason for storing id/doc pairs as opposed to doc's is so that multiple
updates to the same doc reflect the most up to date version as opposed to
multiple, slightly different versions of a doc.
"""
def __init__(self, url, auto_commit=False, unique_key='_id', **kwargs):
"""Verify Solr URL and establish a connection.
"""
self.solr = Solr(url)
self.unique_key = unique_key
self.auto_commit = auto_commit
self.field_list = []
self.dynamic_field_list = []
self.build_fields()
if auto_commit:
self.run_auto_commit()
def _parse_fields(self, result, field_name):
""" If Schema access, parse fields and build respective lists
"""
field_list = []
for key, value in result.get('schema', {}).get(field_name, {}).items():
if key not in field_list:
field_list.append(key)
return field_list
def build_fields(self):
""" Builds a list of valid fields
"""
declared_fields = self.solr._send_request('get', ADMIN_URL)
result = decoder.decode(declared_fields)
self.field_list = self._parse_fields(result, 'fields'),
self.dynamic_field_list = self._parse_fields(result, 'dynamicFields')
def clean_doc(self, doc):
""" Cleans a document passed in to be compliant with the Solr as
used by Solr. This WILL remove fields that aren't in the schema, so
the document may actually get altered.
"""
if not self.field_list:
return doc
fixed_doc = {}
doc[self.unique_key] = doc["_id"]
for key, value in doc.items():
if key in self.field_list[0]:
fixed_doc[key] = value
# Dynamic strings. * can occur only at beginning and at end
else:
for field in self.dynamic_field_list:
if field[0] == '*':
regex = re.compile(r'\w%s\b' % (field))
else:
regex = re.compile(r'\b%s\w' % (field))
if regex.match(key):
fixed_doc[key] = value
return fixed_doc
def stop(self):
""" Stops the instance
"""
self.auto_commit = False
def upsert(self, doc):
"""Update or insert a document into Solr
This method should call whatever add/insert/update method exists for
the backend engine and add the document in there. The input will
always be one mongo document, represented as a Python dictionary.
"""
try:
self.solr.add([self.clean_doc(doc)], commit=True)
except SolrError:
raise errors.OperationFailed(
"Could not insert %r into Solr" % bsjson.dumps(doc))
def bulk_upsert(self, docs):
"""Update or insert multiple documents into Solr
docs may be any iterable
"""
try:
cleaned = (self.clean_doc(d) for d in docs)
self.solr.add(cleaned, commit=True)
except SolrError:
raise errors.OperationFailed(
"Could not bulk-insert documents into Solr")
def remove(self, doc):
"""Removes documents from Solr
The input is a python dictionary that represents a mongo document.
"""
#.........这里部分代码省略.........