本文整理汇总了Python中collective.solr.indexer.SolrIndexProcessor类的典型用法代码示例。如果您正苦于以下问题:Python SolrIndexProcessor类的具体用法?Python SolrIndexProcessor怎么用?Python SolrIndexProcessor使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了SolrIndexProcessor类的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: reindex
def reindex(self, batch=1000, skip=0):
""" find all contentish objects (meaning all objects derived from one
of the catalog mixin classes) and (re)indexes them """
manager = queryUtility(ISolrConnectionManager)
proc = SolrIndexProcessor(manager)
conn = manager.getConnection()
zodb_conn = self.context._p_jar
log = self.mklog()
log("reindexing solr catalog...\n")
if skip:
log("skipping indexing of %d object(s)...\n" % skip)
real = timer() # real time
lap = timer() # real lap time (for intermediate commits)
cpu = timer(clock) # cpu time
processed = 0
schema = manager.getSchema()
key = schema.uniqueKey
updates = {} # list to hold data to be updated
flush = lambda: conn.flush()
flush = notimeout(flush)
def checkPoint():
for boost_values, data in updates.values():
conn.add(boost_values=boost_values, **data)
updates.clear()
msg = "intermediate commit (%d items processed, " "last batch in %s)...\n" % (processed, lap.next())
log(msg)
logger.info(msg)
flush()
zodb_conn.cacheGC()
cpi = checkpointIterator(checkPoint, batch)
count = 0
for path, obj in findObjects(self.context):
if indexable(obj):
if getOwnIndexMethod(obj, "indexObject") is not None:
log("skipping indexing of %r via private method.\n" % obj)
continue
count += 1
if count <= skip:
continue
data, missing = proc.getData(obj)
prepareData(data)
if not missing:
value = data.get(key, None)
if value is not None:
updates[value] = (boost_values(obj, data), data)
processed += 1
cpi.next()
else:
log("missing data, skipping indexing of %r.\n" % obj)
checkPoint()
conn.commit()
log("solr index rebuilt.\n")
msg = "processed %d items in %s (%s cpu time)."
msg = msg % (processed, real.next(), cpu.next())
log(msg)
logger.info(msg)
示例2: cleanup
def cleanup(self, batch=1000):
""" remove entries from solr that don't have a corresponding Zope
object or have a different UID than the real object"""
manager = queryUtility(ISolrConnectionManager)
proc = SolrIndexProcessor(manager)
conn = manager.getConnection()
log = self.mklog(use_std_log=True)
log('cleaning up solr index...\n')
key = manager.getSchema().uniqueKey
start = 0
resp = SolrResponse(conn.search(q='*:*', rows=batch, start=start))
res = resp.results()
log('%s items in solr catalog\n' % resp.response.numFound)
deleted = 0
reindexed = 0
while len(res) > 0:
for flare in res:
try:
ob = PloneFlare(flare).getObject()
except Exception as err:
log('Error getting object, removing: %s (%s)\n' % (
flare['path_string'], err))
conn.delete(flare[key])
deleted += 1
continue
if not IUUIDAware.providedBy(ob):
log('Object %s of type %s does not support uuids, skipping.\n' %
('/'.join(ob.getPhysicalPath()), ob.meta_type))
continue
uuid = IUUID(ob)
if uuid != flare[key]:
log('indexed under wrong UID, removing: %s\n' %
flare['path_string'])
conn.delete(flare[key])
deleted += 1
realob_res = SolrResponse(conn.search(q='%s:%s' %
(key, uuid))).results()
if len(realob_res) == 0:
log('no sane entry for last object, reindexing\n')
data, missing = proc.getData(ob)
prepareData(data)
if not missing:
boost = boost_values(ob, data)
conn.add(boost_values=boost, **data)
reindexed += 1
else:
log(' missing data, cannot index.\n')
log('handled batch of %d items, commiting\n' % len(res))
conn.commit()
start += batch
resp = SolrResponse(conn.search(q='*:*', rows=batch, start=start))
res = resp.results()
msg = 'solr cleanup finished, %s item(s) removed, %s item(s) reindexed\n' % (deleted, reindexed)
log(msg)
logger.info(msg)
示例3: testTwoRequests
def testTwoRequests(self):
mngr = SolrConnectionManager(active=True)
proc = SolrIndexProcessor(mngr)
output = fakehttp(mngr.getConnection(), getData('schema.xml'),
getData('add_response.txt'))
proc.index(self.foo)
mngr.closeConnection()
self.assertEqual(len(output), 2)
self.failUnless(output.get().startswith(self.schema_request))
self.assertEqual(sortFields(output.get()), getData('add_request.txt'))
示例4: RobustnessTests
class RobustnessTests(TestCase):
layer = COLLECTIVE_SOLR_MOCK_REGISTRY_FIXTURE
def setUp(self):
self.mngr = SolrConnectionManager()
self.mngr.setHost(active=True)
self.conn = self.mngr.getConnection()
self.proc = SolrIndexProcessor(self.mngr)
self.log = [] # catch log messages...
def logger(*args):
self.log.extend(args)
logger_indexer.warning = logger
config = getConfig()
config.atomic_updates = True
def tearDown(self):
self.mngr.closeConnection()
self.mngr.setHost(active=False)
def testIndexingWithUniqueKeyMissing(self):
# fake schema response
fakehttp(self.conn, getData('simple_schema.xml'))
# read and cache the schema
self.mngr.getSchema()
response = getData('add_response.txt')
output = fakehttp(self.conn, response) # fake add response
foo = Foo(id='500', name='foo')
# indexing sends data
self.proc.index(foo)
# nothing happened...
self.assertEqual(len(output), 0)
self.assertEqual(self.log, [
'schema is missing unique key, skipping indexing of %r', foo])
def testUnindexingWithUniqueKeyMissing(self):
# fake schema response
fakehttp(self.conn, getData('simple_schema.xml'))
# read and cache the schema
self.mngr.getSchema()
response = getData('delete_response.txt')
# fake delete response
output = fakehttp(self.conn, response)
foo = Foo(id='500', name='foo')
# unindexing sends data
self.proc.unindex(foo)
# nothing happened...
self.assertEqual(len(output), 0)
self.assertEqual(self.log, [
'schema is missing unique key, skipping unindexing of %r', foo])
示例5: testExtraRequest
def testExtraRequest(self):
# basically the same as `testThreeRequests`, except it
# tests adding fake responses consecutively
mngr = SolrConnectionManager(active=True)
proc = SolrIndexProcessor(mngr)
conn = mngr.getConnection()
output = fakehttp(conn, getData('schema.xml'))
fakemore(conn, getData('add_response.txt'))
proc.index(self.foo)
fakemore(conn, getData('delete_response.txt'))
proc.unindex(self.foo)
mngr.closeConnection()
self.assertEqual(len(output), 3)
self.failUnless(output.get().startswith(self.schema_request))
self.assertEqual(sortFields(output.get()), getData('add_request.txt'))
self.assertEqual(output.get(), getData('delete_request.txt'))
示例6: setUp
def setUp(self):
provideUtility(SolrConnectionConfig(), ISolrConnectionConfig)
self.mngr = SolrConnectionManager()
self.mngr.setHost(active=True)
conn = self.mngr.getConnection()
fakehttp(conn, getData('schema.xml')) # fake schema response
self.mngr.getSchema() # read and cache the schema
self.proc = SolrIndexProcessor(self.mngr)
示例7: setUp
def setUp(self):
self.mngr = SolrConnectionManager()
self.mngr.setHost(active=True)
conn = self.mngr.getConnection()
fakehttp(conn, getData('schema.xml')) # fake schema response
self.mngr.getSchema() # read and cache the schema
self.proc = SolrIndexProcessor(self.mngr)
config = getConfig()
config.atomic_updates = True
示例8: setUp
def setUp(self):
provideUtility(SolrConnectionConfig(), ISolrConnectionConfig)
self.mngr = SolrConnectionManager()
self.mngr.setHost(active=True)
self.conn = self.mngr.getConnection()
self.proc = SolrIndexProcessor(self.mngr)
self.log = [] # catch log messages...
def logger(*args):
self.log.extend(args)
logger_indexer.warning = logger
示例9: sync
def sync(self, batch=1000):
"""Sync the Solr index with the portal catalog. Records contained
in the catalog but not in Solr will be indexed and records not
contained in the catalog will be removed.
"""
manager = queryUtility(ISolrConnectionManager)
proc = SolrIndexProcessor(manager)
conn = manager.getConnection()
key = queryUtility(ISolrConnectionManager).getSchema().uniqueKey
zodb_conn = self.context._p_jar
catalog = getToolByName(self.context, "portal_catalog")
getIndex = catalog._catalog.getIndex
modified_index = getIndex("modified")
uid_index = getIndex(key)
log = self.mklog()
real = timer() # real time
lap = timer() # real lap time (for intermediate commits)
cpu = timer(clock) # cpu time
# get Solr status
query = "+%s:[* TO *]" % key
response = conn.search(q=query, rows=MAX_ROWS, fl="%s modified" % key)
# avoid creating DateTime instances
simple_unmarshallers = unmarshallers.copy()
simple_unmarshallers["date"] = parse_date_as_datetime
flares = SolrResponse(response, simple_unmarshallers)
response.close()
solr_results = {}
solr_uids = set()
def _utc_convert(value):
t_tup = value.utctimetuple()
return (((t_tup[0] * 12 + t_tup[1]) * 31 + t_tup[2]) * 24 + t_tup[3]) * 60 + t_tup[4]
for flare in flares:
uid = flare[key]
solr_uids.add(uid)
solr_results[uid] = _utc_convert(flare["modified"])
# get catalog status
cat_results = {}
cat_uids = set()
for uid, rid in uid_index._index.items():
cat_uids.add(uid)
cat_results[uid] = rid
# differences
index = cat_uids.difference(solr_uids)
solr_uids.difference_update(cat_uids)
unindex = solr_uids
processed = 0
flush = notimeout(lambda: conn.flush())
def checkPoint():
msg = "intermediate commit (%d items processed, " "last batch in %s)...\n" % (processed, lap.next())
log(msg)
logger.info(msg)
flush()
zodb_conn.cacheGC()
cpi = checkpointIterator(checkPoint, batch)
# Look up objects
uid_rid_get = cat_results.get
rid_path_get = catalog._catalog.paths.get
catalog_traverse = catalog.unrestrictedTraverse
def lookup(
uid, rid=None, uid_rid_get=uid_rid_get, rid_path_get=rid_path_get, catalog_traverse=catalog_traverse
):
if rid is None:
rid = uid_rid_get(uid)
if not rid:
return None
if not isinstance(rid, int):
rid = tuple(rid)[0]
path = rid_path_get(rid)
if not path:
return None
try:
obj = catalog_traverse(path)
except AttributeError:
return None
return obj
log('processing %d "unindex" operations next...\n' % len(unindex))
op = notimeout(lambda uid: conn.delete(id=uid))
for uid in unindex:
obj = lookup(uid)
if obj is None:
op(uid)
processed += 1
cpi.next()
else:
log("not unindexing existing object %r.\n" % uid)
log('processing %d "index" operations next...\n' % len(index))
op = notimeout(lambda obj: proc.index(obj))
for uid in index:
obj = lookup(uid)
if indexable(obj):
op(obj)
processed += 1
cpi.next()
else:
#.........这里部分代码省略.........
示例10: reindex
def reindex(self, batch=1000, skip=0, limit=0):
""" find all contentish objects (meaning all objects derived from one
of the catalog mixin classes) and (re)indexes them """
manager = queryUtility(ISolrConnectionManager)
proc = SolrIndexProcessor(manager)
conn = manager.getConnection()
zodb_conn = self.context._p_jar
log = self.mklog()
log('reindexing solr catalog...\n')
if skip:
log('skipping indexing of %d object(s)...\n' % skip)
if limit:
log('limiting indexing to %d object(s)...\n' % limit)
real = timer() # real time
lap = timer() # real lap time (for intermediate commits)
cpu = timer(clock) # cpu time
processed = 0
schema = manager.getSchema()
key = schema.uniqueKey
updates = {} # list to hold data to be updated
flush = lambda: conn.flush()
flush = notimeout(flush)
def checkPoint():
for boost_values, data in updates.values():
adder = data.pop('_solr_adder')
adder(conn, boost_values=boost_values, **data)
updates.clear()
msg = 'intermediate commit (%d items processed, ' \
'last batch in %s)...\n' % (processed, lap.next())
log(msg)
logger.info(msg)
flush()
zodb_conn.cacheGC()
cpi = checkpointIterator(checkPoint, batch)
count = 0
for path, obj in findObjects(self.context):
if ICheckIndexable(obj)():
count += 1
if count <= skip:
continue
data, missing = proc.getData(obj)
prepareData(data)
if not missing:
value = data.get(key, None)
if value is not None:
log('indexing %r\n' % obj)
pt = data.get('portal_type', 'default')
adder = queryAdapter(obj, ISolrAddHandler, name=pt)
if adder is None:
adder = DefaultAdder(obj)
data['_solr_adder'] = adder
updates[value] = (boost_values(obj, data), data)
processed += 1
cpi.next()
else:
log('missing data, skipping indexing of %r.\n' % obj)
if limit and count >= (skip + limit):
break
checkPoint()
conn.commit()
log('solr index rebuilt.\n')
msg = 'processed %d items in %s (%s cpu time).'
msg = msg % (processed, real.next(), cpu.next())
log(msg)
logger.info(msg)
示例11: QueueIndexerTests
class QueueIndexerTests(TestCase):
def setUp(self):
provideUtility(SolrConnectionConfig(), ISolrConnectionConfig)
self.mngr = SolrConnectionManager()
self.mngr.setHost(active=True)
conn = self.mngr.getConnection()
fakehttp(conn, getData('schema.xml')) # fake schema response
self.mngr.getSchema() # read and cache the schema
self.proc = SolrIndexProcessor(self.mngr)
def tearDown(self):
self.mngr.closeConnection()
self.mngr.setHost(active=False)
def testPrepareData(self):
data = {'allowedRolesAndUsers': [
'user:test_user_1_', 'user:portal_owner']}
prepareData(data)
self.assertEqual(
data,
{
'allowedRolesAndUsers': [
'user$test_user_1_',
'user$portal_owner'
]
}
)
def testLanguageParameterHandling(self):
# empty strings are replaced...
data = {'Language': ['en', '']}
prepareData(data)
self.assertEqual(data, {'Language': ['en', 'any']})
data = {'Language': ''}
prepareData(data)
self.assertEqual(data, {'Language': 'any'})
# for other indices this shouldn't happen...
data = {'Foo': ['en', '']}
prepareData(data)
self.assertEqual(data, {'Foo': ['en', '']})
def testIndexObject(self):
response = getData('add_response.txt')
# fake add response
output = fakehttp(self.mngr.getConnection(), response)
# indexing sends data
self.proc.index(Foo(id='500', name='python test doc'))
self.assertEqual(sortFields(str(output)), getData('add_request.txt'))
def testIndexAccessorRaises(self):
response = getData('add_response.txt')
# fake add response
output = fakehttp(self.mngr.getConnection(), response)
def brokenfunc():
raise ValueError
self.proc.index(Foo(id='500', name='python test doc',
text=brokenfunc)) # indexing sends data
self.assertEqual(sortFields(str(output)), getData('add_request.txt'))
def testPartialIndexObject(self):
foo = Foo(id='500', name='foo', price=42.0)
# first index all attributes...
response = getData('add_response.txt')
output = fakehttp(self.mngr.getConnection(), response)
self.proc.index(foo)
self.assert_(str(output).find(
'<field name="price">42.0</field>') > 0, '"price" data not found')
# then only a subset...
response = getData('add_response.txt')
output = fakehttp(self.mngr.getConnection(), response)
self.proc.index(foo, attributes=['id', 'name'])
output = str(output)
self.assert_(
output.find('<field name="name">foo</field>') > 0,
'"name" data not found'
)
# at this point we'd normally check for a partial update:
# self.assertEqual(output.find('price'), -1, '"price" data found?')
# self.assertEqual(output.find('42'), -1, '"price" data found?')
# however, until SOLR-139 has been implemented (re)index operations
# always need to provide data for all attributes in the schema...
self.assert_(
output.find('<field name="price">42.0</field>') > 0,
'"price" data not found'
)
def testDateIndexing(self):
foo = Foo(id='zeidler', name='andi', cat='nerd',
timestamp=DateTime('May 11 1972 03:45 GMT'))
response = getData('add_response.txt')
# fake add response
output = fakehttp(self.mngr.getConnection(), response)
self.proc.index(foo)
required = '<field name="timestamp">1972-05-11T03:45:00.000Z</field>'
self.assert_(str(output).find(required) > 0, '"date" data not found')
def testDateIndexingWithPythonDateTime(self):
foo = Foo(id='gerken', name='patrick', cat='nerd',
#.........这里部分代码省略.........