本文整理汇总了Python中seo_pysolr.Solr.search方法的典型用法代码示例。如果您正苦于以下问题:Python Solr.search方法的具体用法?Python Solr.search怎么用?Python Solr.search使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类seo_pysolr.Solr
的用法示例。
在下文中一共展示了Solr.search方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: clear_solr
# 需要导入模块: from seo_pysolr import Solr [as 别名]
# 或者: from seo_pysolr.Solr import search [as 别名]
def clear_solr(buid):
"""Delete all jobs for a given business unit/job source."""
conn = Solr(settings.HAYSTACK_CONNECTIONS['default']['URL'])
hits = conn.search(q="*:*", rows=1, mlt="false", facet="false").hits
logging.info("BUID:%s - SOLR - Deleting all %s jobs" % (buid, hits))
conn.delete(q="buid:%s" % buid)
logging.info("BUID:%s - SOLR - All jobs deleted." % buid)
示例2: bread_box_title_heading
# 需要导入模块: from seo_pysolr import Solr [as 别名]
# 或者: from seo_pysolr.Solr import search [as 别名]
def bread_box_title_heading(title_slug_value, jobs=None):
if (not title_slug_value and not jobs) or not title_slug_value:
return None
if jobs:
job = jobs[0]
if title_slug_value == job.title_slug:
return job.title
else:
for job in jobs:
if title_slug_value == job.title_slug:
return job.title
# Try searching solr for a matching title.
conn = Solr(settings.HAYSTACK_CONNECTIONS['default']['URL'])
try:
search_terms = {
'q': u'title_slug:%s' % title_slug_value,
'fl': 'title, title_slug',
'rows': 1,
}
res = conn.search(**search_terms)
except SolrError:
# Poorly formated title_slug_values can sometimes cause Solr errors.
res = None
if res and res.docs[0].get('title_slug') == title_slug_value:
return res.docs[0]['title']
else:
if title_slug_value:
return title_slug_value.replace('-', ' ').title()
else:
return None
示例3: remove_expired_jobs
# 需要导入模块: from seo_pysolr import Solr [as 别名]
# 或者: from seo_pysolr.Solr import search [as 别名]
def remove_expired_jobs(buid, active_jobs, upload_chunk_size=1024):
"""
Given a job source id and a list of active jobs for that job source,
Remove the jobs on solr that are not among the active jobs.
"""
conn = Solr(settings.HAYSTACK_CONNECTIONS['default']['URL'])
count = conn.search("*:*", fq="buid:%s" % buid, facet="false",
mlt="false").hits
old_jobs = conn.search("*:*", fq="buid:%s" % buid, facet="false",
rows=count, mlt="false").docs
active_ids = set(j['id'] for j in active_jobs)
old_ids = set(j['id'] for j in old_jobs)
expired = old_ids - active_ids
chunks = chunk(list(expired), upload_chunk_size)
for jobs in chunks:
query = "id:(%s)" % " OR ".join([str(x) for x in jobs])
conn.delete(q=query)
return expired
示例4: _solr_results_chunk
# 需要导入模块: from seo_pysolr import Solr [as 别名]
# 或者: from seo_pysolr.Solr import search [as 别名]
def _solr_results_chunk(tup, buid, step):
"""
Takes a (start_index, stop_index) tuple and gets the results in that
range from the Solr index.
"""
conn = Solr(settings.HAYSTACK_CONNECTIONS['default']['URL'])
results = conn.search("*:*", fq="buid:%s" % buid, fl="uid",
rows=step, start=tup[0], facet="false",
mlt="false").docs
return set([i['uid'] for i in results if 'uid' in i])
示例5: SiteTestCase
# 需要导入模块: from seo_pysolr import Solr [as 别名]
# 或者: from seo_pysolr.Solr import search [as 别名]
class SiteTestCase(DirectSEOBase):
"""
We're adding these tests to ensure unicode jobs descriptions and titles
make it through the import process and work with high-level features.
We should use http requests wherever possible since it's difficult to
predict which modules will have issues with unicode.
"""
def setUp(self):
super(SiteTestCase, self).setUp()
self.conn = Solr('http://127.0.0.1:8983/solr/seo')
self.conn.delete(q="*:*")
self.businessunit = factories.BusinessUnitFactory(id=0)
self.buid = self.businessunit.id
self.filepath = os.path.join(import_jobs.DATA_DIR,
'dseo_feed_%s.xml' % self.buid)
SeoSite.objects.all().delete()
self.site = factories.SeoSiteFactory(id=1)
self.configuration = factories.ConfigurationFactory(status=2)
self.configuration.save()
self.site.configurations.clear()
self.site.configurations.add(self.configuration)
def tearDown(self):
super(SiteTestCase, self).tearDown()
self.conn.delete(q="*:*")
def test_unicode_title(self):
# Test imports
group = factories.GroupFactory()
self.site.group = group
self.site.business_units.add(self.businessunit)
self.site.save()
import_jobs.update_solr(self.buid, download=False, delete_feed=False,
data_dir='seo/tests/data/')
solr_jobs = self.conn.search("*:*")
resp = self.client.get('/')
self.assertEqual(resp.context['total_jobs_count'], solr_jobs.hits)
# test standard facets against Haystack query
standard_cf = factories.CustomFacetFactory.build(
# default facet will return both jobs
name="Keyword Facet",
group=group,
show_production=True)
standard_cf.save()
standard_cf.keyword.add(u'Ключевые')
standard_cf.save()
standard_site_facet = factories.SeoSiteFacetFactory(
seosite=self.site,
customfacet=standard_cf,
facet_type=factories.SeoSiteFacet.STANDARD)
standard_site_facet.save()
# test standard facets against Haystack query
standard_cf2 = factories.CustomFacetFactory.build(
# default facet will return both jobs
name='Country Facet',
country='United States',
group=group,
show_production=True)
standard_cf2.save()
standard_site_facet2 = factories.SeoSiteFacetFactory(
seosite=self.site,
customfacet=standard_cf2,
facet_type=factories.SeoSiteFacet.STANDARD)
standard_site_facet2.save()
resp = self.client.get('/keyword-facet/new-jobs/',
HTTP_HOST=self.site.domain, follow=True)
sqs = DESearchQuerySet().filter(text=u'Ключевые')
self.assertEqual(len(resp.context['default_jobs']), sqs.count())
for facet_widget in resp.context['widgets']:
# Ensure that no standard facet has more results than current
# search results
for count_tuple in facet_widget.items:
self.assertTrue(sqs.count() >= count_tuple[1])
# Test default site facets against PySolr query
from django.core.cache import cache
cache.clear()
default_cf = factories.CustomFacetFactory.build(
name="Default Facet",
title=u"Специалист",
group=group,
show_production=True)
default_cf.save()
default_site_facet = factories.SeoSiteFacetFactory(
seosite=self.site,
facet_type=factories.SeoSiteFacet.DEFAULT,
customfacet=default_cf)
default_site_facet.save()
resp = self.client.get('/jobs/', HTTP_HOST=self.site.domain,
follow=True)
total_jobs = resp.context['total_jobs_count']
solr_jobs = self.conn.search(q=u"title:Специалист")
self.assertEqual(total_jobs, solr_jobs.hits)
self.assertEqual(len(resp.context['default_jobs']), total_jobs)
#.........这里部分代码省略.........
示例6: update_solr
# 需要导入模块: from seo_pysolr import Solr [as 别名]
# 或者: from seo_pysolr.Solr import search [as 别名]
def update_solr(buid, download=True, force=True, set_title=False,
delete_feed=True, data_dir=DATA_DIR, clear_cache=False):
"""
Update the Solr master index with the data contained in a feed file
for a given buid/jsid.
This is meant to be a standalone function such that the state of the
Solr index is not tied to the state of the database.
Inputs:
:buid: An integer; the ID for a particular business unit.
:download: Boolean. If False, this process will not download a new
feedfile, but instead use the one on disk. Should only be false for
the purposes of our test suite.
:force: Boolean. If True, every job seen in the feed file will be
updated in the index. Otherwise, only the jobs seen in the feed file
but not seen in the index will be updated. This latter option will
soon be deprecated.
Returns:
A 2-tuple consisting of the number of jobs added and the number deleted.
Writes/Modifies:
Job data found in the feed file is used to modify the Solr index. This
includes adds & deletes. (Solr does not have a discrete equivalent to
SQL's UPDATE; by adding a document with the same UID as a document in
the index, the equivalent of an update operation is performed.)
"""
if download:
filepath = download_feed_file(buid, data_dir=data_dir)
else:
# Get current worker process id, to prevent race conditions.
try:
p = current_process()
process_id = p.index
except:
process_id = 0
filepath = os.path.join(data_dir, str(process_id), FEED_FILE_PREFIX + str(buid) +
'.xml')
bu = BusinessUnit.objects.get(id=buid)
try:
co = bu.company_set.all()[0]
except IndexError:
co = None
jobfeed = DEv2JobFeed(filepath, jsid=buid, markdown=bu.enable_markdown,
company=co)
# If the feed file did not pass validation, return. The return value is
# '(0, 0)' to match what's returned on a successful parse.
if jobfeed.errors:
error = jobfeed.error_messages
logging.error("BUID:%s - Feed file has failed validation on line %s. "
"Exception: %s" % (buid, error['line'],
error['exception']))
raise FeedImportError(error)
# A dictionary of uids
jobs = jobfeed.jobparse()
# Build a set of all the UIDs for all those instances.
job_uids = set([long(i.get('uid')) for i in jobs if i.get('uid')])
conn = Solr(settings.HAYSTACK_CONNECTIONS['default']['URL'])
step1 = 1024
# Get the count of all the results in the Solr index for this BUID.
hits = conn.search("*:*", fq="buid:%s" % buid, facet="false",
mlt="false").hits
# Create (start-index, stop-index) tuples to facilitate handling results
# in ``step1``-sized chunks. So if ``hits`` returns 2048 results,
# ``job_slices`` will look like ``[(0,1024), (1024, 2048)]``. Those
# values are then used to slice up the total results.
#
# This was put in place because part of the logic to figuring out what
# jobs to delete from and add jobs to the Solr index is using set
# algebra. We convert the total list of UIDs in the index and the UIDs
# in the XML feed to sets, then compare them via ``.difference()``
# (seen below). However for very large feed files, say 10,000+ jobs,
# this process was taking so long that the connection would time out. To
# address this problem we break up the comparisons as described above.
# This results in more requests but it alleviates the connection timeout
# issue.
job_slices = slices(range(hits), step=step1)
results = [_solr_results_chunk(tup, buid, step1) for tup in job_slices]
solr_uids = reduce(lambda x, y: x | y, results) if results else set()
# Return the job UIDs that are in the Solr index but not in the feed
# file.
solr_del_uids = solr_uids.difference(job_uids)
if not force:
# Return the job UIDs that are in the feed file but not in the Solr
# index.
solr_add_uids = job_uids.difference(solr_uids)
# ``jobfeed.solr_jobs()`` yields a list of dictionaries. We want to
# filter out any dictionaries whose "uid" key is not in
# ``solr_add_uids``. This is because by default we only want to add
# new documents (which each ``solr_jobs()`` dictionary represents),
# not update.
add_docs = filter(lambda x: int(x.get("uid", 0)) in solr_add_uids,
jobfeed.solr_jobs())
else:
#.........这里部分代码省略.........
示例7: DESolrSearchBackend
# 需要导入模块: from seo_pysolr import Solr [as 别名]
# 或者: from seo_pysolr.Solr import search [as 别名]
class DESolrSearchBackend(SolrSearchBackend):
def __init__(self, connection_alias, **connection_options):
"""
Inputs:
:HTTP_AUTH_USERNAME: Username used for http authentication
:HTTP_AUTH_PASSWORD: Password used for http authentication
"""
super(DESolrSearchBackend, self).__init__(connection_alias,
**connection_options)
user = connection_options.get("HTTP_AUTH_USERNAME")
passwd = connection_options.get("HTTP_AUTH_PASSWORD")
self.conn = Solr(connection_options['URL'], auth=(user, passwd),
timeout=self.timeout)
@log_query
def search(self, query_string, sort_by=None, start_offset=0, end_offset=None,
fields='', highlight=False, facets=None, date_facets=None,
query_facets=None, narrow_queries=None, spelling_query=None,
within=None, dwithin=None, distance_point=None,
limit_to_registered_models=None, result_class=None,
facet_mincount=None, facet_limit=None, facet_prefix=None,
facet_sort=None, facet_offset=None, bf=None, **kwargs):
"""
Overrides both search() and build_search_kwargs().
"""
if len(query_string) == 0:
return {
'results': [],
'hits': 0,
}
kwargs = {
'fl': '* score',
'mlt': 'false'
}
if fields:
if isinstance(fields, (list, set)):
fields = " ".join(fields)
kwargs['fl'] = fields
# This code was causing sort_by to break, but we're keeping it as a
# reference in case we want to enable geographic sorting in the future.
# Haystack does have an order_by_distance function, so this code might
# not be necessary
# Jason McLaughlin 10/30/2012
# geo_sort = False
# if sort_by is not None:
# if sort_by in ['distance asc', 'distance desc'] and distance_point:
# # Do the geo-enabled sort.
# lng, lat = distance_point['point'].get_coords()
# kwargs['sfield'] = distance_point['field']
# kwargs['pt'] = '%s,%s' % (lat, lng)
# geo_sort = True
#
# if sort_by == 'distance asc':
# kwargs['sort'] = 'geodist() asc'
# else:
# kwargs['sort'] = 'geodist() desc'
# else:
# if sort_by.startswith('distance '):
# warnings.warn("In order to sort by distance, "
# "you must call the '.distance(...)' "
# "method.")
if sort_by is not None:
# Regular sorting.
kwargs['sort'] = sort_by
if bf is not None:
kwargs['bf'] = bf
if start_offset is not None:
kwargs['start'] = start_offset
if end_offset is not None:
kwargs['rows'] = end_offset - start_offset
if highlight is True:
kwargs['hl'] = 'true'
kwargs['hl.fragsize'] = '100'
kwargs['hl.snippets'] = '2'
kwargs['hl.mergeContiguous'] = 'true'
kwargs['hl.simple.pre'] = '<b>'
kwargs['hl.simple.post'] = '</b>'
if self.include_spelling is True:
kwargs['spellcheck'] = 'true'
kwargs['spellcheck.collate'] = 'true'
kwargs['spellcheck.count'] = 1
if spelling_query:
kwargs['spellcheck.q'] = spelling_query
if facets is not None:
kwargs['facet'] = 'on'
kwargs['facet.field'] = facets.keys()
for facet_field, options in facets.items():
#.........这里部分代码省略.........
示例8: JobFeedTestCase
# 需要导入模块: from seo_pysolr import Solr [as 别名]
# 或者: from seo_pysolr.Solr import search [as 别名]
#.........这里部分代码省略.........
self.assertNotEqual(job['html_description'].find('<li>'), -1)
self.assertNotEqual(job['html_description'].find('</li>'), -1)
def test_no_null_values(self):
# test that there's no literal null in html 'city' entry
results = DEv2JobFeed(
'seo/tests/data/dseo_feed_0.markdown.xml',
jsid=0)
jobs = results.solr_jobs()
for job in jobs:
self.assertNotEqual(job['city'], 'null')
def test_dev2_feed(self):
filepath = download_feed_file(self.buid_id)
results = DEv2JobFeed(filepath)
jobs = results.jobparse()
self.assertEqual(results.jsid, self.buid_id)
self.assertEqual(results.job_source_name, self.businessunit.title)
self.assertEqual(len(jobs), self.numjobs)
def test_mocids(self):
"""
Tests that mocid fields exist when jobs are imported from a feed and
added to a solr connnection
"""
filepath = download_feed_file(self.buid_id)
results = DEv2JobFeed(filepath)
jobs = results.solr_jobs()
# Since we're going to be adding/updating data in the Solr index, we're
# hardcoding in the local Solr instance so that we don't accidentally
# alter production data.
self.conn.add(jobs)
num_hits = self.conn.search(q="*:*",
fq="buid:%s -mocid:[* TO *]" % self.buid_id)
self.assertEqual(num_hits.hits, self.numjobs)
for job in jobs:
self.assertTrue('mocid' in job)
def test_empty_feed(self):
"""
Test that the schema for the v2 DirectEmployers feed file schema
allows for empty feed files.
"""
results = DEv2JobFeed(self.emptyfeed)
# If the schema is such that empty feed files are considered invalid,
# trying to run jobparse() will throw an exception.
self.assertEqual(len(results.jobparse()), 0)
def test_empty_solr(self):
"""
Tests for the proper behavior when encountering a job-less, but
otherwise valid, feed file. The proper behavior is to delete any
jobs associated with that BusinessUnit from the Solr index.
"""
# Normal download-and-parse operation on a feed file with jobs.
update_solr(self.buid_id)
results = self.conn.search(q="*:*", fq="buid:%s" % self.buid_id)
self.assertEqual(results.hits, self.numjobs)
# Download-and-parse operation on a feed file with no jobs. Expected
# behavior is to delete all jobs.
self._get_feedfile()
update_solr(self.buid_id, download=False)
示例9: LoadETLTestCase
# 需要导入模块: from seo_pysolr import Solr [as 别名]
# 或者: from seo_pysolr.Solr import search [as 别名]
class LoadETLTestCase(DirectSEOBase):
fixtures = ['countries.json']
def setUp(self):
self.solr_settings = {
'default': {'URL': 'http://127.0.0.1:8983/solr/seo'}
}
self.solr = Solr(settings.HAYSTACK_CONNECTIONS['default']['URL'])
self.solr.delete(q="*:*")
self.zipfile = os.path.join(os.path.dirname(os.path.abspath(__file__)),
'data',
'ActiveDirectory_ce2ca701-eeca-4c13-96ba-e6bde9cb7060.zip')
with open(self.zipfile) as zf:
self.jobs = list(get_jobs_from_zipfile(zf, "ce2ca701-eeca-4c13-96ba-e6bde9cb7060"))
self.businessunit = BusinessUnitFactory(id=0)
self.buid = self.businessunit.id
self.guid = 'ce2ca701-eeca-4c13-96ba-e6bde9cb7060'
self.name = "Test"
def tearDown(self):
pass
@patch('import_jobs.get_jobsfs_zipfile')
def test_update_job_source(self, mock_jobsfs):
mock_jobsfs.return_value = open(self.zipfile, 'rb')
count = self.solr.search('*:*').hits
self.assertEqual(count, 0, "Jobs for buid in solr before the test. Cannot guarantee correct behavior.")
self.assertEqual(self.businessunit.associated_jobs, 4, "Initial Job Count does not match the factory")
update_job_source(self.guid, self.buid, self.name)
count = self.solr.search('buid:%s' % self.buid).hits
# Note the job count being one low here is due to one job being filtered out due to include_in_index_bit
self.assertEqual(count, 38, "38 Jobs not in solr after call to update job source. Found %s" % count)
self.assertEqual(BusinessUnit.objects.get(id=self.buid).associated_jobs, 38,
"Job Count not updated after imports: Should be 38 was %s" % self.businessunit.associated_jobs)
def test_filtering_on_includeinindex_bit(self):
"""Test that filtering on the include_in_index bit works"""
#Prove we have the expected number of jobs in the zipfile itself.
self.assertEqual(len(self.jobs), 39,
"Expected to find 0 jobs in the test zipfile, instead found %s" % len(self.jobs))
# Prove that filtering works.
filtered_jobs = list(filter_current_jobs(self.jobs, self.businessunit))
self.assertEqual(len(filtered_jobs), 38,
"filter_current_jobs should rmeove jobs with the includeinindex bit set, "
"it's expected to return %s. Instead it returned %s" % (38, len(filtered_jobs)))
def test_businessunit_ignore_includeinindex(self):
"""Test that filtering on the include_in_index bit can be overridden on a per business unit basis."""
# Set ignore_includeinindex on the test BusinessUnit
self.businessunit.ignore_includeinindex = True
self.businessunit.save()
#Prove we have the expected number of jobs in the zipfile itself.
self.assertEqual(len(self.jobs), 39,
"Expected to find 0 jobs in the test zipfile, instead found %s" % len(self.jobs))
# Prove that filtering works.
filtered_jobs = list(filter_current_jobs(self.jobs, self.businessunit))
self.assertEqual(len(filtered_jobs), 39,
"filter_current_jobs should ignore the includeinindex bit, returning 39 jobs. "
"Instead returned %s." % len(filtered_jobs))
示例10: ImportJobsTestCase
# 需要导入模块: from seo_pysolr import Solr [as 别名]
# 或者: from seo_pysolr.Solr import search [as 别名]
#.........这里部分代码省略.........
add_company(bu1)
companies = bu1.company_set.all()
self.assertEqual(len(companies), 1)
co = companies[0]
self.assertEqual(co.name, bu1.title)
# Add the 2nd business unit
add_company(bu2)
# Both units should be attached to that company
self.assertEqual(bu1.company_set.all()[0], bu2.company_set.all()[0])
self.assertEqual(bu1.company_set.all().count(), 1)
self.assertIn(bu1, co.job_source_ids.all())
self.assertIn(bu2, co.job_source_ids.all())
self.assertEqual(co.name, bu1.title)
self.assertEqual(co.name, bu2.title)
bu2.title = "New company name"
add_company(bu1)
add_company(bu2)
self.assertEqual(len(co.job_source_ids.all()), 1)
self.assertNotEqual(bu1.company_set.all(), bu2.company_set.all())
self.assertEqual(co.name, bu1.title)
self.assertEqual(len(bu2.company_set.all()), 1)
co2 = bu2.company_set.all()[0]
self.assertEqual(co2.name, bu2.title)
self.assertNotEqual(co2.name, bu1.title)
self.assertNotEqual(co.name, bu2.title)
def test_duplicate_company(self):
company1 = CompanyFactory()
company1.save()
company2 = CompanyFactory(name="Acme corp")
company2.save()
self.businessunit.company_set.add(company1)
self.businessunit.title = "Acme corp"
add_company(self.businessunit)
self.assertEqual(self.businessunit.company_set.all()[0], company2)
def test_set_bu_title(self):
"""
Ensure that if a feedfile for a BusinessUnit comes through, and
the `title` attribute for that BusinessUnit is not set, that
`helpers.update_solr` sets the `title` attribute properly.
"""
bu = BusinessUnit.objects.get(id=self.buid_id)
bu.title = None
bu.save()
# Since the BusinessUnit title is None, the intent is that update_solr
# will set its title to match the company name found in the feed file.
results = update_solr(self.buid_id)
# We have to get the updated state of the BusinessUnit instance, since
# changes to the database won't be reflected by our in-memory version of
# the data.
bu = BusinessUnit.objects.get(id=self.buid_id)
# The title attribute should now equal the initial value established in
# the setUp method.
self.assertEquals(self.businessunit.title, bu.title)
def test_add_company(self):
"""
Create environment to test for every possible case--
- Existing relationship but the name is different pk=10
- No existing relationship, but the company exists in the database (as
established by the BusinessUnit title matching a company name) pk=11
- No relationship and the company is not in the database pk=12
Start with 2 Company objects and 3 BusinessUnit objects
End up with 3 Company objects and 3 BusinessUnit objects
"""
for i in range(10, 4):
add_company(BusinessUnit.get(id=i))
# The names of the BU and the Co should be the same
self.assertEquals(BusinessUnit.get(id=i).title,
Company.get(id=i).name,
msg="Company names do not match")
# ensure the relationship was formed
self.assertIn(Company.objects.get(id=i),
BusinessUnit.objects.get(id=i).company_set.all(),
msg="Company is not related to job feed")
def test_remove_expired_jobs(self):
buid = 12345
active_jobs = [{'id': 'seo.%s' % i, 'buid': buid} for i in range(4)]
old_jobs = [{'id': 'seo.%s' % i, 'buid': buid} for i in range(2, 10)]
with self.settings(HAYSTACK_CONNECTIONS=self.solr_settings):
self.solr.add(old_jobs)
self.solr.commit()
removed = remove_expired_jobs(buid, [d['id'] for d in active_jobs])
self.assertEqual(len(removed), 6, "Removed jobs %s" % removed)
ids = [d['id'] for d in self.solr.search('*:*').docs]
self.assertTrue([5, 6, 7, 8, 9, 10] not in ids)