本文整理汇总了Python中seo_pysolr.Solr.add方法的典型用法代码示例。如果您正苦于以下问题:Python Solr.add方法的具体用法?Python Solr.add怎么用?Python Solr.add使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类seo_pysolr.Solr
的用法示例。
在下文中一共展示了Solr.add方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: add_jobs
# 需要导入模块: from seo_pysolr import Solr [as 别名]
# 或者: from seo_pysolr.Solr import add [as 别名]
def add_jobs(jobs, upload_chunk_size=1024):
"""
Loads a solr-ready json list of jobs into solr.
inputs:
:jobs: A list of solr-ready, json-formatted jobs.
outputs:
The number of jobs loaded into solr.
"""
conn = Solr(settings.HAYSTACK_CONNECTIONS['default']['URL'])
num_jobs = len(jobs)
# AT&T Showed that large numbers of MOCs can cause import issues due to the size of documents.
# Therefore, when processing AT&T lower the document chunk size.
for job in jobs:
if int(job.get('buid', 0)) == 19389:
logger.warn("AT&T has large amounts of mapped_mocs, that cause problems. Reducing chunk size.")
upload_chunk_size = 64
break
# Chunk them
jobs = chunk(jobs, upload_chunk_size)
for job_group in jobs:
conn.add(list(job_group))
return num_jobs
示例2: add_jobs
# 需要导入模块: from seo_pysolr import Solr [as 别名]
# 或者: from seo_pysolr.Solr import add [as 别名]
def add_jobs(jobs, upload_chunk_size=1024):
"""
Loads a solr-ready json list of jobs into solr.
inputs:
:jobs: A list of solr-ready, json-formatted jobs.
outputs:
The ids of jobs loaded into solr.
"""
conn = Solr(settings.HAYSTACK_CONNECTIONS["default"]["URL"])
# Chunk them
jobs = chunk(jobs, upload_chunk_size)
job_ids = list()
for job_group in jobs:
job_group = list(job_group)
conn.add(job_group)
job_ids.extend(j["id"] for j in job_group)
return job_ids
示例3: SitemapTestCase
# 需要导入模块: from seo_pysolr import Solr [as 别名]
# 或者: from seo_pysolr.Solr import add [as 别名]
class SitemapTestCase(DirectSEOBase):
def setUp(self):
super(SitemapTestCase, self).setUp()
self.conn = Solr('http://127.0.0.1:8983/solr/seo')
self.conn.add(SOLR_FIXTURE)
def test_index(self):
resp = self.client.get("/sitemap.xml")
self.assertEqual(resp.status_code, 200)
def test_no_buid_sitemap(self):
"""
Test to verify that a sitemap is generated with sites that have no
BUID.
"""
site = SeoSite.objects.get(id=1)
site.business_units = []
site.save()
today = datetime.datetime.today()
dt = datetime.date(*today.timetuple()[0:3]).isoformat()
resp = self.client.get("/sitemap-" + dt + ".xml")
self.assertTrue("<url>" in resp.content)
def test_noreverse(self):
"""
Test to ensure that jobs with bad/ugly data do not block the
creation of a sitemap page, but instead are just skipped over in
`SolrSitemap.get_urls().`
This is a regression test. It was prompted by a job in a job feed
file having "~" in the "city" field. Because our URL pattern
doesn't recognize that character in its regex, it caused a
`NoReverseMatch` exception to be thrown. Instead of adding a
tilde, we want to be able to handle any weird characters not
specified in our URL config.
"""
# Sometimes the site settings are messed up from other tests. Ensure
# that the settings are compatible with actually searching for the
# jobs we're adding.
settings.SITE_BUIDS = []
site = SeoSite.objects.get(pk=1)
site.business_units = []
site.save()
# These are kwargs from the actual error that created this error in the
# first place.
kwargs = {
'location': '~, WV',
'title': '911 Coordinator',
'uid': '25901630'
}
job = dict(SOLR_FIXTURE[0])
job.update(kwargs)
self.conn.add([job])
today = datetime.datetime.now()
dt = today.date().isoformat()
resp = self.client.get("/sitemap-" + dt + ".xml")
self.assertEqual(resp.status_code, 200)
self.assertTrue("<url>" in resp.content)
def tearDown(self):
super(SitemapTestCase, self).tearDown()
self.conn.delete("*:*")
示例4: update_solr
# 需要导入模块: from seo_pysolr import Solr [as 别名]
# 或者: from seo_pysolr.Solr import add [as 别名]
def update_solr(buid, download=True, force=True, set_title=False,
delete_feed=True, data_dir=DATA_DIR, clear_cache=False):
"""
Update the Solr master index with the data contained in a feed file
for a given buid/jsid.
This is meant to be a standalone function such that the state of the
Solr index is not tied to the state of the database.
Inputs:
:buid: An integer; the ID for a particular business unit.
:download: Boolean. If False, this process will not download a new
feedfile, but instead use the one on disk. Should only be false for
the purposes of our test suite.
:force: Boolean. If True, every job seen in the feed file will be
updated in the index. Otherwise, only the jobs seen in the feed file
but not seen in the index will be updated. This latter option will
soon be deprecated.
Returns:
A 2-tuple consisting of the number of jobs added and the number deleted.
Writes/Modifies:
Job data found in the feed file is used to modify the Solr index. This
includes adds & deletes. (Solr does not have a discrete equivalent to
SQL's UPDATE; by adding a document with the same UID as a document in
the index, the equivalent of an update operation is performed.)
"""
if download:
filepath = download_feed_file(buid, data_dir=data_dir)
else:
# Get current worker process id, to prevent race conditions.
try:
p = current_process()
process_id = p.index
except:
process_id = 0
filepath = os.path.join(data_dir, str(process_id), FEED_FILE_PREFIX + str(buid) +
'.xml')
bu = BusinessUnit.objects.get(id=buid)
try:
co = bu.company_set.all()[0]
except IndexError:
co = None
jobfeed = DEv2JobFeed(filepath, jsid=buid, markdown=bu.enable_markdown,
company=co)
# If the feed file did not pass validation, return. The return value is
# '(0, 0)' to match what's returned on a successful parse.
if jobfeed.errors:
error = jobfeed.error_messages
logging.error("BUID:%s - Feed file has failed validation on line %s. "
"Exception: %s" % (buid, error['line'],
error['exception']))
raise FeedImportError(error)
# A dictionary of uids
jobs = jobfeed.jobparse()
# Build a set of all the UIDs for all those instances.
job_uids = set([long(i.get('uid')) for i in jobs if i.get('uid')])
conn = Solr(settings.HAYSTACK_CONNECTIONS['default']['URL'])
step1 = 1024
# Get the count of all the results in the Solr index for this BUID.
hits = conn.search("*:*", fq="buid:%s" % buid, facet="false",
mlt="false").hits
# Create (start-index, stop-index) tuples to facilitate handling results
# in ``step1``-sized chunks. So if ``hits`` returns 2048 results,
# ``job_slices`` will look like ``[(0,1024), (1024, 2048)]``. Those
# values are then used to slice up the total results.
#
# This was put in place because part of the logic to figuring out what
# jobs to delete from and add jobs to the Solr index is using set
# algebra. We convert the total list of UIDs in the index and the UIDs
# in the XML feed to sets, then compare them via ``.difference()``
# (seen below). However for very large feed files, say 10,000+ jobs,
# this process was taking so long that the connection would time out. To
# address this problem we break up the comparisons as described above.
# This results in more requests but it alleviates the connection timeout
# issue.
job_slices = slices(range(hits), step=step1)
results = [_solr_results_chunk(tup, buid, step1) for tup in job_slices]
solr_uids = reduce(lambda x, y: x | y, results) if results else set()
# Return the job UIDs that are in the Solr index but not in the feed
# file.
solr_del_uids = solr_uids.difference(job_uids)
if not force:
# Return the job UIDs that are in the feed file but not in the Solr
# index.
solr_add_uids = job_uids.difference(solr_uids)
# ``jobfeed.solr_jobs()`` yields a list of dictionaries. We want to
# filter out any dictionaries whose "uid" key is not in
# ``solr_add_uids``. This is because by default we only want to add
# new documents (which each ``solr_jobs()`` dictionary represents),
# not update.
add_docs = filter(lambda x: int(x.get("uid", 0)) in solr_add_uids,
jobfeed.solr_jobs())
else:
#.........这里部分代码省略.........
示例5: JobFeedTestCase
# 需要导入模块: from seo_pysolr import Solr [as 别名]
# 或者: from seo_pysolr.Solr import add [as 别名]
class JobFeedTestCase(DirectSEOBase):
def setUp(self):
super(JobFeedTestCase, self).setUp()
self.businessunit = BusinessUnitFactory(id=0)
self.buid_id = self.businessunit.id
self.numjobs = 14
self.testdir = os.path.join(os.path.abspath(os.path.dirname(__file__)),
'data')
self.company = CompanyFactory()
self.company.job_source_ids.add(self.businessunit)
self.company.save()
self.conn = Solr("http://127.0.0.1:8983/solr/seo")
self.emptyfeed = os.path.join(self.testdir, "dseo_feed_0.no_jobs.xml")
self.malformed_feed = os.path.join(self.testdir, 'dseo_malformed_feed_0.xml')
self.invalid_feed = os.path.join(self.testdir, 'dseo_invalid_feed_0.xml')
self.unused_field_feed = os.path.join(self.testdir, 'dseo_feed_1.xml')
self.no_onet_feed = os.path.join(self.testdir, 'dseo_feed_no_onets.xml')
#Ensures DATA_DIR used by import_jobs.download_feed_file exists
data_path = DATA_DIR
if not os.path.exists(data_path):
os.mkdir(data_path)
def test_company_canonical_microsite(self):
# Test that canonical microsites is correctly added to solr
results = DEv2JobFeed(
'seo/tests/data/dseo_feed_0.xml',
jsid=self.businessunit.id,
company=self.businessunit.company_set.all()[0])
jobs = results.solr_jobs()
self.assertEqual(jobs[0]['company_canonical_microsite_exact'], None)
self.company.canonical_microsite = "http://test.jobs"
self.company.save()
results = DEv2JobFeed(
'seo/tests/data/dseo_feed_0.xml',
jsid=self.businessunit.id,
company=self.businessunit.company_set.all()[0])
jobs = results.solr_jobs()
self.assertEqual(jobs[0]['company_canonical_microsite_exact'],
'http://test.jobs')
def test_company_enhanced(self):
# Test that company enhanced check is correctly added to solr
results = DEv2JobFeed(
'seo/tests/data/dseo_feed_0.xml',
jsid=self.businessunit.id,
company=self.businessunit.company_set.all()[0])
jobs = results.solr_jobs()
self.assertFalse(jobs[0]['company_enhanced'])
self.company.enhanced = True
self.company.save()
results = DEv2JobFeed(
'seo/tests/data/dseo_feed_0.xml',
jsid=self.businessunit.id,
company=self.businessunit.company_set.all()[0])
jobs = results.solr_jobs()
self.assertTrue(jobs[0]['company_enhanced'])
def test_company_member(self):
# Test that company member check is correctly added to solr
results = DEv2JobFeed(
'seo/tests/data/dseo_feed_0.xml',
jsid=self.businessunit.id,
company=self.businessunit.company_set.all()[0])
jobs = results.solr_jobs()
self.assertTrue(jobs[0]['company_member'])
self.company.member = False
self.company.save()
results = DEv2JobFeed(
'seo/tests/data/dseo_feed_0.xml',
jsid=self.businessunit.id,
company=self.businessunit.company_set.all()[0])
jobs = results.solr_jobs()
self.assertFalse(jobs[0]['company_member'])
def test_company_digital_strategies_customer(self):
# Test that digial strategies customer check is correctly added to solr
results = DEv2JobFeed(
'seo/tests/data/dseo_feed_0.xml',
jsid=self.businessunit.id,
company=self.businessunit.company_set.all()[0])
jobs = results.solr_jobs()
self.assertFalse(jobs[0]['company_digital_strategies_customer'])
self.company.digital_strategies_customer = True
self.company.save()
#.........这里部分代码省略.........
示例6: ImportJobsTestCase
# 需要导入模块: from seo_pysolr import Solr [as 别名]
# 或者: from seo_pysolr.Solr import add [as 别名]
class ImportJobsTestCase(DirectSEOBase):
fixtures = ['import_jobs_testdata.json']
def setUp(self):
super(ImportJobsTestCase, self).setUp()
self.businessunit = BusinessUnitFactory(id=0)
self.buid_id = self.businessunit.id
self.filepath = os.path.join(DATA_DIR, '0', 'dseo_feed_%s.xml' % self.buid_id)
self.solr_settings = {
'default': {'URL': 'http://127.0.0.1:8983/solr/seo'}
}
self.solr = Solr(settings.HAYSTACK_CONNECTIONS['default']['URL'])
def tearDown(self):
super(ImportJobsTestCase, self).tearDown()
self.solr.delete(q='*:*')
def test_solr_rm_feedfile(self):
"""
Test that at the end of Solr parsing, the feed file is deleted.
"""
update_solr(self.buid_id)
self.assertFalse(os.access(self.filepath, os.F_OK))
def test_subsidiary_rename(self):
company1 = CompanyFactory()
company1.save()
bu1 = self.businessunit
bu1.title = "Acme corp"
bu2 = BusinessUnitFactory(title=bu1.title)
bu2.save()
self.businessunit.company_set.add(company1)
# Test that a company was created for both business units
add_company(bu1)
companies = bu1.company_set.all()
self.assertEqual(len(companies), 1)
co = companies[0]
self.assertEqual(co.name, bu1.title)
# Add the 2nd business unit
add_company(bu2)
# Both units should be attached to that company
self.assertEqual(bu1.company_set.all()[0], bu2.company_set.all()[0])
self.assertEqual(bu1.company_set.all().count(), 1)
self.assertIn(bu1, co.job_source_ids.all())
self.assertIn(bu2, co.job_source_ids.all())
self.assertEqual(co.name, bu1.title)
self.assertEqual(co.name, bu2.title)
bu2.title = "New company name"
add_company(bu1)
add_company(bu2)
self.assertEqual(len(co.job_source_ids.all()), 1)
self.assertNotEqual(bu1.company_set.all(), bu2.company_set.all())
self.assertEqual(co.name, bu1.title)
self.assertEqual(len(bu2.company_set.all()), 1)
co2 = bu2.company_set.all()[0]
self.assertEqual(co2.name, bu2.title)
self.assertNotEqual(co2.name, bu1.title)
self.assertNotEqual(co.name, bu2.title)
def test_duplicate_company(self):
company1 = CompanyFactory()
company1.save()
company2 = CompanyFactory(name="Acme corp")
company2.save()
self.businessunit.company_set.add(company1)
self.businessunit.title = "Acme corp"
add_company(self.businessunit)
self.assertEqual(self.businessunit.company_set.all()[0], company2)
def test_set_bu_title(self):
"""
Ensure that if a feedfile for a BusinessUnit comes through, and
the `title` attribute for that BusinessUnit is not set, that
`helpers.update_solr` sets the `title` attribute properly.
"""
bu = BusinessUnit.objects.get(id=self.buid_id)
bu.title = None
bu.save()
# Since the BusinessUnit title is None, the intent is that update_solr
# will set its title to match the company name found in the feed file.
results = update_solr(self.buid_id)
# We have to get the updated state of the BusinessUnit instance, since
# changes to the database won't be reflected by our in-memory version of
# the data.
bu = BusinessUnit.objects.get(id=self.buid_id)
# The title attribute should now equal the initial value established in
# the setUp method.
self.assertEquals(self.businessunit.title, bu.title)
def test_add_company(self):
"""
Create environment to test for every possible case--
- Existing relationship but the name is different pk=10
#.........这里部分代码省略.........