当前位置: 首页>>代码示例>>Python>>正文


Python Solr.add方法代码示例

本文整理汇总了Python中seo_pysolr.Solr.add方法的典型用法代码示例。如果您正苦于以下问题:Python Solr.add方法的具体用法?Python Solr.add怎么用?Python Solr.add使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在seo_pysolr.Solr的用法示例。


在下文中一共展示了Solr.add方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: add_jobs

# 需要导入模块: from seo_pysolr import Solr [as 别名]
# 或者: from seo_pysolr.Solr import add [as 别名]
def add_jobs(jobs, upload_chunk_size=1024):
    """
    Loads a solr-ready json list of jobs into solr.

    inputs:
        :jobs: A list of solr-ready, json-formatted jobs.

    outputs:
        The number of jobs loaded into solr.
    """
    conn = Solr(settings.HAYSTACK_CONNECTIONS['default']['URL'])
    num_jobs = len(jobs)
    # AT&T Showed that large numbers of MOCs can cause import issues due to the size of documents.
    # Therefore, when processing AT&T lower the document chunk size.
    for job in jobs:
        if int(job.get('buid', 0)) == 19389:
            logger.warn("AT&T has large amounts of mapped_mocs, that cause problems.  Reducing chunk size.")
            upload_chunk_size = 64
            break
            
    
    # Chunk them
    jobs = chunk(jobs, upload_chunk_size)
    for job_group in jobs:
        conn.add(list(job_group))
    return num_jobs
开发者ID:AstroMatchDynamics,项目名称:MyJobs,代码行数:28,代码来源:import_jobs.py

示例2: add_jobs

# 需要导入模块: from seo_pysolr import Solr [as 别名]
# 或者: from seo_pysolr.Solr import add [as 别名]
def add_jobs(jobs, upload_chunk_size=1024):
    """
    Loads a solr-ready json list of jobs into solr.

    inputs:
        :jobs: A list of solr-ready, json-formatted jobs.

    outputs:
        The ids of jobs loaded into solr.
    """
    conn = Solr(settings.HAYSTACK_CONNECTIONS["default"]["URL"])

    # Chunk them
    jobs = chunk(jobs, upload_chunk_size)
    job_ids = list()
    for job_group in jobs:
        job_group = list(job_group)
        conn.add(job_group)
        job_ids.extend(j["id"] for j in job_group)
    return job_ids
开发者ID:DirectEmployers,项目名称:MyJobs,代码行数:22,代码来源:solr.py

示例3: SitemapTestCase

# 需要导入模块: from seo_pysolr import Solr [as 别名]
# 或者: from seo_pysolr.Solr import add [as 别名]
class SitemapTestCase(DirectSEOBase):
    def setUp(self):
        super(SitemapTestCase, self).setUp()
        self.conn = Solr('http://127.0.0.1:8983/solr/seo')
        self.conn.add(SOLR_FIXTURE)

    def test_index(self):
        resp = self.client.get("/sitemap.xml")
        self.assertEqual(resp.status_code, 200)
        
    def test_no_buid_sitemap(self):
        """
        Test to verify that a sitemap is generated with sites that have no
        BUID.
        
        """
        site = SeoSite.objects.get(id=1)
        site.business_units = []
        site.save()
        today = datetime.datetime.today()
        dt = datetime.date(*today.timetuple()[0:3]).isoformat()
        resp = self.client.get("/sitemap-" + dt + ".xml")
        self.assertTrue("<url>" in resp.content)

    def test_noreverse(self):
        """
        Test to ensure that jobs with bad/ugly data do not block the
        creation of a sitemap page, but instead are just skipped over in
        `SolrSitemap.get_urls().`

        This is a regression test. It was prompted by a job in a job feed
        file having "~" in the "city" field. Because our URL pattern
        doesn't recognize that character in its regex, it caused a
        `NoReverseMatch` exception to be thrown. Instead of adding a
        tilde, we want to be able to handle any weird characters not
        specified in our URL config.
        
        """
        # Sometimes the site settings are messed up from other tests. Ensure
        # that the settings are compatible with actually searching for the
        # jobs we're adding.
        settings.SITE_BUIDS = []
        site = SeoSite.objects.get(pk=1)
        site.business_units = []
        site.save()

        # These are kwargs from the actual error that created this error in the
        # first place.
        kwargs = {
            'location': '~, WV',
            'title': '911 Coordinator',
            'uid': '25901630'
        }
        job = dict(SOLR_FIXTURE[0])
        job.update(kwargs)
        self.conn.add([job])
        today = datetime.datetime.now()
        dt = today.date().isoformat()
        resp = self.client.get("/sitemap-" + dt + ".xml")
        self.assertEqual(resp.status_code, 200)
        self.assertTrue("<url>" in resp.content)
        
    def tearDown(self):
        super(SitemapTestCase, self).tearDown()
        self.conn.delete("*:*")
开发者ID:AstroMatchDynamics,项目名称:MyJobs,代码行数:67,代码来源:test_sitemap.py

示例4: update_solr

# 需要导入模块: from seo_pysolr import Solr [as 别名]
# 或者: from seo_pysolr.Solr import add [as 别名]
def update_solr(buid, download=True, force=True, set_title=False,
                delete_feed=True, data_dir=DATA_DIR, clear_cache=False):
    """
    Update the Solr master index with the data contained in a feed file
    for a given buid/jsid.

    This is meant to be a standalone function such that the state of the
    Solr index is not tied to the state of the database.

    Inputs:
    :buid: An integer; the ID for a particular business unit.
    :download: Boolean. If False, this process will not download a new
    feedfile, but instead use the one on disk. Should only be false for
    the purposes of our test suite.
    :force: Boolean. If True, every job seen in the feed file will be
    updated in the index. Otherwise, only the jobs seen in the feed file
    but not seen in the index will be updated. This latter option will
    soon be deprecated.

    Returns:
    A 2-tuple consisting of the number of jobs added and the number deleted.

    Writes/Modifies:
    Job data found in the feed file is used to modify the Solr index. This
    includes adds & deletes. (Solr does not have a discrete equivalent to
    SQL's UPDATE; by adding a document with the same UID as a document in
    the index, the equivalent of an update operation is performed.)

    """
    if download:
        filepath = download_feed_file(buid, data_dir=data_dir)
    else:
        # Get current worker process id, to prevent race conditions.
        try:
            p = current_process()
            process_id = p.index
        except:
            process_id = 0
        filepath = os.path.join(data_dir, str(process_id), FEED_FILE_PREFIX + str(buid) +
                                '.xml')
    bu = BusinessUnit.objects.get(id=buid)
    try:
        co = bu.company_set.all()[0]
    except IndexError:
        co = None
    jobfeed = DEv2JobFeed(filepath, jsid=buid, markdown=bu.enable_markdown,
                          company=co)
    # If the feed file did not pass validation, return. The return value is
    # '(0, 0)' to match what's returned on a successful parse.
    if jobfeed.errors:
        error = jobfeed.error_messages
        logging.error("BUID:%s - Feed file has failed validation on line %s. "
                      "Exception: %s" % (buid, error['line'],
                                         error['exception']))
        raise FeedImportError(error)

    # A dictionary of uids
    jobs = jobfeed.jobparse()

    # Build a set of all the UIDs for all those instances.
    job_uids = set([long(i.get('uid')) for i in jobs if i.get('uid')])
    conn = Solr(settings.HAYSTACK_CONNECTIONS['default']['URL'])
    step1 = 1024

    # Get the count of all the results in the Solr index for this BUID.
    hits = conn.search("*:*", fq="buid:%s" % buid, facet="false",
                       mlt="false").hits
    # Create (start-index, stop-index) tuples to facilitate handling results
    # in ``step1``-sized chunks. So if ``hits`` returns 2048 results,
    # ``job_slices`` will look like ``[(0,1024), (1024, 2048)]``. Those
    # values are then used to slice up the total results.
    #
    # This was put in place because part of the logic to figuring out what
    # jobs to delete from and add jobs to the Solr index is using set
    # algebra. We convert the total list of UIDs in the index and the UIDs
    # in the XML feed to sets, then compare them via ``.difference()``
    # (seen below). However for very large feed files, say 10,000+ jobs,
    # this process was taking so long that the connection would time out. To
    # address this problem we break up the comparisons as described above.
    # This results in more requests but it alleviates the connection timeout
    # issue.
    job_slices = slices(range(hits), step=step1)
    results = [_solr_results_chunk(tup, buid, step1) for tup in job_slices]
    solr_uids = reduce(lambda x, y: x | y, results) if results else set()
    # Return the job UIDs that are in the Solr index but not in the feed
    # file.
    solr_del_uids = solr_uids.difference(job_uids)

    if not force:
        # Return the job UIDs that are in the feed file but not in the Solr
        # index.
        solr_add_uids = job_uids.difference(solr_uids)
        # ``jobfeed.solr_jobs()`` yields a list of dictionaries. We want to
        # filter out any dictionaries whose "uid" key is not in
        # ``solr_add_uids``. This is because by default we only want to add
        # new documents (which each ``solr_jobs()`` dictionary represents),
        # not update.
        add_docs = filter(lambda x: int(x.get("uid", 0)) in solr_add_uids,
                          jobfeed.solr_jobs())
    else:
#.........这里部分代码省略.........
开发者ID:DirectEmployers,项目名称:MyJobs,代码行数:103,代码来源:__init__.py

示例5: JobFeedTestCase

# 需要导入模块: from seo_pysolr import Solr [as 别名]
# 或者: from seo_pysolr.Solr import add [as 别名]
class JobFeedTestCase(DirectSEOBase):

    def setUp(self):
        super(JobFeedTestCase, self).setUp()
        self.businessunit = BusinessUnitFactory(id=0)
        self.buid_id = self.businessunit.id
        self.numjobs = 14
        self.testdir = os.path.join(os.path.abspath(os.path.dirname(__file__)), 
                                    'data')
        self.company = CompanyFactory()
        self.company.job_source_ids.add(self.businessunit)
        self.company.save()
        self.conn = Solr("http://127.0.0.1:8983/solr/seo")
        self.emptyfeed = os.path.join(self.testdir, "dseo_feed_0.no_jobs.xml")
        self.malformed_feed = os.path.join(self.testdir, 'dseo_malformed_feed_0.xml')
        self.invalid_feed = os.path.join(self.testdir, 'dseo_invalid_feed_0.xml')
        self.unused_field_feed = os.path.join(self.testdir, 'dseo_feed_1.xml')
        self.no_onet_feed = os.path.join(self.testdir, 'dseo_feed_no_onets.xml')

        #Ensures DATA_DIR used by import_jobs.download_feed_file exists
        data_path = DATA_DIR
        if not os.path.exists(data_path):
            os.mkdir(data_path)

    def test_company_canonical_microsite(self):
        # Test that canonical microsites is correctly added to solr
        results = DEv2JobFeed(
            'seo/tests/data/dseo_feed_0.xml',
            jsid=self.businessunit.id,
            company=self.businessunit.company_set.all()[0])

        jobs = results.solr_jobs()
        self.assertEqual(jobs[0]['company_canonical_microsite_exact'], None)

        self.company.canonical_microsite = "http://test.jobs"
        self.company.save()

        results = DEv2JobFeed(
            'seo/tests/data/dseo_feed_0.xml',
            jsid=self.businessunit.id,
            company=self.businessunit.company_set.all()[0])

        jobs = results.solr_jobs()
        self.assertEqual(jobs[0]['company_canonical_microsite_exact'],
                         'http://test.jobs')

    def test_company_enhanced(self):
        # Test that company enhanced check is correctly added to solr
        results = DEv2JobFeed(
            'seo/tests/data/dseo_feed_0.xml',
            jsid=self.businessunit.id,
            company=self.businessunit.company_set.all()[0])

        jobs = results.solr_jobs()
        self.assertFalse(jobs[0]['company_enhanced'])

        self.company.enhanced = True
        self.company.save()

        results = DEv2JobFeed(
            'seo/tests/data/dseo_feed_0.xml',
            jsid=self.businessunit.id,
            company=self.businessunit.company_set.all()[0])

        jobs = results.solr_jobs()
        self.assertTrue(jobs[0]['company_enhanced'])

    def test_company_member(self):
        # Test that company member check is correctly added to solr
        results = DEv2JobFeed(
            'seo/tests/data/dseo_feed_0.xml',
            jsid=self.businessunit.id,
            company=self.businessunit.company_set.all()[0])
        jobs = results.solr_jobs()
        self.assertTrue(jobs[0]['company_member'])

        self.company.member = False
        self.company.save()

        results = DEv2JobFeed(
            'seo/tests/data/dseo_feed_0.xml',
            jsid=self.businessunit.id,
            company=self.businessunit.company_set.all()[0])

        jobs = results.solr_jobs()
        self.assertFalse(jobs[0]['company_member'])

    def test_company_digital_strategies_customer(self):
        # Test that digial strategies customer check is correctly added to solr
        results = DEv2JobFeed(
            'seo/tests/data/dseo_feed_0.xml',
            jsid=self.businessunit.id,
            company=self.businessunit.company_set.all()[0])

        jobs = results.solr_jobs()
        self.assertFalse(jobs[0]['company_digital_strategies_customer'])

        self.company.digital_strategies_customer = True
        self.company.save()

#.........这里部分代码省略.........
开发者ID:AstroMatchDynamics,项目名称:MyJobs,代码行数:103,代码来源:test_xmlparse.py

示例6: ImportJobsTestCase

# 需要导入模块: from seo_pysolr import Solr [as 别名]
# 或者: from seo_pysolr.Solr import add [as 别名]
class ImportJobsTestCase(DirectSEOBase):
    fixtures = ['import_jobs_testdata.json']

    def setUp(self):
        super(ImportJobsTestCase, self).setUp()
        self.businessunit = BusinessUnitFactory(id=0)
        self.buid_id = self.businessunit.id        
        self.filepath = os.path.join(DATA_DIR, '0', 'dseo_feed_%s.xml' % self.buid_id)
        self.solr_settings = {
            'default': {'URL': 'http://127.0.0.1:8983/solr/seo'}
        }
        self.solr = Solr(settings.HAYSTACK_CONNECTIONS['default']['URL'])

    def tearDown(self):
        super(ImportJobsTestCase, self).tearDown()
        self.solr.delete(q='*:*')

    def test_solr_rm_feedfile(self):
        """
        Test that at the end of Solr parsing, the feed file is deleted.
        
        """
        update_solr(self.buid_id)
        self.assertFalse(os.access(self.filepath, os.F_OK))

    def test_subsidiary_rename(self):
        company1 = CompanyFactory()
        company1.save()
        bu1 = self.businessunit
        bu1.title = "Acme corp"
        bu2 = BusinessUnitFactory(title=bu1.title)
        bu2.save()
        self.businessunit.company_set.add(company1)

        # Test that a company was created for both business units
        add_company(bu1)
        companies = bu1.company_set.all()
        self.assertEqual(len(companies), 1)
        co = companies[0]
        self.assertEqual(co.name, bu1.title)

        # Add the 2nd business unit
        add_company(bu2)

        # Both units should be attached to that company
        self.assertEqual(bu1.company_set.all()[0], bu2.company_set.all()[0])
        self.assertEqual(bu1.company_set.all().count(), 1) 
        self.assertIn(bu1, co.job_source_ids.all())
        self.assertIn(bu2, co.job_source_ids.all())
        self.assertEqual(co.name, bu1.title)
        self.assertEqual(co.name, bu2.title)

        bu2.title = "New company name"
        add_company(bu1)
        add_company(bu2)
        self.assertEqual(len(co.job_source_ids.all()), 1)
        self.assertNotEqual(bu1.company_set.all(), bu2.company_set.all())
        self.assertEqual(co.name, bu1.title)
        self.assertEqual(len(bu2.company_set.all()), 1)
        co2 = bu2.company_set.all()[0]
        self.assertEqual(co2.name, bu2.title)
        self.assertNotEqual(co2.name, bu1.title)
        self.assertNotEqual(co.name, bu2.title)

    def test_duplicate_company(self):
        company1 = CompanyFactory()
        company1.save()
        company2 = CompanyFactory(name="Acme corp")
        company2.save()
        self.businessunit.company_set.add(company1)
        self.businessunit.title = "Acme corp"
        add_company(self.businessunit)
        self.assertEqual(self.businessunit.company_set.all()[0], company2)

    def test_set_bu_title(self):
        """
        Ensure that if a feedfile for a BusinessUnit comes through, and
        the `title` attribute for that BusinessUnit is not set, that
        `helpers.update_solr` sets the `title` attribute properly.

        """
        bu = BusinessUnit.objects.get(id=self.buid_id)
        bu.title = None
        bu.save()
        # Since the BusinessUnit title is None, the intent is that update_solr
        # will set its title to match the company name found in the feed file.
        results = update_solr(self.buid_id)
        # We have to get the updated state of the BusinessUnit instance, since
        # changes to the database won't be reflected by our in-memory version of
        # the data.
        bu = BusinessUnit.objects.get(id=self.buid_id)
        # The title attribute should now equal the initial value established in
        # the setUp method.
        self.assertEquals(self.businessunit.title, bu.title)

    def test_add_company(self):
        """
        Create environment to test for every possible case--
        
         - Existing relationship but the name is different                 pk=10
#.........这里部分代码省略.........
开发者ID:vfulco,项目名称:MyJobs,代码行数:103,代码来源:test_import_jobs.py


注:本文中的seo_pysolr.Solr.add方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。