当前位置: 首页>>代码示例>>Python>>正文


Python model.HarvestJob类代码示例

本文整理汇总了Python中ckanext.harvest.model.HarvestJob的典型用法代码示例。如果您正苦于以下问题:Python HarvestJob类的具体用法?Python HarvestJob怎么用?Python HarvestJob使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了HarvestJob类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_gather

 def test_gather(self):
     source = HarvestSource(url="http://localhost/test_cmdi", type="cmdi")
     source.save()
     job = HarvestJob(source=source)
     job.save()
     self.harvester.client = _FakeClient()
     self.harvester.gather_stage(job)
开发者ID:LondonAppDev,项目名称:ckanext-oaipmh,代码行数:7,代码来源:test_unit.py

示例2: test_zaincremental_harvester

    def test_zaincremental_harvester(self):

        client = CKANServer()
        metadata_registry = metadata.MetadataRegistry()
        metadata_registry.registerReader('oai_dc', oai_dc_reader)
        metadata_registry.registerWriter('oai_dc', oai_dc_writer)
        serv = BatchingServer(client, metadata_registry=metadata_registry)
        oaipmh.client.Client = mock.Mock(return_value=ServerClient(serv, metadata_registry))
        harv = OAIPMHHarvester()
        harvest_job = HarvestJob()
        harvest_job.source = HarvestSource()
        harvest_job.source.title = "Test"
        harvest_job.source.url = "http://helda.helsinki.fi/oai/request"
        harvest_job.gather_started = ((datetime.now() + timedelta(days=1)))
        harvest_job.source.config = '{"incremental":"True"}'
        harvest_job.source.type = "OAI-PMH"
        Session.add(harvest_job)
        rev = model.repo.new_revision()
        rev.timestamp = ((datetime.now() + timedelta(days=2)))
        pkg = Package(name='footest', revision=rev)
        Session.add(pkg)
        pkg.save()
        roger = Group.get('roger')
        roger.add_package_by_name('footest')
        Session.add(roger)
        roger.save()
        gathered = harv.gather_stage(harvest_job)
        harvest_object = HarvestObject.get(gathered[0])
        harv.fetch_stage(harvest_object)
        harvobj = json.loads(harvest_object.content)
        self.assert_(harvobj['records'])
开发者ID:florenthemmi,项目名称:ckanext-oaipmh,代码行数:31,代码来源:test_oai.py

示例3: harvest_job_create

def harvest_job_create(context,data_dict):
    log.info('Harvest job create: %r', data_dict)
    check_access('harvest_job_create',context,data_dict)

    source_id = data_dict['source_id']

    # Check if source exists
    source = HarvestSource.get(source_id)
    if not source:
        log.warn('Harvest source %s does not exist', source_id)
        raise NotFound('Harvest source %s does not exist' % source_id)

    # Check if the source is active
    if not source.active:
        log.warn('Harvest job cannot be created for inactive source %s', source_id)
        raise Exception('Can not create jobs on inactive sources')

    # Check if there already is an unrun or currently running job for this source
    exists = _check_for_existing_jobs(context, source_id)
    if exists:
        log.warn('There is already an unrun job %r for this source %s', exists, source_id)
        raise HarvestJobExists('There already is an unrun job for this source')

    job = HarvestJob()
    job.source = source

    job.save()
    log.info('Harvest job saved %s', job.id)
    return harvest_job_dictize(job,context)
开发者ID:CodeForAfricaLabs,项目名称:ckanext-harvest,代码行数:29,代码来源:create.py

示例4: harvest_job_create

def harvest_job_create(context,data_dict):
    log.info('Harvest job create: %r', data_dict)
    check_access('harvest_job_create',context,data_dict)

    source_id = data_dict['source_id']

    # Check if source exists
    source = HarvestSource.get(source_id)
    if not source:
        log.warn('Harvest source %s does not exist', source_id)
        raise NotFound('Harvest source %s does not exist' % source_id)

    # Check if the source is active
    if not source.active:
        log.warn('Harvest job cannot be created for inactive source %s', source_id)
        raise HarvestError('Can not create jobs on inactive sources')

    # Check if there already is an unrun job for this source
    data_dict ={
        'source_id':source_id,
        'status':u'New'
    }
    exists = harvest_job_list(context,data_dict)
    if len(exists):
        log.warn('There is already an unrun job %r for this source %s', exists, source_id)
        raise HarvestError('There already is an unrun job for this source')

    job = HarvestJob()
    job.source = source

    job.save()
    log.info('Harvest job saved %s', job.id)
    return harvest_job_dictize(job,context)
开发者ID:tbalaz,项目名称:test,代码行数:33,代码来源:create.py

示例5: harvest_job_abort

def harvest_job_abort(context, data_dict):
    '''
    Aborts a harvest job. Given a harvest source_id, it looks for the latest
    one and (assuming it not already Finished) marks it as Finished. It also
    marks any of that source's harvest objects and (if not complete or error)
    marks them "ERROR", so any left in limbo are cleaned up. Does not actually
    stop running any queued harvest fetchs/objects.

    :param source_id: the name or id of the harvest source with a job to abort
    :type source_id: string
    '''

    check_access('harvest_job_abort', context, data_dict)

    model = context['model']

    source_id = data_dict.get('source_id')
    source = harvest_source_show(context, {'id': source_id})

    # HarvestJob set status to 'Finished'
    # Don not use harvest_job_list since it can use a lot of memory
    last_job = model.Session.query(HarvestJob) \
                    .filter_by(source_id=source['id']) \
                    .order_by(HarvestJob.created.desc()).first()
    if not last_job:
        raise NotFound('Error: source has no jobs')
    job = get_action('harvest_job_show')(context,
                                         {'id': last_job.id})

    if job['status'] != 'Finished':
        # i.e. New or Running
        job_obj = HarvestJob.get(job['id'])
        job_obj.status = new_status = 'Finished'
        model.repo.commit_and_remove()
        log.info('Harvest job changed status from "%s" to "%s"',
                 job['status'], new_status)
    else:
        log.info('Harvest job unchanged. Source %s status is: "%s"',
                 job['id'], job['status'])

    # HarvestObjects set to ERROR
    job_obj = HarvestJob.get(job['id'])
    objs = job_obj.objects
    for obj in objs:
        if obj.state not in ('COMPLETE', 'ERROR'):
            old_state = obj.state
            obj.state = 'ERROR'
            log.info('Harvest object changed state from "%s" to "%s": %s',
                     old_state, obj.state, obj.id)
        else:
            log.info('Harvest object not changed from "%s": %s',
                     obj.state, obj.id)
    model.repo.commit_and_remove()

    job_obj = HarvestJob.get(job['id'])
    return harvest_job_dictize(job_obj, context)
开发者ID:coreyerickson,项目名称:ckanext-harvest,代码行数:56,代码来源:update.py

示例6: test_0harvester_url_error

 def test_0harvester_url_error(self):
     self.harv = MetadataHarvester()
     self.harv.config = "{}"
     harvest_job = HarvestJob()
     harvest_job.source = HarvestSource()
     harvest_job.source.title = "Test"
     harvest_job.source.url = "http://foo"
     harvest_job.source.type = "Metadata"
     urllib2.urlopen = realopen
     self.assert_(self.harv.gather_stage(harvest_job) == None)
开发者ID:kata-csc,项目名称:ckanext-metadata,代码行数:10,代码来源:test_metadataharvester.py

示例7: test_import

    def test_import(self):
        source = HarvestSource(url="http://localhost/test_cmdi", type="cmdi")
        source.save()
        job = HarvestJob(source=source)
        job.save()

        harvest_object = self._run_import("cmdi_1.xml", job)

        self.assertEquals(len(harvest_object.errors), 0, u"\n".join(unicode(error.message) for error in (harvest_object.errors or [])))

        package = get_action('package_show')({'user': 'harvest'}, {'id': 'urn-nbn-fi-lb-20140730180'})

        self.assertEquals(package.get('id', None), 'http://urn.fi/urn:nbn:fi:lb-20140730180')
        self.assertEquals(package.get('name', None), 'urn-nbn-fi-lb-20140730180')
        self.assertEquals(package.get('notes', None), u'{"eng": "Test description"}')
        self.assertEquals(package.get('version', None), '2012-09-07')
        self.assertEquals(package.get('title', []), '{"eng": "Longi Corpus"}')
        self.assertEquals(package.get('license_id', None), 'undernegotiation')

        provider = config['ckan.site_url']
        expected_pid = {u'id': u'http://islrn.org/resources/248-895-085-557-0',
                        u'provider': provider,
                        u'type': u'metadata'}

        self.assertTrue(expected_pid in package.get('pids'))

        model.Session.flush()

        harvest_object = self._run_import("cmdi_2.xml", job)

        self.assertEquals(len(harvest_object.errors), 0, u"\n".join(unicode(error.message) for error in (harvest_object.errors or [])))

        package = get_action('package_show')({'user': 'harvest'}, {'id': 'urn-nbn-fi-lb-20140730186'})

        self.assertEquals(package['temporal_coverage_begin'], '1880')
        self.assertEquals(package['temporal_coverage_end'], '1939')
        self.assertEquals(package.get('license_id', None), 'other')
        # Delete package
        harvest_object = HarvestObject()
        harvest_object.content = None
        harvest_object.id = "test-cmdi-delete"
        harvest_object.guid = "test-cmdi-delete"
        harvest_object.source = job.source
        harvest_object.harvest_source_id = None
        harvest_object.job = job
        harvest_object.package_id = package.get('id')
        harvest_object.report_status = "deleted"
        harvest_object.save()

        self.harvester.import_stage(harvest_object)

        model.Session.flush()
        self.assertEquals(model.Package.get(package['id']).state, 'deleted')
开发者ID:LondonAppDev,项目名称:ckanext-oaipmh,代码行数:53,代码来源:test_unit.py

示例8: _create_harvester_info

 def _create_harvester_info(self, config=True):
     rev = model.repo.new_revision()
     harv = OAIPMHHarvester()
     harvest_job = HarvestJob()
     harvest_job.source = HarvestSource()
     harvest_job.source.title = "Test"
     harvest_job.source.url = "http://helda.helsinki.fi/oai/request"
     if config:
         harvest_job.source.config = '{"query": ""}'
     harvest_job.source.type = "OAI-PMH"
     Session.add(harvest_job)
     return harvest_job, harv
开发者ID:florenthemmi,项目名称:ckanext-oaipmh,代码行数:12,代码来源:test_oai.py

示例9: test_harvester_1gather_ddi

 def test_harvester_1gather_ddi(self):
     self.harv = MetadataHarvester()
     self.harv.config = "{}"
     harvest_job = HarvestJob()
     harvest_job.source = HarvestSource()
     harvest_job.source.title = "Test"
     harvest_job.source.url = "http://foo"
     harvest_job.source.type = "Metadata"
     urllib2.urlopen = mock.Mock(side_effect=self._side_effect_ddi_datas)
     self.gathered = self.harv.gather_stage(harvest_job)
     self.assert_(len(self.gathered) == 1)
     self.assert_(isinstance(self.harv.harvester, DDIHarvester))
开发者ID:kata-csc,项目名称:ckanext-metadata,代码行数:12,代码来源:test_metadataharvester.py

示例10: _create_harvester

 def _create_harvester(self, config=True):
     harv = DDIHarvester()
     harv.config = "{}"
     harvest_job = HarvestJob()
     harvest_job.source = HarvestSource()
     harvest_job.source.title = "Test"
     harvest_job.source.url = "http://foo"
     if config:
         harvest_job.source.config = ''
     else:
         harvest_job.source.config = None
     harvest_job.source.type = "DDI"
     Session.add(harvest_job)
     return harv, harvest_job
开发者ID:ugeuder-kata,项目名称:ckanext-ddi,代码行数:14,代码来源:test_ddiharvester.py

示例11: harvest_send_job_to_gather_queue

def harvest_send_job_to_gather_queue(context, data_dict):
    '''
    Sends a harvest job to the gather queue.

    :param id: the id of the harvest job
    :type id: string
    '''
    log.info('Send job to gather queue: %r', data_dict)

    job_id = logic.get_or_bust(data_dict, 'id')
    job = toolkit.get_action('harvest_job_show')(
        context, {'id': job_id})

    check_access('harvest_send_job_to_gather_queue', context, job)

    # gather queue
    publisher = get_gather_publisher()

    # Check the source is active
    source = harvest_source_show(context, {'id': job['source_id']})
    if not source['active']:
        raise toolkit.ValidationError('Source is not active')

    job_obj = HarvestJob.get(job['id'])
    job_obj.status = job['status'] = u'Running'
    job_obj.save()
    publisher.send({'harvest_job_id': job['id']})
    log.info('Sent job %s to the gather queue', job['id'])

    return harvest_job_dictize(job_obj, context)
开发者ID:AQUACROSS,项目名称:ckanext-harvest,代码行数:30,代码来源:update.py

示例12: harvest_job_exists

def harvest_job_exists(value, context):
    '''Check if a harvest job exists and returns the model if it does'''
    result = HarvestJob.get(value)

    if not result:
        raise Invalid('Harvest Job with id %r does not exist.' % str(value))
    return result
开发者ID:AQUACROSS,项目名称:ckanext-harvest,代码行数:7,代码来源:validators.py

示例13: harvest_job_report

def harvest_job_report(context, data_dict):

    check_access('harvest_job_show', context, data_dict)

    model = context['model']
    id = data_dict.get('id')

    job = HarvestJob.get(id)
    if not job:
        raise NotFound

    report = {
        'gather_errors': [],
        'object_errors': {}
    }

    # Gather errors
    q = model.Session.query(harvest_model.HarvestGatherError) \
                      .join(harvest_model.HarvestJob) \
                      .filter(harvest_model.HarvestGatherError.harvest_job_id==job.id) \
                      .order_by(harvest_model.HarvestGatherError.created.desc())

    for error in q.all():
        report['gather_errors'].append({
            'message': error.message
        })

    # Object errors

    # Check if the harvester for this job's source has a method for returning
    # the URL to the original document
    original_url_builder = None
    for harvester in PluginImplementations(IHarvester):
        if harvester.info()['name'] == job.source.type:
             if hasattr(harvester, 'get_original_url'):
                original_url_builder = harvester.get_original_url

    q = model.Session.query(harvest_model.HarvestObjectError, harvest_model.HarvestObject.guid) \
                      .join(harvest_model.HarvestObject) \
                      .filter(harvest_model.HarvestObject.harvest_job_id==job.id) \
                      .order_by(harvest_model.HarvestObjectError.harvest_object_id)

    for error, guid in q.all():
        if not error.harvest_object_id in report['object_errors']:
            report['object_errors'][error.harvest_object_id] = {
                'guid': guid,
                'errors': []
            }
            if original_url_builder:
                url = original_url_builder(error.harvest_object_id)
                if url:
                    report['object_errors'][error.harvest_object_id]['original_url'] = url

        report['object_errors'][error.harvest_object_id]['errors'].append({
            'message': error.message,
            'line': error.line,
            'type': error.stage
         })

    return report
开发者ID:AQUACROSS,项目名称:ckanext-harvest,代码行数:60,代码来源:get.py

示例14: harvest_job_create

def harvest_job_create(context, data_dict):
    '''
    Creates a Harvest Job for a Harvest Source and runs it (by putting it on
    the gather queue)

    :param source_id: id of the harvest source to create a job for
    :type source_id: string
    :param run: whether to also run it or not (default: True)
    :type run: bool
    '''
    log.info('Harvest job create: %r', data_dict)
    check_access('harvest_job_create', context, data_dict)

    source_id = data_dict['source_id']
    run_it = data_dict.get('run', True)

    # Check if source exists
    source = HarvestSource.get(source_id)
    if not source:
        log.warn('Harvest source %s does not exist', source_id)
        raise toolkit.NotFound('Harvest source %s does not exist' % source_id)

    # Check if the source is active
    if not source.active:
        log.warn('Harvest job cannot be created for inactive source %s',
                 source_id)
        raise HarvestSourceInactiveError('Can not create jobs on inactive sources')

    # Check if there already is an unrun or currently running job for this
    # source
    exists = _check_for_existing_jobs(context, source_id)
    if exists:
        log.warn('There is already an unrun job %r for this source %s',
                 exists, source_id)
        raise HarvestJobExists('There already is an unrun job for this source')

    job = HarvestJob()
    job.source = source
    job.save()
    log.info('Harvest job saved %s', job.id)

    if run_it:
        toolkit.get_action('harvest_send_job_to_gather_queue')(
            context, {'id': job.id})

    return harvest_job_dictize(job, context)
开发者ID:AQUACROSS,项目名称:ckanext-harvest,代码行数:46,代码来源:create.py

示例15: test_harvester_4gather_oaipmh

 def test_harvester_4gather_oaipmh(self):
     self.harv = MetadataHarvester()
     self.harv.config = "{}"
     harvest_job = HarvestJob()
     harvest_job.source = HarvestSource()
     harvest_job.source.title = "Test"
     harvest_job.source.url = "http://foo"
     harvest_job.source.type = "Metadata"
     client = CKANServer()
     metadata_registry = metadata.MetadataRegistry()
     metadata_registry.registerReader('oai_dc', oai_dc_reader)
     metadata_registry.registerWriter('oai_dc', oai_dc_writer)
     serv = BatchingServer(client, metadata_registry=metadata_registry)
     oaipmh.client.Client = mock.Mock(return_value=ServerClient(serv, metadata_registry))
     self.gathered = self.harv.gather_stage(harvest_job)
     self.assert_(len(self.gathered) > 1)
     self.assert_(isinstance(self.harv.harvester, OAIPMHHarvester))
开发者ID:kata-csc,项目名称:ckanext-metadata,代码行数:17,代码来源:test_metadataharvester.py


注:本文中的ckanext.harvest.model.HarvestJob类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。