本文整理汇总了Python中ckanext.dcat.processors.RDFParser.next_page方法的典型用法代码示例。如果您正苦于以下问题:Python RDFParser.next_page方法的具体用法?Python RDFParser.next_page怎么用?Python RDFParser.next_page使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类ckanext.dcat.processors.RDFParser
的用法示例。
在下文中一共展示了RDFParser.next_page方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_parse_without_pagination
# 需要导入模块: from ckanext.dcat.processors import RDFParser [as 别名]
# 或者: from ckanext.dcat.processors.RDFParser import next_page [as 别名]
def test_parse_without_pagination(self):
data = '''<?xml version="1.0" encoding="utf-8" ?>
<rdf:RDF
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#">
<rdfs:SomeClass rdf:about="http://example.org">
<rdfs:label>Some label</rdfs:label>
</rdfs:SomeClass>
</rdf:RDF>
'''
p = RDFParser()
p.parse(data)
eq_(p.next_page(), None)
示例2: test_parse_pagination_last_page
# 需要导入模块: from ckanext.dcat.processors import RDFParser [as 别名]
# 或者: from ckanext.dcat.processors.RDFParser import next_page [as 别名]
def test_parse_pagination_last_page(self):
data = '''<?xml version="1.0" encoding="utf-8" ?>
<rdf:RDF
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
xmlns:hydra="http://www.w3.org/ns/hydra/core#">
<hydra:PagedCollection rdf:about="http://example.com/catalog.xml?page=3">
<hydra:totalItems rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">245</hydra:totalItems>
<hydra:lastPage>http://example.com/catalog.xml?page=3</hydra:lastPage>
<hydra:itemsPerPage rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">100</hydra:itemsPerPage>
<hydra:firstPage>http://example.com/catalog.xml?page=1</hydra:firstPage>
<hydra:previousPage>http://example.com/catalog.xml?page=2</hydra:previousPage>
</hydra:PagedCollection>
</rdf:RDF>
'''
p = RDFParser()
p.parse(data)
eq_(p.next_page(), None)
示例3: gather_stage
# 需要导入模块: from ckanext.dcat.processors import RDFParser [as 别名]
# 或者: from ckanext.dcat.processors.RDFParser import next_page [as 别名]
def gather_stage(self, harvest_job):
log.debug('In DCATRDFHarvester gather_stage')
rdf_format = None
if harvest_job.source.config:
rdf_format = json.loads(harvest_job.source.config).get("rdf_format")
# Get file contents of first page
next_page_url = harvest_job.source.url
guids_in_source = []
object_ids = []
while next_page_url:
for harvester in p.PluginImplementations(IDCATRDFHarvester):
next_page_url, before_download_errors = harvester.before_download(next_page_url, harvest_job)
for error_msg in before_download_errors:
self._save_gather_error(error_msg, harvest_job)
if not next_page_url:
return []
content, rdf_format = self._get_content_and_type(next_page_url, harvest_job, 1, content_type=rdf_format)
# TODO: store content?
for harvester in p.PluginImplementations(IDCATRDFHarvester):
content, after_download_errors = harvester.after_download(content, harvest_job)
for error_msg in after_download_errors:
self._save_gather_error(error_msg, harvest_job)
if not content:
return []
# TODO: profiles conf
parser = RDFParser()
try:
parser.parse(content, _format=rdf_format)
except RDFParserException, e:
self._save_gather_error('Error parsing the RDF file: {0}'.format(e), harvest_job)
return []
for dataset in parser.datasets():
if not dataset.get('name'):
dataset['name'] = self._gen_new_name(dataset['title'])
# Unless already set by the parser, get the owner organization (if any)
# from the harvest source dataset
if not dataset.get('owner_org'):
source_dataset = model.Package.get(harvest_job.source.id)
if source_dataset.owner_org:
dataset['owner_org'] = source_dataset.owner_org
# Try to get a unique identifier for the harvested dataset
guid = self._get_guid(dataset)
if not guid:
self._save_gather_error('Could not get a unique identifier for dataset: {0}'.format(dataset),
harvest_job)
continue
dataset['extras'].append({'key': 'guid', 'value': guid})
guids_in_source.append(guid)
obj = HarvestObject(guid=guid, job=harvest_job,
content=json.dumps(dataset))
obj.save()
object_ids.append(obj.id)
# get the next page
next_page_url = parser.next_page()