本文整理汇总了Python中ckan.model.Package.url方法的典型用法代码示例。如果您正苦于以下问题:Python Package.url方法的具体用法?Python Package.url怎么用?Python Package.url使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类ckan.model.Package
的用法示例。
在下文中一共展示了Package.url方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _oai_dc2ckan
# 需要导入模块: from ckan.model import Package [as 别名]
# 或者: from ckan.model.Package import url [as 别名]
def _oai_dc2ckan(data, namespaces, group, harvest_object):
model.repo.new_revision()
identifier = data['identifier']
metadata_oai_dc = data['metadata']['oai_dc']
titles = _handle_title(metadata_oai_dc.get('titleNode', []), namespaces)
# Store title in pkg.title and keep all in extras as well. That way
# UI will work some way in any case.
title = titles.get('title_0', identifier)
#title = metadata['title'][0] if len(metadata['title']) else identifier
name = data['package_name']
esc_identifier = identifier.replace('/','-')
pkg = Package.get(esc_identifier)
if not pkg:
pkg = Package(name=name, title=title, id=esc_identifier)
pkg.save()
setup_default_user_roles(pkg)
else:
log.debug('Updating: %s' % name)
# There are old resources which are replaced by new ones if they are
# relevant anymore so "delete" all existing resources now.
for r in pkg.resources:
r.state = 'deleted'
extras = titles
idx = 0
for s in ('subject', 'type'):
for tag in metadata_oai_dc.get(s, []):
# Turn each subject or type field into it's own tag.
tagi = tag.strip()
if tagi.startswith('http://www.yso.fi'):
tags = label_list_yso(tagi)
extras['tag_source_%i' % idx] = tagi
idx += 1
elif tagi.startswith('http://') or tagi.startswith('https://'):
extras['tag_source_%i' % idx] = tagi
idx += 1
tags = [] # URL tags break links in UI.
else:
tags = [tagi]
for tagi in tags:
tagi = tagi[:100] # 100 char limit in DB.
#tagi = munge_tag(tagi[:100]) # 100 char limit in DB.
tag_obj = model.Tag.by_name(tagi)
if not tag_obj:
tag_obj = model.Tag(name=tagi)
tag_obj.save()
pkgtag = model.Session.query(model.PackageTag).filter(
model.PackageTag.package_id == pkg.id).filter(
model.PackageTag.tag_id == tag_obj.id).limit(1).first()
if pkgtag is None:
pkgtag = model.PackageTag(tag=tag_obj, package=pkg)
pkgtag.save() # Avoids duplicates if tags have duplicates.
lastidx = 0
for auth in metadata_oai_dc.get('creator', []):
extras['organization_%d' % lastidx] = ''
extras['author_%d' % lastidx] = auth
lastidx += 1
extras.update(_handle_contributor(metadata_oai_dc.get('contributorNode', []), namespaces))
extras.update(_handle_publisher(metadata_oai_dc.get('publisherNode', []), namespaces))
# This value belongs to elsewhere.
if 'package.maintainer_email' in extras:
pkg.maintainer_email = extras['package.maintainer_email']
del extras['package.maintainer_email']
extras.update(_handle_rights(metadata_oai_dc.get('rightsNode', []), namespaces))
if 'package.license' in extras:
pkg.license = extras['package.license']
del extras['package.license']
# Causes failure in commit for some reason.
#for f in _handle_format(metadata.get('formatNode', []), namespaces):
# pprint.pprint(f)
# pkg.add_resource(**f)
# There may be multiple identifiers (URL, ISBN, ...) in the metadata.
id_idx = 0
for ident in metadata_oai_dc.get('identifier', []):
extras['identifier_%i' % id_idx] = ident
id_idx += 1
# Check that we have a language.
lang = metadata_oai_dc.get('language', [])
if lang and len(lang) and len(lang[0]) > 1:
pkg.language = lang[0]
if 'date' in extras:
pkg.version = extras['date']
del extras['date']
pkg.extras = extras
pkg.url = data['package_url']
# Metadata may have different identifiers, pick link, if exists.
for ids in metadata_oai_dc['identifier']:
if ids.startswith('http://') or ids.startswith('https://'):
pkg.add_resource(ids, name=pkg.title, format='html')
# All belong to the main group even if they do not belong to any set.
if group:
group.add_package_by_name(pkg.name)
# The rest.
# description below goes to pkg.notes. I think it should not added here.
for mdp, metadata in data['metadata'].items():
for key, value in metadata.items():
if value is None or len(value) == 0 or key in ('titleNode', 'subject', 'type', 'rightsNode',
'publisherNode', 'creator', 'contributorNode',
'description', 'identifier', 'language', 'formatNode'):
continue
#.........这里部分代码省略.........
示例2: _oai_dc2ckan
# 需要导入模块: from ckan.model import Package [as 别名]
# 或者: from ckan.model.Package import url [as 别名]
def _oai_dc2ckan(data, namespaces, group, harvest_object):
model.repo.new_revision()
identifier = data['identifier']
metadata = data['metadata']
# Store title in pkg.title and keep all in extras as well. That way
# UI will work some way in any case.
title = metadata.get('title', identifier)[0]
#title = metadata['title'][0] if len(metadata['title']) else identifier
name = data['package_name']
pkg = Package.get(name)
if not pkg:
pkg = Package(name=name, title=title, id=identifier)
pkg.save()
setup_default_user_roles(pkg)
else:
log.debug('Updating: %s' % name)
# There are old resources which are replaced by new ones if they are
# relevant anymore so "delete" all existing resources now.
for r in pkg.resources:
r.state = 'deleted'
extras = {}
idx = 0
for s in ('subject', 'type',):
for tag in metadata.get(s, []):
# Turn each subject or type field into it's own tag.
tagi = tag.strip()
if tagi.startswith('http://') or tagi.startswith('https://'):
extras['tag_source_%i' % idx] = tagi
idx += 1
tags = [] # URL tags break links in UI.
else:
tags = [tagi]
for tagi in tags:
tagi = tagi[:100] # 100 char limit in DB.
tag_obj = model.Tag.by_name(tagi)
if not tag_obj:
tag_obj = model.Tag(name=tagi)
tag_obj.save()
pkgtag = model.Session.query(model.PackageTag).filter(
model.PackageTag.package_id == pkg.id).filter(
model.PackageTag.tag_id == tag_obj.id
).limit(1).first()
if pkgtag is None:
pkgtag = model.PackageTag(tag=tag_obj, package=pkg)
pkgtag.save() # Avoids duplicates if tags have duplicates.
extras.update(
_handle_contributor(metadata.get('contributorNode', []), namespaces))
extras.update(
_handle_publisher(metadata.get('publisherNode', []), namespaces))
# This value belongs to elsewhere.
if 'package.maintainer_email' in extras:
pkg.maintainer_email = extras['package.maintainer_email']
del extras['package.maintainer_email']
extras.update(_handle_rights(metadata.get('rightsNode', []), namespaces))
if 'package.license' in extras:
pkg.license = extras['package.license']
del extras['package.license']
# Check that we have a language.
lang = metadata.get('language', [])
if lang is not None and len(lang) and len(lang[0]) > 1:
pkg.language = lang[0]
# The rest.
# description below goes to pkg.notes. I think it should not added here.
for key, value in metadata.items():
if value is None or len(value) == 0 or key in (
'title',
'description',
'publisherNode',
'contributorNode',
'formatNode',
'identifier',
'source',
'rightsNode'
):
continue
extras[key] = value[0]
#description = metadata['description'][0] if len(metadata['description']) else ''
notes = ' '.join(metadata.get('description', []))
pkg.notes = notes.replace('\n', ' ').replace(' ', ' ')
if 'date' in extras:
pkg.version = extras['date']
extras['modified'] = extras['date']
del extras['date']
pkg.extras = extras
pkg.url = data['package_url']
if 'package_resource' in data:
try:
ofs = get_ofs()
ofs.put_stream(BUCKET, data['package_xml_save']['label'], data['package_xml_save']['xml'], {})
pkg.add_resource(**(data['package_resource']))
except KeyError:
pass
if harvest_object is not None:
harvest_object.package_id = pkg.id
harvest_object.content = None
harvest_object.current = True
harvest_object.save()
# Metadata may have different identifiers, pick link, if exists.
# See: https://github.com/okfn/ckan/blob/master/ckan/public/base/images/sprite-resource-icons.png
#.........这里部分代码省略.........
示例3: import_stage
# 需要导入模块: from ckan.model import Package [as 别名]
# 或者: from ckan.model.Package import url [as 别名]
def import_stage(self, harvest_object):
'''
The import stage will receive a HarvestObject object and will be
responsible for:
- performing any necessary action with the fetched object (e.g
create a CKAN package).
Note: if this stage creates or updates a package, a reference
to the package must be added to the HarvestObject.
Additionally, the HarvestObject must be flagged as current.
- creating the HarvestObject - Package relation (if necessary)
- creating and storing any suitable HarvestObjectErrors that may
occur.
- returning True if everything went as expected, False otherwise.
:param harvest_object: HarvestObject object
:returns: True if everything went right, False if errors were found
'''
model.repo.new_revision()
master_data = json.loads(harvest_object.content)
domain = master_data['domain']
group = Group.get(domain)
if not group:
group = Group(name=domain, description=domain)
if 'records' in master_data:
records = master_data['records']
set_name = master_data['set_name']
for rec in records:
identifier, metadata, _ = rec
if metadata:
name = metadata['title'][0] if len(metadata['title'])\
else identifier
title = name
norm_title = unicodedata.normalize('NFKD', name)\
.encode('ASCII', 'ignore')\
.lower().replace(' ', '_')[:35]
slug = ''.join(e for e in norm_title
if e in string.ascii_letters + '_')
name = slug
creator = metadata['creator'][0]\
if len(metadata['creator']) else ''
description = metadata['description'][0]\
if len(metadata['description']) else ''
pkg = Package.by_name(name)
if not pkg:
pkg = Package(name=name, title=title)
extras = {}
for met in metadata.items():
key, value = met
if len(value) > 0:
if key == 'subject' or key == 'type':
for tag in value:
if tag:
tag = munge_tag(tag[:100])
tag_obj = model.Tag.by_name(tag)
if not tag_obj:
tag_obj = model.Tag(name=tag)
if tag_obj:
pkgtag = model.PackageTag(
tag=tag_obj,
package=pkg)
Session.add(tag_obj)
Session.add(pkgtag)
else:
extras[key] = ' '.join(value)
pkg.author = creator
pkg.author_email = creator
pkg.title = title
pkg.notes = description
pkg.extras = extras
pkg.url = \
"%s?verb=GetRecord&identifier=%s&metadataPrefix=oai_dc"\
% (harvest_object.job.source.url, identifier)
pkg.save()
harvest_object.package_id = pkg.id
Session.add(harvest_object)
setup_default_user_roles(pkg)
url = ''
for ids in metadata['identifier']:
if ids.startswith('http://'):
url = ids
title = metadata['title'][0] if len(metadata['title'])\
else ''
description = metadata['description'][0]\
if len(metadata['description']) else ''
pkg.add_resource(url, description=description, name=title)
group.add_package_by_name(pkg.name)
subg_name = "%s - %s" % (domain, set_name)
subgroup = Group.by_name(subg_name)
if not subgroup:
subgroup = Group(name=subg_name, description=subg_name)
subgroup.add_package_by_name(pkg.name)
Session.add(group)
Session.add(subgroup)
setup_default_user_roles(group)
setup_default_user_roles(subgroup)
model.repo.commit()
else:
self._save_object_error('Could not receive any objects from fetch!'
, harvest_object, stage='Import')
return False
#.........这里部分代码省略.........
示例4: import_stage
# 需要导入模块: from ckan.model import Package [as 别名]
# 或者: from ckan.model.Package import url [as 别名]
def import_stage(self, harvest_object):
"""Import the metadata received in the fetch stage to a dataset and
create groups if ones are defined. Fill in metadata from study and
document description.
"""
try:
xml_dict = {}
xml_dict["source"] = harvest_object.content
udict = json.loads(harvest_object.content)
if "url" in udict:
f = urllib2.urlopen(udict["url"]).read()
ddi_xml = BeautifulSoup(f, "xml")
else:
self._save_object_error("No url in content!", harvest_object)
return False
except urllib2.URLError:
self._save_object_error("Could not fetch from url %s!" % udict["url"], harvest_object)
return False
except etree.XMLSyntaxError:
self._save_object_error("Unable to parse XML!", harvest_object)
return False
model.repo.new_revision()
study_descr = ddi_xml.codeBook.stdyDscr
document_info = ddi_xml.codeBook.docDscr.citation
title = study_descr.citation.titlStmt.titl.string
if not title:
title = document_info.titlStmt.titl.string
name = study_descr.citation.titlStmt.IDNo.string
update = True
pkg = Package.get(name)
if not pkg:
pkg = Package(name=name)
update = False
producer = study_descr.citation.prodStmt.producer
if not producer:
producer = study_descr.citation.rspStmt.AuthEnty
if not producer:
producer = study_descr.citation.rspStmt.othId
pkg.author = producer.string
pkg.maintainer = producer.string
if study_descr.citation.distStmt.contact:
pkg.maintainer = study_descr.citation.distStmt.contact.string
if document_info.titlStmt.IDNo:
pkg.id = document_info.titlStmt.IDNo.string
keywords = study_descr.stdyInfo.subject(re.compile("keyword|topcClas"))
keywords = list(set(keywords))
for kw in keywords:
if kw:
vocab = None
kw_str = ""
if kw.string:
kw_str = kw.string
if "vocab" in kw.attrs:
vocab = kw.attrs.get("vocab", None)
if vocab and kw.string:
kw_str = vocab + " " + kw.string
pkg.add_tag_by_name(munge_tag(kw_str))
if study_descr.stdyInfo.abstract:
description_array = study_descr.stdyInfo.abstract("p")
else:
description_array = study_descr.citation.serStmt.serInfo("p")
pkg.notes = "<br />".join([description.string for description in description_array])
pkg.title = title[:100]
pkg.url = udict["url"]
if not update:
ofs = get_ofs()
nowstr = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S.%f")
idno = study_descr.citation.titlStmt.IDNo
agencyxml = (idno["agency"] if "agency" in idno.attrs else "") + idno.string
label = "%s/%s.xml" % (nowstr, agencyxml)
ofs.put_stream(BUCKET, label, f, {})
fileurl = config.get("ckan.site_url") + h.url_for("storage_file", label=label)
pkg.add_resource(url=fileurl, description="Original metadata record", format="xml", size=len(f))
pkg.add_resource(
url=document_info.holdings["URI"] if "URI" in document_info.holdings else "", description=title
)
metas = {}
descendants = [desc for desc in document_info.descendants] + [sdesc for sdesc in study_descr.descendants]
for docextra in descendants:
if isinstance(docextra, Tag):
if docextra:
if docextra.name == "p":
docextra.name = docextra.parent.name
if not docextra.name in metas and docextra.string:
metas[docextra.name] = docextra.string if docextra.string else self._collect_attribs(docextra)
else:
if docextra.string:
metas[docextra.name] += (
" " + docextra.string if docextra.string else self._collect_attribs(docextra)
)
if ddi_xml.codeBook.dataDscr and not update:
vars = ddi_xml.codeBook.dataDscr("var")
heads = self._get_headers()
c_heads = ["ID", "catValu", "labl", "catStat"]
f_var = StringIO.StringIO()
c_var = StringIO.StringIO()
varwriter = csv.DictWriter(f_var, heads)
codewriter = csv.DictWriter(c_var, c_heads)
heading_row = {}
for head in heads:
#.........这里部分代码省略.........