本文整理汇总了Python中eulfedora.server.Repository.get_objects_with_cmodel方法的典型用法代码示例。如果您正苦于以下问题:Python Repository.get_objects_with_cmodel方法的具体用法?Python Repository.get_objects_with_cmodel怎么用?Python Repository.get_objects_with_cmodel使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类eulfedora.server.Repository
的用法示例。
在下文中一共展示了Repository.get_objects_with_cmodel方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
# 需要导入模块: from eulfedora.server import Repository [as 别名]
# 或者: from eulfedora.server.Repository import get_objects_with_cmodel [as 别名]
def main(argv):
repo = Repository(root='%s/fedora/' % HOST, username='%s' % fedoraUser, password='%s' % fedoraPass)
philologic_pids = repo.get_objects_with_cmodel(cmodel_uri='info:fedora/niu-objects:cmodel')
phil_doc = open('phil_doc.csv', 'w')
image_ids = []
d = defaultdict(int)
for p in philologic_pids:
philologic = p.getDatastreamObject('OBJ').content
substring = 'ARTFL-figure-missing'
if substring in philologic:
print 'Processing %s' % p
images = []
image_count = 0
url = '%s/fedora/objects/%s/datastreams/OBJ/content' % (HOST, p)
passwordManager = urllib2.HTTPPasswordMgrWithDefaultRealm()
fedoraAdmin = "%s/fedora" % HOST
passwordManager.add_password(None, fedoraAdmin, fedoraUser, fedoraPass)
handler = urllib2.HTTPBasicAuthHandler(passwordManager)
fedoraOpener = urllib2.build_opener(handler)
soup = BeautifulSoup(fedoraOpener.open(url), 'html.parser')
spans = soup.find_all('span', 'ARTFL-figure-missing')
for span in spans:
image = span['sysid']
images.append(image)
image_count+= 1
image_ids.extend(images)
images_string = ';'.join(images)
phil_doc.write('%s,%s,%s\n' % (p, image_count, images_string))
print 'Successfully processed %s' % p
for i in image_ids:
d[i] += 1
with open('phil_image.csv', 'w') as outfile:
phil_image = csv.writer(outfile)
for key, value in d.items():
phil_image.writerow([key, value])
phil_doc.close()
示例2: main
# 需要导入模块: from eulfedora.server import Repository [as 别名]
# 或者: from eulfedora.server.Repository import get_objects_with_cmodel [as 别名]
def main(argv):
# Make Fedora connection
repo = Repository(root='http://localhost:8080/fedora/', username='fedoraAdmin', password='xxxxx')
# Retreive pids using content model
philologic_pids = repo.get_objects_with_cmodel(cmodel_uri='info:fedora/niu-objects:cmodel')
# Loop through Philologic pids and retreive each object
for p in philologic_pids:
print 'Processing %s' % p
# Extract the text
philologic = p.getDatastreamObject('OBJ').content
text=strip_tags(philologic)
# Add FULL_TEXT
full_text = p.getDatastreamObject('FULL_TEXT')
full_text.label='Full text'
full_text.mimetype='text/plain'
full_text.versionable=True
full_text.state='A'
full_text.checksum_type='MD5'
full_text.content = text
full_text.save()
示例3: Command
# 需要导入模块: from eulfedora.server import Repository [as 别名]
# 或者: from eulfedora.server.Repository import get_objects_with_cmodel [as 别名]
class Command(BaseCommand):
''' This command run through all the articles and makes sure that journal titles and publishers match against Sherpa Romeo
'''
args = "[netid netid ...]"
help = __doc__
option_list = BaseCommand.option_list + (
make_option('--noact', '-n',
action='store_true',
default=False,
help='Fixed all caps title in articles'),
)
def handle(self, *args, **options):
self.verbosity = int(options['verbosity']) # 1 = normal, 0 = minimal, 2 = all
self.v_normal = 1
#connection to repository
self.repo = Repository(settings.FEDORA_ROOT, username=settings.FEDORA_MANAGEMENT_USER, password=settings.FEDORA_PASSWORD)
pid_set = self.repo.get_objects_with_cmodel(Publication.ARTICLE_CONTENT_MODEL, type=Publication)
coll = self.repo.get_object(pid=settings.PID_ALIASES['oe-collection'])
try:
articles = Paginator(pid_set, 100)
except Exception as e:
self.output(0, "Error paginating items: : %s " % (e.message))
#process all Articles
for p in articles.page_range:
try:
objs = articles.page(p).object_list
except Exception as e:
#print error and go to next iteration of loop
self.output(0,"Error getting page: %s : %s " % (p, e.message))
continue
for article in objs:
try:
if not article.exists:
self.output(0, "Skipping %s because pid does not exist" % article.pid)
continue
else:
print coll
print article.pid
article.collection = coll
ark_uri = '%sark:/25593/%s' % (settings.PIDMAN_HOST, article.pid.split(':')[1])
article.dc.content.identifier_list.extend([ark_uri])
article.save()
except Exception as e:
self.output(0, "Error processing pid: %s : %s " % (article.pid, e.message))
# self.counts['errors'] +=1
def output(self, v, msg):
'''simple function to handle logging output based on verbosity'''
if self.verbosity >= v:
self.stdout.write("%s\n" % msg)
示例4: all
# 需要导入模块: from eulfedora.server import Repository [as 别名]
# 或者: from eulfedora.server.Repository import get_objects_with_cmodel [as 别名]
def all():
"""
Returns all collections in the repository as
:class:`~genrepo.collection.models.CollectionObject`
"""
repo = Repository()
colls = repo.get_objects_with_cmodel(CollectionObject.COLLECTION_CONTENT_MODEL,
type=CollectionObject)
return colls
示例5: handle
# 需要导入模块: from eulfedora.server import Repository [as 别名]
# 或者: from eulfedora.server.Repository import get_objects_with_cmodel [as 别名]
def handle(self, *args, **kwargs):
verbosity = kwargs.get('verbosity', self.v_normal)
# pids specified on command-line take precedence
pids = kwargs.get('pids', [])
repo = Repository()
# if no pids were specified, find all AFFs
if not pids:
objs = repo.get_objects_with_cmodel(DiskImage.DISKIMAGE_CONTENT_MODEL,
type=DiskImage)
for obj in objs:
# objects found by risearch *should* exist, but
# just in case of discrepancies (hopefully only in QA),
# ignore non-existent objects
if not obj.exists:
self.stderr.write(self.style.WARNING('%s does not exist' % obj.pid))
continue
# check premis for to find Disk Images in AFF format;
# exclude any that have already been migrated
if obj.provenance.exists:
premis = obj.provenance.content
if premis.object and premis.object.format \
and premis.object.format.name == 'AFF' \
and not obj.migrated:
pids.append(obj.pid)
# create a celery result set and queue conversion of each pid requested
# or found in fedora
migration_tasks = celery.result.ResultSet([])
for pid in pids:
migration_tasks.add(migrate_aff_diskimage.delay(pid))
# wait for tasks to complete
while migration_tasks.waiting():
try:
migration_tasks.join()
except Exception:
# exceptions from tasks gets propagated here, but ignore
# them and report based on success/failure
pass
print '%d migrations completed, %s failures' % \
(migration_tasks.completed_count(),
'some' if migration_tasks.failed() else 'no')
for result in migration_tasks.results:
if result.state == celery.states.FAILURE:
print 'Error: %s' % result.result
else:
print 'Success: %s' % result.result
示例6: handle
# 需要导入模块: from eulfedora.server import Repository [as 别名]
# 或者: from eulfedora.server.Repository import get_objects_with_cmodel [as 别名]
def handle(self, *args, **kwargs):
verbosity = kwargs.get('verbosity', self.v_normal)
repo = Repository()
objs = repo.get_objects_with_cmodel(DiskImage.DISKIMAGE_CONTENT_MODEL,
type=DiskImage)
for obj in objs:
img_fmt = None
# use premis object format to distinguish AD1 disk images
if obj.provenance.exists:
premis = obj.provenance.content
if premis.object and premis.object.format:
img_fmt = premis.object.format.name
if img_fmt == 'AD1':
print '%s %s' % (obj.pid, obj.content.label)
if img_fmt is None and verbosity >= self.v_normal:
self.stderr.write('Warning: %s has no premis object format' % obj.pid)
示例7: handle
# 需要导入模块: from eulfedora.server import Repository [as 别名]
# 或者: from eulfedora.server.Repository import get_objects_with_cmodel [as 别名]
def handle(self, *args, **options):
self.verbosity = int(options['verbosity']) # 1 = normal, 0 = minimal, 2 = all
self.v_normal = 1
#counters
counts = defaultdict(int)
# check required options
if not options['username']:
raise CommandError('Username is required')
else:
if not options['password'] or options['password'] == '':
options['password'] = getpass()
#connection to repository
repo = Repository(username=options['username'], password=options['password'])
coll = repo.get_object(pid=settings.PID_ALIASES['oe-collection'])
#if pids specified, use that list
try:
if len(args) != 0:
pids = list(args)
pid_set = [repo.get_object(pid=p,type=Article) for p in pids]
else:
#search for Articles.
pid_set = repo.get_objects_with_cmodel(Article.ARTICLE_CONTENT_MODEL, Article)
except Exception as e:
raise CommandError('Error getting pid list (%s)' % e.message)
try:
articles = Paginator(pid_set, 20)
counts['total'] = articles.count
except Exception as e:
self.output(0, "Error paginating items: : %s " % (e.message))
#process all Articles
for p in articles.page_range:
try:
objs = articles.page(p).object_list
except Exception as e:
#print error and go to next iteration of loop
self.output(0,"Error getting page: %s : %s " % (p, e.message))
counts['errors'] +=1
continue
for article in objs:
try:
if not article.exists:
self.output(1, "Skipping %s because pid does not exist" % article.pid)
counts['skipped'] +=1
continue
else:
self.output(0,"Processing %s" % article.pid)
# Add to collection
article.collection = coll
self.output(1, "Adding %s to collection %s" % (article.pid, coll.pid))
counts['collection']+= 1
# Add itemID for OAI
if article.is_published:
article.oai_itemID = "oai:ark:/25593/%s" % article.noid
self.output(1, "Adding itemID to %s" % article.pid)
counts['itemId']+= 1
# Modify DB NS
article._prep_dc_for_oai()
self.output(1, "Modified DC namespaces for %s" % (article.pid))
counts['DC']+= 1
# save article
if not options['noact']:
article.save()
except Exception as e:
self.output(0, "Error processing pid: %s : %s " % (article.pid, e.message))
counts['errors'] +=1
# summarize what was done
self.stdout.write("\n\n")
self.stdout.write("Total number selected: %s\n" % counts['total'])
self.stdout.write("Added to collection: %s\n" % counts['collection'])
self.stdout.write("Added itemID: %s\n" % counts['itemId'])
self.stdout.write("Modified DC NS: %s\n" % counts['DC'])
self.stdout.write("Skipped: %s\n" % counts['skipped'])
self.stdout.write("Errors: %s\n" % counts['errors'])
示例8: handle
# 需要导入模块: from eulfedora.server import Repository [as 别名]
# 或者: from eulfedora.server.Repository import get_objects_with_cmodel [as 别名]
def handle(self, *args, **options):
#counters
counts = defaultdict(int)
# check required options
if not options['username']:
raise CommandError('Username is required')
else:
if not options['password'] or options['password'] == '':
options['password'] = getpass()
if not options['file']:
raise CommandError('File is required')
with open(options['file'], 'r') as myfile:
data=myfile.read().splitlines()
#connection to repository
repo = Repository(username=options['username'], password=options['password'])
try:
#if pids specified, use that list
if len(data) != 0:
pids = list(data)
pid_set = [repo.get_object(pid=p, type=Video) for p in pids]
else:
#search for Articles
pid_set = repo.get_objects_with_cmodel(Video.VIDEO_CONTENT_MODEL, Video)
except Exception as e:
raise CommandError('Error gettings pids (%s)' % e.message)
try:
objects = Paginator(pid_set, 20)
counts['total'] = objects.count
except Exception as e:
self.output("Error paginating items: : %s " % (e.message))
#process all Objects
for p in objects.page_range:
try:
objs = objects.page(p).object_list
except Exception as e:
#print error and go to next iteration of loop
self.output("Error getting page: %s : %s " % (p, e.message))
counts['errors'] +=1
continue
for a in objs:
try:
if not a.exists:
self.output("Skipping %s because pid does not exist" % a.pid)
counts['skipped'] +=1
continue
else:
self.output("Processing %s" % a.pid)
a.content.mimetype = 'video/quicktime'
# save object
if not options['noact']:
a.save("cleanup mimetype")
self.output("SAVED %s" % a.pid)
counts['saved'] +=1
counts['processed'] +=1
except Exception as e:
self.output("Error processing pid: %s : %s " % (a.pid, e.message))
counts['errors'] +=1
# summarize what was done
self.stdout.write("\n\n")
self.stdout.write("Total number selected: %s\n" % counts['total'])
self.stdout.write("Total number processed: %s\n" % counts['processed'])
self.stdout.write("Total number saved: %s\n" % counts['saved'])
self.stdout.write("Skipped: %s\n" % counts['skipped'])
self.stdout.write("Errors: %s\n" % counts['errors'])
示例9: handle
# 需要导入模块: from eulfedora.server import Repository [as 别名]
# 或者: from eulfedora.server.Repository import get_objects_with_cmodel [as 别名]
def handle(self, *args, **options):
self.verbosity = int(options['verbosity']) # 1 = normal, 0 = minimal, 2 = all
self.v_normal = 1
# check required options
if (not options['div']) and (not options['author']) and (not options['lead']):
raise CommandError('At least one of the options div, author or lead is required')
if not options['username']:
raise CommandError('Username is required')
else:
if not options['password'] or options['password'] == '':
options['password'] = getpass()
#connection to repository
repo = Repository(username=options['username'], password=options['password'])
pid_set = repo.get_objects_with_cmodel(Article.ARTICLE_CONTENT_MODEL, Article)
try:
articles = Paginator(pid_set, 100)
self.counts['total'] = articles.count
except Exception as e:
self.output(0, "Error paginating items: : %s " % (e.message))
#process all Articles
for p in articles.page_range:
try:
objs = articles.page(p).object_list
except Exception as e:
#print error and go to next iteration of loop
self.output(0,"Error getting page: %s : %s " % (p, e.message))
self.counts['errors'] +=1
continue
for article in objs:
try:
if not article.exists:
self.output(0, "Skipping %s because pid does not exist" % article.pid)
self.counts['skipped'] +=1
continue
else:
self.output(2,"Processing %s" % article.pid)
if options['div']:
self.division(article)
if options['author']:
self.author(article)
if options['lead']:
self.lead(article)
except Exception as e:
self.output(0, "Error processing pid: %s : %s " % (article.pid, e.message))
self.counts['errors'] +=1
# write files
if options['div']:
writer = csv.writer(open("division_report.csv", 'w'))
writer.writerow(['Division', 'Count'])
for key, count in self.div_counts.items():
writer.writerow([key, count])
if options['author']:
writer = csv.writer(open("author_report.csv", 'w'))
writer.writerow(['Author', 'Division', 'Department', 'Count'])
for netid, count in self.author_counts.items():
try:
person = User.objects.get(username=netid).get_profile().esd_data()
writer.writerow([person.directory_name, person.division_name, person.department_shortname, count])
except (User.DoesNotExist, UserProfile.DoesNotExist, EsdPerson.DoesNotExist) as e :
self.output(0, "At least one part (User, Profile, ESD) for netid %s could not be found" % netid)
if options['lead']:
writer = csv.writer(open("lead_report.csv", 'w'))
writer.writerow(['Division', 'Count'])
for key, count in self.lead_counts.items():
writer.writerow([key, count])
# summarize what was done
self.stdout.write("\n\n")
self.stdout.write("Total number selected: %s\n" % self.counts['total'])
self.stdout.write("Skipped: %s\n" % self.counts['skipped'])
self.stdout.write("Errors: %s\n" % self.counts['errors'])
示例10: main
# 需要导入模块: from eulfedora.server import Repository [as 别名]
# 或者: from eulfedora.server.Repository import get_objects_with_cmodel [as 别名]
def main(argv):
# Connect to repository
repo = Repository(root='%s/fedora/' % HOST, username='%s' % fedoraUser, password='%s' % fedoraPass)
# Get philologic pids using content model
philologic_pids = repo.get_objects_with_cmodel(cmodel_uri='info:fedora/niu-objects:cmodel')
# Logging
phil_doc = open('phil_doc_dev.csv', 'w')
image_ids = []
d = defaultdict(int)
for pid in philologic_pids:
# Logging
images = []
image_count = 0
# Get the OBJ's content as string
philologic = pid.getDatastreamObject('OBJ').content
# Take the opportunity to replace deprecated HTML entity reference
philologic = re.sub("˙", ".", philologic)
# Load OBJ content into soup. Must specify html5lib parser, b/c lxml causes fatal exception (memory leak)
soup = BeautifulSoup(philologic, "html5lib")
# Find all ARTFL spans and <a>'s
spans = soup.find_all("span", "ARTFL-figure-missing")
links = soup.find_all("a", "ARTFL-figure")
# Replace /fedora/repository with /islandora/object in existing links
for a in links:
href = a['href']
if href.startswith('/fedora/repository/'):
a['href'] = '/islandora/object/%s' % href[19:]
for span in spans:
# Retreive the sysid and strip the file format.
title = span['sysid'].split('.')[0]
# Use sysid as title to send RI query for pid
results = repo.risearch.sparql_query('select ?pid where {?pid <dc:title> "%s"}' % title)
try:
# sparql_query returns CSV object; next will retreive first row.
# If no results, throw exception and log that image
p = next(results)['pid'].replace('info:fedora/', '')
# Create <a> tag with @href pointing to object
new_tag = soup.new_tag("a", href="/islandora/object/%s/datastream/OBJ/view" % p)
# B/c it's a reserved word, we have to add @class seperately
new_tag['class']="ARTFL-figure"
# Grab and add the <span> string
new_tag.string = span.string
# Replace <span> with <a>
span.replace_with(new_tag)
print "Successfully changed %s in %s" % (title, pid)
except:
print "Failed to locate %s in %s" % (title, pid)
# Logging
images.append(title)
image_count+= 1
pass
# Retreive entire OBJ datastream
obj = pid.getDatastreamObject('OBJ')
# Replace OBJ content with soup. Encoding as html to maintain entity references.
obj.content = soup.encode(formatter="html")
# Save and we're done.
obj.save()
# Because GSearch isn't listening, we have to index the update
url = '%s/fedoragsearch/rest?operation=updateIndex&action=fromPid&value=%s' % (HOST, pid)
gsearchOpener.open(url)
# Rest is all logging not founds and errors
image_ids.extend(images)
images_string = ';'.join(images)
phil_doc.write('%s,%s,%s\n' % (pid, image_count, images_string))
for i in image_ids:
d[i] += 1
with open('phil_image_dev.csv', 'w') as outfile:
phil_image = csv.writer(outfile)
for key, value in d.items():
phil_image.writerow([key, value])
phil_doc.close()
示例11: Command
# 需要导入模块: from eulfedora.server import Repository [as 别名]
# 或者: from eulfedora.server.Repository import get_objects_with_cmodel [as 别名]
class Command(BaseCommand):
'''Fetches `~openemory.publication.models.Article` objects from Fedora and fixes the DC and MODS checksumes:
1. Replaces '\r' with '' in abstract field.
2. Save object. Note: this will make a new version of the mods and copy some fields to the DC
If PIDs are provided in the arguments, that list of pids will be used instead of searching Fedora.
'''
args = "[pid pid ...]"
help = __doc__
option_list = BaseCommand.option_list + (
make_option('--noact', '-n',
action='store_true',
default=False,
help='Reports the pid and total number of Articles that would be processed but does not really do anything.'),
make_option('--username',
action='store',
help='Username of fedora user to connect as'),
make_option('--password',
action='store',
help='Password for fedora user, password= will prompt for password'),
make_option('--host',
action='store',
default=settings.FEDORA_ROOT,
help='Hostname of fedora instance. Defaults to localsettings:FEDORA_ROOT.'),
)
def handle(self, *args, **options):
self.verbosity = int(options['verbosity']) # 1 = normal, 0 = minimal, 2 = all
self.v_normal = 1
#counters
counts = defaultdict(int)
# check required options
if not options['username']:
raise CommandError('Username is required')
else:
if not options['password'] or options['password'] == '':
options['password'] = getpass()
#connection to repository
self.repo = Repository(options['host'], username=options['username'], \
password=options['password'],)
#if pids specified, use that list
if len(args) != 0:
article_set = self.get_pids(args)
else:
#search for Articles in Fedora.
article_set = self.repo.get_objects_with_cmodel(Publication.ARTICLE_CONTENT_MODEL, type=Publication)
#counts['total'] = article_set.count
# self.stdout.write(article_set)
#process all Articles
for a in article_set:
try:
self.output(1, "Processing %s" % a.pid)
if (a.descMetadata.content.abstract is not None) \
and (a.descMetadata.content.abstract.text) \
and ('\r' in a.descMetadata.content.abstract.text) \
and (not a.dc.validate_checksum()):
a.descMetadata.content.abstract.text = a.descMetadata.content.abstract.text.replace('\r', '')
# save article
try:
if not options['noact']:
a.save("Removing backslash-r to fix checksums")
except Exception as e:
self.output(0, "Error processing pid: %s : %s " % (a.pid, e.message))
counts['errors'] +=1
counts['fixed'] +=1
else:
self.output(1, "Skipping %s" % a.pid)
counts['skip']+=1
except Exception as e:
self.output(1, "Error on %s: %s" % (a.pid, e.message ))
# summarize what was done
self.stdout.write("\n\n")
self.stdout.write("Fixed: %s\n" % counts['fixed'])
self.stdout.write("Skipped: %s\n" % counts['skip'])
self.stdout.write("Errors: %s\n" % counts['errors'])
def get_pids(self, pids):
# get objects only if they are Articles
# Return generator
for p in pids:
obj = self.repo.get_object(pid=p, type=Publication)
if str(obj.get_models()[0]) == Publication.ARTICLE_CONTENT_MODEL:
yield obj
#.........这里部分代码省略.........
示例12: handle
# 需要导入模块: from eulfedora.server import Repository [as 别名]
# 或者: from eulfedora.server.Repository import get_objects_with_cmodel [as 别名]
def handle(self, *args, **options):
self.verbosity = int(options['verbosity']) # 1 = normal, 0 = minimal, 2 = all
self.v_normal = 1
#counters
counts = defaultdict(int)
# check required options
if not options['username']:
raise CommandError('Username is required')
else:
if not options['password'] or options['password'] == '':
options['password'] = getpass()
#connection to repository
repo = Repository(username=options['username'], password=options['password'])
try:
#if pids specified, use that list
if len(args) != 0:
pids = list(args)
pid_set = [repo.get_object(pid=p, type=Publication) for p in pids]
else:
#search for Articles
pid_set = repo.get_objects_with_cmodel(Publication.ARTICLE_CONTENT_MODEL, Article)
except Exception as e:
raise CommandError('Error gettings pids (%s)' % e.message)
try:
articles = Paginator(pid_set, 20)
counts['total'] = articles.count
except Exception as e:
self.output(0, "Error paginating items: : %s " % (e.message))
#process all Articles
for p in articles.page_range:
try:
objs = articles.page(p).object_list
except Exception as e:
#print error and go to next iteration of loop
self.output(0,"Error getting page: %s : %s " % (p, e.message))
counts['errors'] +=1
continue
for article in objs:
try:
if not article.exists:
self.output(1, "Skipping %s because pid does not exist" % article.pid)
counts['skipped'] +=1
continue
else:
self.output(0,"Processing %s" % article.pid)
mods = article.descMetadata.content
nlm = article.contentMetadata.content if article.contentMetadata.exists else None
identifiers = []
#PMC info
if nlm:
pmc = nlm.docid
pmc_id = 'PMC%s' % pmc
access_url = pmc_access_url(pmc)
identifiers.extend([pmc_id, access_url])
if mods.ark_uri:
identifiers.append(mods.ark_uri)
identifiers.append(article.pid)
article.dc.content.identifier_list = identifiers
##########REMOVE dc.relation###########
# #
article.dc.content.relation_list = [] #
# #
#######################################
# save article
if not options['noact']:
article.save()
self.output(1, "SAVED")
except Exception as e:
self.output(0, "Error processing pid: %s : %s " % (article.pid, e.message))
counts['errors'] +=1
# summarize what was done
self.stdout.write("\n\n")
self.stdout.write("Total number selected: %s\n" % counts['total'])
self.stdout.write("Skipped: %s\n" % counts['skipped'])
self.stdout.write("Errors: %s\n" % counts['errors'])
示例13: handle
# 需要导入模块: from eulfedora.server import Repository [as 别名]
# 或者: from eulfedora.server.Repository import get_objects_with_cmodel [as 别名]
def handle(self, *pids, **options):
dry_run = options.get('dry_run', False)
verbosity = int(options.get('verbosity', self.v_normal))
repo = Repository()
try:
pidman = DjangoPidmanRestClient()
except Exception as err:
# error if pid manager config options not in localsettings
raise CommandError(err)
# if pids are specified on command line, only process those objects
if pids:
objs = [repo.get_object(pid, type=Volume) for pid in pids]
# otherwise, look for all volume objects in fedora
else:
objs = repo.get_objects_with_cmodel(Volume.VOLUME_CONTENT_MODEL,
type=Volume)
stats = defaultdict(int)
for obj in objs:
if not obj.exists:
if verbosity >= self.v_normal:
self.stdout.write('%s does not exist or is not accessible' % obj.pid)
stats['skipped'] += 1
continue
stats['objs'] += 1
if is_ark(obj.dc.content.identifier):
parsed_ark = parse_ark(obj.dc.content.identifier)
noid = parsed_ark['noid']
try:
ark_info = pidman.get_ark(noid)
except Exception as err:
# requested ARK is not in the configured pid manager
# (this should ONLY happen in dev/QA)
if verbosity >= self.v_normal:
if '404: NOT FOUND' in str(err):
msg = 'not found'
self.stdout.write('Error retriving ARK information for %s: Not Found' % obj.pid)
else:
self.stdout.write('Error retriving ARK information for %s' % obj.pid)
stats['skipped'] += 1
continue
# update unqualified ark to resolve to readux volume landing page
if not dry_run:
pidman.update_ark_target(noid,
target_uri=self.volume_url(obj),
active=True)
# we expected a qualified ARK target for the PDF; update whether
# it currently exists or not
qual = 'PDF'
stats['updated'] += 1 # count as updated in dry run mode (would be updated)
if not dry_run:
pidman.update_ark_target(noid, qual,
target_uri=self.pdf_url(obj),
active=True)
# FIXME: catch possible exceptions here?
# output summary
if verbosity >= self.v_normal:
msg = 'Processed %(objs)d object%%s; skipped %(skipped)d,%%s updated %(updated)d' % stats
msg = msg % ('s' if stats['objs'] != 1 else '', ' would have' if dry_run else '')
self.stdout.write(msg)
示例14: handle
# 需要导入模块: from eulfedora.server import Repository [as 别名]
# 或者: from eulfedora.server.Repository import get_objects_with_cmodel [as 别名]
def handle(self, *args, **options):
self.verbosity = int(options['verbosity']) # 1 = normal, 0 = minimal, 2 = all
self.v_normal = 1
#counters
self.counts = defaultdict(int)
#connection to repository
repo = Repository(username=settings.FEDORA_MANAGEMENT_USER, password=settings.FEDORA_MANAGEMENT_PASSWORD)
#Symplectic-Elements setup
self.session = requests.Session()
self.session.auth = (settings.SYMPLECTIC_USER, settings.SYMPLECTIC_PASSWORD)
self.session.verify=False
self.session.stream=True
self.session.headers.update({'Content-Type': 'text/xml'})
self.pub_query_url = "%s/%s" % (settings.SYMPLECTIC_BASE_URL, "publications")
self.pub_create_url = "%s/%s" % (settings.SYMPLECTIC_BASE_URL, "publication/records/manual")
self.relation_create_url = "%s/%s" % (settings.SYMPLECTIC_BASE_URL, "relationships")
#if pids specified, use that list
try:
if len(args) != 0:
pids = list(args)
pid_set = [repo.get_object(pid=p,type=Article) for p in pids]
else:
#search for Articles.
pid_set = repo.get_objects_with_cmodel(Article.ARTICLE_CONTENT_MODEL, Article)
except Exception as e:
raise CommandError('Error getting pid list (%s)' % e.message)
try:
articles = Paginator(pid_set, 20)
self.counts['total'] = articles.count
except Exception as e:
self.output(0, "Error paginating items: : %s " % (e.message))
#process all Articles
for p in articles.page_range:
try:
objs = articles.page(p).object_list
except Exception as e:
#print error and go to next iteration of loop
self.output(0,"Error getting page: %s : %s " % (p, e.message))
self.counts['errors'] +=1
continue
for article in objs:
try:
if not article.exists:
self.output(1, "Skipping %s because pid does not exist" % article.pid)
self.counts['skipped'] +=1
continue
title = article.descMetadata.content.title_info.title if (article.descMetadata.content.title_info and article.descMetadata.content.title_info.title) else None
if title is None or title == '':
self.output(1, "Skipping %s because OE Title does not exist" % (article.pid))
self.counts['skipped'] +=1
continue
if not article.is_published:
self.output(1, "Skipping %s because pid is not published" % article.pid)
self.counts['skipped'] +=1
continue
# try to detect article by PMC
if article.pmcid and not options['force']:
response = self.session.get(self.pub_query_url, params = {'query' : 'external-identifiers.pmc="PMC%s"' % article.pmcid, 'detail': 'full'})
entries = load_xmlobject_from_string(response.raw.read(), OESympImportArticle).entries
self.output(2, "Query for PMC Match: GET %s %s" % (response.url, response.status_code))
if response.status_code == 200:
if len(entries) >= 1:
self.output(1, "Skipping %s because PMC PMC%s already exists" % (article.pid, article.pmcid))
self.counts['skipped'] +=1
if options['rel']:
symp_pub, relations = article.as_symp(source=entries[0].source, source_id=entries[0].source_id)
self.process_relations(entries[0].source_id, relations, options)
sleep(1)
continue
else:
self.output(1, "Skipping %s because trouble with request %s %s" % (article.pid, response.status_code, entries[0].title))
self.counts['skipped'] +=1
continue
# try to detect article by Title if it does not have PMC
if not options['force']:
response = self.session.get(self.pub_query_url, params = {'query' : 'title~"%s"' % title, 'detail': 'full'})
entries = load_xmlobject_from_string(response.raw.read(), OESympImportArticle).entries
# Accouont for mutiple results
titles = [e.title for e in entries]
self.output(2, "Query for Title Match: GET %s %s" % (response.url, response.status_code))
if response.status_code == 200:
found = False
for t in titles:
success, percent = percent_match(title, t, 90)
self.output(1, "Percent Title Match '%s' '%s' %s " % (title, t, percent))
#.........这里部分代码省略.........
示例15: handle
# 需要导入模块: from eulfedora.server import Repository [as 别名]
# 或者: from eulfedora.server.Repository import get_objects_with_cmodel [as 别名]
def handle(self, *pids, **options):
# testPid
# settings.PIDMAN_HOST = 'https://testpid.library.emory.edu/' # the web root where we'll ask for pids
# settings.PIDMAN_USER = ''
# settings.PIDMAN_PASSWORD = ''
# settings.PIDMAN_DOMAIN = 'https://testpid.library.emory.edu/domains/18/' # default domain (e.g. when minting pids)
# prodPid
# PIDMAN_HOST = 'https://pidqas.library.emory.edu/'
# get a pidman client
client = DjangoPidmanRestClient()
# testFedora
repo = Repository(settings.FEDORA_ROOT, username=settings.FEDORA_MANAGEMENT_USER, password=settings.FEDORA_MANAGEMENT_PASSWORD)
# prodFedora
#repo = Repository('https://fedora.library.emory.edu:8443/fedora/', username='ppppppp', password='pppppp')
# constants
REPOMGMT = Namespace(rdflib.URIRef('info:fedora/fedora-system:def/relations-external#'))
vol_list = repo.get_objects_with_cmodel('info:fedora/emory-control:ScannedVolume-1.0')
print "Found " + str(len(vol_list)) + " books."
# Get a file logger
filename = "ecds/" + str(datetime.datetime.now().strftime("%I-%M-%S %B-%d-%Y")) + ".csv"
f = open(filename, 'w+')
# report all books
f.write("Found " + str(len(vol_list)) + " books.")
f.write("\n")
# report titles
f.write("TYPE,")
f.write("PID,")
f.write("NOID,")
f.write("O_URI,")
f.write("N_URI,")
f.write("PAGE,")
f.write("POST_URI,")
# f.write("POST_PDF_URI,")
f.write("\n")
# go over all books
for vol in vol_list:
volDobj = repo.get_object(vol.pid.rstrip(), type=ScannedVolume)
# get attributes
pid = volDobj.pid
noid = pid.split(":")[1]
try:
pidmanObj = client.get_pid("ark", noid)
except Exception as e:
f.write(str(pid))
f.write("\n")
f.write(str(e))
continue # continue to the next item
oriTargetUri = pidmanObj["targets"][0]["target_uri"]
newTargetUri = oriTargetUri
# if it has emory%3A
if newTargetUri.find("emory%3A") != -1:
newTargetUri = newTargetUri.replace("emory%3A", "emory:")
# if it has readux%3A
if newTargetUri.find("readux%3A") != -1:
newTargetUri = newTargetUri.replace("readux%3A", "emory:")
# if it has readux:
if newTargetUri.find("readux:") != -1:
newTargetUri = newTargetUri.replace("readux:", "emory:")
# if it has webprd001.library.emory.edu/readux
if newTargetUri.find("webprd001.library.emory.edu/readux") != -1:
newTargetUri = newTargetUri.replace("webprd001.library.emory.edu/readux", "testreadux.ecds.emory.edu")
# if it has webprd001.library.emory.edu
if newTargetUri.find("webprd001.library.emory.edu/") != -1:
newTargetUri = newTargetUri.replace("webprd001.library.emory.edu/", "testreadux.ecds.emory.edu/")
# if it has /readux/
if newTargetUri.find("/readux/") != -1:
newTargetUri = newTargetUri.replace("/readux/", "/")
newTargetUri = unicode(newTargetUri)
# log attributes
f.write("BOOK" + ", ")
f.write(str(pid) + ", ")
f.write(str(noid) + ", ")
f.write(str(oriTargetUri) + ", ")
f.write(str(newTargetUri) + ", ")
f.write(str(len(volDobj.pageDObjs)) + ", ")
f.write("\n")
# report attributes
#.........这里部分代码省略.........