本文整理汇总了Python中eulfedora.server.Repository类的典型用法代码示例。如果您正苦于以下问题:Python Repository类的具体用法?Python Repository怎么用?Python Repository使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Repository类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
def main(argv):
csvfile = open ("C:/Users/a1691506/Desktop/repo_size.csv", 'wb')
csvwriter = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
repo = Repository(root='%s/fedora/' % HOST, username='%s' % fedoraUser, password='%s' % fedoraPass)
risearch = repo.risearch
query = 'select ?pid ?date where {?pid <fedora-model:hasModel> <info:fedora/fedora-system:FedoraObject-3.0> ; <fedora-model:createdDate> ?date . } ORDER BY ASC(?date)'
pids = risearch.find_statements(query, language='sparql', type='tuples', flush=None)
repo_size = 0
for dictionary in pids:
p = dictionary['pid']
pid = p.replace('info:fedora/', '')
dateCreated = dictionary['date']
total_size = 0
obj = repo.get_object(pid)
datastreams = obj.ds_list
for datastream in datastreams:
ds = obj.getDatastreamObject(datastream)
size = ds.size
total_size += size
repo_size += total_size
print "Total size for %s: %s" % (pid, total_size)
csvwriter.writerow([pid, dateCreated, total_size, repo_size])
示例2: PdfToTextTest
class PdfToTextTest(unittest.TestCase):
fixture_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'fixtures')
pdf_filepath = os.path.join(fixture_dir, 'test.pdf')
pdf_text = 'This is a short PDF document to use for testing.'
def setUp(self):
self.repo = Repository(settings.FEDORA_ROOT, settings.FEDORA_USER,
settings.FEDORA_PASSWORD)
with open(self.pdf_filepath) as pdf:
self.pdfobj = self.repo.get_object(type=TestPdfObject)
self.pdfobj.label = 'eulindexer test pdf object'
self.pdfobj.pdf.content = pdf
self.pdfobj.save()
def tearDown(self):
self.repo.purge_object(self.pdfobj.pid)
def test_file(self):
# extract text from a pdf from a file on the local filesystem
text = pdf_to_text(open(self.pdf_filepath, 'rb'))
self.assertEqual(self.pdf_text, text)
def test_object_datastream(self):
# extract text from a pdf datastream in fedora
pdfobj = self.repo.get_object(self.pdfobj.pid, type=TestPdfObject)
text = pdf_to_text(pdfobj.pdf.content)
self.assertEqual(self.pdf_text, text)
示例3: process
def process(self, input):
"""
Write the input to the given path.
"""
if input is None:
return
#if not os.environ.get("NODETREE_WRITE_FILEOUT"):
# return input
repo = Repository(self._params.get("url"), self._params.get("username"),
self._params.get("password"))
try:
buf = StringIO()
Image.fromarray(input).save(buf, self._params.get("format").upper())
except IOError:
raise exceptions.NodeError(
"Error obtaining image buffer in format: %s" %
self._params.get("format").upper(), self)
pclass = get_fedora_proxy_class(self._params.get("dsid"))
obj = repo.get_object(self._params.get("pid"), type=pclass)
obj.DATASTREAM.content = buf
obj.DATASTREAM.label = "Test Ingest Datastream 1"
obj.DATASTREAM.mimetype = "image/%s" % self._params.get("format")
obj.save()
return input
示例4: main
def main(argv):
repo = Repository(root='%s/fedora/' % HOST, username='%s' % fedoraUser, password='%s' % fedoraPass)
risearch = repo.risearch
query = 'select ?pid where {?pid <fedora-view:disseminates> ?ds . ?pid <fedora-model:hasModel> <info:fedora/islandora:pageCModel> . ?ds <fedora-view:disseminationType> <info:fedora/*/PDF>}'
pids = risearch.find_statements(query, language='sparql', type='tuples', flush=None)
#total = 0
for dictionary in pids:
for key in dictionary:
p = dictionary[key]
pid = p.replace('info:fedora/', '')
obj = repo.get_object(pid)
pdf = obj.getDatastreamObject("PDF")
#size = pdf.size
#total += size
obj.api.purgeDatastream(pid, "PDF")
obj.save()
print "Purged PDF for %s" % pid
示例5: Command
class Command(BaseCommand):
''' This command run through all the articles and makes sure that journal titles and publishers match against Sherpa Romeo
'''
args = "[netid netid ...]"
help = __doc__
option_list = BaseCommand.option_list + (
make_option('--noact', '-n',
action='store_true',
default=False,
help='Fixed all caps title in articles'),
)
def handle(self, *args, **options):
self.verbosity = int(options['verbosity']) # 1 = normal, 0 = minimal, 2 = all
self.v_normal = 1
#connection to repository
self.repo = Repository(settings.FEDORA_ROOT, username=settings.FEDORA_MANAGEMENT_USER, password=settings.FEDORA_PASSWORD)
pid_set = self.repo.get_objects_with_cmodel(Publication.ARTICLE_CONTENT_MODEL, type=Publication)
coll = self.repo.get_object(pid=settings.PID_ALIASES['oe-collection'])
try:
articles = Paginator(pid_set, 100)
except Exception as e:
self.output(0, "Error paginating items: : %s " % (e.message))
#process all Articles
for p in articles.page_range:
try:
objs = articles.page(p).object_list
except Exception as e:
#print error and go to next iteration of loop
self.output(0,"Error getting page: %s : %s " % (p, e.message))
continue
for article in objs:
try:
if not article.exists:
self.output(0, "Skipping %s because pid does not exist" % article.pid)
continue
else:
print coll
print article.pid
article.collection = coll
ark_uri = '%sark:/25593/%s' % (settings.PIDMAN_HOST, article.pid.split(':')[1])
article.dc.content.identifier_list.extend([ark_uri])
article.save()
except Exception as e:
self.output(0, "Error processing pid: %s : %s " % (article.pid, e.message))
# self.counts['errors'] +=1
def output(self, v, msg):
'''simple function to handle logging output based on verbosity'''
if self.verbosity >= v:
self.stdout.write("%s\n" % msg)
示例6: _load_postcard
def _load_postcard(self, label, description, subjects, filename):
'''Create a postcard object and load to fedora.
:param label: object label and dc:title
:param description: object dc:description
:param subjects: list of subjects to be set in dc:subject
:param filename: filename for image content, assumed relative to current directory
'''
# NOTE: image object init here somewhat redundant with current postcard ingest logic
repo = Repository()
obj = repo.get_object(type=ImageObject)
obj.label = label
obj.owner = settings.FEDORA_OBJECT_OWNERID
obj.dc.content.title = obj.label
obj.dc.content.description_list.extend(description)
obj.dc.content.subject_list.extend(subjects)
# common DC for all postcards
obj.dc.content.type = 'image'
# FIXME: configure this somewhere?
obj.dc.content.relation_list.extend([settings.RELATION,
'http://beck.library.emory.edu/greatwar/'])
# set file as content of image datastream
obj.image.content = open(path.join(fixture_path, filename))
# add relation to postcard collection
obj.rels_ext.content.add((
URIRef(obj.uri),
URIRef(MEMBER_OF_COLLECTION),
URIRef(PostcardCollection.get().uri)
))
obj.save()
self.postcards.append(obj)
示例7: purge_item
def purge_item(item_id):
repo = Repository()
pid = 'pitt:%s' % (item_id,)
objs = repo.find_objects(pid__contains=pid)
for o in objs:
repo.purge_object(o.pid)
print '%s purged' % (o.pid,)
示例8: browse
def browse(request):
"Browse postcards and display thumbnail images."
repo = Repository()
repo.default_object_type = ImageObject
number_of_results = 15
context = {}
search_opts = postcard_search_opts().copy()
if 'subject' in request.GET:
context['subject'] = request.GET['subject']
search_opts['subject'] = request.GET['subject']
postcards = repo.find_objects(**search_opts)
postcard_paginator = Paginator(list(postcards), number_of_results)
try:
page = int(request.GET.get('page', '1'))
except ValueError:
page = 1
# If page request (9999) is out of range, deliver last page of results.
try:
postcard_page = postcard_paginator.page(page)
except (EmptyPage, InvalidPage):
postcard_page = postcard_paginator.page(postcard_paginator.num_pages)
context['postcards_paginated'] = postcard_page
return render(request, 'postcards/browse.html', context)
示例9: get
def get(self, request):
context = {}
item_id = request.GET.get('id', None)
fmt = request.GET.get('format', None)
if item_id is not None:
context['id'] = item_id
repo = Repository(request=self.request)
# generalized class-based view would need probably a get-item method
# for repo objects, could use type-inferring repo variant
obj = repo.get_object(item_id, type=Volume)
formats = obj.unapi_formats
if fmt is None:
# display formats for this item
context['formats'] = formats
else:
current_format = formats[fmt]
# return requested format for this item
meth = getattr(obj, current_format['method'])
return HttpResponse(meth(), content_type=current_format['type'])
else:
# display formats for all items
# NOTE: if multiple classes, should be able to combine the formats
context['formats'] = Volume.unapi_formats
# NOTE: doesn't really even need to be a template, could be generated
# with eulxml just as easily if that simplifies reuse
return render(request, 'books/unapi_format.xml', context,
content_type='application/xml')
示例10: datastream_etag
def datastream_etag(request, pid, dsid, type=None, repo=None, accept_range_request=False, **kwargs):
"""Method suitable for use as an etag function with
:class:`django.views.decorators.http.condition`. Takes the same
arguments as :meth:`~eulfedora.views.raw_datastream`.
"""
# if a range is requested and it is not for the entire file,
# do *NOT* return an etag
if accept_range_request and request.META.get("HTTP_RANGE", None) and request.META["HTTP_RANGE"] != "bytes=1-":
return None
try:
if repo is None:
repo = Repository()
get_obj_opts = {}
if type is not None:
get_obj_opts["type"] = type
obj = repo.get_object(pid, **get_obj_opts)
ds = obj.getDatastreamObject(dsid)
if ds and ds.exists and ds.checksum_type != "DISABLED":
return ds.checksum
except RequestFailed:
pass
return None
示例11: postcard_image
def postcard_image(request, pid, size):
'''Lin to postcard image in requested size.
:param pid: postcard object pid
:param size: size to return, one of thumbnail, medium, or large
'''
# NOTE: formerly this served out actual image content, via
# fedora dissemination & djatoka
# Images now use an IIIF image server; adding redirects here
# for the benefit of search engines or indexes referencing
# the old urls
try:
repo = Repository()
obj = repo.get_object(pid, type=ImageObject)
if not obj.exists:
raise Http404
if size == 'thumbnail':
url = obj.thumbnail_url
elif size == 'medium':
url = obj.medium_img_url
elif size == 'large':
url = obj.large_img_url
return HttpResponsePermanentRedirect(url)
except RequestFailed:
raise Http404
示例12: handle
def handle(self, *pids, **options):
# bind a handler for interrupt signal
signal.signal(signal.SIGINT, self.interrupt_handler)
verbosity = int(options.get('verbosity', self.v_normal))
repo = Repository()
try:
pidman = DjangoPidmanRestClient()
except Exception as err:
# error if pid manager config options not in localsettings
raise CommandError(err)
old_page_target = '%s/books/pages/' % Site.objects.get_current().domain
search_args = {'type':'ark', 'target': old_page_target, 'count': 10}
# get a small result set to retrieve the total
results = pidman.search_pids(**search_args)
total = results['results_count']
# then set a larger page size for actual processing
search_args['count'] = 100
if verbosity >= self.v_normal:
print 'Found %d total page ARKs with targets to be updated' % total
pbar = ProgressBar(widgets=[Percentage(),
' (', Counter(), ')',
Bar(),
ETA()],
maxval=total).start()
self.stats = defaultdict(int)
self.processed = set()
for ark in self.get_search_results(pidman, search_args):
self.processed.add(ark['pid'])
# get fedora pid from target uri
target_uri = ark['targets'][0]['target_uri']
baseurl, pid = target_uri.rstrip('/').rsplit('/', 1)
try:
page = repo.get_object(pid, type=Page)
# this should probably only happen in dev/qa
if not page.exists:
if verbosity > self.v_normal:
self.stderr.write('Page %s does not exist' % pid)
self.stats['notfound'] += 1
else:
# check if volume exists?
pidman.update_ark_target(ark['pid'], target_uri=page.absolute_url)
self.stats['updated'] += 1
except RequestFailed as rf:
print 'Error accessing %s: %s' % (pid, rf)
self.stats['error'] += 1
pbar.update(len(self.processed))
if self.interrupted:
break
if not self.interrupted:
pbar.finish()
# summarize
self.stderr.write('Updated %(updated)d, %(error)d error(s), %(notfound)d not found' \
% self.stats)
示例13: main
def main(argv):
# Make Fedora connection
repo = Repository(root='http://localhost:8080/fedora/', username='fedoraAdmin', password='xxxxx')
# Retreive pids using content model
philologic_pids = repo.get_objects_with_cmodel(cmodel_uri='info:fedora/niu-objects:cmodel')
# Loop through Philologic pids and retreive each object
for p in philologic_pids:
print 'Processing %s' % p
# Extract the text
philologic = p.getDatastreamObject('OBJ').content
text=strip_tags(philologic)
# Add FULL_TEXT
full_text = p.getDatastreamObject('FULL_TEXT')
full_text.label='Full text'
full_text.mimetype='text/plain'
full_text.versionable=True
full_text.state='A'
full_text.checksum_type='MD5'
full_text.content = text
full_text.save()
示例14: ThesisBase
class ThesisBase(unittest.TestCase):
"""Base class for testing the functionality of the ETD Django
application."""
def __init__(self, *args, **kwargs):
unittest.TestCase.__init__(self, *args, **kwargs)
self.fedora_fixtures_ingested = []
self.pidspace = FEDORA_PIDSPACE
def setUp(self):
"""Creates a base class instance of an `eulfedora` Repository
for testing the basic functionality of the ingesting
a thesis object into a Fedora Repository."""
self.repo = Repository()
# self.repo = Repository(FEDORA_ROOT,FEDORA_USER,FEDORA_PASSWORD)
self.repo.risearch.RISEARCH_FLUSH_ON_QUERY = True
def tearDown(self):
"""Removes test objects from the repository"""
for pid in self.fedora_fixtures_ingested:
try:
self.repo.purge_object(pid)
except RequestFailed as rf:
logger.warn('Error purging test object %s in tear down:%s' %\
(pid,rf))
示例15: volume_modified
def volume_modified(request, pid):
'last modification time for a single volume'
solr = solr_interface()
results = solr.query(content_model=VolumeV1_0.VOLUME_CONTENT_MODEL,
pid=pid) \
.sort_by('-timestamp').field_limit('timestamp')
# NOTE: using solr indexing timestamp instead of object last modified, since
# if an object's index has changed it may have been modified,
# and index timestamp for a volume will be updated when pages are added
# if a user is logged in, page should show as modified
# when annotation count changes
latest_note = None
if request.user.is_authenticated():
# NOTE: shouldn't be very expensive to init volume here; not actually
# making any api calls, just using volume to get volume
# uri and associated annotations
repo = Repository()
vol = repo.get_object(pid, type=Volume)
# newest annotation creation for pages in this volume
latest_note = vol.annotations().visible_to(request.user) \
.last_created_time()
solrtime = results[0]['timestamp'] if results.count() else None
return solrtimestamp_or_datetime(solrtime, latest_note)