当前位置: 首页>>代码示例>>Python>>正文


Python Repository.get_objects_with_cmodel方法代码示例

本文整理汇总了Python中eulfedora.server.Repository.get_objects_with_cmodel方法的典型用法代码示例。如果您正苦于以下问题:Python Repository.get_objects_with_cmodel方法的具体用法?Python Repository.get_objects_with_cmodel怎么用?Python Repository.get_objects_with_cmodel使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在eulfedora.server.Repository的用法示例。


在下文中一共展示了Repository.get_objects_with_cmodel方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: main

# 需要导入模块: from eulfedora.server import Repository [as 别名]
# 或者: from eulfedora.server.Repository import get_objects_with_cmodel [as 别名]
def main(argv):

    repo = Repository(root='%s/fedora/' % HOST, username='%s' % fedoraUser, password='%s' % fedoraPass)
    
    philologic_pids = repo.get_objects_with_cmodel(cmodel_uri='info:fedora/niu-objects:cmodel')

    phil_doc = open('phil_doc.csv', 'w')

    image_ids = []
    d = defaultdict(int)
        
    for p in philologic_pids:
        
        philologic = p.getDatastreamObject('OBJ').content

        substring = 'ARTFL-figure-missing'

        if substring in philologic:

            print 'Processing %s' % p
           
            images = []
            image_count = 0

            url = '%s/fedora/objects/%s/datastreams/OBJ/content' % (HOST, p)

            passwordManager = urllib2.HTTPPasswordMgrWithDefaultRealm()
            fedoraAdmin = "%s/fedora" % HOST
            passwordManager.add_password(None, fedoraAdmin, fedoraUser, fedoraPass)
            handler = urllib2.HTTPBasicAuthHandler(passwordManager)
            fedoraOpener = urllib2.build_opener(handler)

            soup = BeautifulSoup(fedoraOpener.open(url), 'html.parser')

            spans = soup.find_all('span', 'ARTFL-figure-missing')

            for span in spans:
                
                image = span['sysid']
                images.append(image)
                image_count+= 1

            image_ids.extend(images)
            images_string = ';'.join(images)

            phil_doc.write('%s,%s,%s\n' % (p, image_count, images_string))

            print 'Successfully processed %s' % p

    for i in image_ids:
        d[i] += 1

    with open('phil_image.csv', 'w') as outfile:

        phil_image = csv.writer(outfile)

        for key, value in d.items():
            phil_image.writerow([key, value])
            
    phil_doc.close()
开发者ID:mshort,项目名称:niu_islandora_migration,代码行数:62,代码来源:philologic_missing_images.py

示例2: main

# 需要导入模块: from eulfedora.server import Repository [as 别名]
# 或者: from eulfedora.server.Repository import get_objects_with_cmodel [as 别名]
def main(argv):

    # Make Fedora connection
    repo = Repository(root='http://localhost:8080/fedora/', username='fedoraAdmin', password='xxxxx')
    
    # Retreive pids using content model
    philologic_pids = repo.get_objects_with_cmodel(cmodel_uri='info:fedora/niu-objects:cmodel')

    # Loop through Philologic pids and retreive each object
    for p in philologic_pids:

        print 'Processing %s' % p

        # Extract the text

        philologic = p.getDatastreamObject('OBJ').content
        text=strip_tags(philologic)

        # Add FULL_TEXT
        
        full_text = p.getDatastreamObject('FULL_TEXT')
        full_text.label='Full text'
        full_text.mimetype='text/plain'
        full_text.versionable=True
        full_text.state='A'
        full_text.checksum_type='MD5'

        full_text.content = text

        full_text.save()
开发者ID:mshort,项目名称:niu_islandora_migration,代码行数:32,代码来源:philologic_add_text.py

示例3: Command

# 需要导入模块: from eulfedora.server import Repository [as 别名]
# 或者: from eulfedora.server.Repository import get_objects_with_cmodel [as 别名]
class Command(BaseCommand):
    ''' This command run through all the articles and makes sure that journal titles and publishers match against Sherpa Romeo
    '''
    args = "[netid netid ...]"
    help = __doc__

    option_list = BaseCommand.option_list + (
        make_option('--noact', '-n',
                    action='store_true',
                    default=False,
                    help='Fixed all caps title in articles'),
        )

    def handle(self, *args, **options):

        self.verbosity = int(options['verbosity'])    # 1 = normal, 0 = minimal, 2 = all
        self.v_normal = 1


        #connection to repository
        self.repo = Repository(settings.FEDORA_ROOT, username=settings.FEDORA_MANAGEMENT_USER, password=settings.FEDORA_PASSWORD)
        pid_set = self.repo.get_objects_with_cmodel(Publication.ARTICLE_CONTENT_MODEL, type=Publication)
        coll =  self.repo.get_object(pid=settings.PID_ALIASES['oe-collection'])
        try:
            articles = Paginator(pid_set, 100)

        except Exception as e:
            self.output(0, "Error paginating items: : %s " % (e.message))

        #process all Articles
        for p in articles.page_range:
            try:
                objs = articles.page(p).object_list
            except Exception as e:
                #print error and go to next iteration of loop
                self.output(0,"Error getting page: %s : %s " % (p, e.message))
                continue
            for article in objs:
                try:
                    if not article.exists:
                        self.output(0, "Skipping %s because pid does not exist" % article.pid)
                        continue
                    else:
                        
                        print coll
                        print article.pid
                        article.collection = coll
                        ark_uri = '%sark:/25593/%s' % (settings.PIDMAN_HOST, article.pid.split(':')[1])
                        article.dc.content.identifier_list.extend([ark_uri])
                        article.save()
        
                except Exception as e:
                    self.output(0, "Error processing pid: %s : %s " % (article.pid, e.message))
                    # self.counts['errors'] +=1


    def output(self, v, msg):
        '''simple function to handle logging output based on verbosity'''
        if self.verbosity >= v:
            self.stdout.write("%s\n" % msg)
开发者ID:emory-libraries,项目名称:OpenEmory,代码行数:62,代码来源:ark_col_cleanup.py

示例4: all

# 需要导入模块: from eulfedora.server import Repository [as 别名]
# 或者: from eulfedora.server.Repository import get_objects_with_cmodel [as 别名]
 def all():
     """
     Returns all collections in the repository as
     :class:`~genrepo.collection.models.CollectionObject`
     """
     repo = Repository()
     colls = repo.get_objects_with_cmodel(CollectionObject.COLLECTION_CONTENT_MODEL,
                                          type=CollectionObject)
     return colls
开发者ID:emory-libraries,项目名称:genrepo-demo,代码行数:11,代码来源:models.py

示例5: handle

# 需要导入模块: from eulfedora.server import Repository [as 别名]
# 或者: from eulfedora.server.Repository import get_objects_with_cmodel [as 别名]
    def handle(self, *args, **kwargs):
        verbosity = kwargs.get('verbosity', self.v_normal)

        # pids specified on command-line take precedence
        pids = kwargs.get('pids', [])
        repo = Repository()
        # if no pids were specified, find all AFFs
        if not pids:
            objs = repo.get_objects_with_cmodel(DiskImage.DISKIMAGE_CONTENT_MODEL,
                type=DiskImage)
            for obj in objs:
                # objects found by risearch *should* exist, but
                # just in case of discrepancies (hopefully only in QA),
                # ignore non-existent objects
                if not obj.exists:
                    self.stderr.write(self.style.WARNING('%s does not exist' % obj.pid))
                    continue

                # check premis for to find Disk Images in AFF format;
                # exclude any that have already been migrated
                if obj.provenance.exists:
                    premis = obj.provenance.content
                    if premis.object and premis.object.format \
                                     and premis.object.format.name == 'AFF' \
                                     and not obj.migrated:
                        pids.append(obj.pid)

        # create a celery result set and queue conversion of each pid requested
        # or found in fedora
        migration_tasks = celery.result.ResultSet([])
        for pid in pids:
            migration_tasks.add(migrate_aff_diskimage.delay(pid))

        # wait for tasks to complete
        while migration_tasks.waiting():
            try:
                migration_tasks.join()
            except Exception:
                # exceptions from tasks gets propagated here, but ignore
                # them and report based on success/failure
                pass

        print '%d migrations completed, %s failures' % \
            (migration_tasks.completed_count(),
            'some' if migration_tasks.failed() else 'no')

        for result in migration_tasks.results:
            if result.state == celery.states.FAILURE:
                print 'Error: %s' % result.result
            else:
                print 'Success: %s' % result.result
开发者ID:emory-libraries,项目名称:TheKeep,代码行数:53,代码来源:migrate_aff_diskimages.py

示例6: handle

# 需要导入模块: from eulfedora.server import Repository [as 别名]
# 或者: from eulfedora.server.Repository import get_objects_with_cmodel [as 别名]
    def handle(self, *args, **kwargs):
        verbosity = kwargs.get('verbosity', self.v_normal)
        repo = Repository()
        objs = repo.get_objects_with_cmodel(DiskImage.DISKIMAGE_CONTENT_MODEL,
            type=DiskImage)
        for obj in objs:
            img_fmt = None

            # use premis object format to distinguish AD1 disk images
            if obj.provenance.exists:
                premis = obj.provenance.content
                if premis.object and premis.object.format:
                    img_fmt = premis.object.format.name

            if img_fmt == 'AD1':
                print '%s %s' % (obj.pid, obj.content.label)
            if img_fmt is None and verbosity >= self.v_normal:
                self.stderr.write('Warning: %s has no premis object format' % obj.pid)
开发者ID:emory-libraries,项目名称:TheKeep,代码行数:20,代码来源:find_ad1.py

示例7: handle

# 需要导入模块: from eulfedora.server import Repository [as 别名]
# 或者: from eulfedora.server.Repository import get_objects_with_cmodel [as 别名]
    def handle(self, *args, **options):
        self.verbosity = int(options['verbosity'])    # 1 = normal, 0 = minimal, 2 = all
        self.v_normal = 1

        #counters
        counts = defaultdict(int)

        # check required options
        if not options['username']:
            raise CommandError('Username is required')
        else:
            if not options['password'] or options['password'] == '':
                options['password'] = getpass()

        #connection to repository
        repo = Repository(username=options['username'], password=options['password'])

        coll = repo.get_object(pid=settings.PID_ALIASES['oe-collection'])

        #if pids specified, use that list
        try:
            if len(args) != 0:
                pids = list(args)
                pid_set = [repo.get_object(pid=p,type=Article) for p in pids]


            else:
                #search for Articles.
                pid_set = repo.get_objects_with_cmodel(Article.ARTICLE_CONTENT_MODEL, Article)

        except Exception as e:
            raise CommandError('Error getting pid list (%s)' % e.message)

        try:
            articles = Paginator(pid_set, 20)
            counts['total'] = articles.count
        except Exception as e:
            self.output(0, "Error paginating items: : %s " % (e.message))

        #process all Articles
        for p in articles.page_range:
            try:
                objs = articles.page(p).object_list
            except Exception as e:
                #print error and go to next iteration of loop
                self.output(0,"Error getting page: %s : %s " % (p, e.message))
                counts['errors'] +=1
                continue
            for article in objs:
                try:
                    if not article.exists:
                        self.output(1, "Skipping %s because pid does not exist" % article.pid)
                        counts['skipped'] +=1
                        continue
                    else:
                        self.output(0,"Processing %s" % article.pid)

                        # Add to collection
                        article.collection = coll
                        self.output(1, "Adding %s to collection %s" % (article.pid, coll.pid))
                        counts['collection']+= 1


                        # Add itemID for OAI
                        if article.is_published:
                            article.oai_itemID = "oai:ark:/25593/%s" % article.noid
                            self.output(1, "Adding itemID to %s" % article.pid)
                            counts['itemId']+= 1

                        # Modify DB NS
                        article._prep_dc_for_oai()
                        self.output(1, "Modified DC namespaces for %s" % (article.pid))
                        counts['DC']+= 1

                        # save article
                        if not options['noact']:
                            article.save()
                except Exception as e:
                    self.output(0, "Error processing pid: %s : %s " % (article.pid, e.message))
                    counts['errors'] +=1

        # summarize what was done
        self.stdout.write("\n\n")
        self.stdout.write("Total number selected: %s\n" % counts['total'])
        self.stdout.write("Added to collection: %s\n" % counts['collection'])
        self.stdout.write("Added itemID: %s\n" % counts['itemId'])
        self.stdout.write("Modified DC NS: %s\n" % counts['DC'])
        self.stdout.write("Skipped: %s\n" % counts['skipped'])
        self.stdout.write("Errors: %s\n" % counts['errors'])
开发者ID:mprefer,项目名称:OpenEmory,代码行数:91,代码来源:add_to_oai.py

示例8: handle

# 需要导入模块: from eulfedora.server import Repository [as 别名]
# 或者: from eulfedora.server.Repository import get_objects_with_cmodel [as 别名]
    def handle(self, *args, **options):
        #counters
        counts = defaultdict(int)



        # check required options
        if not options['username']:
            raise CommandError('Username is required')
        else:
            if not options['password'] or options['password'] == '':
                options['password'] = getpass()

        if not options['file']:
            raise CommandError('File is required')
        

        with open(options['file'], 'r') as myfile:
            data=myfile.read().splitlines()

        #connection to repository
        repo = Repository(username=options['username'], password=options['password'])

        try:
            #if pids specified, use that list
            if len(data) != 0:
                pids = list(data)
                pid_set = [repo.get_object(pid=p, type=Video) for p in pids]

            else:
                #search for Articles
                pid_set = repo.get_objects_with_cmodel(Video.VIDEO_CONTENT_MODEL, Video)

        except Exception as e:
            raise CommandError('Error gettings pids (%s)' % e.message)

        try:
            objects = Paginator(pid_set, 20)
            counts['total'] = objects.count
        except Exception as e:
            self.output("Error paginating items: : %s " % (e.message))

        #process all Objects
        for p in objects.page_range:
            try:
                objs = objects.page(p).object_list
            except Exception as e:
                #print error and go to next iteration of loop
                self.output("Error getting page: %s : %s " % (p, e.message))
                counts['errors'] +=1
                continue
            for a in objs:
                try:
                    if not a.exists:
                        self.output("Skipping %s because pid does not exist" % a.pid)
                        counts['skipped'] +=1
                        continue
                    else:
                        self.output("Processing %s" % a.pid)
                        a.content.mimetype = 'video/quicktime'

                        # save object
                        if not options['noact']:
                            a.save("cleanup mimetype")
                            self.output("SAVED %s" % a.pid)
                            counts['saved'] +=1
                        counts['processed'] +=1
                except Exception as e:
                    self.output("Error processing pid: %s : %s " % (a.pid, e.message))
                    counts['errors'] +=1

        # summarize what was done
        self.stdout.write("\n\n")
        self.stdout.write("Total number selected: %s\n" % counts['total'])
        self.stdout.write("Total number processed: %s\n" % counts['processed'])
        self.stdout.write("Total number saved: %s\n" % counts['saved'])
        self.stdout.write("Skipped: %s\n" % counts['skipped'])
        self.stdout.write("Errors: %s\n" % counts['errors'])
开发者ID:emory-libraries,项目名称:TheKeep,代码行数:80,代码来源:mimetype_cleanup.py

示例9: handle

# 需要导入模块: from eulfedora.server import Repository [as 别名]
# 或者: from eulfedora.server.Repository import get_objects_with_cmodel [as 别名]
    def handle(self, *args, **options):
        self.verbosity = int(options['verbosity'])    # 1 = normal, 0 = minimal, 2 = all
        self.v_normal = 1



        # check required options
        if (not options['div']) and (not options['author']) and (not options['lead']):
            raise CommandError('At least one of the options div, author or lead is required')
        if not options['username']:
            raise CommandError('Username is required')
        else:
            if not options['password'] or options['password'] == '':
                options['password'] = getpass()

        #connection to repository
        repo = Repository(username=options['username'], password=options['password'])
        pid_set = repo.get_objects_with_cmodel(Article.ARTICLE_CONTENT_MODEL, Article)

        try:
            articles = Paginator(pid_set, 100)
            self.counts['total'] = articles.count
        except Exception as e:
            self.output(0, "Error paginating items: : %s " % (e.message))

        #process all Articles
        for p in articles.page_range:
            try:
                objs = articles.page(p).object_list
            except Exception as e:
                #print error and go to next iteration of loop
                self.output(0,"Error getting page: %s : %s " % (p, e.message))
                self.counts['errors'] +=1
                continue
            for article in objs:
                try:
                    if not article.exists:
                        self.output(0, "Skipping %s because pid does not exist" % article.pid)
                        self.counts['skipped'] +=1
                        continue
                    else:
                        self.output(2,"Processing %s" % article.pid)
                        if options['div']:
                            self.division(article)
                        if options['author']:
                            self.author(article)
                        if options['lead']:
                            self.lead(article)
                except Exception as e:
                    self.output(0, "Error processing pid: %s : %s " % (article.pid, e.message))
                    self.counts['errors'] +=1

        # write files
        if options['div']:
            writer = csv.writer(open("division_report.csv", 'w'))
            writer.writerow(['Division', 'Count'])
            for key, count in self.div_counts.items():
                writer.writerow([key, count])

        if options['author']:
            writer = csv.writer(open("author_report.csv", 'w'))
            writer.writerow(['Author', 'Division', 'Department', 'Count'])
            for netid, count in self.author_counts.items():
                try:
                    person = User.objects.get(username=netid).get_profile().esd_data()
                    writer.writerow([person.directory_name, person.division_name, person.department_shortname, count])
                except (User.DoesNotExist, UserProfile.DoesNotExist, EsdPerson.DoesNotExist) as e :
                    self.output(0, "At least one part (User, Profile, ESD) for netid  %s could not be found" % netid)

        if options['lead']:
            writer = csv.writer(open("lead_report.csv", 'w'))
            writer.writerow(['Division', 'Count'])
            for key, count in self.lead_counts.items():
                writer.writerow([key, count])

        # summarize what was done
        self.stdout.write("\n\n")
        self.stdout.write("Total number selected: %s\n" % self.counts['total'])
        self.stdout.write("Skipped: %s\n" % self.counts['skipped'])
        self.stdout.write("Errors: %s\n" % self.counts['errors'])
开发者ID:mprefer,项目名称:OpenEmory,代码行数:82,代码来源:article_report.py

示例10: main

# 需要导入模块: from eulfedora.server import Repository [as 别名]
# 或者: from eulfedora.server.Repository import get_objects_with_cmodel [as 别名]
def main(argv):

    # Connect to repository
    repo = Repository(root='%s/fedora/' % HOST, username='%s' % fedoraUser, password='%s' % fedoraPass)
    # Get philologic pids using content model
    philologic_pids = repo.get_objects_with_cmodel(cmodel_uri='info:fedora/niu-objects:cmodel')

    # Logging
    phil_doc = open('phil_doc_dev.csv', 'w')
    image_ids = []
    d = defaultdict(int)

    for pid in philologic_pids:

        # Logging
        images = []
        image_count = 0

        # Get the OBJ's content as string
        philologic = pid.getDatastreamObject('OBJ').content
        # Take the opportunity to replace deprecated HTML entity reference
        philologic = re.sub("˙", ".", philologic)
        # Load OBJ content into soup. Must specify html5lib parser, b/c lxml causes fatal exception (memory leak)
        soup = BeautifulSoup(philologic, "html5lib")
        # Find all ARTFL spans and <a>'s
        spans = soup.find_all("span", "ARTFL-figure-missing")
        links = soup.find_all("a", "ARTFL-figure")

        # Replace /fedora/repository with /islandora/object in existing links
        for a in links:

            href = a['href']
            if href.startswith('/fedora/repository/'):
                a['href'] = '/islandora/object/%s' % href[19:]

        for span in spans:

            # Retreive the sysid and strip the file format.
            title = span['sysid'].split('.')[0]
            # Use sysid as title to send RI query for pid
            results = repo.risearch.sparql_query('select ?pid where {?pid <dc:title> "%s"}' % title)
            try:
                # sparql_query returns CSV object; next will retreive first row.
                # If no results, throw exception and log that image
                p = next(results)['pid'].replace('info:fedora/', '')
                # Create <a> tag with @href pointing to object
                new_tag = soup.new_tag("a", href="/islandora/object/%s/datastream/OBJ/view" % p)
                # B/c it's a reserved word, we have to add @class seperately
                new_tag['class']="ARTFL-figure"
                # Grab and add the <span> string
                new_tag.string = span.string
                # Replace <span> with <a>
                span.replace_with(new_tag)
                print "Successfully changed %s in %s" % (title, pid)
            except:
                print "Failed to locate %s in %s" % (title, pid)
                # Logging
                images.append(title)
                image_count+= 1
                pass

        # Retreive entire OBJ datastream
        obj = pid.getDatastreamObject('OBJ')
        # Replace OBJ content with soup. Encoding as html to maintain entity references.
        obj.content = soup.encode(formatter="html")
        # Save and we're done.
        obj.save()

        # Because GSearch isn't listening, we have to index the update
        url = '%s/fedoragsearch/rest?operation=updateIndex&action=fromPid&value=%s' % (HOST, pid)
        gsearchOpener.open(url)

        # Rest is all logging not founds and errors
        image_ids.extend(images)
        images_string = ';'.join(images)

        phil_doc.write('%s,%s,%s\n' % (pid, image_count, images_string))

    for i in image_ids:
        d[i] += 1

    with open('phil_image_dev.csv', 'w') as outfile:

        phil_image = csv.writer(outfile)

        for key, value in d.items():
            phil_image.writerow([key, value])
            
    phil_doc.close()
开发者ID:mshort,项目名称:niu_islandora_migration,代码行数:91,代码来源:philologic_missing_link_fix.py

示例11: Command

# 需要导入模块: from eulfedora.server import Repository [as 别名]
# 或者: from eulfedora.server.Repository import get_objects_with_cmodel [as 别名]
class Command(BaseCommand):
    '''Fetches `~openemory.publication.models.Article` objects from Fedora and fixes the DC and MODS checksumes:
     1. Replaces '\r' with '' in abstract field.
     2. Save object. Note: this will make a new version of the mods and copy some fields to the DC
     If PIDs are provided in the arguments, that list of pids will be used instead of searching Fedora.
    '''
    args = "[pid pid ...]"
    help = __doc__

    option_list = BaseCommand.option_list + (
        make_option('--noact', '-n',
                    action='store_true',
                    default=False,
                    help='Reports the pid and total number of Articles that would be processed but does not really do anything.'),
        make_option('--username',
                    action='store',
                    help='Username of fedora user to connect as'),
        make_option('--password',
                    action='store',
                    help='Password for fedora user,  password=  will prompt for password'),
        make_option('--host',
                    action='store',
                    default=settings.FEDORA_ROOT,
                    help='Hostname of fedora instance.  Defaults to localsettings:FEDORA_ROOT.'),
        )


    
    def handle(self, *args, **options):

        self.verbosity = int(options['verbosity'])    # 1 = normal, 0 = minimal, 2 = all
        self.v_normal = 1

        #counters
        counts = defaultdict(int)

        # check required options
        if not options['username']:
            raise CommandError('Username is required')
        else:
            if not options['password'] or options['password'] == '':
                options['password'] = getpass()

        #connection to repository
        self.repo = Repository(options['host'], username=options['username'], \
                               password=options['password'],)


        #if pids specified, use that list
        if len(args) != 0:
            article_set = self.get_pids(args)

        else:
            #search for Articles in Fedora.
            article_set = self.repo.get_objects_with_cmodel(Publication.ARTICLE_CONTENT_MODEL, type=Publication)

        #counts['total'] = article_set.count

#        self.stdout.write(article_set)
        #process all Articles
        for a in article_set:
            try:
                self.output(1, "Processing %s" % a.pid)

                if (a.descMetadata.content.abstract is not None) \
                    and (a.descMetadata.content.abstract.text) \
                    and ('\r' in a.descMetadata.content.abstract.text) \
                    and (not a.dc.validate_checksum()):
                    a.descMetadata.content.abstract.text = a.descMetadata.content.abstract.text.replace('\r', '')
                    # save article
                    try:
                        if not options['noact']:
                            a.save("Removing backslash-r to fix checksums")
                    except Exception as e:
                        self.output(0, "Error processing pid: %s : %s " % (a.pid, e.message))
                        counts['errors'] +=1
                    counts['fixed'] +=1
                else:
                    self.output(1, "Skipping %s" % a.pid)
                    counts['skip']+=1
            except Exception as e:
                self.output(1, "Error on %s: %s" % (a.pid, e.message ))


        # summarize what was done
        self.stdout.write("\n\n")
        self.stdout.write("Fixed: %s\n" % counts['fixed'])
        self.stdout.write("Skipped: %s\n" % counts['skip'])
        self.stdout.write("Errors: %s\n" % counts['errors'])



    def get_pids(self, pids):
        # get objects only if they are Articles
        # Return generator
        for p in pids:
            obj = self.repo.get_object(pid=p, type=Publication)
            if str(obj.get_models()[0]) == Publication.ARTICLE_CONTENT_MODEL:
                yield obj

#.........这里部分代码省略.........
开发者ID:emory-libraries,项目名称:OpenEmory,代码行数:103,代码来源:fix_checksums.py

示例12: handle

# 需要导入模块: from eulfedora.server import Repository [as 别名]
# 或者: from eulfedora.server.Repository import get_objects_with_cmodel [as 别名]
    def handle(self, *args, **options):
        self.verbosity = int(options['verbosity'])    # 1 = normal, 0 = minimal, 2 = all
        self.v_normal = 1

        #counters
        counts = defaultdict(int)

        # check required options
        if not options['username']:
            raise CommandError('Username is required')
        else:
            if not options['password'] or options['password'] == '':
                options['password'] = getpass()

        #connection to repository
        repo = Repository(username=options['username'], password=options['password'])



        try:
            #if pids specified, use that list
            if len(args) != 0:
                pids = list(args)
                pid_set = [repo.get_object(pid=p, type=Publication) for p in pids]

            else:
                #search for Articles
                pid_set = repo.get_objects_with_cmodel(Publication.ARTICLE_CONTENT_MODEL, Article)

        except Exception as e:
            raise CommandError('Error gettings pids (%s)' % e.message)

        try:
            articles = Paginator(pid_set, 20)
            counts['total'] = articles.count
        except Exception as e:
            self.output(0, "Error paginating items: : %s " % (e.message))

        #process all Articles
        for p in articles.page_range:
            try:
                objs = articles.page(p).object_list
            except Exception as e:
                #print error and go to next iteration of loop
                self.output(0,"Error getting page: %s : %s " % (p, e.message))
                counts['errors'] +=1
                continue
            for article in objs:
                try:
                    if not article.exists:
                        self.output(1, "Skipping %s because pid does not exist" % article.pid)
                        counts['skipped'] +=1
                        continue
                    else:
                        self.output(0,"Processing %s" % article.pid)

                        mods = article.descMetadata.content
                        nlm = article.contentMetadata.content if article.contentMetadata.exists else None
                        identifiers = []

                        #PMC info
                        if nlm:
                            pmc = nlm.docid
                            pmc_id = 'PMC%s' % pmc
                            access_url = pmc_access_url(pmc)
                            identifiers.extend([pmc_id, access_url])

                        if mods.ark_uri:
                            identifiers.append(mods.ark_uri)

                        identifiers.append(article.pid)

                        article.dc.content.identifier_list = identifiers

                        ##########REMOVE dc.relation###########
                        #                                     #
                        article.dc.content.relation_list = [] #
                        #                                     #
                        #######################################

                        # save article
                        if not options['noact']:
                            article.save()
                            self.output(1, "SAVED")
                except Exception as e:
                    self.output(0, "Error processing pid: %s : %s " % (article.pid, e.message))
                    counts['errors'] +=1

        # summarize what was done
        self.stdout.write("\n\n")
        self.stdout.write("Total number selected: %s\n" % counts['total'])
        self.stdout.write("Skipped: %s\n" % counts['skipped'])
        self.stdout.write("Errors: %s\n" % counts['errors'])
开发者ID:emory-libraries,项目名称:OpenEmory,代码行数:95,代码来源:add_dc_ident.py

示例13: handle

# 需要导入模块: from eulfedora.server import Repository [as 别名]
# 或者: from eulfedora.server.Repository import get_objects_with_cmodel [as 别名]
    def handle(self, *pids, **options):

        dry_run = options.get('dry_run', False)
        verbosity = int(options.get('verbosity', self.v_normal))

        repo = Repository()
        try:
            pidman = DjangoPidmanRestClient()
        except Exception as err:
            # error if pid manager config options not in localsettings
            raise CommandError(err)

        # if pids are specified on command line, only process those objects
        if pids:
            objs = [repo.get_object(pid, type=Volume) for pid in pids]

        # otherwise, look for all volume objects in fedora
        else:
            objs = repo.get_objects_with_cmodel(Volume.VOLUME_CONTENT_MODEL,
                                                type=Volume)

        stats = defaultdict(int)
        for obj in objs:
            if not obj.exists:
                if verbosity >= self.v_normal:
                    self.stdout.write('%s does not exist or is not accessible' % obj.pid)
                stats['skipped'] += 1
                continue

            stats['objs'] += 1
            if is_ark(obj.dc.content.identifier):
                parsed_ark = parse_ark(obj.dc.content.identifier)
                noid = parsed_ark['noid']
                try:
                    ark_info = pidman.get_ark(noid)
                except Exception as err:
                    # requested ARK is not in the configured pid manager
                    # (this should ONLY happen in dev/QA)
                    if verbosity >= self.v_normal:
                        if '404: NOT FOUND' in str(err):
                            msg = 'not found'
                            self.stdout.write('Error retriving ARK information for %s: Not Found' % obj.pid)
                        else:
                            self.stdout.write('Error retriving ARK information for %s' % obj.pid)
                    stats['skipped'] += 1
                    continue

                # update unqualified ark to resolve to readux volume landing page
                if not dry_run:
                    pidman.update_ark_target(noid,
                        target_uri=self.volume_url(obj),
                        active=True)

                # we expected a qualified ARK target for the PDF; update whether
                # it currently exists or not
                qual = 'PDF'
                stats['updated'] += 1   # count as updated in dry run mode (would be updated)
                if not dry_run:
                    pidman.update_ark_target(noid, qual,
                        target_uri=self.pdf_url(obj),
                        active=True)
                    # FIXME: catch possible exceptions here?

        # output summary
        if verbosity >= self.v_normal:
            msg = 'Processed %(objs)d object%%s; skipped %(skipped)d,%%s updated %(updated)d' % stats
            msg = msg % ('s' if stats['objs'] != 1 else '', ' would have' if dry_run else '')
            self.stdout.write(msg)
开发者ID:WSULib,项目名称:readux,代码行数:70,代码来源:update_arks.py

示例14: handle

# 需要导入模块: from eulfedora.server import Repository [as 别名]
# 或者: from eulfedora.server.Repository import get_objects_with_cmodel [as 别名]
    def handle(self, *args, **options):
        self.verbosity = int(options['verbosity'])    # 1 = normal, 0 = minimal, 2 = all
        self.v_normal = 1

        #counters
        self.counts = defaultdict(int)

        #connection to repository
        repo = Repository(username=settings.FEDORA_MANAGEMENT_USER, password=settings.FEDORA_MANAGEMENT_PASSWORD)

        #Symplectic-Elements setup
        self.session = requests.Session()
        self.session.auth = (settings.SYMPLECTIC_USER, settings.SYMPLECTIC_PASSWORD)
        self.session.verify=False
        self.session.stream=True
        self.session.headers.update({'Content-Type': 'text/xml'})

        self.pub_query_url = "%s/%s" % (settings.SYMPLECTIC_BASE_URL, "publications")
        self.pub_create_url = "%s/%s" % (settings.SYMPLECTIC_BASE_URL, "publication/records/manual")
        self.relation_create_url = "%s/%s" % (settings.SYMPLECTIC_BASE_URL, "relationships")


        #if pids specified, use that list
        try:
            if len(args) != 0:
                pids = list(args)
                pid_set = [repo.get_object(pid=p,type=Article) for p in pids]


            else:
                #search for Articles.
                pid_set = repo.get_objects_with_cmodel(Article.ARTICLE_CONTENT_MODEL, Article)

        except Exception as e:
            raise CommandError('Error getting pid list (%s)' % e.message)

        try:
            articles = Paginator(pid_set, 20)
            self.counts['total'] = articles.count
        except Exception as e:
            self.output(0, "Error paginating items: : %s " % (e.message))

        #process all Articles
        for p in articles.page_range:
            try:
                objs = articles.page(p).object_list
            except Exception as e:
                #print error and go to next iteration of loop
                self.output(0,"Error getting page: %s : %s " % (p, e.message))
                self.counts['errors'] +=1
                continue
            for article in objs:
                try:
                    if not article.exists:
                        self.output(1, "Skipping %s because pid does not exist" % article.pid)
                        self.counts['skipped'] +=1
                        continue
                    title = article.descMetadata.content.title_info.title if (article.descMetadata.content.title_info and article.descMetadata.content.title_info.title) else None
                    if title is None or title == '':
                        self.output(1, "Skipping %s because OE Title does not exist" % (article.pid))
                        self.counts['skipped'] +=1
                        continue

                    if not article.is_published:
                        self.output(1, "Skipping %s because pid is not published" % article.pid)
                        self.counts['skipped'] +=1
                        continue

                    # try to detect article by PMC
                    if article.pmcid and not options['force']:
                        response = self.session.get(self.pub_query_url, params = {'query' : 'external-identifiers.pmc="PMC%s"' % article.pmcid, 'detail': 'full'})
                        entries = load_xmlobject_from_string(response.raw.read(), OESympImportArticle).entries
                        self.output(2, "Query for PMC Match: GET %s %s" % (response.url, response.status_code))
                        if response.status_code == 200:
                            if len(entries) >= 1:
                                self.output(1, "Skipping %s because PMC PMC%s already exists" % (article.pid, article.pmcid))
                                self.counts['skipped'] +=1

                                if options['rel']:
                                    symp_pub, relations = article.as_symp(source=entries[0].source, source_id=entries[0].source_id)
                                    self.process_relations(entries[0].source_id, relations, options)
                                    sleep(1)
                                continue
                        else:
                            self.output(1, "Skipping %s because trouble with request %s %s" % (article.pid, response.status_code, entries[0].title))
                            self.counts['skipped'] +=1
                            continue

                    # try to detect article by Title if it does not have PMC
                    if not options['force']:
                        response = self.session.get(self.pub_query_url, params = {'query' : 'title~"%s"' % title, 'detail': 'full'})
                        entries = load_xmlobject_from_string(response.raw.read(), OESympImportArticle).entries
                        # Accouont for mutiple results
                        titles = [e.title for e in entries]
                        self.output(2, "Query for Title Match: GET %s %s" % (response.url, response.status_code))
                        if response.status_code == 200:
                            found = False
                            for t in titles:
                                success, percent = percent_match(title, t, 90)
                                self.output(1, "Percent Title Match '%s' '%s' %s " % (title, t, percent))
#.........这里部分代码省略.........
开发者ID:mprefer,项目名称:OpenEmory,代码行数:103,代码来源:import_to_symplectic.py

示例15: handle

# 需要导入模块: from eulfedora.server import Repository [as 别名]
# 或者: from eulfedora.server.Repository import get_objects_with_cmodel [as 别名]
    def handle(self, *pids, **options):
        # testPid
        # settings.PIDMAN_HOST = 'https://testpid.library.emory.edu/'  # the web root where we'll ask for pids
        # settings.PIDMAN_USER = ''
        # settings.PIDMAN_PASSWORD = ''
        # settings.PIDMAN_DOMAIN = 'https://testpid.library.emory.edu/domains/18/'  # default domain (e.g. when minting pids)

        # prodPid
        # PIDMAN_HOST = 'https://pidqas.library.emory.edu/'

        # get a pidman client
        client = DjangoPidmanRestClient()

        # testFedora
        repo = Repository(settings.FEDORA_ROOT, username=settings.FEDORA_MANAGEMENT_USER, password=settings.FEDORA_MANAGEMENT_PASSWORD)

        # prodFedora
        #repo = Repository('https://fedora.library.emory.edu:8443/fedora/', username='ppppppp', password='pppppp')

        # constants
        REPOMGMT = Namespace(rdflib.URIRef('info:fedora/fedora-system:def/relations-external#'))
        vol_list = repo.get_objects_with_cmodel('info:fedora/emory-control:ScannedVolume-1.0')

        print "Found " + str(len(vol_list)) + " books."

        # Get a file logger
        filename = "ecds/" + str(datetime.datetime.now().strftime("%I-%M-%S %B-%d-%Y")) + ".csv"
        f = open(filename, 'w+')

        # report all books
        f.write("Found " + str(len(vol_list)) + " books.")
        f.write("\n")

        # report titles
        f.write("TYPE,")
        f.write("PID,")
        f.write("NOID,")
        f.write("O_URI,")
        f.write("N_URI,")
        f.write("PAGE,")
        f.write("POST_URI,")
        # f.write("POST_PDF_URI,")
        f.write("\n")



        # go over all books
        for vol in vol_list:
            volDobj = repo.get_object(vol.pid.rstrip(), type=ScannedVolume)

            # get attributes
            pid = volDobj.pid
            noid = pid.split(":")[1]
            try:
                pidmanObj = client.get_pid("ark", noid)
            except Exception as e:
                f.write(str(pid))
                f.write("\n")
                f.write(str(e))
                continue # continue to the next item
            oriTargetUri = pidmanObj["targets"][0]["target_uri"]
            newTargetUri = oriTargetUri

            # if it has emory%3A
            if newTargetUri.find("emory%3A") != -1:
                newTargetUri = newTargetUri.replace("emory%3A", "emory:")

            # if it has readux%3A
            if newTargetUri.find("readux%3A") != -1:
                newTargetUri = newTargetUri.replace("readux%3A", "emory:")

            # if it has readux:
            if newTargetUri.find("readux:") != -1:
                newTargetUri = newTargetUri.replace("readux:", "emory:")

            # if it has webprd001.library.emory.edu/readux
            if newTargetUri.find("webprd001.library.emory.edu/readux") != -1:
                newTargetUri = newTargetUri.replace("webprd001.library.emory.edu/readux", "testreadux.ecds.emory.edu")

            # if it has webprd001.library.emory.edu
            if newTargetUri.find("webprd001.library.emory.edu/") != -1:
                newTargetUri = newTargetUri.replace("webprd001.library.emory.edu/", "testreadux.ecds.emory.edu/")

            # if it has /readux/
            if newTargetUri.find("/readux/") != -1:
                newTargetUri = newTargetUri.replace("/readux/", "/")


            newTargetUri = unicode(newTargetUri)

            # log attributes
            f.write("BOOK" + ", ")
            f.write(str(pid) + ", ")
            f.write(str(noid) + ", ")
            f.write(str(oriTargetUri) + ", ")
            f.write(str(newTargetUri) + ", ")
            f.write(str(len(volDobj.pageDObjs)) + ", ")
            f.write("\n")

            # report attributes
#.........这里部分代码省略.........
开发者ID:emory-libraries,项目名称:readux,代码行数:103,代码来源:update_pid_ecds.py


注:本文中的eulfedora.server.Repository.get_objects_with_cmodel方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。