当前位置: 首页>>代码示例>>Python>>正文


Python GraphUtils.loadObjectProperties方法代码示例

本文整理汇总了Python中dipper.utils.GraphUtils.GraphUtils.loadObjectProperties方法的典型用法代码示例。如果您正苦于以下问题:Python GraphUtils.loadObjectProperties方法的具体用法?Python GraphUtils.loadObjectProperties怎么用?Python GraphUtils.loadObjectProperties使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在dipper.utils.GraphUtils.GraphUtils的用法示例。


在下文中一共展示了GraphUtils.loadObjectProperties方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: parse

# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import loadObjectProperties [as 别名]
    def parse(self, limit=None):
        if limit is not None:
            logger.info("Only parsing first %s rows of each file", limit)

        if self.version_num is None:
            import os
            logger.info("Figuring out version num for files")
            # probe the raw directory for the WSnumber on
            # the "letter.WS###" file.
            # this is the only one that we keep the version number on
            files = os.listdir(self.rawdir)
            letter_file = next(f for f in files if re.match(r'letter', f))
            vernum = re.search(r'(WS\d+)', letter_file)
            self.update_wsnum_in_files(vernum.group(1))

        logger.info("Parsing files...")

        if self.testOnly:
            self.testMode = True

        if self.testMode:
            g = self.testgraph
        else:
            g = self.graph

        self.nobnodes = True  # FIXME
        # to hold any label for a given id
        self.id_label_map = {}
        # to hold the mappings between genotype and background
        self.genotype_backgrounds = {}
        self.extrinsic_id_to_enviro_id_hash = {}
        # to hold the genes variant due to a seq alt
        self.variant_loci_genes = {}
        # to hold the parts of an environment
        self.environment_hash = {}
        self.wildtype_genotypes = []
        # stores the rnai_reagent to gene targets
        self.rnai_gene_map = {}

        self.process_gene_ids(limit)
        # self.process_gene_desc(limit)   #TEC imput file is mia 2016-Mar-03
        self.process_allele_phenotype(limit)
        self.process_rnai_phenotypes(limit)
        self.process_pub_xrefs(limit)
        self.process_feature_loc(limit)
        self.process_disease_association(limit)
        # TODO add this when when complete
        # self.process_gene_interaction(limit)

        logger.info("Finished parsing.")

        self.load_bindings()
        gu = GraphUtils(curie_map.get())
        gu.loadAllProperties(g)
        gu.loadObjectProperties(g, Genotype.object_properties)

        logger.info("Found %d nodes in graph", len(self.graph))
        logger.info("Found %d nodes in testgraph", len(self.testgraph))

        return
开发者ID:JervenBolleman,项目名称:dipper,代码行数:62,代码来源:WormBase.py

示例2: parse

# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import loadObjectProperties [as 别名]
    def parse(self, limit=None):
        if limit is not None:
            logger.info("Only parsing first %s rows of each file", limit)
        logger.info("Parsing files...")

        if self.testOnly:
            self.testMode = True

        if self.testMode:
            g = self.testgraph
        else:
            g = self.graph

        self.nobnodes = True  # FIXME

        # build the id map for mapping uniprot ids to genes
        uniprot_entrez_id_map = self.get_uniprot_entrez_id_map()

        for s in self.files:

            if s in ['go-references', 'id-map']:
                continue

            if not self.testMode and int(s) not in self.tax_ids:
                continue

            file = '/'.join((self.rawdir, self.files.get(s)['file']))
            self.process_gaf(file, limit, uniprot_entrez_id_map)

        logger.info("Finished parsing.")

        self.load_bindings()
        gu = GraphUtils(curie_map.get())
        gu.loadAllProperties(g)
        gu.loadObjectProperties(g, Genotype.object_properties)

        logger.info("Found %d nodes in graph", len(self.graph))
        logger.info("Found %d nodes in testgraph", len(self.testgraph))

        return
开发者ID:JervenBolleman,项目名称:dipper,代码行数:42,代码来源:GeneOntology.py

示例3: Monochrom

# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import loadObjectProperties [as 别名]

#.........这里部分代码省略.........

        self.load_core_bindings()
        self.load_bindings()

        # using the full graph as the test here
        self.testgraph = self.graph
        logger.info("Found %d nodes", len(self.graph))
        logger.info("Done parsing files.")

        return

    def _get_chrbands(self, limit, taxon):
        """
        For the given taxon, it will fetch the chr band file.
        We will not deal with the coordinate information with this parser.
        Here, we only are concerned with building the partonomy.
        :param limit:
        :return:

        """
        line_counter = 0
        myfile = '/'.join((self.rawdir, self.files[taxon]['file']))
        logger.info("Processing Chr bands from FILE: %s", myfile)
        geno = Genotype(self.graph)

        # build the organism's genome from the taxon
        genome_label = self.files[taxon]['genome_label']
        taxon_id = 'NCBITaxon:'+taxon

        # add the taxon as a class.  adding the class label elsewhere
        self.gu.addClassToGraph(self.graph, taxon_id, None)
        self.gu.addSynonym(self.graph, taxon_id, genome_label)

        self.gu.loadObjectProperties(self.graph, Feature.object_properties)

        genome_id = geno.makeGenomeID(taxon_id)
        geno.addGenome(taxon_id, genome_label)
        self.gu.addOWLPropertyClassRestriction(
            self.graph, genome_id, Genotype.object_properties['in_taxon'],
            taxon_id)

        with gzip.open(myfile, 'rb') as f:
            for line in f:
                # skip comments
                line = line.decode().strip()
                if re.match(r'^#', line):
                    continue

                # chr13	4500000	10000000	p12	stalk
                (chrom, start, stop, band, rtype) = line.split('\t')
                line_counter += 1

                # NOTE
                # some less-finished genomes have placed and unplaced scaffolds
                # * Placed scaffolds:
                #    Scaffold has an oriented location within a chromosome.
                # * Unlocalized scaffolds:
                #     scaffold 's chromosome  is known,
                #     scaffold's position, orientation or both is not known.
                # *Unplaced scaffolds:
                #   it is not known which chromosome the scaffold belongs to.

                # find out if the thing is a full on chromosome, or a scaffold:
                # ex: unlocalized scaffold: chr10_KL568008v1_random
                # ex: unplaced scaffold: chrUn_AABR07022428v1
                placed_scaffold_pattern = r'chr(\d+|X|Y|Z|W|MT|M)'
开发者ID:JervenBolleman,项目名称:dipper,代码行数:70,代码来源:Monochrom.py

示例4: _process_data

# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import loadObjectProperties [as 别名]
    def _process_data(self, raw, limit=None):
        logger.info("Processing Data from %s", raw)
        gu = GraphUtils(curie_map.get())

        if self.testMode:
            g = self.testgraph
        else:
            g = self.graph

        geno = Genotype(g)
        line_counter = 0
        gu.loadAllProperties(g)
        gu.loadObjectProperties(g, geno.object_properties)

        # Add the taxon as a class
        taxon_id = 'NCBITaxon:10090'  # map to Mus musculus
        gu.addClassToGraph(g, taxon_id, None)

        # with open(raw, 'r', encoding="utf8") as csvfile:
        with gzip.open(raw, 'rt') as csvfile:
            filereader = csv.reader(csvfile, delimiter=',', quotechar='\"')
            next(filereader, None)  # skip the header row
            for row in filereader:
                line_counter += 1

                (marker_accession_id, marker_symbol, phenotyping_center,
                 colony, sex, zygosity, allele_accession_id, allele_symbol,
                 allele_name, strain_accession_id, strain_name, project_name,
                 project_fullname, pipeline_name, pipeline_stable_id,
                 procedure_stable_id, procedure_name, parameter_stable_id,
                 parameter_name, top_level_mp_term_id, top_level_mp_term_name,
                 mp_term_id, mp_term_name, p_value, percentage_change,
                 effect_size, statistical_method, resource_name) = row

                if self.testMode and marker_accession_id not in self.test_ids:
                    continue

                # ##### cleanup some of the identifiers ######
                zygosity_id = self._map_zygosity(zygosity)

                # colony ids sometimes have <> in them, spaces,
                # or other non-alphanumerics and break our system;
                # replace these with underscores
                colony_id = '_'+re.sub(r'\W+', '_', colony)
                if self.nobnodes:
                    colony_id = ':'+colony_id

                if not re.match(r'MGI', allele_accession_id):
                    allele_accession_id = \
                        '_IMPC-'+re.sub(r':', '', allele_accession_id)
                    if self.nobnodes:
                        allele_accession_id = ':'+allele_accession_id
                if re.search(r'EUROCURATE', strain_accession_id):
                    # the eurocurate links don't resolve at IMPC
                    strain_accession_id = '_'+strain_accession_id
                    if self.nobnodes:
                        strain_accession_id = ':'+strain_accession_id
                elif not re.match(r'MGI', strain_accession_id):
                    logger.info(
                        "Found a strange strain accession...%s",
                        strain_accession_id)
                    strain_accession_id = 'IMPC:'+strain_accession_id

                ######################
                # first, add the marker and variant to the graph as with MGI,
                # the allele is the variant locus.  IF the marker is not known,
                # we will call it a sequence alteration.  otherwise,
                # we will create a BNode for the sequence alteration.
                sequence_alteration_id = variant_locus_id = None
                variant_locus_name = sequence_alteration_name = None

                # extract out what's within the <> to get the symbol
                if re.match(r'.*<.*>', allele_symbol):
                    sequence_alteration_name = \
                        re.match(r'.*<(.*)>', allele_symbol).group(1)
                else:
                    sequence_alteration_name = allele_symbol

                if marker_accession_id is not None and \
                        marker_accession_id == '':
                    logger.warning(
                        "Marker unspecified on row %d", line_counter)
                    marker_accession_id = None

                if marker_accession_id is not None:
                    variant_locus_id = allele_accession_id
                    variant_locus_name = allele_symbol
                    variant_locus_type = geno.genoparts['variant_locus']
                    geno.addGene(marker_accession_id, marker_symbol,
                                 geno.genoparts['gene'])
                    geno.addAllele(variant_locus_id, variant_locus_name,
                                   variant_locus_type, None)
                    geno.addAlleleOfGene(variant_locus_id, marker_accession_id)

                    sequence_alteration_id = \
                        '_seqalt'+re.sub(r':', '', allele_accession_id)
                    if self.nobnodes:
                        sequence_alteration_id = ':'+sequence_alteration_id
                    geno.addSequenceAlterationToVariantLocus(
                        sequence_alteration_id, variant_locus_id)
#.........这里部分代码省略.........
开发者ID:JervenBolleman,项目名称:dipper,代码行数:103,代码来源:IMPC.py

示例5: UCSCBands

# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import loadObjectProperties [as 别名]

#.........这里部分代码省略.........
        # using the full graph as the test here
        self.testgraph = self.graph
        logger.info("Found %d nodes", len(self.graph))
        logger.info("Done parsing files.")

        return

    def _get_chrbands(self, limit, taxon):
        """
        :param limit:
        :return:

        """

        # TODO PYLINT figure out what limit was for and why it is unused
        line_counter = 0
        myfile = '/'.join((self.rawdir, self.files[taxon]['file']))
        logger.info("Processing Chr bands from FILE: %s", myfile)
        geno = Genotype(self.graph)
        monochrom = Monochrom()

        # used to hold band definitions for a chr
        # in order to compute extent of encompasing bands

        mybands = {}
        # build the organism's genome from the taxon
        genome_label = self.files[taxon]['genome_label']
        taxon_id = 'NCBITaxon:'+taxon

        # add the taxon as a class.  adding the class label elsewhere
        self.gu.addClassToGraph(self.graph, taxon_id, None)
        self.gu.addSynonym(self.graph, taxon_id, genome_label)

        self.gu.loadObjectProperties(self.graph, Feature.object_properties)
        self.gu.loadProperties(self.graph, Feature.data_properties,
                               self.gu.DATAPROP)
        self.gu.loadAllProperties(self.graph)

        geno.addGenome(taxon_id, genome_label)

        # add the build and the taxon it's in
        build_num = self.files[taxon]['build_num']
        build_id = 'UCSC:'+build_num
        geno.addReferenceGenome(build_id, build_num, taxon_id)

        # process the bands
        with gzip.open(myfile, 'rb') as f:
            for line in f:
                # skip comments
                line = line.decode().strip()
                if re.match('^#', line):
                    continue

                # chr13	4500000	10000000	p12	stalk
                (scaffold, start, stop, band_num, rtype) = line.split('\t')
                line_counter += 1

                # NOTE some less-finished genomes have
                # placed and unplaced scaffolds
                # * Placed scaffolds:
                #       the scaffolds have been placed within a chromosome.
                # * Unlocalized scaffolds:
                #       although the chromosome within which the scaffold occurs
                #       is known, the scaffold's position or orientation
                #       is not known.
                # * Unplaced scaffolds:
开发者ID:JervenBolleman,项目名称:dipper,代码行数:70,代码来源:UCSCBands.py


注:本文中的dipper.utils.GraphUtils.GraphUtils.loadObjectProperties方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。