当前位置: 首页>>代码示例>>Python>>正文


Python GraphUtils.addDefinition方法代码示例

本文整理汇总了Python中dipper.utils.GraphUtils.GraphUtils.addDefinition方法的典型用法代码示例。如果您正苦于以下问题:Python GraphUtils.addDefinition方法的具体用法?Python GraphUtils.addDefinition怎么用?Python GraphUtils.addDefinition使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在dipper.utils.GraphUtils.GraphUtils的用法示例。


在下文中一共展示了GraphUtils.addDefinition方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: process_gene_desc

# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addDefinition [as 别名]
    def process_gene_desc(self, limit):
        raw = '/'.join((self.rawdir, self.files['gene_desc']['file']))

        if self.testMode:
            g = self.testgraph
        else:
            g = self.graph

        gu = GraphUtils(curie_map.get())

        logger.info("Processing Gene descriptions")
        line_counter = 0
        # geno = Genotype(g)  # TODO unused
        with gzip.open(raw, 'rb') as csvfile:
            filereader = csv.reader(
                io.TextIOWrapper(csvfile, newline=""), delimiter='\t',
                quotechar='\"')
            for row in filereader:
                if re.match(r'\#', ''.join(row)):
                    continue
                line_counter += 1
                if line_counter == 1:
                    continue
                (gene_num, public_name, molecular_name, concise_description,
                 provisional_description, detailed_description,
                 automated_description, gene_class_description) = row

                if self.testMode and gene_num not in self.test_ids['gene']:
                    continue

                gene_id = 'WormBase:'+gene_num

                if concise_description != 'none available':
                    gu.addDefinition(g, gene_id, concise_description)

                # remove the description if it's identical to the concise
                descs = {
                    'provisional': provisional_description,
                    'automated': automated_description,
                    'detailed': detailed_description,
                    'gene class': gene_class_description
                }
                for d in descs:
                    text = descs.get(d)
                    if text == concise_description \
                            or re.match(r'none', text) or text == '':
                        pass  # don't use it
                    else:
                        text = ' '.join((text, '['+d+']'))
                        descs[d] = text
                        gu.addDescription(g, gene_id, text)

                if not self.testMode \
                        and limit is not None and line_counter > limit:
                    break

        return
开发者ID:JervenBolleman,项目名称:dipper,代码行数:59,代码来源:WormBase.py

示例2: _process_nlx_157874_1_view

# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addDefinition [as 别名]
    def _process_nlx_157874_1_view(self, raw, limit=None):
        """
        This table contains the Elements of Morphology data that has been
        screen-scraped into DISCO.
        Note that foaf:depiction is inverse of foaf:depicts relationship.

        Since it is bad form to have two definitions,
        we concatenate the two into one string.

        Triples:
            <eom id> a owl:Class
                rdf:label Literal(eom label)
                OIO:hasRelatedSynonym Literal(synonym list)
                IAO:definition Literal(objective_def. subjective def)
                foaf:depiction Literal(small_image_url),
                               Literal(large_image_url)
                foaf:page Literal(page_url)
                rdfs:comment Literal(long commented text)


        :param raw:
        :param limit:
        :return:
        """

        gu = GraphUtils(curie_map.get())
        line_counter = 0
        with open(raw, 'r') as f1:
            f1.readline()  # read the header row; skip
            filereader = csv.reader(f1, delimiter='\t', quotechar='\"')
            for line in filereader:
                line_counter += 1
                (morphology_term_id, morphology_term_num,
                 morphology_term_label, morphology_term_url,
                 terminology_category_label, terminology_category_url,
                 subcategory, objective_definition, subjective_definition,
                 comments, synonyms, replaces, small_figure_url,
                 large_figure_url, e_uid, v_uid, v_uuid,
                 v_last_modified) = line

                # note:
                # e_uid v_uuid v_last_modified terminology_category_url
                # subcategory v_uid morphology_term_num
                # terminology_category_label hp_label notes
                # are currently unused.

                # Add morphology term to graph as a class
                # with label, type, and description.
                gu.addClassToGraph(self.graph, morphology_term_id,
                                   morphology_term_label)

                # Assemble the description text

                if subjective_definition != '' and not (
                        re.match(r'.+\.$', subjective_definition)):
                    # add a trailing period.
                    subjective_definition = subjective_definition.strip() + '.'
                if objective_definition != '' and not (
                        re.match(r'.+\.$', objective_definition)):
                    # add a trailing period.
                    objective_definition = objective_definition.strip() + '.'

                definition = \
                    '  '.join(
                        (objective_definition, subjective_definition)).strip()

                gu.addDefinition(self.graph, morphology_term_id, definition)

                # <term id> FOAF:depicted_by literal url
                # <url> type foaf:depiction

                # do we want both images?
                # morphology_term_id has depiction small_figure_url
                if small_figure_url != '':
                    gu.addDepiction(self.graph, morphology_term_id,
                                    small_figure_url)

                # morphology_term_id has depiction large_figure_url
                if large_figure_url != '':
                    gu.addDepiction(self.graph, morphology_term_id,
                                    large_figure_url)

                # morphology_term_id has comment comments
                if comments != '':
                    gu.addComment(self.graph, morphology_term_id,
                                  comments.strip())

                if synonyms != '':
                    for s in synonyms.split(';'):
                        gu.addSynonym(
                            self.graph, morphology_term_id, s.strip(),
                            gu.properties['hasExactSynonym'])

                # morphology_term_id hasRelatedSynonym replaces (; delimited)
                if replaces != '' and replaces != synonyms:
                    for s in replaces.split(';'):
                        gu.addSynonym(
                            self.graph, morphology_term_id, s.strip(),
                            gu.properties['hasRelatedSynonym'])

#.........这里部分代码省略.........
开发者ID:JervenBolleman,项目名称:dipper,代码行数:103,代码来源:EOM.py

示例3: GeneReviews

# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addDefinition [as 别名]

#.........这里部分代码省略.........
            # figure out if the book is there; if so, process, otherwise skip
            book_dir = '/'.join((self.rawdir, 'books'))
            book_files = os.listdir(book_dir)
            if ''.join((nbk, '.html')) not in book_files:
                # logger.warning("No book found locally for %s; skipping", nbk)
                books_not_found.add(nbk)
                continue
            logger.info("Processing %s", nbk)

            page = open(url)
            soup = BeautifulSoup(page.read())

            # sec0 == clinical description
            clin_summary = \
                soup.find(
                    'div', id=re.compile(".*Summary.sec0"))
            if clin_summary is not None:
                p = clin_summary.find('p')
                ptext = p.text
                ptext = re.sub(r'\s+', ' ', ptext)

                ul = clin_summary.find('ul')
                if ul is not None:
                    item_text = list()
                    for li in ul.find_all('li'):
                        item_text.append(re.sub(r'\s+', ' ', li.text))
                    ptext += ' '.join(item_text)

                # add in the copyright and citation info to description
                ptext = \
                    ' '.join(
                        (ptext,
                         '[GeneReviews:NBK1116, GeneReviews:NBK138602, ' +
                         nbk_id+']'))

                self.gu.addDefinition(self.graph, nbk_id, ptext.strip())

            # get the pubs
            pmid_set = set()
            pub_div = soup.find('div', id=re.compile(r".*Literature_Cited"))
            if pub_div is not None:
                ref_list = pub_div.find_all('div', attrs={'class': "bk_ref"})
                for r in ref_list:
                    for a in r.find_all(
                            'a', attrs={'href': re.compile(r"pubmed")}):
                        if re.match(r'PubMed:', a.text):
                            pmnum = re.sub(r'PubMed:\s*', '', a.text)
                        else:
                            pmnum = \
                                re.search(
                                    r'\/pubmed\/(\d+)$', a['href']).group(1)
                        if pmnum is not None:
                            pmid = 'PMID:'+str(pmnum)
                            self.gu.addTriple(
                                self.graph, pmid,
                                self.gu.object_properties['is_about'],
                                nbk_id)
                            pmid_set.add(pmnum)
                            r = Reference(
                                pmid, Reference.ref_types['journal_article'])
                            r.addRefToGraph(self.graph)

            # TODO add author history, copyright, license to dataset

            # TODO get PMID-NBKID equivalence (near foot of page),
            # and make it "is about" link
            # self.gu.addTriple(
            #   self.graph, pmid,
            #   self.gu.object_properties['is_about'], nbk_id)
            # for example: NBK1191 PMID:20301370

            # add the book to the dataset
            self.dataset.setFileAccessUrl(book_item['url'])

            if limit is not None and c > limit:
                break

            # finish looping through books

        l = len(books_not_found)
        if len(books_not_found) > 0:
            if l > 100:
                logger.warning("There were %d books not found.", l)
            else:
                logger.warning(
                    "The following %d books were not found locally: %s",
                    l, str(books_not_found))
        logger.info(
            "Finished processing %d books for clinical descriptions", c-l)

        return

    def getTestSuite(self):
        import unittest
        from tests.test_genereviews import GeneReviewsTestCase

        test_suite = \
            unittest.TestLoader().loadTestsFromTestCase(GeneReviewsTestCase)

        return test_suite
开发者ID:JervenBolleman,项目名称:dipper,代码行数:104,代码来源:GeneReviews.py

示例4: _process_genes

# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addDefinition [as 别名]
    def _process_genes(self, limit=None):
        """
        This method processes the KEGG gene IDs.
        The label for the gene is pulled as the first symbol in the list of gene symbols; the rest
        are added as synonyms.  The long-form of the gene name is added as a definition.
        This is hardcoded to just processes human genes.

        Triples created:
        <gene_id> is a SO:gene
        <gene_id> rdfs:label <gene_name>

        :param limit:
        :return:
        """

        logger.info("Processing genes")
        if self.testMode:
            g = self.testgraph
        else:
            g = self.graph
        line_counter = 0
        gu = GraphUtils(curie_map.get())
        geno = Genotype(g)
        raw = '/'.join((self.rawdir, self.files['hsa_genes']['file']))
        with open(raw, 'r', encoding="iso-8859-1") as csvfile:
            filereader = csv.reader(csvfile, delimiter='\t', quotechar='\"')
            for row in filereader:
                line_counter += 1
                (gene_id, gene_name) = row

                gene_id = 'KEGG-'+gene_id.strip()

                # the gene listing has a bunch of labels that are delimited, like:
                # DST, BP240, BPA, BPAG1, CATX-15, CATX15, D6S1101, DMH, DT, EBSB2, HSAN6, MACF2; dystonin; K10382 dystonin
                # it looks like the list is semicolon delimited (symbol, name, gene_class)
                # where the symbol is a comma-delimited list

                # here, we split them up.  we will take the first abbreviation and make it the symbol
                # then take the rest as synonyms

                gene_stuff = re.split(';', gene_name)
                symbollist = re.split(',', gene_stuff[0])
                first_symbol = symbollist[0].strip()

                if gene_id not in self.label_hash:
                    self.label_hash[gene_id] = first_symbol

                if self.testMode and gene_id not in self.test_ids['genes']:
                    continue

                # Add the gene as a class.
                geno.addGene(gene_id, first_symbol)

                # add the long name as the description
                if len(gene_stuff) > 1:
                    description = gene_stuff[1].strip()
                    gu.addDefinition(g, gene_id, description)

                # add the rest of the symbols as synonyms
                for i in enumerate(symbollist, start=1):
                    gu.addSynonym(g, gene_id, i[1].strip())

                # TODO add the KO here?

                if (not self.testMode) and (limit is not None and line_counter > limit):
                    break

        logger.info("Done with genes")
        return
开发者ID:d3borah,项目名称:dipper,代码行数:71,代码来源:KEGG.py

示例5: _process_all

# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addDefinition [as 别名]

#.........这里部分代码省略.........
                    other_labels += self._get_alt_labels(titles['alternativeTitles'])
                if 'includedTitles' in titles:
                    other_labels += self._get_alt_labels(titles['includedTitles'])

                # add synonyms of alternate labels
                # preferredTitle": "PFEIFFER SYNDROME",
                # "alternativeTitles": "ACROCEPHALOSYNDACTYLY, TYPE V; ACS5;;\nACS V;;\nNOACK SYNDROME",
                # "includedTitles": "CRANIOFACIAL-SKELETAL-DERMATOLOGIC DYSPLASIA, INCLUDED"

                # remove the abbreviation (comes after the ;) from the preferredTitle, and add it as a synonym
                abbrev = None
                if len(re.split(';', label)) > 1:
                    abbrev = (re.split(';', label)[1].strip())
                newlabel = self._cleanup_label(label)

                description = self._get_description(e['entry'])
                omimid = 'OMIM:'+str(omimnum)

                if e['entry']['status'] == 'removed':
                    gu.addDeprecatedClass(g, omimid)
                else:
                    omimtype = self._get_omimtype(e['entry'])
                    # this uses our cleaned-up label
                    gu.addClassToGraph(g, omimid, newlabel, omimtype)

                    # add the original OMIM label as a synonym
                    gu.addSynonym(g, omimid, label)

                    # add the alternate labels and includes as synonyms
                    for l in other_labels:
                        gu.addSynonym(g, omimid, l)

                    # for OMIM, we're adding the description as a definition
                    gu.addDefinition(g, omimid, description)
                    if abbrev is not None:
                        gu.addSynonym(g, omimid, abbrev)

                    # if this is a genetic locus (but not sequenced) then add the chrom loc info
                    if omimtype == Genotype.genoparts['biological_region']:
                        if 'geneMapExists' in e['entry'] and e['entry']['geneMapExists']:
                            genemap = e['entry']['geneMap']
                            if 'cytoLocation' in genemap:
                                cytoloc = genemap['cytoLocation']
                                # parse the cytoloc.  add this omim thing as a subsequence of the cytofeature
                                # 18p11.3-p11.2
                                # for now, just take the first one
                                # FIXME add the other end of the range, but not sure how to do that
                                # not sure if saying subsequence of feature is the right relationship
                                cytoloc = cytoloc.split('-')[0]
                                f = Feature(omimid, None, None)
                                if 'chromosome' in genemap:
                                    chrom = makeChromID(str(genemap['chromosome']), tax_num, 'CHR')
                                    geno.addChromosomeClass(str(genemap['chromosome']), tax_id, tax_label)
                                    loc = makeChromID(cytoloc, tax_num, 'CHR')
                                    gu.addClassToGraph(g, loc, cytoloc)   # this is the chr band
                                    f.addSubsequenceOfFeature(g, loc)
                                    f.addFeatureToGraph(g)
                                pass

                    # check if moved, if so, make it deprecated and replaced/consider class to the other thing(s)
                    # some entries have been moved to multiple other entries and use the joining raw word "and"
                    # 612479 is movedto:  "603075 and 603029"  OR
                    # others use a comma-delimited list, like:
                    # 610402 is movedto: "609122,300870"
                    if e['entry']['status'] == 'moved':
                        if re.search('and', str(e['entry']['movedTo'])):
开发者ID:d3borah,项目名称:dipper,代码行数:70,代码来源:OMIM.py


注:本文中的dipper.utils.GraphUtils.GraphUtils.addDefinition方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。