本文整理汇总了Python中dipper.utils.GraphUtils.GraphUtils.addDefinition方法的典型用法代码示例。如果您正苦于以下问题:Python GraphUtils.addDefinition方法的具体用法?Python GraphUtils.addDefinition怎么用?Python GraphUtils.addDefinition使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类dipper.utils.GraphUtils.GraphUtils
的用法示例。
在下文中一共展示了GraphUtils.addDefinition方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: process_gene_desc
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addDefinition [as 别名]
def process_gene_desc(self, limit):
raw = '/'.join((self.rawdir, self.files['gene_desc']['file']))
if self.testMode:
g = self.testgraph
else:
g = self.graph
gu = GraphUtils(curie_map.get())
logger.info("Processing Gene descriptions")
line_counter = 0
# geno = Genotype(g) # TODO unused
with gzip.open(raw, 'rb') as csvfile:
filereader = csv.reader(
io.TextIOWrapper(csvfile, newline=""), delimiter='\t',
quotechar='\"')
for row in filereader:
if re.match(r'\#', ''.join(row)):
continue
line_counter += 1
if line_counter == 1:
continue
(gene_num, public_name, molecular_name, concise_description,
provisional_description, detailed_description,
automated_description, gene_class_description) = row
if self.testMode and gene_num not in self.test_ids['gene']:
continue
gene_id = 'WormBase:'+gene_num
if concise_description != 'none available':
gu.addDefinition(g, gene_id, concise_description)
# remove the description if it's identical to the concise
descs = {
'provisional': provisional_description,
'automated': automated_description,
'detailed': detailed_description,
'gene class': gene_class_description
}
for d in descs:
text = descs.get(d)
if text == concise_description \
or re.match(r'none', text) or text == '':
pass # don't use it
else:
text = ' '.join((text, '['+d+']'))
descs[d] = text
gu.addDescription(g, gene_id, text)
if not self.testMode \
and limit is not None and line_counter > limit:
break
return
示例2: _process_nlx_157874_1_view
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addDefinition [as 别名]
def _process_nlx_157874_1_view(self, raw, limit=None):
"""
This table contains the Elements of Morphology data that has been
screen-scraped into DISCO.
Note that foaf:depiction is inverse of foaf:depicts relationship.
Since it is bad form to have two definitions,
we concatenate the two into one string.
Triples:
<eom id> a owl:Class
rdf:label Literal(eom label)
OIO:hasRelatedSynonym Literal(synonym list)
IAO:definition Literal(objective_def. subjective def)
foaf:depiction Literal(small_image_url),
Literal(large_image_url)
foaf:page Literal(page_url)
rdfs:comment Literal(long commented text)
:param raw:
:param limit:
:return:
"""
gu = GraphUtils(curie_map.get())
line_counter = 0
with open(raw, 'r') as f1:
f1.readline() # read the header row; skip
filereader = csv.reader(f1, delimiter='\t', quotechar='\"')
for line in filereader:
line_counter += 1
(morphology_term_id, morphology_term_num,
morphology_term_label, morphology_term_url,
terminology_category_label, terminology_category_url,
subcategory, objective_definition, subjective_definition,
comments, synonyms, replaces, small_figure_url,
large_figure_url, e_uid, v_uid, v_uuid,
v_last_modified) = line
# note:
# e_uid v_uuid v_last_modified terminology_category_url
# subcategory v_uid morphology_term_num
# terminology_category_label hp_label notes
# are currently unused.
# Add morphology term to graph as a class
# with label, type, and description.
gu.addClassToGraph(self.graph, morphology_term_id,
morphology_term_label)
# Assemble the description text
if subjective_definition != '' and not (
re.match(r'.+\.$', subjective_definition)):
# add a trailing period.
subjective_definition = subjective_definition.strip() + '.'
if objective_definition != '' and not (
re.match(r'.+\.$', objective_definition)):
# add a trailing period.
objective_definition = objective_definition.strip() + '.'
definition = \
' '.join(
(objective_definition, subjective_definition)).strip()
gu.addDefinition(self.graph, morphology_term_id, definition)
# <term id> FOAF:depicted_by literal url
# <url> type foaf:depiction
# do we want both images?
# morphology_term_id has depiction small_figure_url
if small_figure_url != '':
gu.addDepiction(self.graph, morphology_term_id,
small_figure_url)
# morphology_term_id has depiction large_figure_url
if large_figure_url != '':
gu.addDepiction(self.graph, morphology_term_id,
large_figure_url)
# morphology_term_id has comment comments
if comments != '':
gu.addComment(self.graph, morphology_term_id,
comments.strip())
if synonyms != '':
for s in synonyms.split(';'):
gu.addSynonym(
self.graph, morphology_term_id, s.strip(),
gu.properties['hasExactSynonym'])
# morphology_term_id hasRelatedSynonym replaces (; delimited)
if replaces != '' and replaces != synonyms:
for s in replaces.split(';'):
gu.addSynonym(
self.graph, morphology_term_id, s.strip(),
gu.properties['hasRelatedSynonym'])
#.........这里部分代码省略.........
示例3: GeneReviews
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addDefinition [as 别名]
#.........这里部分代码省略.........
# figure out if the book is there; if so, process, otherwise skip
book_dir = '/'.join((self.rawdir, 'books'))
book_files = os.listdir(book_dir)
if ''.join((nbk, '.html')) not in book_files:
# logger.warning("No book found locally for %s; skipping", nbk)
books_not_found.add(nbk)
continue
logger.info("Processing %s", nbk)
page = open(url)
soup = BeautifulSoup(page.read())
# sec0 == clinical description
clin_summary = \
soup.find(
'div', id=re.compile(".*Summary.sec0"))
if clin_summary is not None:
p = clin_summary.find('p')
ptext = p.text
ptext = re.sub(r'\s+', ' ', ptext)
ul = clin_summary.find('ul')
if ul is not None:
item_text = list()
for li in ul.find_all('li'):
item_text.append(re.sub(r'\s+', ' ', li.text))
ptext += ' '.join(item_text)
# add in the copyright and citation info to description
ptext = \
' '.join(
(ptext,
'[GeneReviews:NBK1116, GeneReviews:NBK138602, ' +
nbk_id+']'))
self.gu.addDefinition(self.graph, nbk_id, ptext.strip())
# get the pubs
pmid_set = set()
pub_div = soup.find('div', id=re.compile(r".*Literature_Cited"))
if pub_div is not None:
ref_list = pub_div.find_all('div', attrs={'class': "bk_ref"})
for r in ref_list:
for a in r.find_all(
'a', attrs={'href': re.compile(r"pubmed")}):
if re.match(r'PubMed:', a.text):
pmnum = re.sub(r'PubMed:\s*', '', a.text)
else:
pmnum = \
re.search(
r'\/pubmed\/(\d+)$', a['href']).group(1)
if pmnum is not None:
pmid = 'PMID:'+str(pmnum)
self.gu.addTriple(
self.graph, pmid,
self.gu.object_properties['is_about'],
nbk_id)
pmid_set.add(pmnum)
r = Reference(
pmid, Reference.ref_types['journal_article'])
r.addRefToGraph(self.graph)
# TODO add author history, copyright, license to dataset
# TODO get PMID-NBKID equivalence (near foot of page),
# and make it "is about" link
# self.gu.addTriple(
# self.graph, pmid,
# self.gu.object_properties['is_about'], nbk_id)
# for example: NBK1191 PMID:20301370
# add the book to the dataset
self.dataset.setFileAccessUrl(book_item['url'])
if limit is not None and c > limit:
break
# finish looping through books
l = len(books_not_found)
if len(books_not_found) > 0:
if l > 100:
logger.warning("There were %d books not found.", l)
else:
logger.warning(
"The following %d books were not found locally: %s",
l, str(books_not_found))
logger.info(
"Finished processing %d books for clinical descriptions", c-l)
return
def getTestSuite(self):
import unittest
from tests.test_genereviews import GeneReviewsTestCase
test_suite = \
unittest.TestLoader().loadTestsFromTestCase(GeneReviewsTestCase)
return test_suite
示例4: _process_genes
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addDefinition [as 别名]
def _process_genes(self, limit=None):
"""
This method processes the KEGG gene IDs.
The label for the gene is pulled as the first symbol in the list of gene symbols; the rest
are added as synonyms. The long-form of the gene name is added as a definition.
This is hardcoded to just processes human genes.
Triples created:
<gene_id> is a SO:gene
<gene_id> rdfs:label <gene_name>
:param limit:
:return:
"""
logger.info("Processing genes")
if self.testMode:
g = self.testgraph
else:
g = self.graph
line_counter = 0
gu = GraphUtils(curie_map.get())
geno = Genotype(g)
raw = '/'.join((self.rawdir, self.files['hsa_genes']['file']))
with open(raw, 'r', encoding="iso-8859-1") as csvfile:
filereader = csv.reader(csvfile, delimiter='\t', quotechar='\"')
for row in filereader:
line_counter += 1
(gene_id, gene_name) = row
gene_id = 'KEGG-'+gene_id.strip()
# the gene listing has a bunch of labels that are delimited, like:
# DST, BP240, BPA, BPAG1, CATX-15, CATX15, D6S1101, DMH, DT, EBSB2, HSAN6, MACF2; dystonin; K10382 dystonin
# it looks like the list is semicolon delimited (symbol, name, gene_class)
# where the symbol is a comma-delimited list
# here, we split them up. we will take the first abbreviation and make it the symbol
# then take the rest as synonyms
gene_stuff = re.split(';', gene_name)
symbollist = re.split(',', gene_stuff[0])
first_symbol = symbollist[0].strip()
if gene_id not in self.label_hash:
self.label_hash[gene_id] = first_symbol
if self.testMode and gene_id not in self.test_ids['genes']:
continue
# Add the gene as a class.
geno.addGene(gene_id, first_symbol)
# add the long name as the description
if len(gene_stuff) > 1:
description = gene_stuff[1].strip()
gu.addDefinition(g, gene_id, description)
# add the rest of the symbols as synonyms
for i in enumerate(symbollist, start=1):
gu.addSynonym(g, gene_id, i[1].strip())
# TODO add the KO here?
if (not self.testMode) and (limit is not None and line_counter > limit):
break
logger.info("Done with genes")
return
示例5: _process_all
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addDefinition [as 别名]
#.........这里部分代码省略.........
other_labels += self._get_alt_labels(titles['alternativeTitles'])
if 'includedTitles' in titles:
other_labels += self._get_alt_labels(titles['includedTitles'])
# add synonyms of alternate labels
# preferredTitle": "PFEIFFER SYNDROME",
# "alternativeTitles": "ACROCEPHALOSYNDACTYLY, TYPE V; ACS5;;\nACS V;;\nNOACK SYNDROME",
# "includedTitles": "CRANIOFACIAL-SKELETAL-DERMATOLOGIC DYSPLASIA, INCLUDED"
# remove the abbreviation (comes after the ;) from the preferredTitle, and add it as a synonym
abbrev = None
if len(re.split(';', label)) > 1:
abbrev = (re.split(';', label)[1].strip())
newlabel = self._cleanup_label(label)
description = self._get_description(e['entry'])
omimid = 'OMIM:'+str(omimnum)
if e['entry']['status'] == 'removed':
gu.addDeprecatedClass(g, omimid)
else:
omimtype = self._get_omimtype(e['entry'])
# this uses our cleaned-up label
gu.addClassToGraph(g, omimid, newlabel, omimtype)
# add the original OMIM label as a synonym
gu.addSynonym(g, omimid, label)
# add the alternate labels and includes as synonyms
for l in other_labels:
gu.addSynonym(g, omimid, l)
# for OMIM, we're adding the description as a definition
gu.addDefinition(g, omimid, description)
if abbrev is not None:
gu.addSynonym(g, omimid, abbrev)
# if this is a genetic locus (but not sequenced) then add the chrom loc info
if omimtype == Genotype.genoparts['biological_region']:
if 'geneMapExists' in e['entry'] and e['entry']['geneMapExists']:
genemap = e['entry']['geneMap']
if 'cytoLocation' in genemap:
cytoloc = genemap['cytoLocation']
# parse the cytoloc. add this omim thing as a subsequence of the cytofeature
# 18p11.3-p11.2
# for now, just take the first one
# FIXME add the other end of the range, but not sure how to do that
# not sure if saying subsequence of feature is the right relationship
cytoloc = cytoloc.split('-')[0]
f = Feature(omimid, None, None)
if 'chromosome' in genemap:
chrom = makeChromID(str(genemap['chromosome']), tax_num, 'CHR')
geno.addChromosomeClass(str(genemap['chromosome']), tax_id, tax_label)
loc = makeChromID(cytoloc, tax_num, 'CHR')
gu.addClassToGraph(g, loc, cytoloc) # this is the chr band
f.addSubsequenceOfFeature(g, loc)
f.addFeatureToGraph(g)
pass
# check if moved, if so, make it deprecated and replaced/consider class to the other thing(s)
# some entries have been moved to multiple other entries and use the joining raw word "and"
# 612479 is movedto: "603075 and 603029" OR
# others use a comma-delimited list, like:
# 610402 is movedto: "609122,300870"
if e['entry']['status'] == 'moved':
if re.search('and', str(e['entry']['movedTo'])):