本文整理汇总了Python中dipper.models.Model.Model.addDefinition方法的典型用法代码示例。如果您正苦于以下问题:Python Model.addDefinition方法的具体用法?Python Model.addDefinition怎么用?Python Model.addDefinition使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类dipper.models.Model.Model
的用法示例。
在下文中一共展示了Model.addDefinition方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: process_gene_desc
# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import addDefinition [as 别名]
def process_gene_desc(self, limit):
raw = '/'.join((self.rawdir, self.files['gene_desc']['file']))
if self.testMode:
g = self.testgraph
else:
g = self.graph
model = Model(g)
logger.info("Processing Gene descriptions")
line_counter = 0
# geno = Genotype(g) # TODO unused
with gzip.open(raw, 'rb') as csvfile:
filereader = csv.reader(
io.TextIOWrapper(csvfile, newline=""), delimiter='\t',
quotechar='\"')
for row in filereader:
if re.match(r'\#', ''.join(row)):
continue
line_counter += 1
if line_counter == 1:
continue
(gene_num, public_name, molecular_name, concise_description,
provisional_description, detailed_description,
automated_description, gene_class_description) = row
if self.testMode and gene_num not in self.test_ids['gene']:
continue
gene_id = 'WormBase:'+gene_num
if concise_description != 'none available':
model.addDefinition(gene_id, concise_description)
# remove the description if it's identical to the concise
descs = {
'provisional': provisional_description,
'automated': automated_description,
'detailed': detailed_description,
'gene class': gene_class_description
}
for d in descs:
text = descs.get(d)
if text == concise_description \
or re.match(r'none', text) or text == '':
pass # don't use it
else:
text = ' '.join((text, '['+d+']'))
descs[d] = text
model.addDescription(gene_id, text)
if not self.testMode \
and limit is not None and line_counter > limit:
break
return
示例2: _transform_entry
# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import addDefinition [as 别名]
def _transform_entry(self, e, graph):
g = graph
model = Model(g)
geno = Genotype(graph)
tax_num = '9606'
tax_id = 'NCBITaxon:9606'
tax_label = 'Human'
build_num = "GRCh38"
build_id = "NCBIGenome:"+build_num
# get the numbers, labels, and descriptions
omimnum = e['entry']['mimNumber']
titles = e['entry']['titles']
label = titles['preferredTitle']
other_labels = []
if 'alternativeTitles' in titles:
other_labels += self._get_alt_labels(titles['alternativeTitles'])
if 'includedTitles' in titles:
other_labels += self._get_alt_labels(titles['includedTitles'])
# add synonyms of alternate labels
# preferredTitle": "PFEIFFER SYNDROME",
# "alternativeTitles":
# "ACROCEPHALOSYNDACTYLY, TYPE V; ACS5;;\nACS V;;\nNOACK SYNDROME",
# "includedTitles":
# "CRANIOFACIAL-SKELETAL-DERMATOLOGIC DYSPLASIA, INCLUDED"
# remove the abbreviation (comes after the ;) from the preferredTitle,
# and add it as a synonym
abbrev = None
if len(re.split(r';', label)) > 1:
abbrev = (re.split(r';', label)[1].strip())
newlabel = self._cleanup_label(label)
description = self._get_description(e['entry'])
omimid = 'OMIM:'+str(omimnum)
if e['entry']['status'] == 'removed':
model.addDeprecatedClass(omimid)
else:
omimtype = self._get_omimtype(e['entry'])
nodelabel = newlabel
# this uses our cleaned-up label
if omimtype == Genotype.genoparts['heritable_phenotypic_marker']:
if abbrev is not None:
nodelabel = abbrev
# in this special case,
# make it a disease by not declaring it as a gene/marker
model.addClassToGraph(omimid, nodelabel, None, newlabel)
elif omimtype == Genotype.genoparts['gene']:
if abbrev is not None:
nodelabel = abbrev
model.addClassToGraph(omimid, nodelabel, omimtype, newlabel)
else:
model.addClassToGraph(omimid, newlabel, omimtype)
# add the original screaming-caps OMIM label as a synonym
model.addSynonym(omimid, label)
# add the alternate labels and includes as synonyms
for l in other_labels:
model.addSynonym(omimid, l, 'OIO:hasRelatedSynonym')
# for OMIM, we're adding the description as a definition
model.addDefinition(omimid, description)
if abbrev is not None:
model.addSynonym(omimid, abbrev, 'OIO:hasRelatedSynonym')
# if this is a genetic locus (but not sequenced)
# then add the chrom loc info
# but add it to the ncbi gene identifier,
# not to the omim id (we reserve the omim id to be the phenotype)
feature_id = None
feature_label = None
if 'geneMapExists' in e['entry'] and e['entry']['geneMapExists']:
genemap = e['entry']['geneMap']
is_gene = False
if omimtype == \
Genotype.genoparts['heritable_phenotypic_marker']:
# get the ncbigene ids
ncbifeature = self._get_mapped_gene_ids(e['entry'], g)
if len(ncbifeature) == 1:
feature_id = 'NCBIGene:'+str(ncbifeature[0])
# add this feature as a cause for the omim disease
# TODO SHOULD I EVEN DO THIS HERE?
assoc = G2PAssoc(g, self.name, feature_id, omimid)
assoc.add_association_to_graph()
elif len(ncbifeature) > 1:
logger.info(
"Its ambiguous when %s maps to >1 gene id: %s",
omimid, str(ncbifeature))
else: # no ncbi feature, make an anonymous one
feature_id = self._make_anonymous_feature(str(omimnum))
feature_label = abbrev
elif omimtype == Genotype.genoparts['gene']:
#.........这里部分代码省略.........
示例3: _process_nlx_157874_1_view
# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import addDefinition [as 别名]
def _process_nlx_157874_1_view(self, raw, limit=None):
"""
This table contains the Elements of Morphology data that has been
screen-scraped into DISCO.
Note that foaf:depiction is inverse of foaf:depicts relationship.
Since it is bad form to have two definitions,
we concatenate the two into one string.
Triples:
<eom id> a owl:Class
rdf:label Literal(eom label)
OIO:hasRelatedSynonym Literal(synonym list)
IAO:definition Literal(objective_def. subjective def)
foaf:depiction Literal(small_image_url),
Literal(large_image_url)
foaf:page Literal(page_url)
rdfs:comment Literal(long commented text)
:param raw:
:param limit:
:return:
"""
model = Model(self.graph)
line_counter = 0
with open(raw, 'r') as f1:
f1.readline() # read the header row; skip
filereader = csv.reader(f1, delimiter='\t', quotechar='\"')
for line in filereader:
line_counter += 1
(morphology_term_id, morphology_term_num,
morphology_term_label, morphology_term_url,
terminology_category_label, terminology_category_url,
subcategory, objective_definition, subjective_definition,
comments, synonyms, replaces, small_figure_url,
large_figure_url, e_uid, v_uid, v_uuid,
v_last_modified, v_status, v_lastmodified_epoch) = line
# note:
# e_uid v_uuid v_last_modified terminology_category_url
# subcategory v_uid morphology_term_num
# terminology_category_label hp_label notes
# are currently unused.
# Add morphology term to graph as a class
# with label, type, and description.
model.addClassToGraph(morphology_term_id,
morphology_term_label)
# Assemble the description text
if subjective_definition != '' and not (
re.match(r'.+\.$', subjective_definition)):
# add a trailing period.
subjective_definition = subjective_definition.strip() + '.'
if objective_definition != '' and not (
re.match(r'.+\.$', objective_definition)):
# add a trailing period.
objective_definition = objective_definition.strip() + '.'
definition = \
' '.join(
(objective_definition, subjective_definition)).strip()
model.addDefinition(morphology_term_id, definition)
# <term id> FOAF:depicted_by literal url
# <url> type foaf:depiction
# do we want both images?
# morphology_term_id has depiction small_figure_url
if small_figure_url != '':
model.addDepiction(morphology_term_id,
small_figure_url)
# morphology_term_id has depiction large_figure_url
if large_figure_url != '':
model.addDepiction(morphology_term_id,
large_figure_url)
# morphology_term_id has comment comments
if comments != '':
model.addComment(morphology_term_id,
comments.strip())
if synonyms != '':
for s in synonyms.split(';'):
model.addSynonym(
morphology_term_id, s.strip(),
model.annotation_properties['hasExactSynonym'])
# morphology_term_id hasRelatedSynonym replaces (; delimited)
if replaces != '' and replaces != synonyms:
for s in replaces.split(';'):
model.addSynonym(
morphology_term_id, s.strip(),
model.annotation_properties['hasRelatedSynonym'])
#.........这里部分代码省略.........
示例4: process_nbk_html
# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import addDefinition [as 别名]
def process_nbk_html(self, limit):
"""
Here we process the gene reviews books to fetch
the clinical descriptions to include in the ontology.
We only use books that have been acquired manually,
as NCBI Bookshelf does not permit automated downloads.
This parser will only process the books that are found in
the ```raw/genereviews/books``` directory,
permitting partial completion.
:param limit:
:return:
"""
model = Model(self.graph)
cnt = 0
books_not_found = set()
clin_des_regx = re.compile(r".*Summary.sec0")
lit_cite_regex = re.compile(r".*Literature_Cited")
pubmed_regex = re.compile(r"pubmed") # ??? for a static string?
for nbk in self.book_ids:
cnt += 1
nbk_id = 'GeneReviews:'+nbk
book_item = self.all_books.get(nbk)
url = '/'.join((self.rawdir, book_item['file']))
# figure out if the book is there; if so, process, otherwise skip
book_dir = '/'.join((self.rawdir, 'books'))
book_files = os.listdir(book_dir)
if ''.join((nbk, '.html')) not in book_files:
# LOG.warning("No book found locally for %s; skipping", nbk)
books_not_found.add(nbk)
continue
LOG.info("Processing %s", nbk)
page = open(url)
soup = BeautifulSoup(page.read())
# sec0 == clinical description
clin_summary = soup.find('div', id=clin_des_regx)
if clin_summary is not None:
ptext = clin_summary.find('p').text
ptext = re.sub(r'\s+', ' ', ptext)
unlst = clin_summary.find('ul')
if unlst is not None:
item_text = list()
for lst_itm in unlst.find_all('li'):
item_text.append(re.sub(r'\s+', ' ', lst_itm.text))
ptext += ' '.join(item_text)
# add in the copyright and citation info to description
ptext = ' '.join((
ptext, '[GeneReviews:NBK1116, GeneReviews:NBK138602, ' +
nbk_id + ']'))
model.addDefinition(nbk_id, ptext.strip())
# get the pubs
pmid_set = set()
pub_div = soup.find('div', id=lit_cite_regex)
if pub_div is not None:
ref_list = pub_div.find_all('div', attrs={'class': "bk_ref"})
for ref in ref_list:
for anchor in ref.find_all(
'a', attrs={'href': pubmed_regex}):
if re.match(r'PubMed:', anchor.text):
pmnum = re.sub(r'PubMed:\s*', '', anchor.text)
else:
pmnum = re.search(
r'\/pubmed\/(\d+)$', anchor['href']).group(1)
if pmnum is not None:
pmid = 'PMID:'+str(pmnum)
self.graph.addTriple(
pmid, self.globaltt['is_about'], nbk_id)
pmid_set.add(pmnum)
reference = Reference(
self.graph, pmid, self.globaltt['journal article'])
reference.addRefToGraph()
# TODO add author history, copyright, license to dataset
# TODO get PMID-NBKID equivalence (near foot of page),
# and make it "is about" link
# self.gu.addTriple(
# self.graph, pmid,
# self.globaltt['is_about'], nbk_id)
# for example: NBK1191 PMID:20301370
# add the book to the dataset
self.dataset.setFileAccessUrl(book_item['url'])
if limit is not None and cnt > limit:
break
# finish looping through books
bknfd = len(books_not_found)
if len(books_not_found) > 0:
if bknfd > 100:
LOG.warning("There were %d books not found.", bknfd)
#.........这里部分代码省略.........
示例5: _process_genes
# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import addDefinition [as 别名]
def _process_genes(self, limit=None):
"""
This method processes the KEGG gene IDs.
The label for the gene is pulled as
the first symbol in the list of gene symbols;
the rest are added as synonyms.
The long-form of the gene name is added as a definition.
This is hardcoded to just processes human genes.
Triples created:
<gene_id> is a SO:gene
<gene_id> rdfs:label <gene_name>
:param limit:
:return:
"""
LOG.info("Processing genes")
if self.test_mode:
graph = self.testgraph
else:
graph = self.graph
model = Model(graph)
family = Family(graph)
geno = Genotype(graph)
raw = '/'.join((self.rawdir, self.files['hsa_genes']['file']))
with open(raw, 'r', encoding="iso-8859-1") as csvfile:
reader = csv.reader(csvfile, delimiter='\t', quotechar='\"')
for row in reader:
(gene_id, gene_name) = row
gene_id = 'KEGG-'+gene_id.strip()
# the gene listing has a bunch of labels
# that are delimited, as:
# DST, BP240, BPA, BPAG1, CATX-15, CATX15, D6S1101, DMH, DT,
# EBSB2, HSAN6, MACF2; dystonin; K10382 dystonin
# it looks like the list is semicolon delimited
# (symbol, name, gene_class)
# where the symbol is a comma-delimited list
# here, we split them up.
# we will take the first abbreviation and make it the symbol
# then take the rest as synonyms
gene_stuff = re.split('r;', gene_name)
symbollist = re.split(r',', gene_stuff[0])
first_symbol = symbollist[0].strip()
if gene_id not in self.label_hash:
self.label_hash[gene_id] = first_symbol
if self.test_mode and gene_id not in self.test_ids['genes']:
continue
# Add the gene as a class.
geno.addGene(gene_id, first_symbol)
# add the long name as the description
if len(gene_stuff) > 1:
description = gene_stuff[1].strip()
model.addDefinition(gene_id, description)
# add the rest of the symbols as synonyms
for i in enumerate(symbollist, start=1):
model.addSynonym(gene_id, i[1].strip())
if len(gene_stuff) > 2:
ko_part = gene_stuff[2]
ko_match = re.search(r'K\d+', ko_part)
if ko_match is not None and len(ko_match.groups()) == 1:
ko = 'KEGG-ko:'+ko_match.group(1)
family.addMemberOf(gene_id, ko)
if not self.test_mode and limit is not None and reader.line_num > limit:
break
LOG.info("Done with genes")