本文整理汇总了Python中dipper.models.Model.Model.addDeprecatedClass方法的典型用法代码示例。如果您正苦于以下问题:Python Model.addDeprecatedClass方法的具体用法?Python Model.addDeprecatedClass怎么用?Python Model.addDeprecatedClass使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类dipper.models.Model.Model
的用法示例。
在下文中一共展示了Model.addDeprecatedClass方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: process_gene_ids
# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import addDeprecatedClass [as 别名]
def process_gene_ids(self, limit):
raw = '/'.join((self.rawdir, self.files['gene_ids']['file']))
if self.testMode:
g = self.testgraph
else:
g = self.graph
model = Model(g)
logger.info("Processing: %s", self.files['gene_ids']['file'])
line_counter = 0
geno = Genotype(g)
with gzip.open(raw, 'rb') as csvfile:
filereader = csv.reader(
io.TextIOWrapper(csvfile, newline=""), delimiter=',',
quotechar='\"')
for row in filereader:
line_counter += 1
(taxon_num,
gene_num,
gene_symbol,
gene_synonym,
live,
gene_type) = row
# 6239,WBGene00000001,aap-1,Y110A7A.10,Live,protein_coding_gene
if self.testMode and gene_num not in self.test_ids['gene']:
continue
taxon_id = 'NCBITaxon:'+taxon_num
gene_id = 'WormBase:'+gene_num
if gene_symbol == '':
gene_symbol = gene_synonym
if gene_symbol == '':
gene_symbol = None
model.addClassToGraph(
gene_id, gene_symbol, Genotype.genoparts['gene'])
if live == 'Dead':
model.addDeprecatedClass(gene_id)
geno.addTaxon(taxon_id, gene_id)
if gene_synonym != '' and gene_synonym is not None:
model.addSynonym(gene_id, gene_synonym)
if not self.testMode \
and limit is not None and line_counter > limit:
break
return
示例2: _process_genes
# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import addDeprecatedClass [as 别名]
def _process_genes(self, limit=None):
if self.test_mode:
graph = self.testgraph
else:
graph = self.graph
geno = Genotype(graph)
model = Model(graph)
raw = '/'.join((self.rawdir, self.files['genes']['file']))
col = self.files['genes']['columns']
LOG.info("Processing HGNC genes")
chr_pattern = re.compile(r'(\d+|X|Y|Z|W|MT)[pq$]')
band_pattern = re.compile(r'([pq][A-H\d]?\d?(?:\.\d+)?)')
with open(raw, 'r', encoding="utf8") as csvfile:
filereader = csv.reader(csvfile, delimiter='\t', quotechar='\"')
row = next(filereader)
if not self.check_fileheader(col, row):
exit(-1)
for row in filereader:
# To generate:
# head -1 hgnc_complete_set.txt.1 | tr '\t' '\n' |
# sed "s/\(.*\)/\1 = row[col.index(\'\1\')]/g"
hgnc_id = row[col.index('hgnc_id')].strip()
symbol = row[col.index('symbol')].strip()
name = row[col.index('name')].strip()
# locus_group = row[col.index('locus_group')]
locus_type = row[col.index('locus_type')].strip()
# status = row[col.index('status')]
location = row[col.index('location')].strip()
# location_sortable = row[col.index('location_sortable')]
# alias_symbol = row[col.index('alias_symbol')]
# alias_name = row[col.index('alias_name')]
# prev_symbol = row[col.index('prev_symbol')]
# prev_name = row[col.index('prev_name')]
# gene_family = row[col.index('gene_family')]
# gene_family_id = row[col.index('gene_family_id')]
# date_approved_reserved = row[col.index('date_approved_reserved')]
# date_symbol_changed = row[col.index('date_symbol_changed')]
# date_name_changed = row[col.index('date_name_changed')]
# date_modified = row[col.index('date_modified')]
entrez_id = row[col.index('entrez_id')].strip()
ensembl_gene_id = row[col.index('ensembl_gene_id')].strip()
# vega_id = row[col.index('vega_id')]
# ucsc_id = row[col.index('ucsc_id')]
# ena = row[col.index('ena')]
# refseq_accession = row[col.index('refseq_accession')]
# ccds_id = row[col.index('ccds_id')]
# uniprot_ids = row[col.index('uniprot_ids')]
pubmed_ids = row[col.index('pubmed_id')].strip() # pipe seperated!
# mgd_id = row[col.index('mgd_id')]
# rgd_id = row[col.index('rgd_id')]
# lsdb = row[col.index('lsdb')]
# cosmic = row[col.index('cosmic')]
omim_ids = row[col.index('omim_id')].strip() # pipe seperated!
# mirbase = row[col.index('mirbase')]
# homeodb = row[col.index('homeodb')]
# snornabase = row[col.index('snornabase')]
# bioparadigms_slc = row[col.index('bioparadigms_slc')]
# orphanet = row[col.index('orphanet')]
# pseudogene.org = row[col.index('pseudogene.org')]
# horde_id = row[col.index('horde_id')]
# merops = row[col.index('merops')]
# imgt = row[col.index('imgt')]
# iuphar = row[col.index('iuphar')]
# kznf_gene_catalog = row[col.index('kznf_gene_catalog')]
# mamit_trnadb = row[col.index('mamit-trnadb')]
# cd = row[col.index('cd')]
# lncrnadb = row[col.index('lncrnadb')]
# enzyme_id = row[col.index('enzyme_id')]
# intermediate_filament_db = row[col.index('intermediate_filament_db')]
# rna_central_ids = row[col.index('rna_central_ids')]
# lncipedia = row[col.index('lncipedia')]
# gtrnadb = row[col.index('gtrnadb')]
if self.test_mode and entrez_id != '' and \
entrez_id not in self.gene_ids:
continue
if name == '':
name = None
if locus_type == 'withdrawn':
model.addDeprecatedClass(hgnc_id)
else:
gene_type_id = self.resolve(locus_type, False) # withdrawn -> None?
if gene_type_id != locus_type:
model.addClassToGraph(hgnc_id, symbol, gene_type_id, name)
model.makeLeader(hgnc_id)
if entrez_id != '':
model.addEquivalentClass(hgnc_id, 'NCBIGene:' + entrez_id)
if ensembl_gene_id != '':
#.........这里部分代码省略.........
示例3: _transform_entry
# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import addDeprecatedClass [as 别名]
def _transform_entry(self, e, graph):
g = graph
model = Model(g)
geno = Genotype(graph)
tax_num = '9606'
tax_id = 'NCBITaxon:9606'
tax_label = 'Human'
build_num = "GRCh38"
build_id = "NCBIGenome:"+build_num
# get the numbers, labels, and descriptions
omimnum = e['entry']['mimNumber']
titles = e['entry']['titles']
label = titles['preferredTitle']
other_labels = []
if 'alternativeTitles' in titles:
other_labels += self._get_alt_labels(titles['alternativeTitles'])
if 'includedTitles' in titles:
other_labels += self._get_alt_labels(titles['includedTitles'])
# add synonyms of alternate labels
# preferredTitle": "PFEIFFER SYNDROME",
# "alternativeTitles":
# "ACROCEPHALOSYNDACTYLY, TYPE V; ACS5;;\nACS V;;\nNOACK SYNDROME",
# "includedTitles":
# "CRANIOFACIAL-SKELETAL-DERMATOLOGIC DYSPLASIA, INCLUDED"
# remove the abbreviation (comes after the ;) from the preferredTitle,
# and add it as a synonym
abbrev = None
if len(re.split(r';', label)) > 1:
abbrev = (re.split(r';', label)[1].strip())
newlabel = self._cleanup_label(label)
description = self._get_description(e['entry'])
omimid = 'OMIM:'+str(omimnum)
if e['entry']['status'] == 'removed':
model.addDeprecatedClass(omimid)
else:
omimtype = self._get_omimtype(e['entry'])
nodelabel = newlabel
# this uses our cleaned-up label
if omimtype == Genotype.genoparts['heritable_phenotypic_marker']:
if abbrev is not None:
nodelabel = abbrev
# in this special case,
# make it a disease by not declaring it as a gene/marker
model.addClassToGraph(omimid, nodelabel, None, newlabel)
elif omimtype == Genotype.genoparts['gene']:
if abbrev is not None:
nodelabel = abbrev
model.addClassToGraph(omimid, nodelabel, omimtype, newlabel)
else:
model.addClassToGraph(omimid, newlabel, omimtype)
# add the original screaming-caps OMIM label as a synonym
model.addSynonym(omimid, label)
# add the alternate labels and includes as synonyms
for l in other_labels:
model.addSynonym(omimid, l, 'OIO:hasRelatedSynonym')
# for OMIM, we're adding the description as a definition
model.addDefinition(omimid, description)
if abbrev is not None:
model.addSynonym(omimid, abbrev, 'OIO:hasRelatedSynonym')
# if this is a genetic locus (but not sequenced)
# then add the chrom loc info
# but add it to the ncbi gene identifier,
# not to the omim id (we reserve the omim id to be the phenotype)
feature_id = None
feature_label = None
if 'geneMapExists' in e['entry'] and e['entry']['geneMapExists']:
genemap = e['entry']['geneMap']
is_gene = False
if omimtype == \
Genotype.genoparts['heritable_phenotypic_marker']:
# get the ncbigene ids
ncbifeature = self._get_mapped_gene_ids(e['entry'], g)
if len(ncbifeature) == 1:
feature_id = 'NCBIGene:'+str(ncbifeature[0])
# add this feature as a cause for the omim disease
# TODO SHOULD I EVEN DO THIS HERE?
assoc = G2PAssoc(g, self.name, feature_id, omimid)
assoc.add_association_to_graph()
elif len(ncbifeature) > 1:
logger.info(
"Its ambiguous when %s maps to >1 gene id: %s",
omimid, str(ncbifeature))
else: # no ncbi feature, make an anonymous one
feature_id = self._make_anonymous_feature(str(omimnum))
feature_label = abbrev
elif omimtype == Genotype.genoparts['gene']:
#.........这里部分代码省略.........
示例4: _get_gene_history
# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import addDeprecatedClass [as 别名]
def _get_gene_history(self, limit):
"""
Loops through the gene_history file and adds the old gene ids
as deprecated classes, where the new gene id is the replacement for it.
The old gene symbol is added as a synonym to the gene.
:param limit:
:return:
"""
if self.testMode:
g = self.testgraph
else:
g = self.graph
model = Model(g)
logger.info("Processing Gene records")
line_counter = 0
myfile = '/'.join((self.rawdir, self.files['gene_history']['file']))
logger.info("FILE: %s", myfile)
with gzip.open(myfile, 'rb') as f:
for line in f:
# skip comments
line = line.decode().strip()
if re.match(r'^#', line):
continue
(tax_num, gene_num, discontinued_num, discontinued_symbol,
discontinued_date) = line.split('\t')
# set filter=None in init if you don't want to have a filter
# if self.filter is not None:
# if ((self.filter == 'taxids' and \
# (int(tax_num) not in self.tax_ids))
# or (self.filter == 'geneids' and \
# (int(gene_num) not in self.gene_ids))):
# continue
# end filter
if gene_num == '-' or discontinued_num == '-':
continue
if self.testMode and int(gene_num) not in self.gene_ids:
continue
if not self.testMode and int(tax_num) not in self.tax_ids:
continue
line_counter += 1
gene_id = ':'.join(('NCBIGene', gene_num))
discontinued_gene_id = ':'.join(('NCBIGene', discontinued_num))
# add the two genes
if self.class_or_indiv.get(gene_id) == 'C':
model.addClassToGraph(gene_id, None)
model.addClassToGraph(
discontinued_gene_id, discontinued_symbol)
# add the new gene id to replace the old gene id
model.addDeprecatedClass(discontinued_gene_id, [gene_id])
else:
model.addIndividualToGraph(gene_id, None)
model.addIndividualToGraph(
discontinued_gene_id, discontinued_symbol)
model.addDeprecatedIndividual(
discontinued_gene_id, [gene_id])
# also add the old symbol as a synonym of the new gene
model.addSynonym(gene_id, discontinued_symbol)
if (not self.testMode) and\
(limit is not None and line_counter > limit):
break
return
示例5: _process_genes
# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import addDeprecatedClass [as 别名]
def _process_genes(self, limit=None):
if self.testMode:
g = self.testgraph
else:
g = self.graph
geno = Genotype(g)
model = Model(g)
raw = '/'.join((self.rawdir, self.files['genes']['file']))
line_counter = 0
logger.info("Processing HGNC genes")
with open(raw, 'r', encoding="utf8") as csvfile:
filereader = csv.reader(csvfile, delimiter='\t', quotechar='\"')
# curl -s ftp://ftp.ebi.ac.uk/pub/databases/genenames/new/tsv/hgnc_complete_set.txt | head -1 | tr '\t' '\n' | grep -n .
for row in filereader:
(hgnc_id,
symbol,
name,
locus_group,
locus_type,
status,
location,
location_sortable,
alias_symbol,
alias_name,
prev_symbol,
prev_name,
gene_family,
gene_family_id,
date_approved_reserved,
date_symbol_changed,
date_name_changed,
date_modified,
entrez_id,
ensembl_gene_id,
vega_id,
ucsc_id,
ena,
refseq_accession,
ccds_id,
uniprot_ids,
pubmed_id,
mgd_id,
rgd_id,
lsdb,
cosmic,
omim_id,
mirbase,
homeodb,
snornabase,
bioparadigms_slc,
orphanet,
pseudogene_org,
horde_id,
merops,
imgt,
iuphar,
kznf_gene_catalog,
mamit_trnadb,
cd,
lncrnadb,
enzyme_id,
intermediate_filament_db,
rna_central_ids) = row
line_counter += 1
# skip header
if line_counter <= 1:
continue
if self.testMode and entrez_id != '' \
and int(entrez_id) not in self.gene_ids:
continue
if name == '':
name = None
gene_type_id = self._get_gene_type(locus_type)
model.addClassToGraph(hgnc_id, symbol, gene_type_id, name)
if locus_type == 'withdrawn':
model.addDeprecatedClass(hgnc_id)
else:
model.makeLeader(hgnc_id)
if entrez_id != '':
model.addEquivalentClass(
hgnc_id, 'NCBIGene:' + entrez_id)
if ensembl_gene_id != '':
model.addEquivalentClass(
hgnc_id, 'ENSEMBL:' + ensembl_gene_id)
if omim_id != '' and "|" not in omim_id:
omim_curie = 'OMIM:' + omim_id
if not DipperUtil.is_omim_disease(omim_curie):
model.addEquivalentClass(hgnc_id, omim_curie)
geno.addTaxon('NCBITaxon:9606', hgnc_id)
# add pubs as "is about"
if pubmed_id != '':
#.........这里部分代码省略.........
示例6: _get_gene_history
# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import addDeprecatedClass [as 别名]
def _get_gene_history(self, limit):
"""
Loops through the gene_history file and adds the old gene ids
as deprecated classes, where the new gene id is the replacement for it.
The old gene symbol is added as a synonym to the gene.
:param limit:
:return:
"""
src_key = 'gene_history'
if self.test_mode:
graph = self.testgraph
else:
graph = self.graph
model = Model(graph)
LOG.info("Processing Gene records")
line_counter = 0
myfile = '/'.join((self.rawdir, self.files[src_key]['file']))
LOG.info("FILE: %s", myfile)
col = self.files[src_key]['columns']
with gzip.open(myfile, 'rb') as tsv:
row = tsv.readline().decode().strip().split('\t')
row[0] = row[0][1:] # strip comment
if not self.check_fileheader(col, row):
pass
for line in tsv:
# skip comments
row = line.decode().strip().split('\t')
if row[0][0] == '#':
continue
tax_num = row[col.index('tax_id')].strip()
gene_num = row[col.index('GeneID')].strip()
discontinued_num = row[col.index('Discontinued_GeneID')].strip()
discontinued_symbol = row[col.index('Discontinued_Symbol')].strip()
# discontinued_date = row[col.index('Discontinue_Date')]
# set filter=None in init if you don't want to have a filter
# if self.id_filter is not None:
# if ((self.id_filter == 'taxids' and \
# (int(tax_num) not in self.tax_ids))
# or (self.id_filter == 'geneids' and \
# (int(gene_num) not in self.gene_ids))):
# continue
# end filter
if gene_num == '-' or discontinued_num == '-':
continue
if self.test_mode and gene_num not in self.gene_ids:
continue
if not self.test_mode and tax_num not in self.tax_ids:
continue
line_counter += 1
gene_id = ':'.join(('NCBIGene', gene_num))
discontinued_gene_id = ':'.join(('NCBIGene', discontinued_num))
# add the two genes
if self.class_or_indiv.get(gene_id) == 'C':
model.addClassToGraph(gene_id, None)
model.addClassToGraph(discontinued_gene_id, discontinued_symbol)
# add the new gene id to replace the old gene id
model.addDeprecatedClass(discontinued_gene_id, [gene_id])
else:
model.addIndividualToGraph(gene_id, None)
model.addIndividualToGraph(
discontinued_gene_id, discontinued_symbol)
model.addDeprecatedIndividual(discontinued_gene_id, [gene_id])
# also add the old symbol as a synonym of the new gene
model.addSynonym(gene_id, discontinued_symbol)
if not self.test_mode and (limit is not None and line_counter > limit):
break