本文整理汇总了Python中dipper.models.Model.Model.makeLeader方法的典型用法代码示例。如果您正苦于以下问题:Python Model.makeLeader方法的具体用法?Python Model.makeLeader怎么用?Python Model.makeLeader使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类dipper.models.Model.Model
的用法示例。
在下文中一共展示了Model.makeLeader方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: parse
# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import makeLeader [as 别名]
def parse(self, limit=None):
zfin_parser = ZFIN(self.graph_type, self.are_bnodes_skized)
model = Model(self.graph)
zp_file = '/'.join((self.rawdir, self.files['zpmap']['file']))
g2p_file = '/'.join((self.rawdir, self.files['g2p_clean']['file']))
zfin_parser.zp_map = zfin_parser._load_zp_mappings(zp_file)
with open(g2p_file, 'r', encoding="utf8") as csvfile:
filereader = csv.reader(csvfile, delimiter='\t', quotechar='\"')
for row in filereader:
(internal_id, symbol, gene_id, subterm1_id, subterm1_label,
pc_rel_id, pc_rel_label, superterm1_id, superterm1_label,
quality_id, quality_name, modifier, subterm2_id,
subterm2_label, pc_rel2_id, pc_rel2_id, superterm2_id,
superterm2_label, fish_id, fish_label, start_stage, end_stage,
environment, pub_id, figure_id, unknown_field) = row
zp_id = zfin_parser._map_sextuple_to_phenotype(
superterm1_id, subterm1_id, quality_id, superterm2_id,
subterm2_id, modifier)
gene_curie = "ZFIN:{0}".format(gene_id)
model.makeLeader(gene_curie)
pub_curie = "ZFIN:{0}".format(pub_id)
if zp_id:
assoc = G2PAssoc(self.graph, self.name, gene_curie, zp_id)
if pub_id:
reference = Reference(self.graph, pub_curie,
Reference.ref_types['document'])
reference.addRefToGraph()
assoc.add_source(pub_curie)
assoc.add_evidence('ECO:0000059')
assoc.add_association_to_graph()
示例2: _add_deprecated_snp
# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import makeLeader [as 别名]
def _add_deprecated_snp(
self, snp_id, snp_id_current, merged, chrom_num, chrom_pos):
if self.test_mode:
graph = self.testgraph
else:
graph = self.graph
model = Model(graph)
location = self._make_location_curie(chrom_num, chrom_pos)
# add deprecation information
if merged == '1' and str(snp_id_current.strip()) != '':
# get the current rs_id
current_rs_id = 'dbSNP:'
if not re.match(r'rs', snp_id_current):
current_rs_id += 'rs'
current_rs_id += str(snp_id_current)
if location is not None:
if location not in self.id_location_map:
self.id_location_map[location] = set(current_rs_id)
else:
self.id_location_map[location].add(current_rs_id)
model.addDeprecatedIndividual(snp_id, current_rs_id)
# TODO check on this
# should we add the annotations to the current
# or orig?
model.makeLeader(current_rs_id)
else:
model.makeLeader(snp_id)
示例3: _add_gene_equivalencies
# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import makeLeader [as 别名]
def _add_gene_equivalencies(self, xrefs, gene_id, taxon):
"""
Add equivalentClass and sameAs relationships
Uses external resource map located in
/resources/clique_leader.yaml to determine
if an NCBITaxon ID space is a clique leader
"""
clique_map = self.open_and_parse_yaml(self.resources['clique_leader'])
if self.test_mode:
graph = self.testgraph
else:
graph = self.graph
model = Model(graph)
filter_out = ['Vega', 'IMGT/GENE-DB', 'Araport']
# deal with the dbxrefs
# MIM:614444|HGNC:HGNC:16851|Ensembl:ENSG00000136828|HPRD:11479|Vega:OTTHUMG00000020696
for dbxref in xrefs.strip().split('|'):
prefix = ':'.join(dbxref.split(':')[:-1]).strip()
if prefix in self.localtt:
prefix = self.localtt[prefix]
dbxref_curie = ':'.join((prefix, dbxref.split(':')[-1]))
if dbxref_curie is not None and prefix != '':
if prefix == 'HPRD': # proteins are not == genes.
model.addTriple(
gene_id, self.globaltt['has gene product'], dbxref_curie)
continue
# skip some of these for now based on curie prefix
if prefix in filter_out:
continue
if prefix == 'ENSEMBL':
model.addXref(gene_id, dbxref_curie)
if prefix == 'OMIM':
if dbxref_curie in self.omim_replaced:
repl = self.omim_replaced[dbxref_curie]
for omim in repl:
if omim in self.omim_type and \
self.omim_type[omim] == self.globaltt['gene']:
dbxref_curie = omim
if dbxref_curie in self.omim_type and \
self.omim_type[dbxref_curie] != self.globaltt['gene']:
continue
try:
if self.class_or_indiv.get(gene_id) == 'C':
model.addEquivalentClass(gene_id, dbxref_curie)
if taxon in clique_map:
if clique_map[taxon] == prefix:
model.makeLeader(dbxref_curie)
elif clique_map[taxon] == gene_id.split(':')[0]:
model.makeLeader(gene_id)
else:
model.addSameIndividual(gene_id, dbxref_curie)
except AssertionError as err:
LOG.warning("Error parsing %s: %s", gene_id, err)
示例4: _add_gene_equivalencies
# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import makeLeader [as 别名]
def _add_gene_equivalencies(self, xrefs, gene_id, taxon):
"""
Add equivalentClass and sameAs relationships
Uses external resource map located in
/resources/clique_leader.yaml to determine
if an ID space is a clique leader
"""
clique_map = self.open_and_parse_yaml(self.resources['clique_leader'])
if self.testMode:
graph = self.testgraph
else:
graph = self.graph
filter_out = ['Vega', 'IMGT/GENE-DB', 'Araport']
taxon_spec_filters = {
'10090': ['ENSEMBL']
}
if taxon in taxon_spec_filters:
filter_out += taxon_spec_filters[taxon]
model = Model(graph)
# deal with the xrefs
# MIM:614444|HGNC:HGNC:16851|Ensembl:ENSG00000136828|HPRD:11479|Vega:OTTHUMG00000020696
for ref in xrefs.strip().split('|'):
xref_curie = self._cleanup_id(ref)
if xref_curie is not None and xref_curie.strip() != '':
if re.match(r'HPRD', xref_curie):
# proteins are not == genes.
model.addTriple(
gene_id,
self.properties['has_gene_product'], xref_curie)
continue
# skip some of these for now
if xref_curie.split(':')[0] in filter_out:
continue
if re.match(r'^OMIM', xref_curie):
if DipperUtil.is_omim_disease(xref_curie):
continue
try:
if self.class_or_indiv.get(gene_id) == 'C':
model.addEquivalentClass(
gene_id, xref_curie)
if int(taxon) in clique_map:
if clique_map[int(taxon)] == xref_curie.split(':')[0]:
model.makeLeader(xref_curie)
elif clique_map[int(taxon)] == gene_id.split(':')[0]:
model.makeLeader(gene_id)
else:
model.addSameIndividual(gene_id, xref_curie)
except AssertionError as e:
logger.warn("Error parsing {0}: {1}".format(gene_id, e))
return
示例5: _get_var_citations
# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import makeLeader [as 别名]
def _get_var_citations(self, limit):
# Generated weekly, the first of the week
# A tab-delimited report of citations associated with data in ClinVar,
# connected to the AlleleID, the VariationID, and either rs# from dbSNP
# or nsv in dbVar.
#
# AlleleID int value (xpath //Measure/@ID )
# VariationID ID ClinVar uses to anchor default display.
# (xpath //MeasureSet/@ID)
# rs rs identifier from dbSNP
# nsv nsv identifier from dbVar
# citation_source The source of the citation, either PubMed,
# PubMedCentral, or the NCBI Bookshelf
# citation_id The identifier used by that source
logger.info("Processing Citations for variants")
line_counter = 0
myfile = \
'/'.join((self.rawdir, self.files['variant_citations']['file']))
if self.testMode:
g = self.testgraph
else:
g = self.graph
model = Model(g)
with open(myfile, 'r', encoding="utf8") as f:
filereader = csv.reader(f, delimiter='\t', quotechar='\"')
for line in filereader:
# skip comments
line = line
if re.match(r'^#', line[0]):
continue
(allele_num, variant_num, rs_num, nsv_num, citation_source,
citation_id) = line
line_counter += 1
if self.testMode:
if int(variant_num) not in self.variant_ids:
continue
if citation_id.strip() == '':
logger.info(
"Skipping blank citation for ClinVarVariant:%s",
str(variant_num))
continue
# the citation for a variant is made to some kind of
# combination of the ids here.
# but i'm not sure which, we don't know what the
# citation is for exactly, other than the variant.
# so use mentions
var_id = 'ClinVarVariant:'+variant_num
# citation source: PubMed | PubMedCentral | citation_source
# citation id:
# format the citation id:
ref_id = None
if citation_source == 'PubMed':
ref_id = 'PMID:'+str(citation_id.replace(" ", ""))
model.makeLeader(ref_id)
elif citation_source == 'PubMedCentral':
ref_id = 'PMCID:'+str(citation_id)
if ref_id is not None:
r = Reference(
self.graph, ref_id,
Reference.ref_types['journal_article'])
r.addRefToGraph()
g.addTriple(
ref_id, self.properties['is_about'], var_id)
if not self.testMode \
and (limit is not None and line_counter > limit):
break
logger.info("Finished processing citations for variants")
return
示例6: _get_variants
# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import makeLeader [as 别名]
#.........这里部分代码省略.........
seqalt_id = ':'.join(('ClinVarVariant', variant_num))
gene_id = None
# they use -1 to indicate unknown gene
if str(gene_num) != '-1' and str(gene_num) != 'more than 10':
if re.match(r'^Gene:', gene_num):
gene_num = "NCBI" + gene_num
else:
gene_id = ':'.join(('NCBIGene', str(gene_num)))
# FIXME there are some "variants" that are actually haplotypes
# probably will get taken care of when we switch to processing
# the xml for example, variant_num = 38562
# but there's no way to tell if it's a haplotype
# in the csv data so the dbsnp or dbvar
# should probably be primary,
# and the variant num be the vslc,
# with each of the dbsnps being added to it
# TODO clinical significance needs to be mapped to
# a list of terms
# first, make the variant:
f = Feature(seqalt_id, allele_name, allele_type_id)
if start != '-' and start.strip() != '':
f.addFeatureStartLocation(start, chrinbuild_id)
if stop != '-' and stop.strip() != '':
f.addFeatureEndLocation(stop, chrinbuild_id)
f.addFeatureToGraph()
f.addTaxonToFeature(tax_id)
# make the ClinVarVariant the clique leader
model.makeLeader(seqalt_id)
if bandinbuild_id is not None:
f.addSubsequenceOfFeature(bandinbuild_id)
# CHECK - this makes the assumption that there is
# only one affected chromosome per variant what happens with
# chromosomal rearrangement variants?
# shouldn't both chromosomes be here?
# add the hgvs as synonyms
if hgvs_c != '-' and hgvs_c.strip() != '':
model.addSynonym(seqalt_id, hgvs_c)
if hgvs_p != '-' and hgvs_p.strip() != '':
model.addSynonym(seqalt_id, hgvs_p)
# add the dbsnp and dbvar ids as equivalent
if dbsnp_num != '-' and int(dbsnp_num) != -1:
dbsnp_id = 'dbSNP:rs'+str(dbsnp_num)
model.addIndividualToGraph(dbsnp_id, None)
model.addSameIndividual(seqalt_id, dbsnp_id)
if dbvar_num != '-':
dbvar_id = 'dbVar:'+dbvar_num
model.addIndividualToGraph(dbvar_id, None)
model.addSameIndividual(seqalt_id, dbvar_id)
# TODO - not sure if this is right... add as xref?
# the rcv is like the combo of the phenotype with the variant
if rcv_nums != '-':
for rcv_num in re.split(r';', rcv_nums):
rcv_id = 'ClinVar:' + rcv_num
model.addIndividualToGraph(rcv_id, None)
model.addXref(seqalt_id, rcv_id)
示例7: _process_genes
# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import makeLeader [as 别名]
def _process_genes(self, limit=None):
if self.test_mode:
graph = self.testgraph
else:
graph = self.graph
geno = Genotype(graph)
model = Model(graph)
raw = '/'.join((self.rawdir, self.files['genes']['file']))
col = self.files['genes']['columns']
LOG.info("Processing HGNC genes")
chr_pattern = re.compile(r'(\d+|X|Y|Z|W|MT)[pq$]')
band_pattern = re.compile(r'([pq][A-H\d]?\d?(?:\.\d+)?)')
with open(raw, 'r', encoding="utf8") as csvfile:
filereader = csv.reader(csvfile, delimiter='\t', quotechar='\"')
row = next(filereader)
if not self.check_fileheader(col, row):
exit(-1)
for row in filereader:
# To generate:
# head -1 hgnc_complete_set.txt.1 | tr '\t' '\n' |
# sed "s/\(.*\)/\1 = row[col.index(\'\1\')]/g"
hgnc_id = row[col.index('hgnc_id')].strip()
symbol = row[col.index('symbol')].strip()
name = row[col.index('name')].strip()
# locus_group = row[col.index('locus_group')]
locus_type = row[col.index('locus_type')].strip()
# status = row[col.index('status')]
location = row[col.index('location')].strip()
# location_sortable = row[col.index('location_sortable')]
# alias_symbol = row[col.index('alias_symbol')]
# alias_name = row[col.index('alias_name')]
# prev_symbol = row[col.index('prev_symbol')]
# prev_name = row[col.index('prev_name')]
# gene_family = row[col.index('gene_family')]
# gene_family_id = row[col.index('gene_family_id')]
# date_approved_reserved = row[col.index('date_approved_reserved')]
# date_symbol_changed = row[col.index('date_symbol_changed')]
# date_name_changed = row[col.index('date_name_changed')]
# date_modified = row[col.index('date_modified')]
entrez_id = row[col.index('entrez_id')].strip()
ensembl_gene_id = row[col.index('ensembl_gene_id')].strip()
# vega_id = row[col.index('vega_id')]
# ucsc_id = row[col.index('ucsc_id')]
# ena = row[col.index('ena')]
# refseq_accession = row[col.index('refseq_accession')]
# ccds_id = row[col.index('ccds_id')]
# uniprot_ids = row[col.index('uniprot_ids')]
pubmed_ids = row[col.index('pubmed_id')].strip() # pipe seperated!
# mgd_id = row[col.index('mgd_id')]
# rgd_id = row[col.index('rgd_id')]
# lsdb = row[col.index('lsdb')]
# cosmic = row[col.index('cosmic')]
omim_ids = row[col.index('omim_id')].strip() # pipe seperated!
# mirbase = row[col.index('mirbase')]
# homeodb = row[col.index('homeodb')]
# snornabase = row[col.index('snornabase')]
# bioparadigms_slc = row[col.index('bioparadigms_slc')]
# orphanet = row[col.index('orphanet')]
# pseudogene.org = row[col.index('pseudogene.org')]
# horde_id = row[col.index('horde_id')]
# merops = row[col.index('merops')]
# imgt = row[col.index('imgt')]
# iuphar = row[col.index('iuphar')]
# kznf_gene_catalog = row[col.index('kznf_gene_catalog')]
# mamit_trnadb = row[col.index('mamit-trnadb')]
# cd = row[col.index('cd')]
# lncrnadb = row[col.index('lncrnadb')]
# enzyme_id = row[col.index('enzyme_id')]
# intermediate_filament_db = row[col.index('intermediate_filament_db')]
# rna_central_ids = row[col.index('rna_central_ids')]
# lncipedia = row[col.index('lncipedia')]
# gtrnadb = row[col.index('gtrnadb')]
if self.test_mode and entrez_id != '' and \
entrez_id not in self.gene_ids:
continue
if name == '':
name = None
if locus_type == 'withdrawn':
model.addDeprecatedClass(hgnc_id)
else:
gene_type_id = self.resolve(locus_type, False) # withdrawn -> None?
if gene_type_id != locus_type:
model.addClassToGraph(hgnc_id, symbol, gene_type_id, name)
model.makeLeader(hgnc_id)
if entrez_id != '':
model.addEquivalentClass(hgnc_id, 'NCBIGene:' + entrez_id)
if ensembl_gene_id != '':
#.........这里部分代码省略.........
示例8: _process_genes
# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import makeLeader [as 别名]
def _process_genes(self, limit=None):
if self.testMode:
g = self.testgraph
else:
g = self.graph
geno = Genotype(g)
model = Model(g)
raw = '/'.join((self.rawdir, self.files['genes']['file']))
line_counter = 0
logger.info("Processing HGNC genes")
with open(raw, 'r', encoding="utf8") as csvfile:
filereader = csv.reader(csvfile, delimiter='\t', quotechar='\"')
# curl -s ftp://ftp.ebi.ac.uk/pub/databases/genenames/new/tsv/hgnc_complete_set.txt | head -1 | tr '\t' '\n' | grep -n .
for row in filereader:
(hgnc_id,
symbol,
name,
locus_group,
locus_type,
status,
location,
location_sortable,
alias_symbol,
alias_name,
prev_symbol,
prev_name,
gene_family,
gene_family_id,
date_approved_reserved,
date_symbol_changed,
date_name_changed,
date_modified,
entrez_id,
ensembl_gene_id,
vega_id,
ucsc_id,
ena,
refseq_accession,
ccds_id,
uniprot_ids,
pubmed_id,
mgd_id,
rgd_id,
lsdb,
cosmic,
omim_id,
mirbase,
homeodb,
snornabase,
bioparadigms_slc,
orphanet,
pseudogene_org,
horde_id,
merops,
imgt,
iuphar,
kznf_gene_catalog,
mamit_trnadb,
cd,
lncrnadb,
enzyme_id,
intermediate_filament_db,
rna_central_ids) = row
line_counter += 1
# skip header
if line_counter <= 1:
continue
if self.testMode and entrez_id != '' \
and int(entrez_id) not in self.gene_ids:
continue
if name == '':
name = None
gene_type_id = self._get_gene_type(locus_type)
model.addClassToGraph(hgnc_id, symbol, gene_type_id, name)
if locus_type == 'withdrawn':
model.addDeprecatedClass(hgnc_id)
else:
model.makeLeader(hgnc_id)
if entrez_id != '':
model.addEquivalentClass(
hgnc_id, 'NCBIGene:' + entrez_id)
if ensembl_gene_id != '':
model.addEquivalentClass(
hgnc_id, 'ENSEMBL:' + ensembl_gene_id)
if omim_id != '' and "|" not in omim_id:
omim_curie = 'OMIM:' + omim_id
if not DipperUtil.is_omim_disease(omim_curie):
model.addEquivalentClass(hgnc_id, omim_curie)
geno.addTaxon('NCBITaxon:9606', hgnc_id)
# add pubs as "is about"
if pubmed_id != '':
#.........这里部分代码省略.........
示例9: _process_phenotype_data
# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import makeLeader [as 别名]
#.........这里部分代码省略.........
# split apart the mp ids
# ataxia [MP:0001393] ,hypoactivity [MP:0001402] ...
# mpt_ids are a comma delimited list
# labels with MP terms following in brackets
phenotype_ids = []
if mpt_ids != '':
for lb_mp in mpt_ids.split(r','):
lb_mp = lb_mp.strip()
if lb_mp[-1:] == ']' and lb_mp[-12:-8] == '[MP:':
phenotype_ids.append(lb_mp[-11:-2])
# pubmed ids are space delimited
pubmed_ids = []
if pubmed_nums != '':
for pm_num in re.split(r'\s+', pubmed_nums):
pmid = 'PMID:' + pm_num.strip()
pubmed_ids.append(pmid)
ref = Reference(graph, pmid, self.globaltt['journal article'])
ref.addRefToGraph()
# https://www.mmrrc.org/catalog/sds.php?mmrrc_id=00001
# is a good example of 4 genotype parts
model.addClassToGraph(mouse_taxon, None)
if research_areas == '':
research_areas = None
else:
research_areas = 'Research Areas: ' + research_areas
strain_type = mouse_taxon
if strain_state == 'ES':
strain_type = stem_cell_class
model.addIndividualToGraph( # an inst of mouse??
strain_id, strain_label, strain_type, research_areas)
model.makeLeader(strain_id)
# phenotypes are associated with the alleles
for pid in phenotype_ids:
# assume the phenotype label is in some ontology
model.addClassToGraph(pid, None)
if mgi_allele_id is not None and mgi_allele_id != '':
assoc = G2PAssoc(
graph, self.name, mgi_allele_id, pid,
self.globaltt['has phenotype'])
for p in pubmed_ids:
assoc.add_source(p)
assoc.add_association_to_graph()
else:
LOG.info("Phenotypes and no allele for %s", strain_id)
if not self.test_mode and (
limit is not None and reader.line_num > limit):
break
# now that we've collected all of the variant information, build it
# we don't know their zygosities
for s in self.strain_hash:
h = self.strain_hash.get(s)
variants = h['variants']
genes = h['genes']
vl_set = set()
# make variant loci for each gene
if len(variants) > 0:
for var in variants:
vl_id = var.strip()
vl_symbol = self.id_label_hash[vl_id]
geno.addAllele(
示例10: _process_phenotype_data
# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import makeLeader [as 别名]
#.........这里部分代码省略.........
phenotype_ids = []
if mp_ids != '':
for i in re.split(r',', mp_ids):
i = i.strip()
mps = re.search(r'\[(.*)\]', i)
if mps is not None:
mp_id = mps.group(1).strip()
phenotype_ids.append(mp_id)
# pubmed ids are space delimited
pubmed_ids = []
if pubmed_nums.strip() != '':
for i in re.split(r'\s+', pubmed_nums):
pmid = 'PMID:'+i.strip()
pubmed_ids.append(pmid)
r = Reference(g, pmid,
Reference.ref_types['journal_article'])
r.addRefToGraph()
# https://www.mmrrc.org/catalog/sds.php?mmrrc_id=00001
# is a good example of 4 genotype parts
model.addClassToGraph(mouse_taxon, None)
if research_areas.strip() == '':
research_areas = None
else:
research_areas = 'Research Areas: '+research_areas
strain_type = mouse_taxon
if strain_state == 'ES':
strain_type = stem_cell_class
model.addIndividualToGraph(
strain_id, strain_label, strain_type,
research_areas) # an inst of mouse??
model.makeLeader(strain_id)
# phenotypes are associated with the alleles
for pid in phenotype_ids:
# assume the phenotype label is in the ontology
model.addClassToGraph(pid, None)
if mgi_allele_id is not None and mgi_allele_id != '':
assoc = G2PAssoc(g, self.name, mgi_allele_id, pid,
model.object_properties['has_phenotype'])
for p in pubmed_ids:
assoc.add_source(p)
assoc.add_association_to_graph()
else:
logger.info("Phenotypes and no allele for %s",
strain_id)
if not self.testMode and (
limit is not None and line_counter > limit):
break
# now that we've collected all of the variant information, build it
# we don't know their zygosities
for s in self.strain_hash:
h = self.strain_hash.get(s)
variants = h['variants']
genes = h['genes']
vl_set = set()
# make variant loci for each gene
if len(variants) > 0:
for v in variants:
vl_id = v.strip()
vl_symbol = self.id_label_hash[vl_id]
geno.addAllele(vl_id, vl_symbol,