本文整理汇总了Python中dipper.models.Genotype.Genotype.addAllele方法的典型用法代码示例。如果您正苦于以下问题:Python Genotype.addAllele方法的具体用法?Python Genotype.addAllele怎么用?Python Genotype.addAllele使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类dipper.models.Genotype.Genotype
的用法示例。
在下文中一共展示了Genotype.addAllele方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _get_process_allelic_variants
# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addAllele [as 别名]
def _get_process_allelic_variants(self, entry, g):
gu = GraphUtils(curie_map.get())
geno = Genotype(g)
du = DipperUtil()
if entry is not None:
publist = {} # to hold the entry-specific publication mentions for the allelic variants
entry_num = entry['mimNumber']
# process the ref list just to get the pmids
ref_to_pmid = self._get_pubs(entry, g)
if 'allelicVariantList' in entry:
allelicVariantList = entry['allelicVariantList']
for al in allelicVariantList:
al_num = al['allelicVariant']['number']
al_id = 'OMIM:'+str(entry_num)+'.'+str(al_num).zfill(4)
al_label = None
al_description = None
if al['allelicVariant']['status'] == 'live':
publist[al_id] = set()
if 'mutations' in al['allelicVariant']:
al_label = al['allelicVariant']['mutations']
if 'text' in al['allelicVariant']:
al_description = al['allelicVariant']['text']
m = re.findall('\{(\d+)\:', al_description)
publist[al_id] = set(m)
geno.addAllele(al_id, al_label, geno.genoparts['variant_locus'], al_description)
geno.addAlleleOfGene(al_id, 'OMIM:'+str(entry_num),
geno.object_properties['is_sequence_variant_instance_of'])
for r in publist[al_id]:
pmid = ref_to_pmid[int(r)]
gu.addTriple(g, pmid, gu.object_properties['is_about'], al_id)
# look up the pubmed id in the list of references
if 'dbSnps' in al['allelicVariant']:
dbsnp_ids = re.split(',', al['allelicVariant']['dbSnps'])
for dnum in dbsnp_ids:
did = 'dbSNP:'+dnum.strip()
gu.addIndividualToGraph(g, did, None)
gu.addEquivalentClass(g, al_id, did)
if 'clinvarAccessions' in al['allelicVariant']:
# clinvarAccessions triple semicolon delimited, each lik eRCV000020059;;1
rcv_ids = re.split(';;;', al['allelicVariant']['clinvarAccessions'])
rcv_ids = [(re.match('(RCV\d+)\;\;', r)).group(1) for r in rcv_ids]
for rnum in rcv_ids:
rid = 'ClinVar:'+rnum
gu.addXref(g, al_id, rid)
gu.addPage(g, al_id, "http://omim.org/entry/"+str(entry_num)+"#"+str(al_num).zfill(4))
elif re.search('moved', al['allelicVariant']['status']):
# for both 'moved' and 'removed'
moved_ids = None
if 'movedTo' in al['allelicVariant']:
moved_id = 'OMIM:'+al['allelicVariant']['movedTo']
moved_ids = [moved_id]
gu.addDeprecatedIndividual(g, al_id, moved_ids)
else:
logger.error('Uncaught alleleic variant status %s', al['allelicVariant']['status'])
# end loop allelicVariantList
return
示例2: _process_phenotype_data
# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addAllele [as 别名]
#.........这里部分代码省略.........
research_areas) # an inst of mouse??
gu.makeLeader(g, strain_id)
# phenotypes are associated with the alleles
for pid in phenotype_ids:
# assume the phenotype label is in the ontology
gu.addClassToGraph(g, pid, None)
if mgi_allele_id is not None and mgi_allele_id != '':
assoc = G2PAssoc(self.name, mgi_allele_id, pid,
gu.object_properties['has_phenotype'])
for p in pubmed_ids:
assoc.add_source(p)
assoc.add_association_to_graph(g)
else:
logger.info("Phenotypes and no allele for %s",
strain_id)
if not self.testMode and (
limit is not None and line_counter > limit):
break
# now that we've collected all of the variant information, build it
# we don't know their zygosities
for s in self.strain_hash:
h = self.strain_hash.get(s)
variants = h['variants']
genes = h['genes']
vl_set = set()
# make variant loci for each gene
if len(variants) > 0:
for v in variants:
vl_id = v
vl_symbol = self.id_label_hash[vl_id]
geno.addAllele(vl_id, vl_symbol,
geno.genoparts['variant_locus'])
vl_set.add(vl_id)
if len(variants) == 1 and len(genes) == 1:
for gene in genes:
geno.addAlleleOfGene(vl_id, gene)
else:
geno.addAllele(vl_id, vl_symbol)
else: # len(vars) == 0
# it's just anonymous variants in some gene
for gene in genes:
vl_id = '_'+gene+'-VL'
vl_id = re.sub(r':', '', vl_id)
if self.nobnodes:
vl_id = ':'+vl_id
vl_symbol = self.id_label_hash[gene]+'<?>'
self.id_label_hash[vl_id] = vl_symbol
geno.addAllele(vl_id, vl_symbol,
geno.genoparts['variant_locus'])
geno.addGene(gene, self.id_label_hash[gene])
geno.addAlleleOfGene(vl_id, gene)
vl_set.add(vl_id)
# make the vslcs
vl_list = sorted(vl_set)
vslc_list = []
for vl in vl_list:
# for unknown zygosity
vslc_id = '_'+re.sub(r'^_', '', vl)+'U'
vslc_id = re.sub(r':', '', vslc_id)
if self.nobnodes:
vslc_id = ':' + vslc_id
vslc_label = self.id_label_hash[vl] + '/?'
示例3: _process_data
# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addAllele [as 别名]
def _process_data(self, raw, limit=None):
logger.info("Processing Data from %s", raw)
gu = GraphUtils(curie_map.get())
if self.testMode:
g = self.testgraph
else:
g = self.graph
geno = Genotype(g)
line_counter = 0
gu.loadAllProperties(g)
gu.loadObjectProperties(g, geno.object_properties)
# Add the taxon as a class
taxon_id = 'NCBITaxon:10090' # map to Mus musculus
gu.addClassToGraph(g, taxon_id, None)
# with open(raw, 'r', encoding="utf8") as csvfile:
with gzip.open(raw, 'rt') as csvfile:
filereader = csv.reader(csvfile, delimiter=',', quotechar='\"')
next(filereader, None) # skip the header row
for row in filereader:
line_counter += 1
(marker_accession_id, marker_symbol, phenotyping_center,
colony, sex, zygosity, allele_accession_id, allele_symbol,
allele_name, strain_accession_id, strain_name, project_name,
project_fullname, pipeline_name, pipeline_stable_id,
procedure_stable_id, procedure_name, parameter_stable_id,
parameter_name, top_level_mp_term_id, top_level_mp_term_name,
mp_term_id, mp_term_name, p_value, percentage_change,
effect_size, statistical_method, resource_name) = row
if self.testMode and marker_accession_id not in self.test_ids:
continue
# ##### cleanup some of the identifiers ######
zygosity_id = self._map_zygosity(zygosity)
# colony ids sometimes have <> in them, spaces,
# or other non-alphanumerics and break our system;
# replace these with underscores
colony_id = '_'+re.sub(r'\W+', '_', colony)
if self.nobnodes:
colony_id = ':'+colony_id
if not re.match(r'MGI', allele_accession_id):
allele_accession_id = \
'_IMPC-'+re.sub(r':', '', allele_accession_id)
if self.nobnodes:
allele_accession_id = ':'+allele_accession_id
if re.search(r'EUROCURATE', strain_accession_id):
# the eurocurate links don't resolve at IMPC
strain_accession_id = '_'+strain_accession_id
if self.nobnodes:
strain_accession_id = ':'+strain_accession_id
elif not re.match(r'MGI', strain_accession_id):
logger.info(
"Found a strange strain accession...%s",
strain_accession_id)
strain_accession_id = 'IMPC:'+strain_accession_id
######################
# first, add the marker and variant to the graph as with MGI,
# the allele is the variant locus. IF the marker is not known,
# we will call it a sequence alteration. otherwise,
# we will create a BNode for the sequence alteration.
sequence_alteration_id = variant_locus_id = None
variant_locus_name = sequence_alteration_name = None
# extract out what's within the <> to get the symbol
if re.match(r'.*<.*>', allele_symbol):
sequence_alteration_name = \
re.match(r'.*<(.*)>', allele_symbol).group(1)
else:
sequence_alteration_name = allele_symbol
if marker_accession_id is not None and \
marker_accession_id == '':
logger.warning(
"Marker unspecified on row %d", line_counter)
marker_accession_id = None
if marker_accession_id is not None:
variant_locus_id = allele_accession_id
variant_locus_name = allele_symbol
variant_locus_type = geno.genoparts['variant_locus']
geno.addGene(marker_accession_id, marker_symbol,
geno.genoparts['gene'])
geno.addAllele(variant_locus_id, variant_locus_name,
variant_locus_type, None)
geno.addAlleleOfGene(variant_locus_id, marker_accession_id)
sequence_alteration_id = \
'_seqalt'+re.sub(r':', '', allele_accession_id)
if self.nobnodes:
sequence_alteration_id = ':'+sequence_alteration_id
geno.addSequenceAlterationToVariantLocus(
sequence_alteration_id, variant_locus_id)
#.........这里部分代码省略.........
示例4: OMIA
# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addAllele [as 别名]
#.........这里部分代码省略.........
omia_id = 'OMIA:'+row['omia_id']
lidaurl = row['lidaurl']
if self.testMode and omia_id not in self.test_ids['disease']:
return
self.gu.addXref(self.g, omia_id, lidaurl, True)
return
def _process_phene_gene_row(self, row):
gene_id = self.id_hash['gene'].get(row['gene_id'])
phene_id = self.id_hash['phene'].get(row['phene_id'])
omia_id = self._get_omia_id_from_phene_id(phene_id)
if self.testMode and not (
omia_id in self.test_ids['disease'] and
row['gene_id'] in self.test_ids['gene']) or\
gene_id is None or phene_id is None:
return
# occasionally some phenes are missing! (ex: 406)
if phene_id is None:
logger.warning("Phene id %s is missing", str(row['phene_id']))
return
gene_label = self.label_hash[gene_id]
# some variant of gene_id has phenotype d
vl = '_'+re.sub(r'NCBIGene:', '', str(gene_id)) + 'VL'
if self.nobnodes:
vl = ':'+vl
self.geno.addAllele(vl, 'some variant of ' + gene_label)
self.geno.addAlleleOfGene(vl, gene_id)
assoc = G2PAssoc(self.name, vl, phene_id)
assoc.add_association_to_graph(self.g)
# add the gene id to the set of annotated genes
# for later lookup by orthology
self.annotated_genes.add(gene_id)
return
def _process_omia_omim_map(self, row):
"""
Links OMIA groups to OMIM equivalents.
:param row:
:return:
"""
# omia_id, omim_id, added_by
omia_id = 'OMIA:'+row['omia_id']
omim_id = 'OMIM:'+row['omim_id']
# also store this for use when we say that a given animal is
# a model of a disease
if omia_id not in self.omia_omim_map:
self.omia_omim_map[omia_id] = set()
self.omia_omim_map[omia_id].add(omim_id)
if self.testMode and omia_id not in self.test_ids['disease']:
return
self.gu.addXref(self.g, omia_id, omim_id)
示例5: _process_phenotype_data
# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addAllele [as 别名]
#.........这里部分代码省略.........
strain_id, strain_label, strain_type, research_areas)
model.makeLeader(strain_id)
# phenotypes are associated with the alleles
for pid in phenotype_ids:
# assume the phenotype label is in some ontology
model.addClassToGraph(pid, None)
if mgi_allele_id is not None and mgi_allele_id != '':
assoc = G2PAssoc(
graph, self.name, mgi_allele_id, pid,
self.globaltt['has phenotype'])
for p in pubmed_ids:
assoc.add_source(p)
assoc.add_association_to_graph()
else:
LOG.info("Phenotypes and no allele for %s", strain_id)
if not self.test_mode and (
limit is not None and reader.line_num > limit):
break
# now that we've collected all of the variant information, build it
# we don't know their zygosities
for s in self.strain_hash:
h = self.strain_hash.get(s)
variants = h['variants']
genes = h['genes']
vl_set = set()
# make variant loci for each gene
if len(variants) > 0:
for var in variants:
vl_id = var.strip()
vl_symbol = self.id_label_hash[vl_id]
geno.addAllele(
vl_id, vl_symbol, self.globaltt['variant_locus'])
vl_set.add(vl_id)
if len(variants) == 1 and len(genes) == 1:
for gene in genes:
geno.addAlleleOfGene(vl_id, gene)
else:
geno.addAllele(vl_id, vl_symbol)
else: # len(vars) == 0
# it's just anonymous variants in some gene
for gene in genes:
vl_id = '_:' + re.sub(r':', '', gene) + '-VL'
vl_symbol = self.id_label_hash[gene]+'<?>'
self.id_label_hash[vl_id] = vl_symbol
geno.addAllele(
vl_id, vl_symbol, self.globaltt['variant_locus'])
geno.addGene(gene, self.id_label_hash[gene])
geno.addAlleleOfGene(vl_id, gene)
vl_set.add(vl_id)
# make the vslcs
vl_list = sorted(vl_set)
vslc_list = []
for vl in vl_list:
# for unknown zygosity
vslc_id = re.sub(r'^_', '', vl)+'U'
vslc_id = re.sub(r':', '', vslc_id)
vslc_id = '_:' + vslc_id
vslc_label = self.id_label_hash[vl] + '/?'
self.id_label_hash[vslc_id] = vslc_label
vslc_list.append(vslc_id)
geno.addPartsToVSLC(
vslc_id, vl, None, self.globaltt['indeterminate'],