本文整理汇总了Python中dipper.models.Genotype.Genotype类的典型用法代码示例。如果您正苦于以下问题:Python Genotype类的具体用法?Python Genotype怎么用?Python Genotype使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Genotype类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _add_snp_gene_relation
def _add_snp_gene_relation(
self, snp_id, snp_gene_nums, upstream_gene_num, downstream_gene_num):
if self.test_mode:
graph = self.testgraph
else:
graph = self.graph
geno = Genotype(graph)
# add the feature as a sequence alteration
# affecting various genes
# note that intronic variations don't necessarily list
# the genes such as for rs10448080 FIXME
if snp_gene_nums != '':
for geneid in re.split(r',', snp_gene_nums):
geneid = geneid.strip()
# still have to test for this,
# because sometimes there's a leading comma
if geneid != '':
geno.addAffectedLocus(snp_id, 'NCBIGene:' + geneid)
# add the up and downstream genes if they are available
if upstream_gene_num != '':
downstream_gene_id = 'NCBIGene:' + downstream_gene_num
graph.addTriple(
snp_id, self.globaltt['is upstream of sequence of'], downstream_gene_id)
if downstream_gene_num != '':
upstream_gene_id = 'NCBIGene:' + upstream_gene_num
graph.addTriple(
snp_id, self.globaltt['is downstream of sequence of'], upstream_gene_id)
示例2: _add_snp_gene_relation
def _add_snp_gene_relation(self, snp_id, snp_gene_nums,
upstream_gene_num, downstream_gene_num):
if self.testMode:
g = self.testgraph
else:
g = self.graph
geno = Genotype(g)
# add the feature as a sequence alteration
# affecting various genes
# note that intronic variations don't necessarily list
# the genes such as for rs10448080 FIXME
if snp_gene_nums != '':
for s in re.split(r',', snp_gene_nums):
s = s.strip()
# still have to test for this,
# because sometimes there's a leading comma
if s != '':
gene_id = 'NCBIGene:' + s
geno.addAffectedLocus(snp_id, gene_id)
# add the up and downstream genes if they are available
if upstream_gene_num != '':
downstream_gene_id = 'NCBIGene:' + downstream_gene_num
g.addTriple(
snp_id,
Feature.object_properties[
r'upstream_of_sequence_of'],
downstream_gene_id)
if downstream_gene_num != '':
upstream_gene_id = 'NCBIGene:' + upstream_gene_num
g.addTriple(
snp_id,
Feature.object_properties[
'downstream_of_sequence_of'],
upstream_gene_id)
示例3: process_disease_association
def process_disease_association(self, limit):
raw = '/'.join((self.rawdir, self.files['disease_assoc']['file']))
if self.testMode:
g = self.testgraph
else:
g = self.graph
gu = GraphUtils(curie_map.get())
logger.info("Processing disease models")
geno = Genotype(g, self.nobnodes)
line_counter = 0
worm_taxon = 'NCBITaxon:6239'
with open(raw, 'r') as csvfile:
filereader = csv.reader(csvfile, delimiter='\t', quotechar='\"')
for row in filereader:
if re.match(r'!', ''.join(row)): # header
continue
line_counter += 1
(db, gene_num, gene_symbol, is_not, disease_id, ref,
eco_symbol, with_or_from, aspect, gene_name, gene_synonym,
gene_class, taxon, date, assigned_by, blank, blank2) = row
if self.testMode and gene_num not in self.test_ids['gene']:
continue
# TODO add NOT phenotypes
if is_not == 'NOT':
continue
# WB WBGene00000001 aap-1 DOID:2583 PMID:19029536 IEA ENSEMBL:ENSG00000145675|OMIM:615214 D Y110A7A.10 gene taxon:6239 20150612 WB
gene_id = 'WormBase:'+gene_num
# make a variant of the gene
vl = '_'+'-'.join((gene_num, 'unspecified'))
if self.nobnodes:
vl = ':'+vl
vl_label = 'some variant of '+gene_symbol
geno.addAlleleOfGene(vl, gene_id)
animal_id = geno.make_experimental_model_with_genotype(
g, vl, vl_label, worm_taxon, 'worm')
assoc = G2PAssoc(
self.name, animal_id,
disease_id, gu.object_properties['model_of'])
ref = re.sub(r'WB_REF:', 'WormBase:', ref)
if ref != '':
assoc.add_source(ref)
eco_id = None
if eco_symbol == 'IEA':
eco_id = 'ECO:0000501' # IEA is this now
if eco_id is not None:
assoc.add_evidence(eco_id)
assoc.add_association_to_graph(g)
return
示例4: _get_process_allelic_variants
def _get_process_allelic_variants(self, entry, g):
gu = GraphUtils(curie_map.get())
geno = Genotype(g)
du = DipperUtil()
if entry is not None:
publist = {} # to hold the entry-specific publication mentions for the allelic variants
entry_num = entry['mimNumber']
# process the ref list just to get the pmids
ref_to_pmid = self._get_pubs(entry, g)
if 'allelicVariantList' in entry:
allelicVariantList = entry['allelicVariantList']
for al in allelicVariantList:
al_num = al['allelicVariant']['number']
al_id = 'OMIM:'+str(entry_num)+'.'+str(al_num).zfill(4)
al_label = None
al_description = None
if al['allelicVariant']['status'] == 'live':
publist[al_id] = set()
if 'mutations' in al['allelicVariant']:
al_label = al['allelicVariant']['mutations']
if 'text' in al['allelicVariant']:
al_description = al['allelicVariant']['text']
m = re.findall('\{(\d+)\:', al_description)
publist[al_id] = set(m)
geno.addAllele(al_id, al_label, geno.genoparts['variant_locus'], al_description)
geno.addAlleleOfGene(al_id, 'OMIM:'+str(entry_num),
geno.object_properties['is_sequence_variant_instance_of'])
for r in publist[al_id]:
pmid = ref_to_pmid[int(r)]
gu.addTriple(g, pmid, gu.object_properties['is_about'], al_id)
# look up the pubmed id in the list of references
if 'dbSnps' in al['allelicVariant']:
dbsnp_ids = re.split(',', al['allelicVariant']['dbSnps'])
for dnum in dbsnp_ids:
did = 'dbSNP:'+dnum.strip()
gu.addIndividualToGraph(g, did, None)
gu.addEquivalentClass(g, al_id, did)
if 'clinvarAccessions' in al['allelicVariant']:
# clinvarAccessions triple semicolon delimited, each lik eRCV000020059;;1
rcv_ids = re.split(';;;', al['allelicVariant']['clinvarAccessions'])
rcv_ids = [(re.match('(RCV\d+)\;\;', r)).group(1) for r in rcv_ids]
for rnum in rcv_ids:
rid = 'ClinVar:'+rnum
gu.addXref(g, al_id, rid)
gu.addPage(g, al_id, "http://omim.org/entry/"+str(entry_num)+"#"+str(al_num).zfill(4))
elif re.search('moved', al['allelicVariant']['status']):
# for both 'moved' and 'removed'
moved_ids = None
if 'movedTo' in al['allelicVariant']:
moved_id = 'OMIM:'+al['allelicVariant']['movedTo']
moved_ids = [moved_id]
gu.addDeprecatedIndividual(g, al_id, moved_ids)
else:
logger.error('Uncaught alleleic variant status %s', al['allelicVariant']['status'])
# end loop allelicVariantList
return
示例5: parse
def parse(self, limit=None):
"""
:param limit:
:return:
"""
if limit is not None:
logger.info("Only parsing first %s rows fo each file", str(limit))
logger.info("Parsing files...")
if self.testOnly:
self.testMode = True
g = self.testgraph
else:
g = self.graph
tmap = '/'.join((self.rawdir, self.files['trait_mappings']['file']))
self._process_trait_mappings(tmap, limit)
geno = Genotype(g)
# organisms = ['chicken']
organisms = [
'chicken', 'pig', 'horse', 'rainbow_trout', 'sheep', 'cattle']
for o in organisms:
tax_id = self._get_tax_by_common_name(o)
geno.addGenome(tax_id, o)
build_id = None
build = None
k = o+'_bp'
if k in self.files:
file = self.files[k]['file']
m = re.search(r'QTL_([\w\.]+)\.gff.txt.gz', file)
if m is None:
logger.error("Can't match a gff build")
else:
build = m.group(1)
build_id = self._map_build_by_abbrev(build)
logger.info("Build = %s", build_id)
geno.addReferenceGenome(build_id, build, tax_id)
if build_id is not None:
self._process_QTLs_genomic_location(
'/'.join((self.rawdir, file)), tax_id, build_id, build,
limit)
k = o+'_cm'
if k in self.files:
file = self.files[k]['file']
self._process_QTLs_genetic_location(
'/'.join((self.rawdir, file)), tax_id, o, limit)
logger.info("Finished parsing")
self.load_bindings()
logger.info("Found %d nodes", len(self.graph))
return
示例6: _process_genes
def _process_genes(self, taxid, limit=None):
gu = GraphUtils(curie_map.get())
if self.testMode:
g = self.testgraph
else:
g = self.graph
geno = Genotype(g)
raw = '/'.join((self.rawdir, self.files[taxid]['file']))
line_counter = 0
logger.info("Processing Ensembl genes for tax %s", taxid)
with open(raw, 'r', encoding="utf8") as csvfile:
filereader = csv.reader(csvfile, delimiter='\t')
for row in filereader:
if len(row) < 4:
logger.error("Data error for file %s", raw)
return
(ensembl_gene_id, external_gene_name, description,
gene_biotype, entrezgene) = row[0:5]
# in the case of human genes, we also get the hgnc id,
# and is the last col
if taxid == '9606':
hgnc_id = row[5]
else:
hgnc_id = None
if self.testMode and entrezgene != '' \
and int(entrezgene) not in self.gene_ids:
continue
line_counter += 1
gene_id = 'ENSEMBL:'+ensembl_gene_id
if description == '':
description = None
gene_type_id = self._get_gene_type(gene_biotype)
gene_type_id = None
gu.addClassToGraph(
g, gene_id, external_gene_name, gene_type_id, description)
if entrezgene != '':
gu.addEquivalentClass(g, gene_id, 'NCBIGene:'+entrezgene)
if hgnc_id is not None and hgnc_id != '':
gu.addEquivalentClass(g, gene_id, hgnc_id)
geno.addTaxon('NCBITaxon:'+taxid, gene_id)
if not self.testMode \
and limit is not None and line_counter > limit:
break
gu.loadProperties(g, Feature.object_properties, gu.OBJPROP)
gu.loadProperties(g, Feature.data_properties, gu.DATAPROP)
gu.loadProperties(g, Genotype.object_properties, gu.OBJPROP)
gu.loadAllProperties(g)
return
示例7: process_disease_association
def process_disease_association(self, limit):
raw = '/'.join((self.rawdir, self.files['disease_assoc']['file']))
if self.test_mode:
graph = self.testgraph
else:
graph = self.graph
model = Model(graph)
LOG.info("Processing disease models")
geno = Genotype(graph)
line_counter = 0
worm_taxon = self.globaltt['Caenorhabditis elegans']
with open(raw, 'r') as csvfile:
filereader = csv.reader(csvfile, delimiter='\t', quotechar='\"')
for row in filereader:
if re.match(r'!', ''.join(row)): # header
continue
line_counter += 1
(db, gene_num, gene_symbol, is_not, disease_id, ref,
eco_symbol, with_or_from, aspect, gene_name, gene_synonym,
gene_class, taxon, date, assigned_by, blank, blank2) = row
if self.test_mode and gene_num not in self.test_ids['gene']:
continue
# TODO add NOT phenotypes
if is_not == 'NOT':
continue
# WB WBGene00000001 aap-1 DOID:2583 PMID:19029536 IEA ENSEMBL:ENSG00000145675|OMIM:615214 D Y110A7A.10 gene taxon:6239 20150612 WB
gene_id = 'WormBase:'+gene_num
# make a variant of the gene
vl = '_:'+'-'.join((gene_num, 'unspecified'))
vl_label = 'some variant of '+gene_symbol
geno.addAffectedLocus(vl, gene_id)
model.addBlankNodeAnnotation(vl)
animal_id = geno.make_experimental_model_with_genotype(
vl, vl_label, worm_taxon, 'worm')
assoc = G2PAssoc(
graph, self.name, animal_id,
disease_id, self.globaltt['is model of'])
ref = re.sub(r'WB_REF:', 'WormBase:', ref)
if ref != '':
assoc.add_source(ref)
assoc.add_evidence(self.resolve(eco_symbol))
assoc.add_association_to_graph()
return
示例8: _make_pheno_assoc
def _make_pheno_assoc(self, g, gene_id, gene_symbol, disorder_num,
disorder_label, phene_key):
geno = Genotype(g)
model = Model(g)
disorder_id = ':'.join(('OMIM', disorder_num))
rel_id = model.object_properties['has_phenotype'] # default
rel_label = 'causes'
if re.match(r'\[', disorder_label):
rel_id = model.object_properties['is_marker_for']
rel_label = 'is a marker for'
elif re.match(r'\{', disorder_label):
rel_id = model.object_properties['contributes_to']
rel_label = 'contributes to'
elif re.match(r'\?', disorder_label):
# this is a questionable mapping! skip?
rel_id = model.object_properties['contributes_to']
rel_label = 'contributes to'
evidence = self._map_phene_mapping_code_to_eco(phene_key)
# we actually want the association between the gene and the disease
# to be via an alternate locus not the "wildtype" gene itself.
# so we make an anonymous alternate locus,
# and put that in the association.
# but we only need to do that in the cases when it's not an NCBIGene
# (as that is a sequence feature itself)
if re.match(r'OMIM:', gene_id):
alt_locus = '_:'+re.sub(r':', '', gene_id)+'-'+disorder_num+'VL'
alt_label = gene_symbol.strip()
if alt_label is not None and alt_label != '':
alt_label = \
' '.join(('some variant of', alt_label,
'that', rel_label, disorder_label))
else:
alt_label = None
model.addIndividualToGraph(
alt_locus, alt_label, Genotype.genoparts['variant_locus'])
geno.addAffectedLocus(alt_locus, gene_id)
model.addBlankNodeAnnotation(alt_locus)
else:
# assume it's already been added
alt_locus = gene_id
assoc = G2PAssoc(g, self.name, alt_locus, disorder_id, rel_id)
assoc.add_evidence(evidence)
assoc.add_association_to_graph()
return
示例9: process_gene_ids
def process_gene_ids(self, limit):
raw = '/'.join((self.rawdir, self.files['gene_ids']['file']))
if self.testMode:
g = self.testgraph
else:
g = self.graph
model = Model(g)
logger.info("Processing: %s", self.files['gene_ids']['file'])
line_counter = 0
geno = Genotype(g)
with gzip.open(raw, 'rb') as csvfile:
filereader = csv.reader(
io.TextIOWrapper(csvfile, newline=""), delimiter=',',
quotechar='\"')
for row in filereader:
line_counter += 1
(taxon_num,
gene_num,
gene_symbol,
gene_synonym,
live,
gene_type) = row
# 6239,WBGene00000001,aap-1,Y110A7A.10,Live,protein_coding_gene
if self.testMode and gene_num not in self.test_ids['gene']:
continue
taxon_id = 'NCBITaxon:'+taxon_num
gene_id = 'WormBase:'+gene_num
if gene_symbol == '':
gene_symbol = gene_synonym
if gene_symbol == '':
gene_symbol = None
model.addClassToGraph(
gene_id, gene_symbol, Genotype.genoparts['gene'])
if live == 'Dead':
model.addDeprecatedClass(gene_id)
geno.addTaxon(taxon_id, gene_id)
if gene_synonym != '' and gene_synonym is not None:
model.addSynonym(gene_id, gene_synonym)
if not self.testMode \
and limit is not None and line_counter > limit:
break
return
示例10: __init__
def __init__(self, graph_type, are_bnodes_skolemized):
super().__init__(
graph_type,
are_bnodes_skolemized,
'decipher',
ingest_title='Development Disorder Genotype Phenotype Database',
ingest_url='https://decipher.sanger.ac.uk/',
license_url='https://decipher.sanger.ac.uk/legal',
data_rights='https://decipher.sanger.ac.uk/datasharing',
# file_handle=None
)
if 'disease' not in self.all_test_ids:
LOG.warning("not configured with disease test ids.")
self.test_ids = []
else:
self.test_ids = self.all_test_ids['disease']
self.graph = self.graph
self.geno = Genotype(self.graph)
self.model = Model(self.graph)
self.graph_type = graph_type
self.are_bnodes_skolemized = are_bnodes_skolemized
return
示例11: __init__
def __init__(self):
Source.__init__(self, 'ctd')
self.dataset = Dataset(
'ctd', 'CTD', 'http://ctdbase.org', None,
'http://ctdbase.org/about/legal.jsp')
if 'test_ids' not in config.get_config() \
or 'gene' not in config.get_config()['test_ids']:
logger.warning("not configured with gene test ids.")
self.test_geneids = []
else:
self.test_geneids = config.get_config()['test_ids']['gene']
if 'test_ids' not in config.get_config() \
or 'disease' not in config.get_config()['test_ids']:
logger.warning("not configured with disease test ids.")
self.test_diseaseids = []
else:
self.test_diseaseids = config.get_config()['test_ids']['disease']
self.gu = GraphUtils(curie_map.get())
self.g = self.graph
self.geno = Genotype(self.g)
return
示例12: __init__
def __init__(self):
Source.__init__(self, 'mpd')
# @N, not sure if this step is required
self.namespaces.update(curie_map.get())
self.stdevthreshold = 2
self.nobnodes = True # FIXME
# update the dataset object with details about this resource
# @N: Note that there is no license as far as I can tell
self.dataset = Dataset(
'mpd', 'MPD', 'http://phenome.jax.org', None, None)
# TODO add a citation for mpd dataset as a whole
self.dataset.set_citation('PMID:15619963')
self.assayhash = {}
self.idlabel_hash = {}
# to store the mean/zscore of each measure by strain+sex
self.score_means_by_measure = {}
# to store the mean value for each measure by strain+sex
self.strain_scores_by_measure = {}
self.geno = Genotype(self.graph)
self.gu = GraphUtils(curie_map.get())
return
示例13: __init__
def __init__(self, graph_type, are_bnodes_skolemized):
super().__init__(graph_type, are_bnodes_skolemized, 'ctd')
self.dataset = Dataset(
'ctd', 'CTD', 'http://ctdbase.org', None,
'http://ctdbase.org/about/legal.jsp')
if 'test_ids' not in config.get_config() \
or 'gene' not in config.get_config()['test_ids']:
logger.warning("not configured with gene test ids.")
self.test_geneids = []
else:
self.test_geneids = config.get_config()['test_ids']['gene']
if 'test_ids' not in config.get_config() \
or 'disease' not in config.get_config()['test_ids']:
logger.warning("not configured with disease test ids.")
self.test_diseaseids = []
else:
self.test_diseaseids = config.get_config()['test_ids']['disease']
self.g = self.graph
self.geno = Genotype(self.graph)
self.pathway = Pathway(self.graph)
return
示例14: _process_all
def _process_all(self, limit):
"""
This takes the list of omim identifiers from the omim.txt.Z file,
and iteratively queries the omim api for the json-formatted data.
This will create OMIM classes, with the label,
definition, and some synonyms.
If an entry is "removed",
it is added as a deprecated class.
If an entry is "moved",
it is deprecated and consider annotations are added.
Additionally, we extract:
*phenotypicSeries ids as superclasses
*equivalent ids for Orphanet and UMLS
If set to testMode,
it will write only those items in the test_ids to the testgraph.
:param limit:
:return:
"""
omimids = self._get_omim_ids() # store the set of omim identifiers
if self.testMode:
g = self.testgraph
else:
g = self.graph
geno = Genotype(g)
model = Model(g)
# tax_num = '9606' # TODO PYLINT unused
tax_id = 'NCBITaxon:9606'
tax_label = 'Human'
# add genome and taxon
geno.addGenome(tax_id, tax_label) # tax label can get added elsewhere
model.addClassToGraph(tax_id, None) # label added elsewhere
includes = set()
includes.add('all')
self.process_entries(
omimids, self._transform_entry, includes, g, limit)
return
示例15: _create_genome_builds
def _create_genome_builds(self):
"""
Various resources will map variations to either UCSC (hg*)
or to NCBI assemblies. Here we create the equivalences between them.
Data taken from:
https://genome.ucsc.edu/FAQ/FAQreleases.html#release1
:return:
"""
# TODO add more species
graph = self.graph
geno = Genotype(graph)
model = Model(graph)
LOG.info("Adding equivalent assembly identifiers")
for sp in self.species:
tax_id = self.globaltt[sp]
txid_num = tax_id.split(':')[1]
for key in self.files[txid_num]['assembly']:
ucsc_id = key
try:
ucsc_label = ucsc_id.split(':')[1]
except IndexError:
LOG.error('%s Assembly id: "%s" is problematic', sp, key)
continue
if key in self.localtt:
mapped_id = self.localtt[key]
else:
LOG.error(
'%s Assembly id: "%s" is not in local translation table',
sp, key)
mapped_label = mapped_id.split(':')[1]
mapped_label = 'NCBI build ' + str(mapped_label)
geno.addReferenceGenome(ucsc_id, ucsc_label, tax_id)
geno.addReferenceGenome(mapped_id, mapped_label, tax_id)
model.addSameIndividual(ucsc_id, mapped_id)
return