本文整理汇总了Python中dipper.models.Genotype.Genotype.addGene方法的典型用法代码示例。如果您正苦于以下问题:Python Genotype.addGene方法的具体用法?Python Genotype.addGene怎么用?Python Genotype.addGene使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类dipper.models.Genotype.Genotype
的用法示例。
在下文中一共展示了Genotype.addGene方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _process_qtls_genetic_location
# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addGene [as 别名]
#.........这里部分代码省略.........
re.match(r'rs', peak_mark.strip()):
dbsnp_id = 'dbSNP:'+peak_mark.strip()
model.addIndividualToGraph(
dbsnp_id, None,
self.globaltt['sequence_alteration'])
model.addXref(qtl_id, dbsnp_id)
gene_id = gene_id.replace('uncharacterized ', '').strip()
if gene_id is not None and gene_id != '' and gene_id != '.'\
and re.fullmatch(r'[^ ]*', gene_id) is not None:
# we assume if no src is provided and gene_id is an integer,
# then it is an NCBI gene ... (okay, lets crank that back a notch)
if gene_id_src == '' and gene_id.isdigit() and \
gene_id in self.gene_info:
# LOG.info(
# 'Warm & Fuzzy saying %s is a NCBI gene for %s',
# gene_id, common_name)
gene_id_src = 'NCBIgene'
elif gene_id_src == '' and gene_id.isdigit():
LOG.warning(
'Cold & Prickely saying %s is a NCBI gene for %s',
gene_id, common_name)
gene_id_src = 'NCBIgene'
elif gene_id_src == '':
LOG.error(
' "%s" is a NOT NCBI gene for %s', gene_id, common_name)
gene_id_src = None
if gene_id_src == 'NCBIgene':
gene_id = 'NCBIGene:' + gene_id
# we will expect that these will get labels elsewhere
geno.addGene(gene_id, None)
# FIXME what is the right relationship here?
geno.addAffectedLocus(qtl_id, gene_id)
if dbsnp_id is not None:
# add the rsid as a seq alt of the gene_id
vl_id = '_:' + re.sub(
r':', '', gene_id) + '-' + peak_mark.strip()
geno.addSequenceAlterationToVariantLocus(
dbsnp_id, vl_id)
geno.addAffectedLocus(vl_id, gene_id)
# add the trait
model.addClassToGraph(trait_id, trait_name)
# Add publication
reference = None
if re.match(r'ISU.*', pubmed_id):
pub_id = 'AQTLPub:'+pubmed_id.strip()
reference = Reference(graph, pub_id)
elif pubmed_id != '':
pub_id = 'PMID:' + pubmed_id.strip()
reference = Reference(
graph, pub_id, self.globaltt['journal article'])
if reference is not None:
reference.addRefToGraph()
# make the association to the QTL
assoc = G2PAssoc(
graph, self.name, qtl_id, trait_id, self.globaltt['is marker for'])
assoc.add_evidence(eco_id)
assoc.add_source(pub_id)
示例2: _process_phenotype_data
# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addGene [as 别名]
#.........这里部分代码省略.........
break
# now that we've collected all of the variant information, build it
# we don't know their zygosities
for s in self.strain_hash:
h = self.strain_hash.get(s)
variants = h['variants']
genes = h['genes']
vl_set = set()
# make variant loci for each gene
if len(variants) > 0:
for v in variants:
vl_id = v
vl_symbol = self.id_label_hash[vl_id]
geno.addAllele(vl_id, vl_symbol,
geno.genoparts['variant_locus'])
vl_set.add(vl_id)
if len(variants) == 1 and len(genes) == 1:
for gene in genes:
geno.addAlleleOfGene(vl_id, gene)
else:
geno.addAllele(vl_id, vl_symbol)
else: # len(vars) == 0
# it's just anonymous variants in some gene
for gene in genes:
vl_id = '_'+gene+'-VL'
vl_id = re.sub(r':', '', vl_id)
if self.nobnodes:
vl_id = ':'+vl_id
vl_symbol = self.id_label_hash[gene]+'<?>'
self.id_label_hash[vl_id] = vl_symbol
geno.addAllele(vl_id, vl_symbol,
geno.genoparts['variant_locus'])
geno.addGene(gene, self.id_label_hash[gene])
geno.addAlleleOfGene(vl_id, gene)
vl_set.add(vl_id)
# make the vslcs
vl_list = sorted(vl_set)
vslc_list = []
for vl in vl_list:
# for unknown zygosity
vslc_id = '_'+re.sub(r'^_', '', vl)+'U'
vslc_id = re.sub(r':', '', vslc_id)
if self.nobnodes:
vslc_id = ':' + vslc_id
vslc_label = self.id_label_hash[vl] + '/?'
self.id_label_hash[vslc_id] = vslc_label
vslc_list.append(vslc_id)
geno.addPartsToVSLC(
vslc_id, vl, None, geno.zygosity['indeterminate'],
geno.object_properties['has_alternate_part'], None)
gu.addIndividualToGraph(
g, vslc_id, vslc_label,
geno.genoparts['variant_single_locus_complement'])
if len(vslc_list) > 0:
if len(vslc_list) > 1:
gvc_id = '-'.join(vslc_list)
gvc_id = re.sub(r':', '', gvc_id)
if self.nobnodes:
gvc_id = ':'+gvc_id
gvc_label = \
'; '.join(self.id_label_hash[v] for v in vslc_list)
gu.addIndividualToGraph(
g, gvc_id, gvc_label,
geno.genoparts['genomic_variation_complement'])
示例3: _process_genes
# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addGene [as 别名]
def _process_genes(self, limit=None):
"""
This method processes the KEGG gene IDs.
The label for the gene is pulled as the first symbol in the list of gene symbols; the rest
are added as synonyms. The long-form of the gene name is added as a definition.
This is hardcoded to just processes human genes.
Triples created:
<gene_id> is a SO:gene
<gene_id> rdfs:label <gene_name>
:param limit:
:return:
"""
logger.info("Processing genes")
if self.testMode:
g = self.testgraph
else:
g = self.graph
line_counter = 0
gu = GraphUtils(curie_map.get())
geno = Genotype(g)
raw = '/'.join((self.rawdir, self.files['hsa_genes']['file']))
with open(raw, 'r', encoding="iso-8859-1") as csvfile:
filereader = csv.reader(csvfile, delimiter='\t', quotechar='\"')
for row in filereader:
line_counter += 1
(gene_id, gene_name) = row
gene_id = 'KEGG-'+gene_id.strip()
# the gene listing has a bunch of labels that are delimited, like:
# DST, BP240, BPA, BPAG1, CATX-15, CATX15, D6S1101, DMH, DT, EBSB2, HSAN6, MACF2; dystonin; K10382 dystonin
# it looks like the list is semicolon delimited (symbol, name, gene_class)
# where the symbol is a comma-delimited list
# here, we split them up. we will take the first abbreviation and make it the symbol
# then take the rest as synonyms
gene_stuff = re.split(';', gene_name)
symbollist = re.split(',', gene_stuff[0])
first_symbol = symbollist[0].strip()
if gene_id not in self.label_hash:
self.label_hash[gene_id] = first_symbol
if self.testMode and gene_id not in self.test_ids['genes']:
continue
# Add the gene as a class.
geno.addGene(gene_id, first_symbol)
# add the long name as the description
if len(gene_stuff) > 1:
description = gene_stuff[1].strip()
gu.addDefinition(g, gene_id, description)
# add the rest of the symbols as synonyms
for i in enumerate(symbollist, start=1):
gu.addSynonym(g, gene_id, i[1].strip())
# TODO add the KO here?
if (not self.testMode) and (limit is not None and line_counter > limit):
break
logger.info("Done with genes")
return
示例4: _process_omim2gene
# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addGene [as 别名]
def _process_omim2gene(self, limit=None):
"""
This method maps the OMIM IDs and KEGG gene ID. Currently split based on the link_type field.
Equivalent link types are mapped as gene XRefs.
Reverse link types are mapped as disease to gene associations.
Original link types are currently skipped.
Triples created:
<kegg_gene_id> is a Gene
<omim_gene_id> is a Gene
<kegg_gene_id>> hasXref <omim_gene_id>
<assoc_id> has subject <omim_disease_id>
<assoc_id> has object <kegg_gene_id>
:param limit:
:return:
"""
logger.info("Processing OMIM to KEGG gene")
if self.testMode:
g = self.testgraph
else:
g = self.graph
line_counter = 0
geno = Genotype(g)
gu = GraphUtils(curie_map.get())
raw = '/'.join((self.rawdir, self.files['omim2gene']['file']))
with open(raw, 'r', encoding="iso-8859-1") as csvfile:
filereader = csv.reader(csvfile, delimiter='\t', quotechar='\"')
for row in filereader:
line_counter += 1
(kegg_gene_id, omim_id, link_type) = row
if self.testMode and kegg_gene_id not in self.test_ids['genes']:
continue
kegg_gene_id = 'KEGG-'+kegg_gene_id.strip()
omim_id = re.sub('omim', 'OMIM', omim_id)
if link_type == 'equivalent':
# these are genes! so add them as a class then make equivalence
gu.addClassToGraph(g, omim_id, None)
geno.addGene(kegg_gene_id, None)
gu.addEquivalentClass(g, kegg_gene_id, omim_id)
elif link_type == 'reverse':
# make an association between an OMIM ID and the KEGG gene ID
# we do this with omim ids because they are more atomic than KEGG ids
alt_locus_id = self._make_variant_locus_id(kegg_gene_id, omim_id)
alt_label = self.label_hash[alt_locus_id]
gu.addIndividualToGraph(g, alt_locus_id, alt_label, geno.genoparts['variant_locus'])
geno.addAlleleOfGene(alt_locus_id, kegg_gene_id)
# Add the disease to gene relationship.
rel = gu.object_properties['is_marker_for']
assoc = G2PAssoc(self.name, alt_locus_id, omim_id, rel)
assoc.add_association_to_graph(g)
elif link_type == 'original':
# these are sometimes a gene, and sometimes a disease
logger.info('Unable to handle original link for %s-%s', kegg_gene_id, omim_id)
else:
# don't know what these are
logger.warn('Unhandled link type for %s-%s: %s', kegg_gene_id, omim_id, link_type)
if (not self.testMode) and (limit is not None and line_counter > limit):
break
logger.info("Done with OMIM to KEGG gene")
gu.loadProperties(g, G2PAssoc.annotation_properties, G2PAssoc.ANNOTPROP)
gu.loadProperties(g, G2PAssoc.datatype_properties, G2PAssoc.DATAPROP)
gu.loadProperties(g, G2PAssoc.object_properties, G2PAssoc.OBJECTPROP)
return
示例5: _process_data
# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addGene [as 别名]
def _process_data(self, raw, limit=None):
logger.info("Processing Data from %s", raw)
gu = GraphUtils(curie_map.get())
if self.testMode:
g = self.testgraph
else:
g = self.graph
geno = Genotype(g)
line_counter = 0
gu.loadAllProperties(g)
gu.loadObjectProperties(g, geno.object_properties)
# Add the taxon as a class
taxon_id = 'NCBITaxon:10090' # map to Mus musculus
gu.addClassToGraph(g, taxon_id, None)
# with open(raw, 'r', encoding="utf8") as csvfile:
with gzip.open(raw, 'rt') as csvfile:
filereader = csv.reader(csvfile, delimiter=',', quotechar='\"')
next(filereader, None) # skip the header row
for row in filereader:
line_counter += 1
(marker_accession_id, marker_symbol, phenotyping_center,
colony, sex, zygosity, allele_accession_id, allele_symbol,
allele_name, strain_accession_id, strain_name, project_name,
project_fullname, pipeline_name, pipeline_stable_id,
procedure_stable_id, procedure_name, parameter_stable_id,
parameter_name, top_level_mp_term_id, top_level_mp_term_name,
mp_term_id, mp_term_name, p_value, percentage_change,
effect_size, statistical_method, resource_name) = row
if self.testMode and marker_accession_id not in self.test_ids:
continue
# ##### cleanup some of the identifiers ######
zygosity_id = self._map_zygosity(zygosity)
# colony ids sometimes have <> in them, spaces,
# or other non-alphanumerics and break our system;
# replace these with underscores
colony_id = '_'+re.sub(r'\W+', '_', colony)
if self.nobnodes:
colony_id = ':'+colony_id
if not re.match(r'MGI', allele_accession_id):
allele_accession_id = \
'_IMPC-'+re.sub(r':', '', allele_accession_id)
if self.nobnodes:
allele_accession_id = ':'+allele_accession_id
if re.search(r'EUROCURATE', strain_accession_id):
# the eurocurate links don't resolve at IMPC
strain_accession_id = '_'+strain_accession_id
if self.nobnodes:
strain_accession_id = ':'+strain_accession_id
elif not re.match(r'MGI', strain_accession_id):
logger.info(
"Found a strange strain accession...%s",
strain_accession_id)
strain_accession_id = 'IMPC:'+strain_accession_id
######################
# first, add the marker and variant to the graph as with MGI,
# the allele is the variant locus. IF the marker is not known,
# we will call it a sequence alteration. otherwise,
# we will create a BNode for the sequence alteration.
sequence_alteration_id = variant_locus_id = None
variant_locus_name = sequence_alteration_name = None
# extract out what's within the <> to get the symbol
if re.match(r'.*<.*>', allele_symbol):
sequence_alteration_name = \
re.match(r'.*<(.*)>', allele_symbol).group(1)
else:
sequence_alteration_name = allele_symbol
if marker_accession_id is not None and \
marker_accession_id == '':
logger.warning(
"Marker unspecified on row %d", line_counter)
marker_accession_id = None
if marker_accession_id is not None:
variant_locus_id = allele_accession_id
variant_locus_name = allele_symbol
variant_locus_type = geno.genoparts['variant_locus']
geno.addGene(marker_accession_id, marker_symbol,
geno.genoparts['gene'])
geno.addAllele(variant_locus_id, variant_locus_name,
variant_locus_type, None)
geno.addAlleleOfGene(variant_locus_id, marker_accession_id)
sequence_alteration_id = \
'_seqalt'+re.sub(r':', '', allele_accession_id)
if self.nobnodes:
sequence_alteration_id = ':'+sequence_alteration_id
geno.addSequenceAlterationToVariantLocus(
sequence_alteration_id, variant_locus_id)
#.........这里部分代码省略.........
示例6: _process_omim2gene
# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addGene [as 别名]
def _process_omim2gene(self, limit=None):
"""
This method maps the OMIM IDs and KEGG gene ID.
Currently split based on the link_type field.
Equivalent link types are mapped as gene XRefs.
Reverse link types are mapped as disease to gene associations.
Original link types are currently skipped.
Triples created:
<kegg_gene_id> is a Gene
<omim_gene_id> is a Gene
<kegg_gene_id>> hasXref <omim_gene_id>
<assoc_id> has subject <omim_disease_id>
<assoc_id> has object <kegg_gene_id>
:param limit:
:return:
"""
LOG.info("Processing OMIM to KEGG gene")
if self.test_mode:
graph = self.testgraph
else:
graph = self.graph
model = Model(graph)
geno = Genotype(graph)
raw = '/'.join((self.rawdir, self.files['omim2gene']['file']))
with open(raw, 'r', encoding="iso-8859-1") as csvfile:
reader = csv.reader(csvfile, delimiter='\t', quotechar='\"')
for row in reader:
(kegg_gene_id, omim_id, link_type) = row
if self.test_mode and kegg_gene_id not in self.test_ids['genes']:
continue
kegg_gene_id = 'KEGG-' + kegg_gene_id.strip()
omim_id = re.sub(r'omim', 'OMIM', omim_id)
if link_type == 'equivalent':
# these are genes!
# so add them as a class then make equivalence
model.addClassToGraph(omim_id, None)
geno.addGene(kegg_gene_id, None)
# previous: if omim type is not disease-ish then use
# now is: if omim type is gene then use
if omim_id in self.omim_replaced:
repl = self.omim_replaced[omim_id]
for omim in repl:
if omim in self.omim_type and \
self.omim_type[omim] == self.globaltt['gene']:
omim_id = omim
if omim_id in self.omim_type and \
self.omim_type[omim_id] == self.globaltt['gene']:
model.addEquivalentClass(kegg_gene_id, omim_id)
elif link_type == 'reverse':
# make an association between an OMIM ID & the KEGG gene ID
# we do this with omim ids because
# they are more atomic than KEGG ids
alt_locus_id = self._make_variant_locus_id(kegg_gene_id, omim_id)
alt_label = self.label_hash[alt_locus_id]
model.addIndividualToGraph(
alt_locus_id, alt_label, self.globaltt['variant_locus'])
geno.addAffectedLocus(alt_locus_id, kegg_gene_id)
model.addBlankNodeAnnotation(alt_locus_id)
# Add the disease to gene relationship.
rel = self.globaltt['is marker for']
assoc = G2PAssoc(graph, self.name, alt_locus_id, omim_id, rel)
assoc.add_association_to_graph()
elif link_type == 'original':
# these are sometimes a gene, and sometimes a disease
LOG.info(
'Unable to handle original link for %s-%s',
kegg_gene_id, omim_id)
else:
# don't know what these are
LOG.warning(
'Unhandled link type for %s-%s: %s',
kegg_gene_id, omim_id, link_type)
if (not self.test_mode) and (
limit is not None and reader.line_num > limit):
break
LOG.info("Done with OMIM to KEGG gene")
示例7: _process_genes
# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addGene [as 别名]
def _process_genes(self, limit=None):
"""
This method processes the KEGG gene IDs.
The label for the gene is pulled as
the first symbol in the list of gene symbols;
the rest are added as synonyms.
The long-form of the gene name is added as a definition.
This is hardcoded to just processes human genes.
Triples created:
<gene_id> is a SO:gene
<gene_id> rdfs:label <gene_name>
:param limit:
:return:
"""
LOG.info("Processing genes")
if self.test_mode:
graph = self.testgraph
else:
graph = self.graph
model = Model(graph)
family = Family(graph)
geno = Genotype(graph)
raw = '/'.join((self.rawdir, self.files['hsa_genes']['file']))
with open(raw, 'r', encoding="iso-8859-1") as csvfile:
reader = csv.reader(csvfile, delimiter='\t', quotechar='\"')
for row in reader:
(gene_id, gene_name) = row
gene_id = 'KEGG-'+gene_id.strip()
# the gene listing has a bunch of labels
# that are delimited, as:
# DST, BP240, BPA, BPAG1, CATX-15, CATX15, D6S1101, DMH, DT,
# EBSB2, HSAN6, MACF2; dystonin; K10382 dystonin
# it looks like the list is semicolon delimited
# (symbol, name, gene_class)
# where the symbol is a comma-delimited list
# here, we split them up.
# we will take the first abbreviation and make it the symbol
# then take the rest as synonyms
gene_stuff = re.split('r;', gene_name)
symbollist = re.split(r',', gene_stuff[0])
first_symbol = symbollist[0].strip()
if gene_id not in self.label_hash:
self.label_hash[gene_id] = first_symbol
if self.test_mode and gene_id not in self.test_ids['genes']:
continue
# Add the gene as a class.
geno.addGene(gene_id, first_symbol)
# add the long name as the description
if len(gene_stuff) > 1:
description = gene_stuff[1].strip()
model.addDefinition(gene_id, description)
# add the rest of the symbols as synonyms
for i in enumerate(symbollist, start=1):
model.addSynonym(gene_id, i[1].strip())
if len(gene_stuff) > 2:
ko_part = gene_stuff[2]
ko_match = re.search(r'K\d+', ko_part)
if ko_match is not None and len(ko_match.groups()) == 1:
ko = 'KEGG-ko:'+ko_match.group(1)
family.addMemberOf(gene_id, ko)
if not self.test_mode and limit is not None and reader.line_num > limit:
break
LOG.info("Done with genes")
示例8: _process_phenotype_data
# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addGene [as 别名]
#.........这里部分代码省略.........
if not self.test_mode and (
limit is not None and reader.line_num > limit):
break
# now that we've collected all of the variant information, build it
# we don't know their zygosities
for s in self.strain_hash:
h = self.strain_hash.get(s)
variants = h['variants']
genes = h['genes']
vl_set = set()
# make variant loci for each gene
if len(variants) > 0:
for var in variants:
vl_id = var.strip()
vl_symbol = self.id_label_hash[vl_id]
geno.addAllele(
vl_id, vl_symbol, self.globaltt['variant_locus'])
vl_set.add(vl_id)
if len(variants) == 1 and len(genes) == 1:
for gene in genes:
geno.addAlleleOfGene(vl_id, gene)
else:
geno.addAllele(vl_id, vl_symbol)
else: # len(vars) == 0
# it's just anonymous variants in some gene
for gene in genes:
vl_id = '_:' + re.sub(r':', '', gene) + '-VL'
vl_symbol = self.id_label_hash[gene]+'<?>'
self.id_label_hash[vl_id] = vl_symbol
geno.addAllele(
vl_id, vl_symbol, self.globaltt['variant_locus'])
geno.addGene(gene, self.id_label_hash[gene])
geno.addAlleleOfGene(vl_id, gene)
vl_set.add(vl_id)
# make the vslcs
vl_list = sorted(vl_set)
vslc_list = []
for vl in vl_list:
# for unknown zygosity
vslc_id = re.sub(r'^_', '', vl)+'U'
vslc_id = re.sub(r':', '', vslc_id)
vslc_id = '_:' + vslc_id
vslc_label = self.id_label_hash[vl] + '/?'
self.id_label_hash[vslc_id] = vslc_label
vslc_list.append(vslc_id)
geno.addPartsToVSLC(
vslc_id, vl, None, self.globaltt['indeterminate'],
self.globaltt['has_variant_part'], None)
model.addIndividualToGraph(
vslc_id, vslc_label,
self.globaltt['variant single locus complement'])
if len(vslc_list) > 0:
if len(vslc_list) > 1:
gvc_id = '-'.join(vslc_list)
gvc_id = re.sub(r'_|:', '', gvc_id)
gvc_id = '_:'+gvc_id
gvc_label = '; '.join(self.id_label_hash[v] for v in vslc_list)
model.addIndividualToGraph(
gvc_id, gvc_label,
self.globaltt['genomic_variation_complement'])
for vslc_id in vslc_list:
geno.addVSLCtoParent(vslc_id, gvc_id)
else:
示例9: _process_QTLs_genetic_location
# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addGene [as 别名]
def _process_QTLs_genetic_location(self, raw, taxon_id, common_name, limit=None):
"""
This function processes
Triples created:
:param limit:
:return:
"""
if self.testMode:
g = self.testgraph
else:
g = self.graph
line_counter = 0
geno = Genotype(g)
gu = GraphUtils(curie_map.get())
eco_id = "ECO:0000061" # Quantitative Trait Analysis Evidence
logger.info("Processing genetic location for %s", taxon_id)
with open(raw, 'r', encoding="iso-8859-1") as csvfile:
filereader = csv.reader(csvfile, delimiter='\t', quotechar='\"')
for row in filereader:
line_counter += 1
(qtl_id, qtl_symbol, trait_name, assotype, empty, chromosome, position_cm, range_cm,
flankmark_a2, flankmark_a1, peak_mark, flankmark_b1, flankmark_b2, exp_id, model, test_base,
sig_level, lod_score, ls_mean, p_values, f_statistics, variance, bayes_value, likelihood_ratio,
trait_id, dom_effect, add_effect, pubmed_id, gene_id, gene_id_src, gene_id_type, empty2) = row
if self.testMode and int(qtl_id) not in self.test_ids:
continue
qtl_id = 'AQTL:'+qtl_id
trait_id = 'AQTLTrait:'+trait_id
# Add QTL to graph
f = Feature(qtl_id, qtl_symbol, geno.genoparts['QTL'])
f.addTaxonToFeature(g, taxon_id)
# deal with the chromosome
chrom_id = makeChromID(chromosome, taxon_id, 'CHR')
# add a version of the chromosome which is defined as the genetic map
build_id = 'MONARCH:'+common_name.strip()+'-linkage'
build_label = common_name+' genetic map'
geno.addReferenceGenome(build_id, build_label, taxon_id)
chrom_in_build_id = makeChromID(chromosome, build_id, 'MONARCH')
geno.addChromosomeInstance(chromosome, build_id, build_label, chrom_id)
start = stop = None
if re.search('-', range_cm):
range_parts = re.split('-', range_cm)
# check for poorly formed ranges
if len(range_parts) == 2 and range_parts[0] != '' and range_parts[1] != '':
(start, stop) = [int(float(x.strip())) for x in re.split('-', range_cm)]
else:
logger.info("There's a cM range we can't handle for QTL %s: %s", qtl_id, range_cm)
elif position_cm != '':
start = stop = int(float(position_cm))
# FIXME remove converion to int for start/stop when schema can handle floats
# add in the genetic location based on the range
f.addFeatureStartLocation(start, chrom_in_build_id, None, [Feature.types['FuzzyPosition']])
f.addFeatureEndLocation(stop, chrom_in_build_id, None, [Feature.types['FuzzyPosition']])
f.addFeatureToGraph(g)
# sometimes there's a peak marker, like a rsid. we want to add that as a variant of the gene,
# and xref it to the qtl.
dbsnp_id = None
if peak_mark != '' and peak_mark != '.' and re.match('rs', peak_mark.strip()):
dbsnp_id = 'dbSNP:'+peak_mark.strip()
gu.addIndividualToGraph(g, dbsnp_id, None, geno.genoparts['sequence_alteration'])
gu.addXref(g, qtl_id, dbsnp_id)
if gene_id is not None and gene_id != '' and gene_id != '.':
if gene_id_src == 'NCBIgene' or gene_id_src == '': # we assume if no src is provided, it's NCBI
gene_id = 'NCBIGene:'+gene_id.strip()
geno.addGene(gene_id, None) # we will expect that these labels provided elsewhere
geno.addAlleleOfGene(qtl_id, gene_id, geno.object_properties['feature_to_gene_relation']) # FIXME what is the right relationship here?
if dbsnp_id is not None:
# add the rsid as a seq alt of the gene_id
vl_id = '_' + re.sub(':', '', gene_id) + '-' + peak_mark
if self.nobnodes:
vl_id = ':' + vl_id
geno.addSequenceAlterationToVariantLocus(dbsnp_id, vl_id)
geno.addAlleleOfGene(vl_id, gene_id)
# add the trait
gu.addClassToGraph(g, trait_id, trait_name)
# Add publication
r = None
if re.match('ISU.*', pubmed_id):
pub_id = 'AQTLPub:'+pubmed_id.strip()
r = Reference(pub_id)
elif pubmed_id != '':
pub_id = 'PMID:'+pubmed_id.strip()
r = Reference(pub_id, Reference.ref_types['journal_article'])
if r is not None:
#.........这里部分代码省略.........