本文整理汇总了Python中dipper.models.Genotype.Genotype.addChromosomeInstance方法的典型用法代码示例。如果您正苦于以下问题:Python Genotype.addChromosomeInstance方法的具体用法?Python Genotype.addChromosomeInstance怎么用?Python Genotype.addChromosomeInstance使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类dipper.models.Genotype.Genotype
的用法示例。
在下文中一共展示了Genotype.addChromosomeInstance方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: process_feature_loc
# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addChromosomeInstance [as 别名]
#.........这里部分代码省略.........
fid = 'WormBase:'+attribute_dict.get('variation')
flabel = attribute_dict.get('public_name')
sub = attribute_dict.get('substitution')
ins = attribute_dict.get('insertion')
# if it's a variation:
# variation=WBVar00604246;public_name=gk320600;strain=VC20384;substitution=C/T
desc = ''
if sub is not None:
desc = 'substitution='+sub
if ins is not None:
desc = 'insertion='+ins
# keep track of the strains with this variation,
# for later processing
strain_list = attribute_dict.get('strain')
if strain_list is not None:
for s in re.split(r',', strain_list):
if s.strip() not in strain_to_variant_map:
strain_to_variant_map[s.strip()] = set()
strain_to_variant_map[s.strip()].add(fid)
# if feature_type_label == 'RNAi_reagent':
# Target=WBRNAi00096030 1 4942
# this will tell us where the RNAi is actually binding
# target = attribute_dict.get('Target') # TODO unused
# rnai_num = re.split(r' ', target)[0] # TODO unused
# it will be the reagent-targeted-gene that has a position,
# (i think)
# TODO finish the RNAi binding location
name = attribute_dict.get('Name')
polymorphism = attribute_dict.get('polymorphism')
if fid is None:
if name is not None and re.match(r'WBsf', name):
fid = 'WormBase:'+name
name = None
else:
continue
if self.testMode \
and re.sub(r'WormBase:', '', fid) \
not in self.test_ids['gene']+self.test_ids['allele']:
continue
# these really aren't that interesting
if polymorphism is not None:
continue
if name is not None and not re.search(name, fid):
if flabel is None:
flabel = name
else:
gu.addSynonym(g, fid, name)
if desc is not None:
gu.addDescription(g, fid, desc)
alias = attribute_dict.get('Alias')
biotype = attribute_dict.get('biotype')
note = attribute_dict.get('Note')
other_name = attribute_dict.get('other_name')
for n in [alias, other_name]:
if n is not None:
gu.addSynonym(g, fid, other_name)
ftype = self.get_feature_type_by_class_and_biotype(
feature_type_label, biotype)
chr_id = makeChromID(chrom, build_id, 'CHR')
geno.addChromosomeInstance(chrom, build_id, build_num)
f = Feature(fid, flabel, ftype)
f.addFeatureStartLocation(start, chr_id, strand)
f.addFeatureEndLocation(start, chr_id, strand)
feature_is_class = False
if feature_type_label == 'gene':
feature_is_class = True
f.addFeatureToGraph(g, True, None, feature_is_class)
if note is not None:
gu.addDescription(g, fid, note)
if not self.testMode \
and limit is not None and line_counter > limit:
break
# RNAi reagents:
# I RNAi_primary RNAi_reagent 4184 10232 . + . Target=WBRNAi00001601 1 6049 +;laboratory=YK;history_name=SA:yk326e10
# I RNAi_primary RNAi_reagent 4223 10147 . + . Target=WBRNAi00033465 1 5925 +;laboratory=SV;history_name=MV_SV:mv_G_YK5052
# I RNAi_primary RNAi_reagent 5693 9391 . + . Target=WBRNAi00066135 1 3699 +;laboratory=CH
# TODO TF bindiing sites and network:
# I TF_binding_site_region TF_binding_site 1861 2048 . + . Name=WBsf292777;tf_id=WBTranscriptionFactor000025;tf_name=DAF-16
# I TF_binding_site_region TF_binding_site 3403 4072 . + . Name=WBsf331847;tf_id=WBTranscriptionFactor000703;tf_name=DPL-1
return
示例2: _transform_entry
# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addChromosomeInstance [as 别名]
#.........这里部分代码省略.........
if 'comments' in genemap:
# add a comment to this feature
comment = genemap['comments']
if comment.strip() != '':
model.addDescription(feature_id, comment)
if 'cytoLocation' in genemap:
cytoloc = genemap['cytoLocation']
# parse the cytoloc.
# add this omim thing as
# a subsequence of the cytofeature
# 18p11.3-p11.2
# FIXME
# add the other end of the range,
# but not sure how to do that
# not sure if saying subsequence of feature
# is the right relationship
f = Feature(g, feature_id, feature_label, omimtype)
if 'chromosomeSymbol' in genemap:
chrom_num = str(genemap['chromosomeSymbol'])
chrom = makeChromID(chrom_num, tax_num, 'CHR')
geno.addChromosomeClass(
chrom_num, tax_id, tax_label)
# add the positional information, if available
fstart = fend = -1
if 'chromosomeLocationStart' in genemap:
fstart = genemap['chromosomeLocationStart']
if 'chromosomeLocationEnd' in genemap:
fend = genemap['chromosomeLocationEnd']
if fstart >= 0:
# make the build-specific chromosome
chrom_in_build = makeChromID(chrom_num,
build_num,
'MONARCH')
# then, add the chromosome instance
# (from the given build)
geno.addChromosomeInstance(
chrom_num, build_id, build_num, chrom)
if omimtype == \
Genotype.genoparts[
'heritable_phenotypic_marker']:
postypes = [Feature.types['FuzzyPosition']]
else:
postypes = None
# NOTE that no strand information
# is available in the API
f.addFeatureStartLocation(
fstart, chrom_in_build, None, postypes)
if fend >= 0:
f.addFeatureEndLocation(
fend, chrom_in_build, None, postypes)
if fstart > fend:
logger.info(
"start>end (%d>%d) for %s",
fstart, fend, omimid)
# add the cytogenic location too
# for now, just take the first one
cytoloc = cytoloc.split('-')[0]
loc = makeChromID(cytoloc, tax_num, 'CHR')
model.addClassToGraph(loc, None)
f.addSubsequenceOfFeature(loc)
f.addFeatureToGraph(True, None, is_gene)
# end adding causative genes/features
# check if moved, if so,
# make it deprecated and
# replaced consider class to the other thing(s)
# some entries have been moved to multiple other entries and
# use the joining raw word "and"
# 612479 is movedto: "603075 and 603029" OR
# others use a comma-delimited list, like:
# 610402 is movedto: "609122,300870"
if e['entry']['status'] == 'moved':
if re.search(r'and', str(e['entry']['movedTo'])):
# split the movedTo entry on 'and'
newids = re.split(r'and', str(e['entry']['movedTo']))
elif len(str(e['entry']['movedTo']).split(',')) > 0:
# split on the comma
newids = str(e['entry']['movedTo']).split(',')
else:
# make a list of one
newids = [str(e['entry']['movedTo'])]
# cleanup whitespace and add OMIM prefix to numeric portion
fixedids = []
for i in newids:
fixedids.append('OMIM:'+i.strip())
model.addDeprecatedClass(omimid, fixedids)
self._get_phenotypicseries_parents(e['entry'], g)
self._get_mappedids(e['entry'], g)
self._get_mapped_gene_ids(e['entry'], g)
self._get_pubs(e['entry'], g)
self._get_process_allelic_variants(e['entry'], g) # temp gag
return
示例3: _get_variants
# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addChromosomeInstance [as 别名]
#.........这里部分代码省略.........
pheno_list = re.split(r'[,;]', phenotype_ids)
if self.testMode:
# get intersection of test disease ids
# and these phenotype_ids
intersect = \
list(
set([str(i)
for i in self.disease_ids]) & set(pheno_list))
if int(gene_num) not in self.gene_ids and\
int(variant_num) not in self.variant_ids and\
len(intersect) < 1:
continue
# TODO may need to switch on assembly to create correct
# assembly/build identifiers
build_id = ':'.join(('NCBIGenome', assembly))
# make the reference genome build
geno.addReferenceGenome(build_id, assembly, tax_id)
allele_type_id = self._map_type_of_allele(allele_type)
bandinbuild_id = None
if str(chr) == '':
# check cytogenic location
if str(cytogenetic_loc).strip() != '':
# use cytogenic location to get the apx location
# oddly, they still put an assembly number even when
# there's no numeric location
if not re.search(r'-', str(cytogenetic_loc)):
band_id = makeChromID(
re.split(r'-', str(cytogenetic_loc)),
tax_num, 'CHR')
geno.addChromosomeInstance(
cytogenetic_loc, build_id, assembly, band_id)
bandinbuild_id = makeChromID(
re.split(r'-', str(cytogenetic_loc)),
assembly, 'MONARCH')
else:
# can't deal with ranges yet
pass
else:
# add the human chromosome class to the graph,
# and add the build-specific version of it
chr_id = makeChromID(str(chr), tax_num, 'CHR')
geno.addChromosomeClass(str(chr), tax_id, tax_label)
geno.addChromosomeInstance(
str(chr), build_id, assembly, chr_id)
chrinbuild_id = makeChromID(str(chr), assembly, 'MONARCH')
seqalt_id = ':'.join(('ClinVarVariant', variant_num))
gene_id = None
# they use -1 to indicate unknown gene
if str(gene_num) != '-1' and str(gene_num) != 'more than 10':
if re.match(r'^Gene:', gene_num):
gene_num = "NCBI" + gene_num
else:
gene_id = ':'.join(('NCBIGene', str(gene_num)))
# FIXME there are some "variants" that are actually haplotypes
# probably will get taken care of when we switch to processing
# the xml for example, variant_num = 38562
# but there's no way to tell if it's a haplotype
# in the csv data so the dbsnp or dbvar
# should probably be primary,
示例4: _get_chrbands
# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addChromosomeInstance [as 别名]
#.........这里部分代码省略.........
else:
# skip over anything that isn't a placed_scaffold
# at the class level
logger.info("Found non-placed chromosome %s", scaffold)
chrom_num = None
m_chr_unloc = re.match(unlocalized_scaffold_pattern, scaffold)
m_chr_unplaced = re.match(unplaced_scaffold_pattern, scaffold)
scaffold_num = None
if m:
pass
elif m_chr_unloc is not None and\
len(m_chr_unloc.groups()) == 2:
chrom_num = m_chr_unloc.group(1)
scaffold_num = chrom_num+'_'+m_chr_unloc.group(2)
elif m_chr_unplaced is not None and\
len(m_chr_unplaced.groups()) == 1:
scaffold_num = m_chr_unplaced.group(1)
else:
logger.error(
"There's a chr pattern that we aren't matching: %s",
scaffold)
if chrom_num is not None:
# the chrom class (generic) id
chrom_class_id = makeChromID(chrom_num, taxon, 'CHR')
# first, add the chromosome class (in the taxon)
geno.addChromosomeClass(
chrom_num, taxon_id, self.files[taxon]['genome_label'])
# then, add the chromosome instance (from the given build)
geno.addChromosomeInstance(chrom_num, build_id, build_num,
chrom_class_id)
# add the chr to the hashmap of coordinates for this build
# the chromosome coordinate space is itself
if chrom_num not in mybands.keys():
mybands[chrom_num] = {
'min': 0,
'max': int(stop),
'chr': chrom_num,
'ref': build_id,
'parent': None,
'stain': None,
'type': Feature.types['chromosome']}
if scaffold_num is not None:
# this will put the coordinates of the scaffold
# in the scaffold-space and make sure that the scaffold
# is part of the correct parent.
# if chrom_num is None,
# then it will attach it to the genome,
# just like a reg chrom
mybands[scaffold_num] = {
'min': start,
'max': stop,
'chr': scaffold_num,
'ref': build_id,
'parent': chrom_num,
'stain': None,
'type': Feature.types['assembly_component'],
'synonym': scaffold}
if band_num is not None and band_num.strip() != '':
示例5: _process_qtls_genomic_location
# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addChromosomeInstance [as 别名]
#.........这里部分代码省略.........
trait="Spleen percentage";breed="leghorn";"FlankMarkers=ADL0022";VTO_name="spleen mass";
MO_name="spleen weight to body weight ratio";Map_Type="Linkage";Model="Mendelian";
Test_Base="Chromosome-wise";Significance="Significant";P-value="<0.05";F-Stat="5.52";
Variance="2.94";Dominance_Effect="-0.002";Additive_Effect="0.01
'''
str(example)
# make dictionary of attributes
# keys are:
# QTL_ID,Name,Abbrev,PUBMED_ID,trait_ID,trait,FlankMarkers,
# VTO_name,Map_Type,Significance,P-value,Model,
# Test_Base,Variance, Bayes-value,PTO_name,gene_IDsrc,peak_cM,
# CMO_name,gene_ID,F-Stat,LOD-score,Additive_Effect,
# Dominance_Effect,Likelihood_Ratio,LS-means,Breed,
# trait (duplicate with Name),Variance,Bayes-value,
# F-Stat,LOD-score,Additive_Effect,Dominance_Effect,
# Likelihood_Ratio,LS-means
# deal with poorly formed attributes
if re.search(r'"FlankMarkers";', attr):
attr = re.sub(r'FlankMarkers;', '', attr)
attr_items = re.sub(r'"', '', attr).split(";")
bad_attrs = set()
for attributes in attr_items:
if not re.search(r'=', attributes):
# remove this attribute from the list
bad_attrs.add(attributes)
attr_set = set(attr_items) - bad_attrs
attribute_dict = dict(item.split("=") for item in attr_set)
qtl_num = attribute_dict.get('QTL_ID')
if self.test_mode and int(qtl_num) not in self.test_ids:
continue
# make association between QTL and trait based on taxon
qtl_id = common_name + 'QTL:' + str(qtl_num)
model.addIndividualToGraph(qtl_id, None, self.globaltt['QTL'])
geno.addTaxon(taxon_curie, qtl_id)
#
trait_id = 'AQTLTrait:' + attribute_dict.get('trait_ID')
# if pub is in attributes, add it to the association
pub_id = None
if 'PUBMED_ID' in attribute_dict.keys():
pub_id = attribute_dict.get('PUBMED_ID')
if re.match(r'ISU.*', pub_id):
pub_id = 'AQTLPub:' + pub_id.strip()
reference = Reference(graph, pub_id)
else:
pub_id = 'PMID:' + pub_id.strip()
reference = Reference(
graph, pub_id, self.globaltt['journal article'])
reference.addRefToGraph()
# Add QTL to graph
assoc = G2PAssoc(
graph, self.name, qtl_id, trait_id,
self.globaltt['is marker for'])
assoc.add_evidence(eco_id)
assoc.add_source(pub_id)
if 'P-value' in attribute_dict.keys():
scr = re.sub(r'<', '', attribute_dict.get('P-value'))
if ',' in scr:
scr = re.sub(r',', '.', scr)
if scr.isnumeric():
score = float(scr)
assoc.set_score(score)
assoc.add_association_to_graph()
# TODO make association to breed
# (which means making QTL feature in Breed background)
# get location of QTL
chromosome = re.sub(r'Chr\.', '', chromosome)
chrom_id = makeChromID(chromosome, taxon_curie, 'CHR')
chrom_in_build_id = makeChromID(chromosome, build_id, 'MONARCH')
geno.addChromosomeInstance(
chromosome, build_id, build_label, chrom_id)
qtl_feature = Feature(graph, qtl_id, None, self.globaltt['QTL'])
if start_bp == '':
start_bp = None
qtl_feature.addFeatureStartLocation(
start_bp, chrom_in_build_id, strand,
[self.globaltt['FuzzyPosition']])
if stop_bp == '':
stop_bp = None
qtl_feature.addFeatureEndLocation(
stop_bp, chrom_in_build_id, strand,
[self.globaltt['FuzzyPosition']])
qtl_feature.addTaxonToFeature(taxon_curie)
qtl_feature.addFeatureToGraph()
if not self.test_mode and limit is not None and line_counter > limit:
break
# LOG.warning("Bad attribute flags in this file") # what does this even mean??
LOG.info("Done with QTL genomic mappings for %s", taxon_curie)
return
示例6: _process_qtls_genetic_location
# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addChromosomeInstance [as 别名]
def _process_qtls_genetic_location(
self, raw, txid, common_name, limit=None):
"""
This function processes
Triples created:
:param limit:
:return:
"""
aql_curie = self.files[common_name + '_cm']['curie']
if self.test_mode:
graph = self.testgraph
else:
graph = self.graph
line_counter = 0
geno = Genotype(graph)
model = Model(graph)
eco_id = self.globaltt['quantitative trait analysis evidence']
taxon_curie = 'NCBITaxon:' + txid
LOG.info("Processing genetic location for %s from %s", taxon_curie, raw)
with open(raw, 'r', encoding="iso-8859-1") as csvfile:
filereader = csv.reader(csvfile, delimiter='\t', quotechar='\"')
for row in filereader:
line_counter += 1
(qtl_id,
qtl_symbol,
trait_name,
assotype,
empty,
chromosome,
position_cm,
range_cm,
flankmark_a2,
flankmark_a1,
peak_mark,
flankmark_b1,
flankmark_b2,
exp_id,
model_id,
test_base,
sig_level,
lod_score,
ls_mean,
p_values,
f_statistics,
variance,
bayes_value,
likelihood_ratio,
trait_id, dom_effect,
add_effect,
pubmed_id,
gene_id,
gene_id_src,
gene_id_type,
empty2) = row
if self.test_mode and int(qtl_id) not in self.test_ids:
continue
qtl_id = common_name + 'QTL:' + qtl_id.strip()
trait_id = ':'.join((aql_curie, trait_id.strip()))
# Add QTL to graph
feature = Feature(graph, qtl_id, qtl_symbol, self.globaltt['QTL'])
feature.addTaxonToFeature(taxon_curie)
# deal with the chromosome
chrom_id = makeChromID(chromosome, taxon_curie, 'CHR')
# add a version of the chromosome which is defined as
# the genetic map
build_id = 'MONARCH:'+common_name.strip()+'-linkage'
build_label = common_name+' genetic map'
geno.addReferenceGenome(build_id, build_label, taxon_curie)
chrom_in_build_id = makeChromID(chromosome, build_id, 'MONARCH')
geno.addChromosomeInstance(
chromosome, build_id, build_label, chrom_id)
start = stop = None
# range_cm sometimes ends in "(Mb)" (i.e pig 2016 Nov)
range_mb = re.split(r'\(', range_cm)
if range_mb is not None:
range_cm = range_mb[0]
if re.search(r'[0-9].*-.*[0-9]', range_cm):
range_parts = re.split(r'-', range_cm)
# check for poorly formed ranges
if len(range_parts) == 2 and\
range_parts[0] != '' and range_parts[1] != '':
(start, stop) = [
int(float(x.strip())) for x in re.split(r'-', range_cm)]
else:
LOG.info(
"A cM range we can't handle for QTL %s: %s",
qtl_id, range_cm)
#.........这里部分代码省略.........
示例7: _process_QTLs_genomic_location
# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addChromosomeInstance [as 别名]
def _process_QTLs_genomic_location(self, raw, taxon_id, build_id, build_label, limit=None):
"""
This method
Triples created:
:param limit:
:return:
"""
if self.testMode:
g = self.testgraph
else:
g = self.graph
gu = GraphUtils(curie_map.get())
line_counter = 0
geno = Genotype(g)
genome_id = geno.makeGenomeID(taxon_id) # assume that chrs get added to the genome elsewhere
eco_id = "ECO:0000061" # Quantitative Trait Analysis Evidence
with gzip.open(raw, 'rt', encoding='ISO-8859-1') as tsvfile:
reader = csv.reader(tsvfile, delimiter="\t")
for row in reader:
line_counter += 1
if re.match('^#', ' '.join(row)):
continue
(chromosome, qtl_source, qtl_type, start_bp, stop_bp, frame, strand, score, attr) = row
# Chr.Z Animal QTLdb Production_QTL 33954873 34023581 . . .
# QTL_ID=2242;Name="Spleen percentage";Abbrev="SPLP";PUBMED_ID=17012160;trait_ID=2234;
# trait="Spleen percentage";breed="leghorn";"FlankMarkers=ADL0022";VTO_name="spleen mass";
# CMO_name="spleen weight to body weight ratio";Map_Type="Linkage";Model="Mendelian";
# Test_Base="Chromosome-wise";Significance="Significant";P-value="<0.05";F-Stat="5.52";
# Variance="2.94";Dominance_Effect="-0.002";Additive_Effect="0.01"
# make dictionary of attributes
# keys are:
# QTL_ID,Name,Abbrev,PUBMED_ID,trait_ID,trait,
# FlankMarkers,VTO_name,Map_Type,Significance,P-value,Model,Test_Base,Variance,
# Bayes-value,PTO_name,gene_IDsrc,peak_cM,CMO_name,gene_ID,F-Stat,LOD-score,Additive_Effect,
# Dominance_Effect,Likelihood_Ratio,LS-means,Breed,
# trait (duplicate with Name),Variance,Bayes-value,
# F-Stat,LOD-score,Additive_Effect,Dominance_Effect,Likelihood_Ratio,LS-means
# deal with poorly formed attributes
if re.search('"FlankMarkers";', attr):
attr = re.sub('"FlankMarkers";', '', attr)
attr_items = re.sub('"', '', attr).split(";")
bad_attr_flag = False
for a in attr_items:
if not re.search('=', a):
bad_attr_flag = True
if bad_attr_flag:
logger.error("Poorly formed data on line %d:\n %s", line_counter, '\t'.join(row))
continue
attribute_dict = dict(item.split("=") for item in re.sub('"', '', attr).split(";"))
qtl_num = attribute_dict.get('QTL_ID')
if self.testMode and int(qtl_num) not in self.test_ids:
continue
# make association between QTL and trait
qtl_id = 'AQTL:' + str(qtl_num)
gu.addIndividualToGraph(g, qtl_id, None, geno.genoparts['QTL'])
geno.addTaxon(taxon_id, qtl_id)
trait_id = 'AQTLTrait:'+attribute_dict.get('trait_ID')
# if pub is in attributes, add it to the association
pub_id = None
if 'PUBMED_ID' in attribute_dict.keys():
pub_id = attribute_dict.get('PUBMED_ID')
if re.match('ISU.*', pub_id):
pub_id = 'AQTLPub:' + pub_id.strip()
p = Reference(pub_id)
else:
pub_id = 'PMID:' + pub_id.strip()
p = Reference(pub_id, Reference.ref_types['journal_article'])
p.addRefToGraph(g)
# Add QTL to graph
assoc = G2PAssoc(self.name, qtl_id, trait_id, gu.object_properties['is_marker_for'])
assoc.add_evidence(eco_id)
assoc.add_source(pub_id)
if 'P-value' in attribute_dict.keys():
score = float(re.sub('<', '', attribute_dict.get('P-value')))
assoc.set_score(score)
assoc.add_association_to_graph(g)
# TODO make association to breed (which means making QTL feature in Breed background)
# get location of QTL
chromosome = re.sub('Chr\.', '', chromosome)
chrom_id = makeChromID(chromosome, taxon_id, 'CHR')
chrom_in_build_id = makeChromID(chromosome, build_id, 'MONARCH')
geno.addChromosomeInstance(chromosome, build_id, build_label, chrom_id)
qtl_feature = Feature(qtl_id, None, geno.genoparts['QTL'])
if start_bp == '':
#.........这里部分代码省略.........
示例8: _process_QTLs_genetic_location
# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addChromosomeInstance [as 别名]
def _process_QTLs_genetic_location(self, raw, taxon_id, common_name, limit=None):
"""
This function processes
Triples created:
:param limit:
:return:
"""
if self.testMode:
g = self.testgraph
else:
g = self.graph
line_counter = 0
geno = Genotype(g)
gu = GraphUtils(curie_map.get())
eco_id = "ECO:0000061" # Quantitative Trait Analysis Evidence
logger.info("Processing genetic location for %s", taxon_id)
with open(raw, 'r', encoding="iso-8859-1") as csvfile:
filereader = csv.reader(csvfile, delimiter='\t', quotechar='\"')
for row in filereader:
line_counter += 1
(qtl_id, qtl_symbol, trait_name, assotype, empty, chromosome, position_cm, range_cm,
flankmark_a2, flankmark_a1, peak_mark, flankmark_b1, flankmark_b2, exp_id, model, test_base,
sig_level, lod_score, ls_mean, p_values, f_statistics, variance, bayes_value, likelihood_ratio,
trait_id, dom_effect, add_effect, pubmed_id, gene_id, gene_id_src, gene_id_type, empty2) = row
if self.testMode and int(qtl_id) not in self.test_ids:
continue
qtl_id = 'AQTL:'+qtl_id
trait_id = 'AQTLTrait:'+trait_id
# Add QTL to graph
f = Feature(qtl_id, qtl_symbol, geno.genoparts['QTL'])
f.addTaxonToFeature(g, taxon_id)
# deal with the chromosome
chrom_id = makeChromID(chromosome, taxon_id, 'CHR')
# add a version of the chromosome which is defined as the genetic map
build_id = 'MONARCH:'+common_name.strip()+'-linkage'
build_label = common_name+' genetic map'
geno.addReferenceGenome(build_id, build_label, taxon_id)
chrom_in_build_id = makeChromID(chromosome, build_id, 'MONARCH')
geno.addChromosomeInstance(chromosome, build_id, build_label, chrom_id)
start = stop = None
if re.search('-', range_cm):
range_parts = re.split('-', range_cm)
# check for poorly formed ranges
if len(range_parts) == 2 and range_parts[0] != '' and range_parts[1] != '':
(start, stop) = [int(float(x.strip())) for x in re.split('-', range_cm)]
else:
logger.info("There's a cM range we can't handle for QTL %s: %s", qtl_id, range_cm)
elif position_cm != '':
start = stop = int(float(position_cm))
# FIXME remove converion to int for start/stop when schema can handle floats
# add in the genetic location based on the range
f.addFeatureStartLocation(start, chrom_in_build_id, None, [Feature.types['FuzzyPosition']])
f.addFeatureEndLocation(stop, chrom_in_build_id, None, [Feature.types['FuzzyPosition']])
f.addFeatureToGraph(g)
# sometimes there's a peak marker, like a rsid. we want to add that as a variant of the gene,
# and xref it to the qtl.
dbsnp_id = None
if peak_mark != '' and peak_mark != '.' and re.match('rs', peak_mark.strip()):
dbsnp_id = 'dbSNP:'+peak_mark.strip()
gu.addIndividualToGraph(g, dbsnp_id, None, geno.genoparts['sequence_alteration'])
gu.addXref(g, qtl_id, dbsnp_id)
if gene_id is not None and gene_id != '' and gene_id != '.':
if gene_id_src == 'NCBIgene' or gene_id_src == '': # we assume if no src is provided, it's NCBI
gene_id = 'NCBIGene:'+gene_id.strip()
geno.addGene(gene_id, None) # we will expect that these labels provided elsewhere
geno.addAlleleOfGene(qtl_id, gene_id, geno.object_properties['feature_to_gene_relation']) # FIXME what is the right relationship here?
if dbsnp_id is not None:
# add the rsid as a seq alt of the gene_id
vl_id = '_' + re.sub(':', '', gene_id) + '-' + peak_mark
if self.nobnodes:
vl_id = ':' + vl_id
geno.addSequenceAlterationToVariantLocus(dbsnp_id, vl_id)
geno.addAlleleOfGene(vl_id, gene_id)
# add the trait
gu.addClassToGraph(g, trait_id, trait_name)
# Add publication
r = None
if re.match('ISU.*', pubmed_id):
pub_id = 'AQTLPub:'+pubmed_id.strip()
r = Reference(pub_id)
elif pubmed_id != '':
pub_id = 'PMID:'+pubmed_id.strip()
r = Reference(pub_id, Reference.ref_types['journal_article'])
if r is not None:
#.........这里部分代码省略.........