本文整理汇总了Python中dipper.utils.GraphUtils.GraphUtils.makeLeader方法的典型用法代码示例。如果您正苦于以下问题:Python GraphUtils.makeLeader方法的具体用法?Python GraphUtils.makeLeader怎么用?Python GraphUtils.makeLeader使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类dipper.utils.GraphUtils.GraphUtils
的用法示例。
在下文中一共展示了GraphUtils.makeLeader方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _process_phenotype_data
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import makeLeader [as 别名]
#.........这里部分代码省略.........
phenotype_ids = []
if mp_ids != '':
for i in re.split(r',', mp_ids):
i = i.strip()
mps = re.search(r'\[(.*)\]', i)
if mps is not None:
mp_id = mps.group(1).strip()
phenotype_ids.append(mp_id)
# pubmed ids are space delimited
pubmed_ids = []
if pubmed_nums.strip() != '':
for i in re.split(r'\s+', pubmed_nums):
pmid = 'PMID:'+i.strip()
pubmed_ids.append(pmid)
r = Reference(pmid,
Reference.ref_types['journal_article'])
r.addRefToGraph(g)
# https://www.mmrrc.org/catalog/sds.php?mmrrc_id=00001
# is a good example of 4 genotype parts
gu.addClassToGraph(g, mouse_taxon, None)
if research_areas.strip() == '':
research_areas = None
else:
research_areas = 'Research Areas: '+research_areas
strain_type = mouse_taxon
if strain_state == 'ES':
strain_type = stem_cell_class
gu.addIndividualToGraph(
g, strain_id, strain_label, strain_type,
research_areas) # an inst of mouse??
gu.makeLeader(g, strain_id)
# phenotypes are associated with the alleles
for pid in phenotype_ids:
# assume the phenotype label is in the ontology
gu.addClassToGraph(g, pid, None)
if mgi_allele_id is not None and mgi_allele_id != '':
assoc = G2PAssoc(self.name, mgi_allele_id, pid,
gu.object_properties['has_phenotype'])
for p in pubmed_ids:
assoc.add_source(p)
assoc.add_association_to_graph(g)
else:
logger.info("Phenotypes and no allele for %s",
strain_id)
if not self.testMode and (
limit is not None and line_counter > limit):
break
# now that we've collected all of the variant information, build it
# we don't know their zygosities
for s in self.strain_hash:
h = self.strain_hash.get(s)
variants = h['variants']
genes = h['genes']
vl_set = set()
# make variant loci for each gene
if len(variants) > 0:
for v in variants:
vl_id = v
vl_symbol = self.id_label_hash[vl_id]
geno.addAllele(vl_id, vl_symbol,
示例2: _get_variants
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import makeLeader [as 别名]
#.........这里部分代码省略.........
geno.addChromosomeInstance(
str(chr), build_id, assembly, chr_id)
chrinbuild_id = makeChromID(str(chr), assembly, 'MONARCH')
seqalt_id = ':'.join(('ClinVarVariant', variant_num))
gene_id = None
# they use -1 to indicate unknown gene
if str(gene_num) != '-1' and str(gene_num) != 'more than 10':
gene_id = ':'.join(('NCBIGene', str(gene_num)))
# FIXME there are some "variants" that are actually haplotypes
# probably will get taken care of when we switch to processing
# the xml for example, variant_num = 38562
# but there's no way to tell if it's a haplotype
# in the csv data so the dbsnp or dbvar
# should probably be primary,
# and the variant num be the vslc,
# with each of the dbsnps being added to it
# TODO clinical significance needs to be mapped to
# a list of terms
# first, make the variant:
f = Feature(seqalt_id, allele_name, allele_type_id)
if start != '-' and start.strip() != '':
f.addFeatureStartLocation(start, chrinbuild_id)
if stop != '-' and stop.strip() != '':
f.addFeatureEndLocation(stop, chrinbuild_id)
f.addFeatureToGraph(g)
f.addTaxonToFeature(g, tax_id)
# make the ClinVarVariant the clique leader
gu.makeLeader(g, seqalt_id)
if bandinbuild_id is not None:
f.addSubsequenceOfFeature(g, bandinbuild_id)
# CHECK - this makes the assumption that there is
# only one affected chromosome per variant what happens with
# chromosomal rearrangement variants?
# shouldn't both chromosomes be here?
# add the hgvs as synonyms
if hgvs_c != '-' and hgvs_c.strip() != '':
gu.addSynonym(g, seqalt_id, hgvs_c)
if hgvs_p != '-' and hgvs_p.strip() != '':
gu.addSynonym(g, seqalt_id, hgvs_p)
# add the dbsnp and dbvar ids as equivalent
if dbsnp_num != '-' and int(dbsnp_num) != -1:
dbsnp_id = 'dbSNP:rs'+str(dbsnp_num)
gu.addIndividualToGraph(g, dbsnp_id, None)
gu.addSameIndividual(g, seqalt_id, dbsnp_id)
if dbvar_num != '-':
dbvar_id = 'dbVar:'+dbvar_num
gu.addIndividualToGraph(g, dbvar_id, None)
gu.addSameIndividual(g, seqalt_id, dbvar_id)
# TODO - not sure if this is right... add as xref?
# the rcv is like the combo of the phenotype with the variant
if rcv_nums != '-':
for rcv_num in re.split(r';', rcv_nums):
rcv_id = 'ClinVar:' + rcv_num
gu.addIndividualToGraph(g, rcv_id, None)
gu.addXref(g, seqalt_id, rcv_id)
示例3: _get_var_citations
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import makeLeader [as 别名]
def _get_var_citations(self, limit):
# Generated weekly, the first of the week
# A tab-delimited report of citations associated with data in ClinVar,
# connected to the AlleleID, the VariationID, and either rs# from dbSNP
# or nsv in dbVar.
#
# AlleleID int value (xpath //Measure/@ID )
# VariationID ID ClinVar uses to anchor default display.
# (xpath //MeasureSet/@ID)
# rs rs identifier from dbSNP
# nsv nsv identifier from dbVar
# citation_source The source of the citation, either PubMed,
# PubMedCentral, or the NCBI Bookshelf
# citation_id The identifier used by that source
gu = GraphUtils(curie_map.get())
logger.info("Processing Citations for variants")
line_counter = 0
myfile = \
'/'.join((self.rawdir, self.files['variant_citations']['file']))
if self.testMode:
g = self.testgraph
else:
g = self.graph
with open(myfile, 'r', encoding="utf8") as f:
filereader = csv.reader(f, delimiter='\t', quotechar='\"')
for line in filereader:
# skip comments
line = line
if re.match(r'^#', line[0]):
continue
(allele_num, variant_num, rs_num, nsv_num, citation_source,
citation_id) = line
line_counter += 1
if self.testMode:
if int(variant_num) not in self.variant_ids:
continue
if citation_id.strip() == '':
logger.info(
"Skipping blank citation for ClinVarVariant:%s",
str(variant_num))
continue
# the citation for a variant is made to some kind of
# combination of the ids here.
# but i'm not sure which, we don't know what the
# citation is for exactly, other than the variant.
# so use mentions
var_id = 'ClinVarVariant:'+variant_num
# citation source: PubMed | PubMedCentral | citation_source
# citation id:
# format the citation id:
ref_id = None
if citation_source == 'PubMed':
ref_id = 'PMID:'+str(citation_id)
gu.makeLeader(g, ref_id)
elif citation_source == 'PubMedCentral':
ref_id = 'PMCID:'+str(citation_id)
if ref_id is not None:
r = Reference(
ref_id, Reference.ref_types['journal_article'])
r.addRefToGraph(g)
gu.addTriple(
g, ref_id, self.properties['is_about'], var_id)
if not self.testMode \
and (limit is not None and line_counter > limit):
break
logger.info("Finished processing citations for variants")
return
示例4: process_catalog
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import makeLeader [as 别名]
#.........这里部分代码省略.........
' [risk allele frequency]'
f = Feature(
rs_id, strongest_snp_risk_allele.strip(),
Feature.types[r'SNP'], snp_description)
if chrom_num != '' and chrom_pos != '':
f.addFeatureStartLocation(chrom_pos, chrom_id)
f.addFeatureEndLocation(chrom_pos, chrom_id)
f.addFeatureToGraph(g)
f.addTaxonToFeature(g, tax_id)
# TODO consider adding allele frequency as property;
# but would need background info to do that
# also want to add other descriptive info about
# the variant from the context
for c in re.split(r';', context):
cid = self._map_variant_type(c.strip())
if cid is not None:
gu.addType(g, rs_id, cid)
# add deprecation information
if merged == 1 and str(snp_id_current.strip()) != '':
# get the current rs_id
current_rs_id = 'dbSNP:'
if not re.match(r'rs', snp_id_current):
current_rs_id += 'rs'
if loc is not None:
loc_to_id_hash[loc].append(current_rs_id)
current_rs_id += str(snp_id_current)
gu.addDeprecatedIndividual(g, rs_id, current_rs_id)
# TODO check on this
# should we add the annotations to the current
# or orig?
gu.makeLeader(g, current_rs_id)
else:
gu.makeLeader(g, rs_id)
# add the feature as a sequence alteration
# affecting various genes
# note that intronic variations don't necessarily list
# the genes such as for rs10448080 FIXME
if snp_gene_nums != '':
for s in re.split(r',', snp_gene_nums):
s = s.strip()
# still have to test for this,
# because sometimes there's a leading comma
if s != '':
gene_id = 'NCBIGene:'+s
geno.addAlleleOfGene(rs_id, gene_id)
# add the up and downstream genes if they are available
if upstream_gene_num != '':
downstream_gene_id = 'NCBIGene:'+downstream_gene_num
gu.addTriple(
g, rs_id,
Feature.object_properties[
r'upstream_of_sequence_of'],
downstream_gene_id)
if downstream_gene_num != '':
upstream_gene_id = 'NCBIGene:'+upstream_gene_num
gu.addTriple(
g, rs_id,
Feature.object_properties[
'downstream_of_sequence_of'],
upstream_gene_id)