本文整理汇总了Python中oncotator.Annotator.Annotator.addDatasource方法的典型用法代码示例。如果您正苦于以下问题:Python Annotator.addDatasource方法的具体用法?Python Annotator.addDatasource怎么用?Python Annotator.addDatasource使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类oncotator.Annotator.Annotator
的用法示例。
在下文中一共展示了Annotator.addDatasource方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: testFullIndelVcf
# 需要导入模块: from oncotator.Annotator import Annotator [as 别名]
# 或者: from oncotator.Annotator.Annotator import addDatasource [as 别名]
def testFullIndelVcf(self):
""" Perform test of a Indel maflite all the way through TCGA VCF creation
"""
outputFilename = "out/TCGAVCFTest.indel.vcf"
callStatsIn = MafliteInputMutationCreator("testdata/maflite/Patient0.indel.maf.txt")
vcfOR = TcgaVcfOutputRenderer(outputFilename)
datasources = self._createDatasourcesForTesting()
annotator = Annotator()
annotator.setInputCreator(callStatsIn)
annotator.setOutputRenderer(vcfOR)
annotator.setManualAnnotations(self._createManualAnnotations())
for ds in datasources:
annotator.addDatasource(ds)
annotator.annotate()
self.assertTrue(os.path.exists(outputFilename))
# Check that the deletions have position decremented by one from what is present in the maflite
# Checking that 1 36643701 in the maflite (a deletion) becomes 1 36643700 in the vcf, but that the others are
# the same.
maflite_ic = MafliteInputMutationCreator("testdata/maflite/Patient0.indel.maf.txt")
muts = maflite_ic.createMutations()
vcf_reader = vcf.Reader(open(outputFilename, 'r'))
vcf_pos = [int(rec.POS) for rec in vcf_reader]
for m in muts:
# If the variant is a deletion, then the vcf position should be the same as maflite minus one. Otherwise, the same.
is_variant_deletion = (m.alt_allele == "") or (m.alt_allele == "-") or (m.alt_allele == ".")
if is_variant_deletion:
self.assertTrue((int(m.start) - 1) in vcf_pos, "Deletion was not correct for " + m.chr + ":" + m.start)
else:
self.assertTrue(int(m.start) in vcf_pos, "Insertion was not correct for " + m.chr + ":" + m.start)
示例2: testTCGAMAFAsInputAndQuickAnnotate
# 需要导入模块: from oncotator.Annotator import Annotator [as 别名]
# 或者: from oncotator.Annotator.Annotator import addDatasource [as 别名]
def testTCGAMAFAsInputAndQuickAnnotate(self):
""" Test that we can take in a TCGA MAF (using MAFLITE), do annotating, and still render it properly """
inputFilename = "testdata/maf/Patient0.maf.annotated"
tmp = MafliteInputMutationCreator(inputFilename, 'configs/maflite_input.config')
outputFilename = "out/testTCGAMAFAsInputAndQuickAnnotate.tsv"
outputRenderer = TcgaMafOutputRenderer(outputFilename, 'configs/tcgaMAF2.4_output.config')
annotator = Annotator()
annotator.setInputCreator(tmp)
annotator.setOutputRenderer(outputRenderer)
ds = DatasourceFactory.createDatasource("testdata/thaga_janakari_gene_ds/hg19/tj_data.config", "testdata/thaga_janakari_gene_ds/hg19/")
annotator.addDatasource(ds)
annotator.annotate()
statinfo = os.stat(outputFilename)
self.assertTrue(statinfo.st_size > 0, "Generated MAF file (" + outputFilename + ") is empty.")
tsvReaderIn = GenericTsvReader(inputFilename)
tsvReader = GenericTsvReader(outputFilename)
self.assertTrue(tsvReader.getComments().find('#version') != -1, "First line did not specify a version number")
self.assertTrue("i_TJ_Data_Why" in tsvReader.getFieldNames(), "New field missing (i_TJ_Data_Why) from header")
self.assertTrue("i_TJ_Data_Who" in tsvReader.getFieldNames(), "New field missing (i_TJ_Data_Who) from header")
ctrOut = 0
for lineDict in tsvReader:
ctrOut += 1
ctrIn = 0
for lineDict in tsvReaderIn:
ctrIn += 1
ctrIn += len(tsvReaderIn.getCommentsAsList())
ctrOut += len(tsvReader.getCommentsAsList())
self.assertTrue(ctrOut == (ctrIn + 2), "Output file should have same number of lines plus two (for maf version and Oncotator version comments) as input file. (In,Out): " + str(ctrIn) + ", " + str(ctrOut))
示例3: testCreationAndAnnotation
# 需要导入模块: from oncotator.Annotator import Annotator [as 别名]
# 或者: from oncotator.Annotator.Annotator import addDatasource [as 别名]
def testCreationAndAnnotation(self):
""" Test the datasource creation and then do a simple annotation
"""
outputFilename = 'out/genericGeneProteinPositionTest.out.tsv'
gafDS = TestUtils.createTranscriptProviderDatasource(self.config)
gppDS = DatasourceFactory.createDatasource("testdata/simple_uniprot_natvar/simple_uniprot_natvar.config", "testdata/simple_uniprot_natvar/")
annotator = Annotator()
annotator.setInputCreator(MafliteInputMutationCreator('testdata/maflite/tiny_maflite_natvar.maf.tsv'))
annotator.setOutputRenderer(SimpleOutputRenderer(outputFilename))
annotator.addDatasource(gafDS)
annotator.addDatasource(gppDS)
testFilename = annotator.annotate()
# Make sure that some values were populated
self.assertTrue(os.path.exists(testFilename))
tsvReader = GenericTsvReader(testFilename)
ctr = 0
for lineDict in tsvReader:
colName = "UniProt_NatVar_natural_variations"
self.assertTrue(sorted(lineDict[colName].split("|")) == sorted("R -> RR (in EDMD2).|R -> Q (in EDMD2).".split("|")), "Annotation value did not match: " + lineDict[colName])
ctr += 1
self.assertTrue(ctr == 1, "Number of mutations incorrect (1): " + str(ctr) )
示例4: testBasicAnnotation
# 需要导入模块: from oncotator.Annotator import Annotator [as 别名]
# 或者: from oncotator.Annotator.Annotator import addDatasource [as 别名]
def testBasicAnnotation(self):
''' Annotate from a basic tsv of Genomic positions. This tests both single- and multiple-nucleotide variants. The tsv is already installed (i.e. proper config file created).
'''
outputFilename = 'out/genericGenomePositionTest.out.tsv'
gpDS = DatasourceFactory.createDatasource("testdata/small_genome_position_tsv_ds/oreganno_trim.config", "testdata/small_genome_position_tsv_ds/")
annotator = Annotator()
annotator.setInputCreator(MafliteInputMutationCreator('testdata/maflite/tiny_maflite.maf.txt'))
annotator.setOutputRenderer(SimpleOutputRenderer(outputFilename))
annotator.addDatasource(gpDS)
testFilename = annotator.annotate()
# Make sure that some values were populated
self.assertTrue(os.path.exists(testFilename))
tsvReader = GenericTsvReader(testFilename)
ctr = 1
# Two overlap, one does not. Repeat...
for lineDict in tsvReader:
if (ctr % 3 == 0):
self.assertTrue(lineDict["ORegAnno_hg19.oreganno.id"] == '', "Line " + str(ctr) + " should have had blank value, but did not: " + lineDict["ORegAnno_hg19.oreganno.id"])
else:
self.assertFalse(lineDict["ORegAnno_hg19.oreganno.id"] == '', "Line " + str(ctr) + " should not have had blank value, but did.")
self.assertTrue(lineDict["ORegAnno_hg19.oreganno.id"] == 'OREG0013034', "Line " + str(ctr) + " did not have correct value: " + lineDict["ORegAnno_hg19.oreganno.id"])
ctr = ctr + 1
示例5: testVersionHeader
# 需要导入模块: from oncotator.Annotator import Annotator [as 别名]
# 或者: from oncotator.Annotator.Annotator import addDatasource [as 别名]
def testVersionHeader(self):
""" This method simply tests that the version string returned by the annotator does not cause an exception.
Minimal checking that the returned sting is actually correct.
Does not attempt to initialize input or output. Only a gaf datasource.
"""
annotator = Annotator()
annotator.addDatasource(TestUtils.createTranscriptProviderDatasource(self.config))
tmp = annotator.createHeaderString()
self.assertTrue(tmp.find("Gaf ") != -1 or tmp.find("GENCODE") != -1, "Could not find Gaf or GENCODE version in header string.")
self.assertTrue(tmp.find("Oncotator") != -1, "Could not find the word Oncotator in header string.")
示例6: test_simple_transcript_annotation
# 需要导入模块: from oncotator.Annotator import Annotator [as 别名]
# 或者: from oncotator.Annotator.Annotator import addDatasource [as 别名]
def test_simple_transcript_annotation(self):
"""Test web api backend call /transcript/ """
# http://www.broadinstitute.org/oncotator/transcript/ENST00000215832.6/
datasource_list = DatasourceFactory.createDatasources(self._determine_db_dir(), "hg19", isMulticore=False)
annotator = Annotator()
for ds in datasource_list:
annotator.addDatasource(ds)
tx = annotator.retrieve_transcript_by_id("ENST00000215832.6")
self.assertTrue(tx is not None)
self.assertTrue(tx.get_gene() == "MAPK1")
示例7: test_querying_transcripts_by_genes
# 需要导入模块: from oncotator.Annotator import Annotator [as 别名]
# 或者: from oncotator.Annotator.Annotator import addDatasource [as 别名]
def test_querying_transcripts_by_genes(self):
"""Test that we can get all of the transcripts for a given set of genes. """
datasource_list = DatasourceFactory.createDatasources(self._determine_db_dir(), "hg19", isMulticore=False)
annotator = Annotator()
for ds in datasource_list:
annotator.addDatasource(ds)
# Step 1 get all of the relevant transcripts
txs = annotator.retrieve_transcripts_by_genes(["MAPK1", "PIK3CA"])
self.assertTrue(len(txs) > 3)
示例8: test_simple_genes_by_gene_annotation
# 需要导入模块: from oncotator.Annotator import Annotator [as 别名]
# 或者: from oncotator.Annotator.Annotator import addDatasource [as 别名]
def test_simple_genes_by_gene_annotation(self):
"""Test web api backend call /gene/ """
# http://www.broadinstitute.org/oncotator/gene/MAPK1/
datasource_list = DatasourceFactory.createDatasources(self._determine_db_dir(), "hg19", isMulticore=False)
annotator = Annotator()
for ds in datasource_list:
annotator.addDatasource(ds)
txs = annotator.retrieve_transcripts_by_genes(["MAPK1"])
self.assertTranscriptsFound(txs)
mut_dict = annotator.annotate_genes_given_txs(txs)
self.assertTrue(len(mut_dict.keys()) == 1)
示例9: test_simple_genes_by_region_annotation
# 需要导入模块: from oncotator.Annotator import Annotator [as 别名]
# 或者: from oncotator.Annotator.Annotator import addDatasource [as 别名]
def test_simple_genes_by_region_annotation(self):
"""Test web api backend call /genes/ """
# http://www.broadinstitute.org/oncotator/genes/chr22_22112223_22312558/
# Two genes: chr22:22,112,223-22,312,558
datasource_list = DatasourceFactory.createDatasources(self._determine_db_dir(), "hg19", isMulticore=False)
annotator = Annotator()
for ds in datasource_list:
annotator.addDatasource(ds)
# Here is what the API would call....
txs = annotator.retrieve_transcripts_by_region("22", 22112223, 22312558)
self.assertTranscriptsFound(txs)
mut_dict = annotator.annotate_genes_given_txs(txs)
# Each mut will be for a separate gene
for gene in mut_dict.keys():
mut = mut_dict[gene]
alt_accessions = mut["UniProt_alt_uniprot_accessions"].split("|")
tcgascape_amp_peaks = mut["TCGAScape_Amplification_Peaks"].split("|")
tcgascape_del_peaks = mut["TCGAScape_Deletion_Peaks"].split("|")
tumorscape_amp_peaks = mut["TUMORScape_Amplification_Peaks"].split("|")
tumorscape_del_peaks = mut["TUMORScape_Deletion_Peaks"].split("|")
full_name = mut["HGNC_Approved Name"]
cosmic = {
"tissue_types_affected": mut["COSMIC_Tissue_tissue_types_affected"],
"total_alterations_in_gene": mut["COSMIC_Tissue_tissue_types_affected"],
}
alt_aliases = list(
itertools.chain([mut["HGNC_Previous Symbols"].split(", "), mut["HGNC_Synonyms"].split(", ")])
)
location = mut["HGNC_Chromosome"]
uniprot_accession = mut["UniProt_uniprot_accession"]
transcripts = mut["transcripts"]
self.assertTrue(transcripts is not None)
self.assertTrue(len(transcripts) > 0)
self.assertTrue(transcripts.startswith("ENST"))
strand = mut["strand"]
klass = mut["class"]
uniprot_experimentals = mut["UniProt_AA_experimental_info"].split("|")
self.assertTrue(uniprot_experimentals is not None)
uniprot_natural_variations = mut["UniProt_AA_natural_variation"].split("|")
uniprot_regions = mut["UniProt_AA_region"].split("|")
uniprot_sites = mut["UniProt_AA_site"].split("|")
uniprot_go_biological_processes = mut["UniProt_GO_Biological_Process"].split("|")
uniprot_go_cellular_components = mut["UniProt_GO_Cellular_Component"].split("|")
self.assertTrue(uniprot_go_cellular_components is not None)
uniprot_go_molecular_functions = mut["UniProt_GO_Molecular_Function"].split("|")
pass
示例10: testSimpleAnnotationWithExampleVcf
# 需要导入模块: from oncotator.Annotator import Annotator [as 别名]
# 或者: from oncotator.Annotator.Annotator import addDatasource [as 别名]
def testSimpleAnnotationWithExampleVcf(self):
"""
Tests the ability to do a simple Gaf 3.0 annotation.
"""
inputFilename = os.path.join(*["testdata", "vcf", "example.vcf"])
outputFilename = os.path.join("out", "simpleVCF.Gaf.annotated.out.tsv")
creator = VcfInputMutationCreator(inputFilename)
creator.createMutations()
renderer = SimpleOutputRenderer(outputFilename, [])
annotator = Annotator()
annotator.setInputCreator(creator)
annotator.setOutputRenderer(renderer)
annotator.addDatasource(TestUtils.createTranscriptProviderDatasource(self.config))
annotator.annotate()
示例11: testDoubleAnnotationError
# 需要导入模块: from oncotator.Annotator import Annotator [as 别名]
# 或者: from oncotator.Annotator.Annotator import addDatasource [as 别名]
def testDoubleAnnotationError(self):
''' Given a maf file that used to cause a duplicate annotation exception, do not throw that (or any) exception. '''
outputFilename = 'out/genericGenomePositionDoubleAnnotationTest.out.tsv'
gpDS = DatasourceFactory.createDatasource("testdata/small_genome_position_tsv_ds/oreganno_trim.config", "testdata/small_genome_position_tsv_ds/")
annotator = Annotator()
annotator.setInputCreator(MafliteInputMutationCreator('testdata/maflite/testDoubleAnnotate.maf.tsv'))
annotator.setOutputRenderer(SimpleOutputRenderer(outputFilename))
annotator.addDatasource(gpDS)
testFilename = annotator.annotate()
# Make sure that some values were populated
self.assertTrue(os.path.exists(testFilename))
示例12: testFullSnpVcf
# 需要导入模块: from oncotator.Annotator import Annotator [as 别名]
# 或者: from oncotator.Annotator.Annotator import addDatasource [as 别名]
def testFullSnpVcf(self):
""" Perform test of a SNP call stats (maflite) all the way through TCGA VCF creation. Only checks that a file was created.
"""
outputFilename = "out/TCGAVCFTest.snp.vcf"
callStatsIn = MafliteInputMutationCreator("testdata/Test.call_stats.trim.txt")
vcfOR = TcgaVcfOutputRenderer(outputFilename)
datasources = self._createDatasourcesForTesting()
annotator = Annotator()
annotator.setInputCreator(callStatsIn)
annotator.setOutputRenderer(vcfOR)
annotator.setManualAnnotations(self._createManualAnnotations())
for ds in datasources:
annotator.addDatasource(ds)
annotator.annotate()
self.assertTrue(os.path.exists(outputFilename))
示例13: testAnotherFullSNP
# 需要导入模块: from oncotator.Annotator import Annotator [as 别名]
# 或者: from oncotator.Annotator.Annotator import addDatasource [as 别名]
def testAnotherFullSNP(self):
"""Test SNP call stats . Just make sure no exception is thrown."""
inputFile = "testdata/maflite/Another.call_stats.txt"
outputFilename = "out/Another.call_stats.out.vcf"
callStatsIn = MafliteInputMutationCreator(inputFile)
vcfOR = TcgaVcfOutputRenderer(outputFilename)
datasources = self._createDatasourcesForTesting()
annotator = Annotator()
annotator.setInputCreator(callStatsIn)
annotator.setOutputRenderer(vcfOR)
annotator.setManualAnnotations(self._createManualAnnotations())
for ds in datasources:
annotator.addDatasource(ds)
annotator.annotate()
self.assertTrue(os.path.exists(outputFilename))
statinfo = os.stat(outputFilename)
self.assertTrue(statinfo.st_size > 0, "Generated VCF file (" + outputFilename + ") is empty.")
示例14: test_querying_transcripts_by_region
# 需要导入模块: from oncotator.Annotator import Annotator [as 别名]
# 或者: from oncotator.Annotator.Annotator import addDatasource [as 别名]
def test_querying_transcripts_by_region(self):
"""Test web api backend call /transcripts/.... """
datasource_list = DatasourceFactory.createDatasources(self._determine_db_dir(), "hg19", isMulticore=False)
annotator = Annotator()
for ds in datasource_list:
annotator.addDatasource(ds)
txs = annotator.retrieve_transcripts_by_region("4", 50164411, 60164411)
self.assertTranscriptsFound(txs)
## Here is an example of getting enough data to populate the json in doc/transcript_json_commented.json.txt
# None of these values are validated.
for tx in txs:
transcript_id = tx.get_transcript_id()
tx_start = tx.determine_transcript_start()
tx_end = tx.determine_transcript_stop()
gene = tx.get_gene()
chr = tx.get_contig()
n_exons = len(tx.get_exons())
strand = tx.get_strand()
footprint_start, footprint_end = tx.determine_cds_footprint()
klass = tx.get_gene_type()
cds_start = tx.determine_cds_start()
cds_end = tx.determine_cds_stop()
id = tx.get_gene_id()
genomic_coords = [[exon[0], exon[1]] for exon in tx.get_exons()]
transcript_coords = [
[TranscriptProviderUtils.convert_genomic_space_to_exon_space(exon[0] + 1, exon[1], tx)]
for exon in tx.get_exons()
]
code_len = int(cds_end) - int(cds_start) + 1
# If refseq datasources are not available, this will fail.
# Step 2 annotate the transcript, which produces a dummy mutation with the refseq annotations.
dummy_mut = annotator.annotate_transcript(tx)
refseq_mRNA_id = dummy_mut["gencode_xref_refseq_mRNA_id"]
refseq_prot_id = dummy_mut["gencode_xref_refseq_prot_acc"]
# Description is unavailable right now
description = ""
self.assertTrue(refseq_mRNA_id is not None)
self.assertTrue(refseq_prot_id is not None)
self.assertTrue(len(transcript_coords) == n_exons)
示例15: testEmptyInput
# 需要导入模块: from oncotator.Annotator import Annotator [as 别名]
# 或者: from oncotator.Annotator.Annotator import addDatasource [as 别名]
def testEmptyInput(self):
"""Make sure that we can generate an empty vcf from an empty maflite"""
inputFile = "testdata/maflite/empty.maflite"
outputFilename = "out/empty.vcf"
callStatsIn = MafliteInputMutationCreator(inputFile)
vcfOR = TcgaVcfOutputRenderer(outputFilename)
datasources = self._createDatasourcesForTesting()
annotator = Annotator()
annotator.setInputCreator(callStatsIn)
annotator.setOutputRenderer(vcfOR)
annotator.setManualAnnotations(self._createManualAnnotations())
for ds in datasources:
annotator.addDatasource(ds)
annotator.annotate()
self.assertTrue(os.path.exists(outputFilename))
statinfo = os.stat(outputFilename)
self.assertTrue(statinfo.st_size > 0, "Generated VCF file (" + outputFilename + ") is empty.")