本文整理汇总了Python中oncotator.DatasourceFactory.DatasourceFactory类的典型用法代码示例。如果您正苦于以下问题:Python DatasourceFactory类的具体用法?Python DatasourceFactory怎么用?Python DatasourceFactory使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了DatasourceFactory类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: testBasicDatasourceSorting
def testBasicDatasourceSorting(self):
"""Test that the GAF datasource is sorted before a gene-based datasource"""
gafDatasource = TestUtils.createTranscriptProviderDatasource(self.config)
geneDS = DatasourceFactory.createDatasource("testdata/small_tsv_ds/small_tsv_ds.config", "testdata/small_tsv_ds/")
incorrectSortList = [geneDS, gafDatasource]
guessSortList = DatasourceFactory.sortDatasources(incorrectSortList)
self.assertTrue(guessSortList[1] == geneDS, "Sorting is incorrect.")
self.assertTrue(len(guessSortList) == 2, "Sorting altered number of datasources (gt: 2): " + str(len(guessSortList)))
示例2: test_overwriting_muts
def test_overwriting_muts(self):
"""Ensure that (given correct configuration) we can annotate from a datasource, even if the datasource will overwrite an existing mutation."""
# We will have an input with a "Who" annotation that this datasource will try to write.
gene_ds = DatasourceFactory.createDatasource(
"testdata/thaga_janakari_gene_ds/hg19/tj_data.config", "testdata/thaga_janakari_gene_ds/hg19/"
)
input_filename = "testdata/maflite/who_alt1_vs_alt2.maflite"
output_filename = "out/who_alt1_vs_alt2.maf.annotated"
input_format = "MAFLITE"
output_format = "TCGAMAF"
other_opts = {OptionConstants.ALLOW_ANNOTATION_OVERWRITING: True, OptionConstants.NO_PREPEND: True}
run_spec = RunSpecificationFactory.create_run_spec_given_datasources(
input_format,
output_format,
input_filename,
output_filename,
datasource_list=[gene_ds],
other_opts=other_opts,
)
annotator = Annotator()
annotator.initialize(run_spec)
annotator.annotate()
tsv_reader = GenericTsvReader(output_filename)
for i, line_dict in enumerate(tsv_reader):
self.assertTrue(line_dict.get("TJ_Data_Who", "") != "Tromokratis")
示例3: testESPCoverageAnnotationWithSNPAvgMatch
def testESPCoverageAnnotationWithSNPAvgMatch(self):
"""
"""
self.logger.info("Initializing ESP6500SI-V2 Coverage")
tabixIndexedTsvDirName = os.path.join(*["testdata", "small_esp_coverage_avg_ds", "hg19"])
tabixIndexedTsvDatasource = DatasourceFactory.createDatasource(
os.path.join(tabixIndexedTsvDirName, "small_esp_coverage_avg_ds.config"), tabixIndexedTsvDirName)
m1 = MutationData()
m1.chr = "X"
m1.start = "100075334"
m1.end = "100075334"
m1_annotated = tabixIndexedTsvDatasource.annotate_mutation(m1)
m1_annotation = m1_annotated.getAnnotation("ESP_AvgAAsampleReadDepth")
cur_annotation = Annotation(value="75.0", datasourceName="ESP", dataType="Float",
description="", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT], number=None)
self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")
m1_annotation = m1_annotated.getAnnotation("ESP_TotalAAsamplesCovered")
cur_annotation = Annotation(value="692.0", datasourceName="ESP", dataType="Float",
description="", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT], number=None)
self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")
m1_annotation = m1_annotated.getAnnotation("ESP_Chromosome")
cur_annotation = Annotation(value="X", datasourceName="ESP", dataType="String",
description="", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT], number=None)
self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")
示例4: test_overlapping_single_transcripts
def test_overlapping_single_transcripts(self):
base_config_location = "testdata/ensembl/saccer/"
ensembl_ds = DatasourceFactory.createDatasource(base_config_location + "ensembl.config", base_config_location)
recs = ensembl_ds.get_overlapping_transcripts("I", "500", "500")
self.assertTrue(len(recs) == 1)
self.assertTrue(recs[0].get_gene() == 'YAL069W')
示例5: test_no_overwriting_muts
def test_no_overwriting_muts(self):
"""Ensure that (given configuration that disallows) we cannot annotate from a datasource when a value was specified in the input."""
# We will have an input with a "Who" annotation that this datasource will try to write.
gene_ds = DatasourceFactory.createDatasource(
"testdata/thaga_janakari_gene_ds/hg19/tj_data.config", "testdata/thaga_janakari_gene_ds/hg19/"
)
input_filename = "testdata/maflite/who_alt1_vs_alt2.maflite"
output_filename = "out/who_alt1_vs_alt2.maf.annotated"
input_format = "MAFLITE"
output_format = "TCGAMAF"
other_opts = {OptionConstants.ALLOW_ANNOTATION_OVERWRITING: False, OptionConstants.NO_PREPEND: True}
run_spec = RunSpecificationFactory.create_run_spec_given_datasources(
input_format,
output_format,
input_filename,
output_filename,
datasource_list=[gene_ds],
other_opts=other_opts,
)
annotator = Annotator()
annotator.initialize(run_spec)
self.assertRaises(DuplicateAnnotationException, annotator.annotate)
示例6: _create_test_ds
def _create_test_ds(self, input_tsv, dir_name, index_cols):
base_name = "test_snp_leveldb"
full_name = dir_name + "/" + base_name
if os.path.exists(full_name):
shutil.rmtree(full_name)
os.makedirs(full_name)
tsv_reader = GenericTsvReader(input_tsv, commentPrepend="%")
annotation_cols = copy.copy(tsv_reader.getFieldNames())
for icol in index_cols:
if icol in annotation_cols:
annotation_cols.remove(icol)
ds_creator = SnpOnlyLevelDbDatasourceCreator()
ds_creator.createDatasource(full_name, input_tsv, ",".join(index_cols), full_name + "/" + base_name + ".config", "snp_leveldb", base_name, "TEST",
"exact", annotation_cols, [])
config_filename = "out/test_simple_annotate_snp_only_leveldb/test_snp_leveldb/test_snp_leveldb.config"
ds = DatasourceFactory.createDatasource(os.path.abspath(config_filename), os.path.dirname(config_filename))
return ds
示例7: testCreationAndAnnotation
def testCreationAndAnnotation(self):
""" Test the datasource creation and then do a simple annotation
"""
outputFilename = 'out/genericGeneProteinPositionTest.out.tsv'
gafDS = TestUtils.createTranscriptProviderDatasource(self.config)
gppDS = DatasourceFactory.createDatasource("testdata/simple_uniprot_natvar/simple_uniprot_natvar.config", "testdata/simple_uniprot_natvar/")
annotator = Annotator()
annotator.setInputCreator(MafliteInputMutationCreator('testdata/maflite/tiny_maflite_natvar.maf.tsv'))
annotator.setOutputRenderer(SimpleOutputRenderer(outputFilename))
annotator.addDatasource(gafDS)
annotator.addDatasource(gppDS)
testFilename = annotator.annotate()
# Make sure that some values were populated
self.assertTrue(os.path.exists(testFilename))
tsvReader = GenericTsvReader(testFilename)
ctr = 0
for lineDict in tsvReader:
colName = "UniProt_NatVar_natural_variations"
self.assertTrue(sorted(lineDict[colName].split("|")) == sorted("R -> RR (in EDMD2).|R -> Q (in EDMD2).".split("|")), "Annotation value did not match: " + lineDict[colName])
ctr += 1
self.assertTrue(ctr == 1, "Number of mutations incorrect (1): " + str(ctr) )
示例8: testTCGAMAFAsInputAndQuickAnnotate
def testTCGAMAFAsInputAndQuickAnnotate(self):
""" Test that we can take in a TCGA MAF (using MAFLITE), do annotating, and still render it properly """
inputFilename = "testdata/maf/Patient0.maf.annotated"
tmp = MafliteInputMutationCreator(inputFilename, 'configs/maflite_input.config')
outputFilename = "out/testTCGAMAFAsInputAndQuickAnnotate.tsv"
outputRenderer = TcgaMafOutputRenderer(outputFilename, 'configs/tcgaMAF2.4_output.config')
annotator = Annotator()
annotator.setInputCreator(tmp)
annotator.setOutputRenderer(outputRenderer)
ds = DatasourceFactory.createDatasource("testdata/thaga_janakari_gene_ds/hg19/tj_data.config", "testdata/thaga_janakari_gene_ds/hg19/")
annotator.addDatasource(ds)
annotator.annotate()
statinfo = os.stat(outputFilename)
self.assertTrue(statinfo.st_size > 0, "Generated MAF file (" + outputFilename + ") is empty.")
tsvReaderIn = GenericTsvReader(inputFilename)
tsvReader = GenericTsvReader(outputFilename)
self.assertTrue(tsvReader.getComments().find('#version') != -1, "First line did not specify a version number")
self.assertTrue("i_TJ_Data_Why" in tsvReader.getFieldNames(), "New field missing (i_TJ_Data_Why) from header")
self.assertTrue("i_TJ_Data_Who" in tsvReader.getFieldNames(), "New field missing (i_TJ_Data_Who) from header")
ctrOut = 0
for lineDict in tsvReader:
ctrOut += 1
ctrIn = 0
for lineDict in tsvReaderIn:
ctrIn += 1
ctrIn += len(tsvReaderIn.getCommentsAsList())
ctrOut += len(tsvReader.getCommentsAsList())
self.assertTrue(ctrOut == (ctrIn + 2), "Output file should have same number of lines plus two (for maf version and Oncotator version comments) as input file. (In,Out): " + str(ctrIn) + ", " + str(ctrOut))
示例9: testdbNSFPNoRefAltAnnotationWithExactMatch
def testdbNSFPNoRefAltAnnotationWithExactMatch(self):
"""
"""
self.logger.info("Initializing dbNSFP")
tabixIndexedTsvDirName = os.path.join(*["testdata", "dbNSFP_chr1_chr3_100vars_exact_no_ref_alt_ds", "hg19"])
tabixIndexedTsvDatasource = DatasourceFactory.createDatasource(
os.path.join(tabixIndexedTsvDirName, "dbNSFP_chr1_chr3_100vars_exact_no_ref_alt_ds.config"),
tabixIndexedTsvDirName)
m1 = MutationData()
m1.chr = "1"
m1.start = "35140"
m1.end = "35140"
m1_annotated = tabixIndexedTsvDatasource.annotate_mutation(m1)
m1_annotation = m1_annotated.getAnnotation("dbNSFP_codonpos")
cur_annotation = Annotation(value="1|1|1", datasourceName="dbNSFP", dataType="String",
description="", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT], number=None)
self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")
m1_annotation = m1_annotated.getAnnotation("dbNSFP_refcodon")
cur_annotation = Annotation(value="TAA|TAA|TAA", datasourceName="dbNSFP", dataType="String",
description="", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT], number=None)
self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")
m1_annotation = m1_annotated.getAnnotation("dbNSFP_cds_strand")
cur_annotation = Annotation(value="-|-|-", datasourceName="dbNSFP", dataType="String",
description="", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT], number=None)
self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")
示例10: testdbNSFPAnnotationWithMissingOverlapMatch
def testdbNSFPAnnotationWithMissingOverlapMatch(self): # SNPs only
"""
"""
self.logger.info("Initializing dbNSFP")
tabixIndexedTsvDirName = os.path.join(*["testdata", "dbNSFP_chr1_chr3_100vars_overlap_ds", "hg19"])
tabixIndexedTsvDatasource = DatasourceFactory.createDatasource(
os.path.join(tabixIndexedTsvDirName, "dbNSFP_chr1_chr3_100vars_overlap_ds.config"), tabixIndexedTsvDirName)
m1 = MutationDataFactory.default_create()
m1.chr = "1"
m1.start = "35136"
m1.end = "35137"
m1_annotated = tabixIndexedTsvDatasource.annotate_mutation(m1)
m1_annotation = m1_annotated.getAnnotation("dbNSFP_codonpos")
cur_annotation = Annotation(value="", datasourceName="dbNSFP", dataType="String",
description="", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT], number=None)
self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")
m1_annotation = m1_annotated.getAnnotation("dbNSFP_refcodon")
cur_annotation = Annotation(value="", datasourceName="dbNSFP", dataType="String",
description="", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT], number=None)
self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")
m1_annotation = m1_annotated.getAnnotation("dbNSFP_cds_strand")
cur_annotation = Annotation(value="", datasourceName="dbNSFP", dataType="String",
description="", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT], number=None)
self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")
示例11: testBasicAnnotation
def testBasicAnnotation(self):
''' Annotate from a basic tsv of Genomic positions. This tests both single- and multiple-nucleotide variants. The tsv is already installed (i.e. proper config file created).
'''
outputFilename = 'out/genericGenomePositionTest.out.tsv'
gpDS = DatasourceFactory.createDatasource("testdata/small_genome_position_tsv_ds/oreganno_trim.config", "testdata/small_genome_position_tsv_ds/")
annotator = Annotator()
annotator.setInputCreator(MafliteInputMutationCreator('testdata/maflite/tiny_maflite.maf.txt'))
annotator.setOutputRenderer(SimpleOutputRenderer(outputFilename))
annotator.addDatasource(gpDS)
testFilename = annotator.annotate()
# Make sure that some values were populated
self.assertTrue(os.path.exists(testFilename))
tsvReader = GenericTsvReader(testFilename)
ctr = 1
# Two overlap, one does not. Repeat...
for lineDict in tsvReader:
if (ctr % 3 == 0):
self.assertTrue(lineDict["ORegAnno_hg19.oreganno.id"] == '', "Line " + str(ctr) + " should have had blank value, but did not: " + lineDict["ORegAnno_hg19.oreganno.id"])
else:
self.assertFalse(lineDict["ORegAnno_hg19.oreganno.id"] == '', "Line " + str(ctr) + " should not have had blank value, but did.")
self.assertTrue(lineDict["ORegAnno_hg19.oreganno.id"] == 'OREG0013034', "Line " + str(ctr) + " did not have correct value: " + lineDict["ORegAnno_hg19.oreganno.id"])
ctr = ctr + 1
示例12: testAnnotateListOfMutations
def testAnnotateListOfMutations(self):
"""Test that we can initialize an Annotator, without an input or output and then feed mutations,
one at a time... using a runspec"""
# Locate the datasource directory and create a runspec
dbDir = self.config.get("DEFAULT", "dbDir")
ds = DatasourceFactory.createDatasources(dbDir)
runSpec = RunSpecification()
runSpec.initialize(None, None, datasources=ds)
# Initialize the annotator with the runspec
annotator = Annotator()
annotator.initialize(runSpec)
m = MutationData()
m.chr = "1"
m.start = "12941796"
m.end = "12941796"
m.alt_allele = "G"
m.ref_allele = "T"
muts = [m]
muts = annotator.annotate_mutations(muts)
m2 = muts.next()
self.assertTrue(m2.get("gene", None) is not None)
示例13: testExampleVcfDBAnnotationWithSNPExactMatch
def testExampleVcfDBAnnotationWithSNPExactMatch(self):
"""
"""
tabixIndexedVcfDirName = os.path.join(*["testdata", "vcf_db_exact", "hg19"])
tabixIndexedVcfDatasource = DatasourceFactory.createDatasource(
os.path.join(tabixIndexedVcfDirName, "vcf_db_exact.config"), tabixIndexedVcfDirName)
chrom = "20"
start = "1110696"
end = "1110696"
ref_allele = "A"
alt_allele = "T"
build = "hg19"
m1 = MutUtils.initializeMutFromAttributes(chrom, start, end, ref_allele, alt_allele, build)
m1_annotated = tabixIndexedVcfDatasource.annotate_mutation(m1)
m1_annotation = m1_annotated.getAnnotation("ESP_AF")
cur_annotation = Annotation(value="0.667", datasourceName="ESP", dataType="Float",
description="Allele Frequency", tags=[TagConstants.INFO, TagConstants.SPLIT],
number=-1)
self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")
m1_annotation = m1_annotated.getAnnotation("ESP_AC")
cur_annotation = Annotation(value="2,4", datasourceName="ESP", dataType="Integer",
description="Allele Count", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT],
number=None)
self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")
m1_annotation = m1_annotated.getAnnotation("ESP_H2")
cur_annotation = Annotation(value="False", datasourceName="ESP", dataType="Flag",
description="HapMap2 membership", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT],
number=0)
self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")
chrom = "20"
start = "1230237"
end = "1230237"
ref_allele = "T"
alt_allele = "A"
build = "hg19"
m1 = MutUtils.initializeMutFromAttributes(chrom, start, end, ref_allele, alt_allele, build)
m1_annotated = tabixIndexedVcfDatasource.annotate_mutation(m1)
m1_annotation = m1_annotated.getAnnotation("ESP_NS")
cur_annotation = Annotation(value="3", datasourceName="ESP", dataType="Integer",
description="Number of Samples With Data",
tags=[TagConstants.INFO, TagConstants.NOT_SPLIT],
number=1)
self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")
m1_annotation = m1_annotated.getAnnotation("ESP_AF")
cur_annotation = Annotation(value="", datasourceName="ESP", dataType="Float",
description="Allele Frequency", tags=[TagConstants.INFO, TagConstants.SPLIT],
number=-1)
self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")
示例14: testBasicGeneTSVInit
def testBasicGeneTSVInit(self):
""" Make sure that we can initialize a simple tsv data source """
geneDS = DatasourceFactory.createDatasource("testdata/small_tsv_ds/small_tsv_ds.config", "testdata/small_tsv_ds/")
self.assertTrue(geneDS <> None, "gene indexed datasource was None.")
m = MutationDataFactory.default_create()
m.createAnnotation('gene',"ABL1")
m = geneDS.annotate_mutation(m)
self.assertTrue(m['CGC_Abridged_Name'] == "v-abl Abelson murine leukemia viral oncogene homolog 1","Test gene TSV datasource did not annotate properly.")
示例15: test_querying_transcripts_by_genes
def test_querying_transcripts_by_genes(self):
"""Test that we can get all of the transcripts for a given set of genes. """
datasource_list = DatasourceFactory.createDatasources(self._determine_db_dir(), "hg19", isMulticore=False)
annotator = Annotator()
for ds in datasource_list:
annotator.addDatasource(ds)
# Step 1 get all of the relevant transcripts
txs = annotator.retrieve_transcripts_by_genes(["MAPK1", "PIK3CA"])
self.assertTrue(len(txs) > 3)