当前位置: 首页>>代码示例>>Python>>正文


Python DatasourceFactory.DatasourceFactory类代码示例

本文整理汇总了Python中oncotator.DatasourceFactory.DatasourceFactory的典型用法代码示例。如果您正苦于以下问题:Python DatasourceFactory类的具体用法?Python DatasourceFactory怎么用?Python DatasourceFactory使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了DatasourceFactory类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: testBasicDatasourceSorting

    def testBasicDatasourceSorting(self):
        """Test that the GAF datasource is sorted before a gene-based datasource"""

        gafDatasource = TestUtils.createTranscriptProviderDatasource(self.config)
        geneDS = DatasourceFactory.createDatasource("testdata/small_tsv_ds/small_tsv_ds.config", "testdata/small_tsv_ds/")
        
        incorrectSortList = [geneDS, gafDatasource]
        guessSortList =  DatasourceFactory.sortDatasources(incorrectSortList)
        self.assertTrue(guessSortList[1] == geneDS, "Sorting is incorrect.")
        self.assertTrue(len(guessSortList) == 2, "Sorting altered number of datasources (gt: 2): " + str(len(guessSortList)))
开发者ID:Tmacme,项目名称:oncotator,代码行数:10,代码来源:DatasourceFactoryTest.py

示例2: test_overwriting_muts

    def test_overwriting_muts(self):
        """Ensure that (given correct configuration) we can annotate from a datasource, even if the datasource will overwrite an existing mutation."""
        # We will have an input with a "Who" annotation that this datasource will try to write.
        gene_ds = DatasourceFactory.createDatasource(
            "testdata/thaga_janakari_gene_ds/hg19/tj_data.config", "testdata/thaga_janakari_gene_ds/hg19/"
        )
        input_filename = "testdata/maflite/who_alt1_vs_alt2.maflite"
        output_filename = "out/who_alt1_vs_alt2.maf.annotated"
        input_format = "MAFLITE"
        output_format = "TCGAMAF"

        other_opts = {OptionConstants.ALLOW_ANNOTATION_OVERWRITING: True, OptionConstants.NO_PREPEND: True}

        run_spec = RunSpecificationFactory.create_run_spec_given_datasources(
            input_format,
            output_format,
            input_filename,
            output_filename,
            datasource_list=[gene_ds],
            other_opts=other_opts,
        )
        annotator = Annotator()
        annotator.initialize(run_spec)

        annotator.annotate()

        tsv_reader = GenericTsvReader(output_filename)

        for i, line_dict in enumerate(tsv_reader):
            self.assertTrue(line_dict.get("TJ_Data_Who", "") != "Tromokratis")
开发者ID:Yixf-Self,项目名称:oncotator,代码行数:30,代码来源:AnnotatorTest.py

示例3: testESPCoverageAnnotationWithSNPAvgMatch

    def testESPCoverageAnnotationWithSNPAvgMatch(self):
        """
        """
        self.logger.info("Initializing ESP6500SI-V2 Coverage")
        tabixIndexedTsvDirName = os.path.join(*["testdata", "small_esp_coverage_avg_ds", "hg19"])
        tabixIndexedTsvDatasource = DatasourceFactory.createDatasource(
            os.path.join(tabixIndexedTsvDirName, "small_esp_coverage_avg_ds.config"), tabixIndexedTsvDirName)

        m1 = MutationData()
        m1.chr = "X"
        m1.start = "100075334"
        m1.end = "100075334"

        m1_annotated = tabixIndexedTsvDatasource.annotate_mutation(m1)
        m1_annotation = m1_annotated.getAnnotation("ESP_AvgAAsampleReadDepth")
        cur_annotation = Annotation(value="75.0", datasourceName="ESP", dataType="Float",
                                    description="", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT], number=None)
        self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")

        m1_annotation = m1_annotated.getAnnotation("ESP_TotalAAsamplesCovered")
        cur_annotation = Annotation(value="692.0", datasourceName="ESP", dataType="Float",
                                    description="", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT], number=None)
        self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")

        m1_annotation = m1_annotated.getAnnotation("ESP_Chromosome")
        cur_annotation = Annotation(value="X", datasourceName="ESP", dataType="String",
                                    description="", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT], number=None)
        self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")
开发者ID:alexramos,项目名称:oncotator,代码行数:28,代码来源:TabixIndexedTsvDatasourceTest.py

示例4: test_overlapping_single_transcripts

    def test_overlapping_single_transcripts(self):
        base_config_location = "testdata/ensembl/saccer/"

        ensembl_ds = DatasourceFactory.createDatasource(base_config_location + "ensembl.config", base_config_location)
        recs = ensembl_ds.get_overlapping_transcripts("I", "500", "500")
        self.assertTrue(len(recs) == 1)
        self.assertTrue(recs[0].get_gene() == 'YAL069W')
开发者ID:alexramos,项目名称:oncotator,代码行数:7,代码来源:EnsemblTranscriptDatasourceTest.py

示例5: test_no_overwriting_muts

    def test_no_overwriting_muts(self):
        """Ensure that (given configuration that disallows) we cannot annotate from a datasource when a value was specified in the input."""
        # We will have an input with a "Who" annotation that this datasource will try to write.
        gene_ds = DatasourceFactory.createDatasource(
            "testdata/thaga_janakari_gene_ds/hg19/tj_data.config", "testdata/thaga_janakari_gene_ds/hg19/"
        )
        input_filename = "testdata/maflite/who_alt1_vs_alt2.maflite"
        output_filename = "out/who_alt1_vs_alt2.maf.annotated"
        input_format = "MAFLITE"
        output_format = "TCGAMAF"

        other_opts = {OptionConstants.ALLOW_ANNOTATION_OVERWRITING: False, OptionConstants.NO_PREPEND: True}

        run_spec = RunSpecificationFactory.create_run_spec_given_datasources(
            input_format,
            output_format,
            input_filename,
            output_filename,
            datasource_list=[gene_ds],
            other_opts=other_opts,
        )
        annotator = Annotator()
        annotator.initialize(run_spec)

        self.assertRaises(DuplicateAnnotationException, annotator.annotate)
开发者ID:Yixf-Self,项目名称:oncotator,代码行数:25,代码来源:AnnotatorTest.py

示例6: _create_test_ds

    def _create_test_ds(self, input_tsv, dir_name, index_cols):

        base_name = "test_snp_leveldb"

        full_name = dir_name + "/" + base_name

        if os.path.exists(full_name):
            shutil.rmtree(full_name)

        os.makedirs(full_name)

        tsv_reader = GenericTsvReader(input_tsv, commentPrepend="%")
        annotation_cols = copy.copy(tsv_reader.getFieldNames())
        for icol in index_cols:
            if icol in annotation_cols:
                annotation_cols.remove(icol)

        ds_creator = SnpOnlyLevelDbDatasourceCreator()
        ds_creator.createDatasource(full_name, input_tsv, ",".join(index_cols), full_name + "/" + base_name + ".config", "snp_leveldb", base_name, "TEST",
                         "exact", annotation_cols, [])

        config_filename = "out/test_simple_annotate_snp_only_leveldb/test_snp_leveldb/test_snp_leveldb.config"
        ds = DatasourceFactory.createDatasource(os.path.abspath(config_filename), os.path.dirname(config_filename))

        return ds
开发者ID:alexramos,项目名称:oncotator,代码行数:25,代码来源:SnpOnlyLevelDbDatasourceTest.py

示例7: testCreationAndAnnotation

    def testCreationAndAnnotation(self):
        """ Test the datasource creation and then do a simple annotation
        """
        outputFilename = 'out/genericGeneProteinPositionTest.out.tsv'

        gafDS = TestUtils.createTranscriptProviderDatasource(self.config)
        gppDS = DatasourceFactory.createDatasource("testdata/simple_uniprot_natvar/simple_uniprot_natvar.config", "testdata/simple_uniprot_natvar/")

        annotator = Annotator()
        annotator.setInputCreator(MafliteInputMutationCreator('testdata/maflite/tiny_maflite_natvar.maf.tsv'))
        annotator.setOutputRenderer(SimpleOutputRenderer(outputFilename))
        annotator.addDatasource(gafDS)
        annotator.addDatasource(gppDS)
        testFilename = annotator.annotate()

        # Make sure that some values were populated
        self.assertTrue(os.path.exists(testFilename))
        tsvReader = GenericTsvReader(testFilename)

        ctr = 0
        for lineDict in tsvReader:
            colName = "UniProt_NatVar_natural_variations"
            self.assertTrue(sorted(lineDict[colName].split("|")) == sorted("R -> RR (in EDMD2).|R -> Q (in EDMD2).".split("|")), "Annotation value did not match: " + lineDict[colName])
            ctr += 1

        self.assertTrue(ctr == 1, "Number of mutations incorrect (1): " + str(ctr) )
开发者ID:alexramos,项目名称:oncotator,代码行数:26,代码来源:GenericGeneProteinPositionDatasourceTest.py

示例8: testTCGAMAFAsInputAndQuickAnnotate

    def testTCGAMAFAsInputAndQuickAnnotate(self):
        """ Test that we can take in a TCGA MAF (using MAFLITE), do annotating, and still render it properly """
        inputFilename = "testdata/maf/Patient0.maf.annotated"
        tmp = MafliteInputMutationCreator(inputFilename, 'configs/maflite_input.config')
        outputFilename = "out/testTCGAMAFAsInputAndQuickAnnotate.tsv"
        outputRenderer = TcgaMafOutputRenderer(outputFilename, 'configs/tcgaMAF2.4_output.config')
        annotator = Annotator()
        
        annotator.setInputCreator(tmp)
        annotator.setOutputRenderer(outputRenderer)
        ds = DatasourceFactory.createDatasource("testdata/thaga_janakari_gene_ds/hg19/tj_data.config", "testdata/thaga_janakari_gene_ds/hg19/")
        annotator.addDatasource(ds)
        annotator.annotate()
        
        statinfo = os.stat(outputFilename)
        self.assertTrue(statinfo.st_size > 0, "Generated MAF file (" + outputFilename + ") is empty.")
        tsvReaderIn = GenericTsvReader(inputFilename)
        tsvReader = GenericTsvReader(outputFilename)
        
        self.assertTrue(tsvReader.getComments().find('#version') != -1, "First line did not specify a version number")
        self.assertTrue("i_TJ_Data_Why" in tsvReader.getFieldNames(), "New field missing (i_TJ_Data_Why) from header")
        self.assertTrue("i_TJ_Data_Who" in tsvReader.getFieldNames(), "New field missing (i_TJ_Data_Who) from header")
        
        ctrOut = 0
        for lineDict in tsvReader:
            ctrOut += 1
        ctrIn = 0
        for lineDict in tsvReaderIn:
            ctrIn += 1
        ctrIn += len(tsvReaderIn.getCommentsAsList())
        ctrOut += len(tsvReader.getCommentsAsList())

        self.assertTrue(ctrOut == (ctrIn + 2), "Output file should have same number of lines plus two (for maf version and Oncotator version comments) as input file.  (In,Out): " + str(ctrIn) + ", " + str(ctrOut))
开发者ID:alexramos,项目名称:oncotator,代码行数:33,代码来源:MafliteInputMutationCreatorTest.py

示例9: testdbNSFPNoRefAltAnnotationWithExactMatch

    def testdbNSFPNoRefAltAnnotationWithExactMatch(self):
        """

        """
        self.logger.info("Initializing dbNSFP")
        tabixIndexedTsvDirName = os.path.join(*["testdata", "dbNSFP_chr1_chr3_100vars_exact_no_ref_alt_ds", "hg19"])
        tabixIndexedTsvDatasource = DatasourceFactory.createDatasource(
            os.path.join(tabixIndexedTsvDirName, "dbNSFP_chr1_chr3_100vars_exact_no_ref_alt_ds.config"),
            tabixIndexedTsvDirName)

        m1 = MutationData()
        m1.chr = "1"
        m1.start = "35140"
        m1.end = "35140"

        m1_annotated = tabixIndexedTsvDatasource.annotate_mutation(m1)
        m1_annotation = m1_annotated.getAnnotation("dbNSFP_codonpos")
        cur_annotation = Annotation(value="1|1|1", datasourceName="dbNSFP", dataType="String",
                                    description="", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT], number=None)
        self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")

        m1_annotation = m1_annotated.getAnnotation("dbNSFP_refcodon")
        cur_annotation = Annotation(value="TAA|TAA|TAA", datasourceName="dbNSFP", dataType="String",
                                    description="", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT], number=None)
        self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")

        m1_annotation = m1_annotated.getAnnotation("dbNSFP_cds_strand")
        cur_annotation = Annotation(value="-|-|-", datasourceName="dbNSFP", dataType="String",
                                    description="", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT], number=None)
        self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")
开发者ID:alexramos,项目名称:oncotator,代码行数:30,代码来源:TabixIndexedTsvDatasourceTest.py

示例10: testdbNSFPAnnotationWithMissingOverlapMatch

    def testdbNSFPAnnotationWithMissingOverlapMatch(self):  # SNPs only
        """

        """
        self.logger.info("Initializing dbNSFP")
        tabixIndexedTsvDirName = os.path.join(*["testdata", "dbNSFP_chr1_chr3_100vars_overlap_ds", "hg19"])
        tabixIndexedTsvDatasource = DatasourceFactory.createDatasource(
            os.path.join(tabixIndexedTsvDirName, "dbNSFP_chr1_chr3_100vars_overlap_ds.config"), tabixIndexedTsvDirName)

        m1 = MutationDataFactory.default_create()
        m1.chr = "1"
        m1.start = "35136"
        m1.end = "35137"

        m1_annotated = tabixIndexedTsvDatasource.annotate_mutation(m1)
        m1_annotation = m1_annotated.getAnnotation("dbNSFP_codonpos")
        cur_annotation = Annotation(value="", datasourceName="dbNSFP", dataType="String",
                                    description="", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT], number=None)
        self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")

        m1_annotation = m1_annotated.getAnnotation("dbNSFP_refcodon")
        cur_annotation = Annotation(value="", datasourceName="dbNSFP", dataType="String",
                                    description="", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT], number=None)
        self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")

        m1_annotation = m1_annotated.getAnnotation("dbNSFP_cds_strand")
        cur_annotation = Annotation(value="", datasourceName="dbNSFP", dataType="String",
                                    description="", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT], number=None)
        self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")
开发者ID:Tmacme,项目名称:oncotator,代码行数:29,代码来源:TabixIndexedTsvDatasourceTest.py

示例11: testBasicAnnotation

 def testBasicAnnotation(self):
     ''' Annotate from a basic tsv of Genomic positions.  This tests both single- and multiple-nucleotide variants.  The tsv is already installed (i.e. proper config file created).
     '''
     outputFilename = 'out/genericGenomePositionTest.out.tsv'
     
     gpDS = DatasourceFactory.createDatasource("testdata/small_genome_position_tsv_ds/oreganno_trim.config", "testdata/small_genome_position_tsv_ds/")
     
     annotator = Annotator()
     annotator.setInputCreator(MafliteInputMutationCreator('testdata/maflite/tiny_maflite.maf.txt'))
     annotator.setOutputRenderer(SimpleOutputRenderer(outputFilename))
     annotator.addDatasource(gpDS)
     testFilename = annotator.annotate()
     
     # Make sure that some values were populated
     self.assertTrue(os.path.exists(testFilename))
     tsvReader = GenericTsvReader(testFilename) 
     
     ctr = 1
     # Two overlap, one does not.  Repeat...
     for lineDict in tsvReader:
         if (ctr % 3 == 0):
             self.assertTrue(lineDict["ORegAnno_hg19.oreganno.id"] == '', "Line " + str(ctr) + " should have had blank value, but did not: " + lineDict["ORegAnno_hg19.oreganno.id"])
         else:
             self.assertFalse(lineDict["ORegAnno_hg19.oreganno.id"] == '', "Line " + str(ctr) + " should not have had blank value, but did.")
             self.assertTrue(lineDict["ORegAnno_hg19.oreganno.id"] == 'OREG0013034', "Line " + str(ctr) + " did not have correct value: " + lineDict["ORegAnno_hg19.oreganno.id"])
         ctr = ctr + 1
开发者ID:Tmacme,项目名称:oncotator,代码行数:26,代码来源:GenericGenomicPositionDatasourceTest.py

示例12: testAnnotateListOfMutations

    def testAnnotateListOfMutations(self):
        """Test that we can initialize an Annotator, without an input or output and then feed mutations,
        one at a time... using a runspec"""

        # Locate the datasource directory and create a runspec
        dbDir = self.config.get("DEFAULT", "dbDir")
        ds = DatasourceFactory.createDatasources(dbDir)
        runSpec = RunSpecification()
        runSpec.initialize(None, None, datasources=ds)

        # Initialize the annotator with the runspec
        annotator = Annotator()
        annotator.initialize(runSpec)

        m = MutationData()
        m.chr = "1"
        m.start = "12941796"
        m.end = "12941796"
        m.alt_allele = "G"
        m.ref_allele = "T"

        muts = [m]

        muts = annotator.annotate_mutations(muts)
        m2 = muts.next()
        self.assertTrue(m2.get("gene", None) is not None)
开发者ID:alexramos,项目名称:oncotator,代码行数:26,代码来源:AnnotatorTest.py

示例13: testExampleVcfDBAnnotationWithSNPExactMatch

    def testExampleVcfDBAnnotationWithSNPExactMatch(self):
        """

        """
        tabixIndexedVcfDirName = os.path.join(*["testdata", "vcf_db_exact", "hg19"])
        tabixIndexedVcfDatasource = DatasourceFactory.createDatasource(
            os.path.join(tabixIndexedVcfDirName, "vcf_db_exact.config"), tabixIndexedVcfDirName)

        chrom = "20"
        start = "1110696"
        end = "1110696"
        ref_allele = "A"
        alt_allele = "T"
        build = "hg19"
        m1 = MutUtils.initializeMutFromAttributes(chrom, start, end, ref_allele, alt_allele, build)

        m1_annotated = tabixIndexedVcfDatasource.annotate_mutation(m1)

        m1_annotation = m1_annotated.getAnnotation("ESP_AF")
        cur_annotation = Annotation(value="0.667", datasourceName="ESP", dataType="Float",
                                    description="Allele Frequency", tags=[TagConstants.INFO, TagConstants.SPLIT],
                                    number=-1)
        self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")

        m1_annotation = m1_annotated.getAnnotation("ESP_AC")
        cur_annotation = Annotation(value="2,4", datasourceName="ESP", dataType="Integer",
                                    description="Allele Count", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT],
                                    number=None)
        self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")

        m1_annotation = m1_annotated.getAnnotation("ESP_H2")
        cur_annotation = Annotation(value="False", datasourceName="ESP", dataType="Flag",
                                    description="HapMap2 membership", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT],
                                    number=0)
        self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")

        chrom = "20"
        start = "1230237"
        end = "1230237"
        ref_allele = "T"
        alt_allele = "A"
        build = "hg19"
        m1 = MutUtils.initializeMutFromAttributes(chrom, start, end, ref_allele, alt_allele, build)

        m1_annotated = tabixIndexedVcfDatasource.annotate_mutation(m1)

        m1_annotation = m1_annotated.getAnnotation("ESP_NS")
        cur_annotation = Annotation(value="3", datasourceName="ESP", dataType="Integer",
                                    description="Number of Samples With Data",
                                    tags=[TagConstants.INFO, TagConstants.NOT_SPLIT],
                                    number=1)
        self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")

        m1_annotation = m1_annotated.getAnnotation("ESP_AF")
        cur_annotation = Annotation(value="", datasourceName="ESP", dataType="Float",
                                    description="Allele Frequency", tags=[TagConstants.INFO, TagConstants.SPLIT],
                                    number=-1)
        self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")
开发者ID:alexramos,项目名称:oncotator,代码行数:58,代码来源:TabixIndexedVcfDatasourceTest.py

示例14: testBasicGeneTSVInit

    def testBasicGeneTSVInit(self):
        """ Make sure that we can initialize a simple tsv data source """

        geneDS = DatasourceFactory.createDatasource("testdata/small_tsv_ds/small_tsv_ds.config", "testdata/small_tsv_ds/")
        self.assertTrue(geneDS <> None, "gene indexed datasource was None.")
        
        m = MutationDataFactory.default_create()
        m.createAnnotation('gene',"ABL1")
        m = geneDS.annotate_mutation(m)
        self.assertTrue(m['CGC_Abridged_Name'] == "v-abl Abelson murine leukemia viral oncogene homolog 1","Test gene TSV datasource did not annotate properly.")
开发者ID:Tmacme,项目名称:oncotator,代码行数:10,代码来源:DatasourceFactoryTest.py

示例15: test_querying_transcripts_by_genes

    def test_querying_transcripts_by_genes(self):
        """Test that we can get all of the transcripts for a given set of genes. """

        datasource_list = DatasourceFactory.createDatasources(self._determine_db_dir(), "hg19", isMulticore=False)
        annotator = Annotator()
        for ds in datasource_list:
            annotator.addDatasource(ds)

        # Step 1 get all of the relevant transcripts
        txs = annotator.retrieve_transcripts_by_genes(["MAPK1", "PIK3CA"])
        self.assertTrue(len(txs) > 3)
开发者ID:Yixf-Self,项目名称:oncotator,代码行数:11,代码来源:AnnotatorTest.py


注:本文中的oncotator.DatasourceFactory.DatasourceFactory类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。