當前位置: 首頁>>代碼示例>>Python>>正文


Python GenericTsvReader.getFieldNames方法代碼示例

本文整理匯總了Python中oncotator.utils.GenericTsvReader.GenericTsvReader.getFieldNames方法的典型用法代碼示例。如果您正苦於以下問題:Python GenericTsvReader.getFieldNames方法的具體用法?Python GenericTsvReader.getFieldNames怎麽用?Python GenericTsvReader.getFieldNames使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在oncotator.utils.GenericTsvReader.GenericTsvReader的用法示例。


在下文中一共展示了GenericTsvReader.getFieldNames方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: sortFile

# 需要導入模塊: from oncotator.utils.GenericTsvReader import GenericTsvReader [as 別名]
# 或者: from oncotator.utils.GenericTsvReader.GenericTsvReader import getFieldNames [as 別名]
    def sortFile(self, filename, func, length=50000):
        """
        This method sorts the input file and writes out the sorted file to filename.

        :param filename: sorted filename
        :param func: function that converts each row of the input file to an unique, sortable key
        :param length: maximum number of lines in a partition
        """
        reader = GenericTsvReader(filename=self.readfilename, commentPrepend=self.commentPrepend,
                                  delimiter=self.delimiter)
        comments = reader.getComments()

        fieldnames = reader.getFieldNames()
        if fieldnames is None:
            fieldnames = []

        fieldnameIndexes = collections.OrderedDict()
        if fieldnames is not None:
            fieldnameIndexes = collections.OrderedDict([(x, i) for (i, x) in enumerate(fieldnames)])

        iterable = iter(reader.getInputContentFP())
        partitions = self._yieldPartitions(iterable, func, fieldnameIndexes, length)

        with open(name=filename, mode='wb', buffering=64 * 1024) as writer:
            writer.write(comments)
            writer.write(string.join(fieldnames, self.delimiter) + "\n")
            writer.writelines(self._merge(partitions))  # generators are allowed as inputs to writelines function
開發者ID:Tmacme,項目名稱:oncotator,代碼行數:29,代碼來源:TsvFileSorter.py

示例2: testDuplicateAnnotation

# 需要導入模塊: from oncotator.utils.GenericTsvReader import GenericTsvReader [as 別名]
# 或者: from oncotator.utils.GenericTsvReader.GenericTsvReader import getFieldNames [as 別名]
    def testDuplicateAnnotation(self):
        """
        Tests that the duplicate annotations are parsed correctly.
        """
        inputFilename = os.path.join(*["testdata", "vcf", "example.duplicate_annotation.vcf"])
        outputFilename = os.path.join("out", "example.duplicate_annotation.out.tsv")

        creator = VcfInputMutationCreator(inputFilename)
        creator.createMutations()
        renderer = SimpleOutputRenderer(outputFilename)
        annotator = Annotator()
        annotator.setInputCreator(creator)
        annotator.setOutputRenderer(renderer)
        annotator.annotate()

        tsvReader = GenericTsvReader(outputFilename)
        fieldnames = tsvReader.getFieldNames()
        self.assertTrue("variant_status" in fieldnames, "variant_status field is missing in the header.")
        self.assertTrue("sample_variant_status" in fieldnames, "sample_variant_status is missing in the header.")

        row = tsvReader.next()
        self.assertTrue("variant_status" in row, "variant_status field is missing in the row.")
        self.assertTrue("sample_variant_status" in row, "sample_variant_status is missing in the row.")

        self.assertEqual("2", row["variant_status"], "Incorrect value of variant_status.")
        self.assertEqual("0", row["sample_variant_status"], "Incorrect value of sample_variant_status")
開發者ID:broadinstitute,項目名稱:oncotator,代碼行數:28,代碼來源:VcfInputMutationCreatorTest.py

示例3: _create_test_ds

# 需要導入模塊: from oncotator.utils.GenericTsvReader import GenericTsvReader [as 別名]
# 或者: from oncotator.utils.GenericTsvReader.GenericTsvReader import getFieldNames [as 別名]
    def _create_test_ds(self, input_tsv, dir_name, index_cols):

        base_name = "test_snp_leveldb"

        full_name = dir_name + "/" + base_name

        if os.path.exists(full_name):
            shutil.rmtree(full_name)

        os.makedirs(full_name)

        tsv_reader = GenericTsvReader(input_tsv, commentPrepend="%")
        annotation_cols = copy.copy(tsv_reader.getFieldNames())
        for icol in index_cols:
            if icol in annotation_cols:
                annotation_cols.remove(icol)

        ds_creator = SnpOnlyLevelDbDatasourceCreator()
        ds_creator.createDatasource(full_name, input_tsv, ",".join(index_cols), full_name + "/" + base_name + ".config", "snp_leveldb", base_name, "TEST",
                         "exact", annotation_cols, [])

        config_filename = "out/test_simple_annotate_snp_only_leveldb/test_snp_leveldb/test_snp_leveldb.config"
        ds = DatasourceFactory.createDatasource(os.path.abspath(config_filename), os.path.dirname(config_filename))

        return ds
開發者ID:alexramos,項目名稱:oncotator,代碼行數:27,代碼來源:SnpOnlyLevelDbDatasourceTest.py

示例4: test_full_seg_file_annotations

# 需要導入模塊: from oncotator.utils.GenericTsvReader import GenericTsvReader [as 別名]
# 或者: from oncotator.utils.GenericTsvReader.GenericTsvReader import getFieldNames [as 別名]
    def test_full_seg_file_annotations(self):
        """Test that we can read in a seg file, do a proper full annotation, and output as SIMPLE_TSV"""
        inputFilename = "testdata/seg/Patient0.seg.txt"
        output_filename = "out/test_full_seg_file_annotations.tsv"
        db_dir = self.config.get('DEFAULT',"dbDir")
        if os.path.exists(output_filename):
            os.remove(output_filename)

        annotator = Annotator()
        run_spec = RunSpecificationFactory.create_run_spec("SEG_FILE", "SIMPLE_TSV", inputFilename, output_filename,
                                                           datasourceDir=db_dir, annotating_type=RunSpecification.ANNOTATE_SEGMENTS)
        annotator.initialize(run_spec)
        annotator.annotate()

        # Now check the output
        output_reader = GenericTsvReader(output_filename)

        required_cols = ["Sample", "Num_Probes", "Segment_Mean"]
        headers = output_reader.getFieldNames()
        for rcol in required_cols:
            self.assertTrue(rcol in headers)

        for line_dict in output_reader:
            self.assertTrue(line_dict['start'] is not None)
            self.assertTrue(line_dict['start'].strip() != "")
            self.assertTrue(line_dict['end'] is not None)
            self.assertTrue(line_dict['end'].strip() != "")
            self.assertTrue("genes" in line_dict.keys())
            self.assertTrue(len(line_dict["genes"].split(",")) > 0)
開發者ID:alexramos,項目名稱:oncotator,代碼行數:31,代碼來源:MafliteInputMutationCreatorTest.py

示例5: test_basic_rendering

# 需要導入模塊: from oncotator.utils.GenericTsvReader import GenericTsvReader [as 別名]
# 或者: from oncotator.utils.GenericTsvReader.GenericTsvReader import getFieldNames [as 別名]
    def test_basic_rendering(self):
        """Test that we can render a basic seg file as a gene list"""
        inputFilename = "testdata/seg/Patient0.seg.txt"
        output_filename = "out/test_basic_rendering.gene_list.tsv"
        db_dir = self.config.get('DEFAULT',"dbDir")
        if os.path.exists(output_filename):
            os.remove(output_filename)

        annotator = Annotator()
        run_spec = RunSpecificationFactory.create_run_spec("SEG_FILE", "GENE_LIST", inputFilename, output_filename,
                                                           datasourceDir=db_dir, annotating_type=RunSpecification.ANNOTATE_SEGMENTS)
        annotator.initialize(run_spec)
        annotator.annotate()

        # Now check the output
        output_reader = GenericTsvReader(output_filename)

        headers = output_reader.getFieldNames()

        for line_dict in output_reader:
            self.assertTrue(line_dict['segment_start'] is not None)
            self.assertTrue(line_dict['segment_start'].strip() != "")
            self.assertTrue(line_dict['segment_end'] is not None)
            self.assertTrue(line_dict['segment_end'].strip() != "")
            self.assertTrue("gene" in line_dict.keys())
            self.assertTrue(len(line_dict["gene"]) > 0)
            self.assertTrue(float(line_dict["segment_num_probes"]))
            self.assertTrue(line_dict['sample'] == "Patient0")
開發者ID:alexramos,項目名稱:oncotator,代碼行數:30,代碼來源:GeneListOutputRendererTest.py

示例6: test_simple_seg_file_input

# 需要導入模塊: from oncotator.utils.GenericTsvReader import GenericTsvReader [as 別名]
# 或者: from oncotator.utils.GenericTsvReader.GenericTsvReader import getFieldNames [as 別名]
    def test_simple_seg_file_input(self):
        """Test that we can read in a seg file, do no annotation, and output as SIMPLE_TSV"""
        inputFilename = "testdata/seg/Patient0.seg.txt"
        output_filename = "out/test_simple_seg_file_input.tsv"
        if os.path.exists(output_filename):
            os.remove(output_filename)
        ic = MafliteInputMutationCreator(inputFilename, 'configs/seg_file_input.config')
        segs = ic.createMutations()

        i = 1
        for i,seg in enumerate(segs):
            pass

        self.assertTrue((i+1) == 27, "Found %d segments when there should have been 27." % (i+1))

        ic = MafliteInputMutationCreator(inputFilename, 'configs/seg_file_input.config')
        segs = ic.createMutations()


        outputRenderer = SimpleOutputRenderer(output_filename, '')
        outputRenderer.renderMutations(segs)

        # Now check the output
        output_reader = GenericTsvReader(output_filename)

        required_cols = ["Sample", "Num_Probes", "Segment_Mean"]
        headers = output_reader.getFieldNames()
        for rcol in required_cols:
            self.assertTrue(rcol in headers)

        for line_dict in output_reader:
            self.assertTrue(line_dict['start'] is not None)
            self.assertTrue(line_dict['start'].strip() != "")
            self.assertTrue(line_dict['end'] is not None)
            self.assertTrue(line_dict['end'].strip() != "")
開發者ID:alexramos,項目名稱:oncotator,代碼行數:37,代碼來源:MafliteInputMutationCreatorTest.py

示例7: testTCGAMAFAsInputAndQuickAnnotate

# 需要導入模塊: from oncotator.utils.GenericTsvReader import GenericTsvReader [as 別名]
# 或者: from oncotator.utils.GenericTsvReader.GenericTsvReader import getFieldNames [as 別名]
    def testTCGAMAFAsInputAndQuickAnnotate(self):
        """ Test that we can take in a TCGA MAF (using MAFLITE), do annotating, and still render it properly """
        inputFilename = "testdata/maf/Patient0.maf.annotated"
        tmp = MafliteInputMutationCreator(inputFilename, 'configs/maflite_input.config')
        outputFilename = "out/testTCGAMAFAsInputAndQuickAnnotate.tsv"
        outputRenderer = TcgaMafOutputRenderer(outputFilename, 'configs/tcgaMAF2.4_output.config')
        annotator = Annotator()
        
        annotator.setInputCreator(tmp)
        annotator.setOutputRenderer(outputRenderer)
        ds = DatasourceFactory.createDatasource("testdata/thaga_janakari_gene_ds/hg19/tj_data.config", "testdata/thaga_janakari_gene_ds/hg19/")
        annotator.addDatasource(ds)
        annotator.annotate()
        
        statinfo = os.stat(outputFilename)
        self.assertTrue(statinfo.st_size > 0, "Generated MAF file (" + outputFilename + ") is empty.")
        tsvReaderIn = GenericTsvReader(inputFilename)
        tsvReader = GenericTsvReader(outputFilename)
        
        self.assertTrue(tsvReader.getComments().find('#version') != -1, "First line did not specify a version number")
        self.assertTrue("i_TJ_Data_Why" in tsvReader.getFieldNames(), "New field missing (i_TJ_Data_Why) from header")
        self.assertTrue("i_TJ_Data_Who" in tsvReader.getFieldNames(), "New field missing (i_TJ_Data_Who) from header")
        
        ctrOut = 0
        for lineDict in tsvReader:
            ctrOut += 1
        ctrIn = 0
        for lineDict in tsvReaderIn:
            ctrIn += 1
        ctrIn += len(tsvReaderIn.getCommentsAsList())
        ctrOut += len(tsvReader.getCommentsAsList())

        self.assertTrue(ctrOut == (ctrIn + 2), "Output file should have same number of lines plus two (for maf version and Oncotator version comments) as input file.  (In,Out): " + str(ctrIn) + ", " + str(ctrOut))
開發者ID:alexramos,項目名稱:oncotator,代碼行數:35,代碼來源:MafliteInputMutationCreatorTest.py

示例8: createDatasource

# 需要導入模塊: from oncotator.utils.GenericTsvReader import GenericTsvReader [as 別名]
# 或者: from oncotator.utils.GenericTsvReader.GenericTsvReader import getFieldNames [as 別名]
    def createDatasource(self, destDir, ds_file, index_column_names, configFilename, ds_type, ds_name, ds_version,
                         ds_match_mode, annotation_column_names, indexCols):
        """


        :param destDir:
        :param ds_file:
        :param index_column_names:
        :param configFilename:
        :param ds_type:
        :param ds_name:
        :param ds_version:
        :param ds_match_mode:
        :param annotation_column_names: If blank, assume all in the tsv (minus the index columns)
        :param indexCols: list of the index columns.  Assumed to be five corresponding to chrom, start, end, ref, and alt.
        """
        index_column_names = index_column_names.split(",")

        output_filename = destDir + "/" + ds_name + ".leveldb"
        src_file = os.path.basename(output_filename)
        db = leveldb.LevelDB(output_filename, create_if_missing=True)

        comment_prepend = "#"
        if any([True for icol in index_column_names if icol.startswith("#")]):
            comment_prepend = "%"

        tsv_file = ds_file
        tsv_reader = GenericTsvReader(tsv_file, commentPrepend=comment_prepend)


        if annotation_column_names is None:
            annotation_column_names = copy.copy(tsv_reader.getFieldNames())
            for icol in index_column_names:
                if icol in annotation_column_names:
                    annotation_column_names.remove(icol)

        logging.getLogger(__name__).info("Creating SNP LevelDB for the following index headers: " + str(index_column_names))
        logging.getLogger(__name__).info("Creating SNP LevelDB for the following data headers: " + str(annotation_column_names))

        # Create the config file
        self._createConfigFile(configFilename, src_file, ds_name, ds_version, index_column_names, annotation_columns=annotation_column_names)

        batch = leveldb.WriteBatch()
        for i,line_dict in enumerate(tsv_reader):

            chrom = line_dict[index_column_names[0]]
            start = line_dict[index_column_names[1]]
            end = line_dict[index_column_names[2]]
            ref = line_dict[index_column_names[3]]
            alt = line_dict[index_column_names[4]]

            h = SnpOnlyLevelDbDatasource.generate_hash(chrom, start, end, ref, alt)
            if i % 5000 == 0:
                logging.getLogger(__name__).info("Rendering %d entries" % (i))

            line_list = [line_dict.get(k, "") for k in annotation_column_names]
            db.Put(h, ",".join(line_list))
        db.Write(batch, sync = True)
開發者ID:alexramos,項目名稱:oncotator,代碼行數:60,代碼來源:SnpOnlyLevelDbDatasourceCreator.py

示例9: testExposedColumns

# 需要導入模塊: from oncotator.utils.GenericTsvReader import GenericTsvReader [as 別名]
# 或者: from oncotator.utils.GenericTsvReader.GenericTsvReader import getFieldNames [as 別名]
    def testExposedColumns(self):
        """Test that columns listed in the config file as exposed do not get the i_ prepend"""
        testOutputFilename = self._annotateTest('testdata/maflite/tiny_maflite.maf.txt', "out/testExposedCols.maf.tsv", self._determine_db_dir())

        # Sanity checks to make sure that the generated maf file is not junk.
        self._validateTcgaMafContents(testOutputFilename)

        # Check the columns, since the input has a couple of exposed columns.
        tsvReader = GenericTsvReader(testOutputFilename)
        headers = tsvReader.getFieldNames()
        headersToCheck = ['t_alt_count', 't_ref_count']
        for h in headersToCheck:
            self.assertFalse(("i_" + h) in headers, "i_ was prepended to " + h)
            self.assertTrue(h in headers, h + " not found.")
開發者ID:Tmacme,項目名稱:oncotator,代碼行數:16,代碼來源:TcgaMafOutputRendererTest.py

示例10: testBasicAnnotation

# 需要導入模塊: from oncotator.utils.GenericTsvReader import GenericTsvReader [as 別名]
# 或者: from oncotator.utils.GenericTsvReader.GenericTsvReader import getFieldNames [as 別名]
    def testBasicAnnotation(self):
        """ Test annotation from a generic TSV based on a transcript annotation.  Only confirms the proper headers of the output. """
        # We need a gaf data source to annotate gene

        gafDatasource = TestUtils.createTranscriptProviderDatasource(config=self.config)
        transcriptDS = DatasourceFactory.createDatasource(
            "testdata/small_transcript_tsv_ds/small_transcript_tsv_ds.config", "testdata/small_transcript_tsv_ds/"
        )
        outputFilename = "out/genericTranscriptTest.out.tsv"

        annotator = Annotator()
        annotator.setInputCreator(MafliteInputMutationCreator("testdata/maflite/Patient0.snp.maf.txt"))
        annotator.setOutputRenderer(SimpleOutputRenderer(outputFilename))
        annotator.addDatasource(gafDatasource)
        annotator.addDatasource(transcriptDS)
        outputFilename = annotator.annotate()

        tsvReader = GenericTsvReader(outputFilename)
        headers = tsvReader.getFieldNames()
        self.assertTrue("refseq_test_mRNA_Id" in headers, "refseq_test_mRNA_Id not found in headers: " + str(headers))
        self.assertTrue("refseq_test_prot_Id" in headers, "refseq_test_prot_Id not found in headers: " + str(headers))
開發者ID:jcambry,項目名稱:oncotator,代碼行數:23,代碼來源:GenericTranscriptDatasourceTest.py

示例11: testBasicAnnotation

# 需要導入模塊: from oncotator.utils.GenericTsvReader import GenericTsvReader [as 別名]
# 或者: from oncotator.utils.GenericTsvReader.GenericTsvReader import getFieldNames [as 別名]
    def testBasicAnnotation(self):
        ''' Annotate from a basic tsv gene file.  Use the Gaf to annotate before trying the tsv -- required since the gene annotation must be populated.
        Using trimmed CancerGeneCensus as basis for this test.
        ''' 
        
        # cut -f 1 oncotator/test/testdata/small_tsv_ds/CancerGeneCensus_Table_1_full_2012-03-15_trim.txt | egrep -v Symbol | sed -r "s/^/'/g" | sed ':a;N;$!ba;s/\n/,/g' | sed -r "s/,'/','/g"
        genesAvailable = ['ABL1','ABL2','ACSL3','AF15Q14','AF1Q','AF3p21','AF5q31','AKAP9','AKT1','AKT2','ALDH2','ALK','ALO17','APC','ARHGEF12','ARHH','ARID1A','ARID2','ARNT','ASPSCR1','ASXL1','ATF1','ATIC','ATM','ATRX','BAP1','BCL10','BCL11A','BCL11B']
        
        # We need a gaf data source to annotate gene

        gafDatasource = TestUtils.createTranscriptProviderDatasource(config=self.config)
        geneDS = DatasourceFactory.createDatasource("testdata/small_tsv_ds/small_tsv_ds.config", "testdata/small_tsv_ds/")
        outputFilename = 'out/genericGeneTest.out.tsv'
        
        annotator = Annotator()
        annotator.setInputCreator(MafliteInputMutationCreator('testdata/maflite/Patient0.snp.maf.txt'))
        annotator.setOutputRenderer(SimpleOutputRenderer(outputFilename))
        annotator.addDatasource(gafDatasource)
        annotator.addDatasource(geneDS)
        annotator.annotate()
        
        # Check that there were actual annotations performed.
        tsvReader = GenericTsvReader(outputFilename)
        
        fields = tsvReader.getFieldNames()
        self.assertTrue('CGC_Abridged_Other Syndrome/Disease' in fields, "'CGC_Other Syndrome/Disease' was not present in the header")
        self.assertTrue('CGC_Abridged_Mutation Type' in fields, "'CGC_Abridged_Mutation Type' was not present in the header")
        
        ctr = 1
        linesThatShouldBeAnnotated = 0
        for lineDict in tsvReader:
            self.assertTrue('gene' in lineDict.keys())
            if lineDict['gene'] in genesAvailable:
                self.assertTrue(lineDict['CGC_Abridged_GeneID'] <> '', "'CGC_Abridged_GeneID' was missing on a row that should have been populated.  Line: " + str(ctr))
                linesThatShouldBeAnnotated = linesThatShouldBeAnnotated + 1
            ctr = ctr + 1
        self.assertTrue((linesThatShouldBeAnnotated) > 0, "Bad data -- cannot test missed detects.")
開發者ID:dhlbh,項目名稱:oncotator,代碼行數:39,代碼來源:GenericGeneDataSourceTest.py

示例12: testInternalFieldsSkipPrepend

# 需要導入模塊: from oncotator.utils.GenericTsvReader import GenericTsvReader [as 別名]
# 或者: from oncotator.utils.GenericTsvReader.GenericTsvReader import getFieldNames [as 別名]
    def testInternalFieldsSkipPrepend(self):
        """ Test that no prepending of "i_" is honored."""
        outputFilename = "out/testInternalFields_v2.4.maf.tsv"
        m = MutationDataFactory.default_create()
        m.createAnnotation("TEST", "THIS IS A TEST", "TESTING")

        # The next annotation is real and should not be considered internal.
        m.createAnnotation("gene", "EGFR")

        outputRenderer = TcgaMafOutputRenderer(outputFilename, configFile='configs/tcgaMAF2.4_output.config', other_options={OptionConstants.NO_PREPEND:True})
        outputRenderer.renderMutations(iter([m]), ['No comments'])

        configFile = ConfigUtils.createConfigParser('configs/tcgaMAF2.4_output.config')
        requiredColumns = configFile.get("general", "requiredColumns")
        self.assertTrue("Hugo_Symbol" in requiredColumns, " This test assumes that Hugo_Symbol is a required column in the TCGA MAF.  If not, the test must be modified.")

        statinfo = os.stat(outputFilename)
        self.assertTrue(statinfo.st_size > 0, "Generated MAF file (" + outputFilename + ") is empty.")

        tsvReader = GenericTsvReader(outputFilename)
        headers = tsvReader.getFieldNames()
        self.assertTrue("Hugo_Symbol" in headers, "Hugo_Symbol not found in output headers")
        self.assertTrue("i_TEST" not in headers, "i_TEST was found in output headers when prepend was disabled.")
        self.assertTrue("TEST" in headers, "TEST was not found in output headers.")
開發者ID:Tmacme,項目名稱:oncotator,代碼行數:26,代碼來源:TcgaMafOutputRendererTest.py

示例13: testInternalFields

# 需要導入模塊: from oncotator.utils.GenericTsvReader import GenericTsvReader [as 別名]
# 或者: from oncotator.utils.GenericTsvReader.GenericTsvReader import getFieldNames [as 別名]
    def testInternalFields(self):
        """ Test that an annotation that is not listed explicitly in the required or optional columns is rendered with i_ prepended """
        outputFilename = "out/testInternalFields_v2.4.maf.tsv"
        m = MutationData()
        m.createAnnotation("TEST", "THIS IS A TEST", "TESTING")
        
        # The next annotation is real and should not be considered internal.
        m.createAnnotation("gene", "EGFR")
        
        outputRenderer = TcgaMafOutputRenderer(outputFilename, configFile='configs/tcgaMAF2.4_output.config')
        outputRenderer.renderMutations(iter([m]), ['No comments'])
        
        configFile = ConfigUtils.createConfigParser('configs/tcgaMAF2.4_output.config')
        requiredColumns = configFile.get("general", "requiredColumns")
        self.assertTrue("Hugo_Symbol" in requiredColumns, " This test assumes that Hugo_Symbol is a required column in the TCGA MAF.  If not, the test must be modified.")

        statinfo = os.stat(outputFilename)
        self.assertTrue(statinfo.st_size > 0, "Generated MAF file (" + outputFilename + ") is empty.")
        
        tsvReader = GenericTsvReader(outputFilename)
        headers = tsvReader.getFieldNames()
        self.assertTrue("Hugo_Symbol" in headers, "Hugo_Symbol not found in output headers")
        self.assertTrue("TEST" not in headers, "TEST was found in output headers when it should have been renamed to i_TEST")
        self.assertTrue("i_TEST" in headers, "i_TEST not found in output headers")
開發者ID:alexramos,項目名稱:oncotator,代碼行數:26,代碼來源:TcgaMafOutputRendererTest.py

示例14: test_rendering_with_exons

# 需要導入模塊: from oncotator.utils.GenericTsvReader import GenericTsvReader [as 別名]
# 或者: from oncotator.utils.GenericTsvReader.GenericTsvReader import getFieldNames [as 別名]
    def test_rendering_with_exons(self):
        """Test that we can render a seg file that includes exons at end points"""
        inputFilename = "testdata/seg/Middle_of_exon.seg.txt"
        output_filename = "out/test_exon_seg2.gene_list.tsv"
        db_dir = self.config.get('DEFAULT',"dbDir")
        if os.path.exists(output_filename):
            os.remove(output_filename)

        annotator = Annotator()
        run_spec = RunSpecificationFactory.create_run_spec("SEG_FILE", "GENE_LIST", inputFilename, output_filename,
                                                           datasourceDir=db_dir, annotating_type=RunSpecification.ANNOTATE_SEGMENTS)
        annotator.initialize(run_spec)
        annotator.annotate()

        # Now check the output
        output_reader = GenericTsvReader(output_filename)

        headers = output_reader.getFieldNames()

        for line_dict in output_reader:
            self.assertTrue(line_dict['segment_start'] is not None)
            self.assertTrue(line_dict['segment_start'].strip() != "")
            if line_dict['segment_end_gene'] == "MAPK1":
                self.assertTrue(line_dict['segment_end_exon'].strip() == "8+", "Should have been 8+, but saw: %s" % line_dict['segment_end_exon'].strip())
開發者ID:alexramos,項目名稱:oncotator,代碼行數:26,代碼來源:GeneListOutputRendererTest.py

示例15: testBasicAnnotation

# 需要導入模塊: from oncotator.utils.GenericTsvReader import GenericTsvReader [as 別名]
# 或者: from oncotator.utils.GenericTsvReader.GenericTsvReader import getFieldNames [as 別名]
    def testBasicAnnotation(self):
        """ Annotate from a basic tsv gene file.  Use the Gaf to annotate before trying the tsv -- required since the gene annotation must be populated.
        Using trimmed CancerGeneCensus as basis for this test.
        """

        # cut -f 1 oncotator/test/testdata/small_tsv_ds/CancerGeneCensus_Table_1_full_2012-03-15_trim.txt | egrep -v Symbol | sed -r "s/^/'/g" | sed ':a;N;$!ba;s/\n/,/g' | sed -r "s/,'/','/g"
        genesAvailable = [
            "ABL1",
            "ABL2",
            "ACSL3",
            "AF15Q14",
            "AF1Q",
            "AF3p21",
            "AF5q31",
            "AKAP9",
            "AKT1",
            "AKT2",
            "ALDH2",
            "ALK",
            "ALO17",
            "APC",
            "ARHGEF12",
            "ARHH",
            "ARID1A",
            "ARID2",
            "ARNT",
            "ASPSCR1",
            "ASXL1",
            "ATF1",
            "ATIC",
            "ATM",
            "ATRX",
            "BAP1",
            "BCL10",
            "BCL11A",
            "BCL11B",
        ]

        # We need a gaf data source to annotate gene

        gafDatasource = TestUtils.createTranscriptProviderDatasource(config=self.config)
        geneDS = DatasourceFactory.createDatasource(
            "testdata/small_tsv_ds/small_tsv_ds.config", "testdata/small_tsv_ds/"
        )
        outputFilename = "out/genericGeneTest.out.tsv"

        annotator = Annotator()
        annotator.setInputCreator(MafliteInputMutationCreator("testdata/maflite/Patient0.snp.maf.txt"))
        annotator.setOutputRenderer(SimpleOutputRenderer(outputFilename))
        annotator.addDatasource(gafDatasource)
        annotator.addDatasource(geneDS)
        annotator.annotate()

        # Check that there were actual annotations performed.
        tsvReader = GenericTsvReader(outputFilename)

        fields = tsvReader.getFieldNames()
        self.assertTrue(
            "CGC_Abridged_Other Syndrome/Disease" in fields,
            "'CGC_Other Syndrome/Disease' was not present in the header",
        )
        self.assertTrue(
            "CGC_Abridged_Mutation Type" in fields, "'CGC_Abridged_Mutation Type' was not present in the header"
        )

        ctr = 1
        linesThatShouldBeAnnotated = 0
        for lineDict in tsvReader:
            self.assertTrue("gene" in lineDict.keys())
            if lineDict["gene"] in genesAvailable:
                self.assertTrue(
                    lineDict["CGC_Abridged_GeneID"] != "",
                    "'CGC_Abridged_GeneID' was missing on a row that should have been populated.  Line: " + str(ctr),
                )
                linesThatShouldBeAnnotated += 1
            ctr += 1
        self.assertTrue((linesThatShouldBeAnnotated) > 0, "Bad data -- cannot test missed detects.")
開發者ID:jcambry,項目名稱:oncotator,代碼行數:79,代碼來源:GenericGeneDataSourceTest.py


注:本文中的oncotator.utils.GenericTsvReader.GenericTsvReader.getFieldNames方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。