當前位置: 首頁>>代碼示例>>Python>>正文


Python GenericTsvReader.GenericTsvReader類代碼示例

本文整理匯總了Python中oncotator.utils.GenericTsvReader.GenericTsvReader的典型用法代碼示例。如果您正苦於以下問題:Python GenericTsvReader類的具體用法?Python GenericTsvReader怎麽用?Python GenericTsvReader使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。


在下文中一共展示了GenericTsvReader類的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: test_full_seg_file_annotations

    def test_full_seg_file_annotations(self):
        """Test that we can read in a seg file, do a proper full annotation, and output as SIMPLE_TSV"""
        inputFilename = "testdata/seg/Patient0.seg.txt"
        output_filename = "out/test_full_seg_file_annotations.tsv"
        db_dir = self.config.get('DEFAULT',"dbDir")
        if os.path.exists(output_filename):
            os.remove(output_filename)

        annotator = Annotator()
        run_spec = RunSpecificationFactory.create_run_spec("SEG_FILE", "SIMPLE_TSV", inputFilename, output_filename,
                                                           datasourceDir=db_dir, annotating_type=RunSpecification.ANNOTATE_SEGMENTS)
        annotator.initialize(run_spec)
        annotator.annotate()

        # Now check the output
        output_reader = GenericTsvReader(output_filename)

        required_cols = ["Sample", "Num_Probes", "Segment_Mean"]
        headers = output_reader.getFieldNames()
        for rcol in required_cols:
            self.assertTrue(rcol in headers)

        for line_dict in output_reader:
            self.assertTrue(line_dict['start'] is not None)
            self.assertTrue(line_dict['start'].strip() != "")
            self.assertTrue(line_dict['end'] is not None)
            self.assertTrue(line_dict['end'].strip() != "")
            self.assertTrue("genes" in line_dict.keys())
            self.assertTrue(len(line_dict["genes"].split(",")) > 0)
開發者ID:alexramos,項目名稱:oncotator,代碼行數:29,代碼來源:MafliteInputMutationCreatorTest.py

示例2: sortFile

    def sortFile(self, filename, func, length=50000):
        """
        This method sorts the input file and writes out the sorted file to filename.

        :param filename: sorted filename
        :param func: function that converts each row of the input file to an unique, sortable key
        :param length: maximum number of lines in a partition
        """
        reader = GenericTsvReader(filename=self.readfilename, commentPrepend=self.commentPrepend,
                                  delimiter=self.delimiter)
        comments = reader.getComments()

        fieldnames = reader.getFieldNames()
        if fieldnames is None:
            fieldnames = []

        fieldnameIndexes = collections.OrderedDict()
        if fieldnames is not None:
            fieldnameIndexes = collections.OrderedDict([(x, i) for (i, x) in enumerate(fieldnames)])

        iterable = iter(reader.getInputContentFP())
        partitions = self._yieldPartitions(iterable, func, fieldnameIndexes, length)

        with open(name=filename, mode='wb', buffering=64 * 1024) as writer:
            writer.write(comments)
            writer.write(string.join(fieldnames, self.delimiter) + "\n")
            writer.writelines(self._merge(partitions))  # generators are allowed as inputs to writelines function
開發者ID:Tmacme,項目名稱:oncotator,代碼行數:27,代碼來源:TsvFileSorter.py

示例3: test_basic_rendering

    def test_basic_rendering(self):
        """Test that we can render a basic seg file as a gene list"""
        inputFilename = "testdata/seg/Patient0.seg.txt"
        output_filename = "out/test_basic_rendering.gene_list.tsv"
        db_dir = self.config.get('DEFAULT',"dbDir")
        if os.path.exists(output_filename):
            os.remove(output_filename)

        annotator = Annotator()
        run_spec = RunSpecificationFactory.create_run_spec("SEG_FILE", "GENE_LIST", inputFilename, output_filename,
                                                           datasourceDir=db_dir, annotating_type=RunSpecification.ANNOTATE_SEGMENTS)
        annotator.initialize(run_spec)
        annotator.annotate()

        # Now check the output
        output_reader = GenericTsvReader(output_filename)

        headers = output_reader.getFieldNames()

        for line_dict in output_reader:
            self.assertTrue(line_dict['segment_start'] is not None)
            self.assertTrue(line_dict['segment_start'].strip() != "")
            self.assertTrue(line_dict['segment_end'] is not None)
            self.assertTrue(line_dict['segment_end'].strip() != "")
            self.assertTrue("gene" in line_dict.keys())
            self.assertTrue(len(line_dict["gene"]) > 0)
            self.assertTrue(float(line_dict["segment_num_probes"]))
            self.assertTrue(line_dict['sample'] == "Patient0")
開發者ID:alexramos,項目名稱:oncotator,代碼行數:28,代碼來源:GeneListOutputRendererTest.py

示例4: test_simple_seg_file_input

    def test_simple_seg_file_input(self):
        """Test that we can read in a seg file, do no annotation, and output as SIMPLE_TSV"""
        inputFilename = "testdata/seg/Patient0.seg.txt"
        output_filename = "out/test_simple_seg_file_input.tsv"
        if os.path.exists(output_filename):
            os.remove(output_filename)
        ic = MafliteInputMutationCreator(inputFilename, 'configs/seg_file_input.config')
        segs = ic.createMutations()

        i = 1
        for i,seg in enumerate(segs):
            pass

        self.assertTrue((i+1) == 27, "Found %d segments when there should have been 27." % (i+1))

        ic = MafliteInputMutationCreator(inputFilename, 'configs/seg_file_input.config')
        segs = ic.createMutations()


        outputRenderer = SimpleOutputRenderer(output_filename, '')
        outputRenderer.renderMutations(segs)

        # Now check the output
        output_reader = GenericTsvReader(output_filename)

        required_cols = ["Sample", "Num_Probes", "Segment_Mean"]
        headers = output_reader.getFieldNames()
        for rcol in required_cols:
            self.assertTrue(rcol in headers)

        for line_dict in output_reader:
            self.assertTrue(line_dict['start'] is not None)
            self.assertTrue(line_dict['start'].strip() != "")
            self.assertTrue(line_dict['end'] is not None)
            self.assertTrue(line_dict['end'].strip() != "")
開發者ID:alexramos,項目名稱:oncotator,代碼行數:35,代碼來源:MafliteInputMutationCreatorTest.py

示例5: testDuplicateAnnotation

    def testDuplicateAnnotation(self):
        """
        Tests that the duplicate annotations are parsed correctly.
        """
        inputFilename = os.path.join(*["testdata", "vcf", "example.duplicate_annotation.vcf"])
        outputFilename = os.path.join("out", "example.duplicate_annotation.out.tsv")

        creator = VcfInputMutationCreator(inputFilename)
        creator.createMutations()
        renderer = SimpleOutputRenderer(outputFilename)
        annotator = Annotator()
        annotator.setInputCreator(creator)
        annotator.setOutputRenderer(renderer)
        annotator.annotate()

        tsvReader = GenericTsvReader(outputFilename)
        fieldnames = tsvReader.getFieldNames()
        self.assertTrue("variant_status" in fieldnames, "variant_status field is missing in the header.")
        self.assertTrue("sample_variant_status" in fieldnames, "sample_variant_status is missing in the header.")

        row = tsvReader.next()
        self.assertTrue("variant_status" in row, "variant_status field is missing in the row.")
        self.assertTrue("sample_variant_status" in row, "sample_variant_status is missing in the row.")

        self.assertEqual("2", row["variant_status"], "Incorrect value of variant_status.")
        self.assertEqual("0", row["sample_variant_status"], "Incorrect value of sample_variant_status")
開發者ID:broadinstitute,項目名稱:oncotator,代碼行數:26,代碼來源:VcfInputMutationCreatorTest.py

示例6: _create_test_ds

    def _create_test_ds(self, input_tsv, dir_name, index_cols):

        base_name = "test_snp_leveldb"

        full_name = dir_name + "/" + base_name

        if os.path.exists(full_name):
            shutil.rmtree(full_name)

        os.makedirs(full_name)

        tsv_reader = GenericTsvReader(input_tsv, commentPrepend="%")
        annotation_cols = copy.copy(tsv_reader.getFieldNames())
        for icol in index_cols:
            if icol in annotation_cols:
                annotation_cols.remove(icol)

        ds_creator = SnpOnlyLevelDbDatasourceCreator()
        ds_creator.createDatasource(full_name, input_tsv, ",".join(index_cols), full_name + "/" + base_name + ".config", "snp_leveldb", base_name, "TEST",
                         "exact", annotation_cols, [])

        config_filename = "out/test_simple_annotate_snp_only_leveldb/test_snp_leveldb/test_snp_leveldb.config"
        ds = DatasourceFactory.createDatasource(os.path.abspath(config_filename), os.path.dirname(config_filename))

        return ds
開發者ID:alexramos,項目名稱:oncotator,代碼行數:25,代碼來源:SnpOnlyLevelDbDatasourceTest.py

示例7: createDatasource

    def createDatasource(self, destDir, ds_file, index_column_names, configFilename, ds_type, ds_name, ds_version,
                         ds_match_mode, annotation_column_names, indexCols):
        """


        :param destDir:
        :param ds_file:
        :param index_column_names:
        :param configFilename:
        :param ds_type:
        :param ds_name:
        :param ds_version:
        :param ds_match_mode:
        :param annotation_column_names: If blank, assume all in the tsv (minus the index columns)
        :param indexCols: list of the index columns.  Assumed to be five corresponding to chrom, start, end, ref, and alt.
        """
        index_column_names = index_column_names.split(",")

        output_filename = destDir + "/" + ds_name + ".leveldb"
        src_file = os.path.basename(output_filename)
        db = leveldb.LevelDB(output_filename, create_if_missing=True)

        comment_prepend = "#"
        if any([True for icol in index_column_names if icol.startswith("#")]):
            comment_prepend = "%"

        tsv_file = ds_file
        tsv_reader = GenericTsvReader(tsv_file, commentPrepend=comment_prepend)


        if annotation_column_names is None:
            annotation_column_names = copy.copy(tsv_reader.getFieldNames())
            for icol in index_column_names:
                if icol in annotation_column_names:
                    annotation_column_names.remove(icol)

        logging.getLogger(__name__).info("Creating SNP LevelDB for the following index headers: " + str(index_column_names))
        logging.getLogger(__name__).info("Creating SNP LevelDB for the following data headers: " + str(annotation_column_names))

        # Create the config file
        self._createConfigFile(configFilename, src_file, ds_name, ds_version, index_column_names, annotation_columns=annotation_column_names)

        batch = leveldb.WriteBatch()
        for i,line_dict in enumerate(tsv_reader):

            chrom = line_dict[index_column_names[0]]
            start = line_dict[index_column_names[1]]
            end = line_dict[index_column_names[2]]
            ref = line_dict[index_column_names[3]]
            alt = line_dict[index_column_names[4]]

            h = SnpOnlyLevelDbDatasource.generate_hash(chrom, start, end, ref, alt)
            if i % 5000 == 0:
                logging.getLogger(__name__).info("Rendering %d entries" % (i))

            line_list = [line_dict.get(k, "") for k in annotation_column_names]
            db.Put(h, ",".join(line_list))
        db.Write(batch, sync = True)
開發者ID:alexramos,項目名稱:oncotator,代碼行數:58,代碼來源:SnpOnlyLevelDbDatasourceCreator.py

示例8: testExposedColumns

    def testExposedColumns(self):
        """Test that columns listed in the config file as exposed do not get the i_ prepend"""
        testOutputFilename = self._annotateTest('testdata/maflite/tiny_maflite.maf.txt', "out/testExposedCols.maf.tsv", self._determine_db_dir())

        # Sanity checks to make sure that the generated maf file is not junk.
        self._validateTcgaMafContents(testOutputFilename)

        # Check the columns, since the input has a couple of exposed columns.
        tsvReader = GenericTsvReader(testOutputFilename)
        headers = tsvReader.getFieldNames()
        headersToCheck = ['t_alt_count', 't_ref_count']
        for h in headersToCheck:
            self.assertFalse(("i_" + h) in headers, "i_ was prepended to " + h)
            self.assertTrue(h in headers, h + " not found.")
開發者ID:Tmacme,項目名稱:oncotator,代碼行數:14,代碼來源:TcgaMafOutputRendererTest.py

示例9: _renderSortedTsv

    def _renderSortedTsv(self, templateFilename, vcfFilename, tsvFilename, sampleNames, dataManager, inferGenotypes):
        """
        Turn a sorted tsv into a VCF

        :param templateFilename: basic VCF to model output VCF.
        :param vcfFilename: output VCF filename
        :param tsvFilename: input sorted tsv
        :param sampleNames: sample names that should be used in output
        :param dataManager: dataManager instance used in creating pyvcf records.
        :param inferGenotypes: whether we should try to infer the genotypes, since we may not have add GT explicitly
        on input
        """
        tempVcfReader = vcf.Reader(filename=templateFilename, strict_whitespace=True)
        pointer = file(vcfFilename, "w")

        tsvReader = GenericTsvReader(tsvFilename, delimiter=self.delimiter)
        index = 0
        nrecords = 1000
        chrom = None
        pos = None
        refAllele = None
        recordBuilder = None

        vcfWriter = vcf.Writer(pointer, tempVcfReader, self.lineterminator)
        for ctr, m in enumerate(tsvReader):
            isNewRecord = self._isNewVcfRecordNeeded(chrom, m["chr"], pos, m["start"], refAllele, m["ref_allele"])
            if isNewRecord:
                if recordBuilder is not None:
                    record = recordBuilder.createRecord()
                    vcfWriter.write_record(record)
                    index += 1
                    if index % nrecords == 0:
                        self.logger.info("Rendered " + str(index) + " vcf records.")
                        vcfWriter.flush()

                chrom = m["chr"]
                pos = m["start"]
                refAllele = m["ref_allele"]

                recordBuilder = RecordBuilder(chrom, int(pos), refAllele, sampleNames)

            recordBuilder = self._parseRecordBuilder(m, recordBuilder, dataManager, inferGenotypes)

        if recordBuilder is not None:
            record = recordBuilder.createRecord()
            vcfWriter.write_record(record)

        vcfWriter.close()
        tsvReader.close()
        self.logger.info("Rendered all " + str(index) + " vcf records.")
開發者ID:Tmacme,項目名稱:oncotator,代碼行數:50,代碼來源:VcfOutputRenderer.py

示例10: _validateTcgaMafContents

    def _validateTcgaMafContents(self, filename):
        """ This is a utility, private method for unit tests to get a semblance that a valid maf file was created.  
        
        Note: This method has nothing to do with the TCGA validator.
        
        """
        configFile = ConfigUtils.createConfigParser(os.path.join("configs", "tcgaMAF2.4_output.config"))
        statinfo = os.stat(filename)
        self.assertTrue(statinfo.st_size > 0, "Generated MAF file (" + filename + ") is empty.")

        tsvReader = GenericTsvReader(filename)

        self.assertTrue(tsvReader.getComments().find('#version') != -1, "First line did not specify a version number")

        ctr = 1
        for lineDict in tsvReader:

            # TODO: Re-enable when GENCODE and HGNC datasources are concordant (or Entrez_Gene_ID is in the gencode gtf)
            # if lineDict['Entrez_Gene_Id'] == "0":
            #     self.assertTrue(lineDict['Hugo_Symbol'] == "Unknown", "Entrez_Gene_Id was zero, but Hugo Symbol was not 'Unknown'.  Line: " + str(ctr))

            unknownKeys = []
            self.assertTrue(lineDict["Tumor_Seq_Allele1"] != lineDict["Tumor_Seq_Allele2"], "Reference and alternate were equal in TCGA MAF output on line %d (%s)" % (ctr, lineDict["Tumor_Seq_Allele1"]))
            self.assertTrue(lineDict["Tumor_Seq_Allele1"] == lineDict["Reference_Allele"], "Reference Allele should match Tumor_Seq_Allele1 on line " + str(ctr))
            uniprot_aa_xform_counter = 0
            for k in lineDict.keys():
                if lineDict[k] == "__UNKNOWN__":
                    unknownKeys.append(k)

                self.assertTrue('\r' not in lineDict[k], "Carriage return character found in an annotation value.")

                requiredColumns = configFile.get("general", "requiredColumns")
                optionalColumns = configFile.get("general", "optionalColumns")
                exposedColumns = configFile.get("general", "exposedColumns")
                if (k not in requiredColumns) and (k not in optionalColumns) and (k not in exposedColumns):
                    self.assertTrue(k.startswith("i_"), "Internal column was not prepended with 'i_'")
            if lineDict['UniProt_AApos'] == "0":
                uniprot_aa_xform_counter += 1

            if lineDict["Variant_Type"] == VariantClassification.VT_DEL:
                self.assertTrue(lineDict["Tumor_Seq_Allele2"] == "-")

            if lineDict["Variant_Type"] == VariantClassification.VT_INS:
                self.assertTrue(lineDict["Reference_Allele"] == "-")

            unknownKeys.sort()
            self.assertTrue(len(unknownKeys) == 0, "__UNKNOWN__ values (" + str(len(unknownKeys)) + ") seen on line " + str(ctr) + ", in fields: " + ", ".join(unknownKeys))
            self.assertTrue(uniprot_aa_xform_counter < 10, "Too many uniprot aa xform values are zero (" + str(uniprot_aa_xform_counter) + ").  This is probably an error.")

            ctr += 1
開發者ID:Tmacme,項目名稱:oncotator,代碼行數:50,代碼來源:TcgaMafOutputRendererTest.py

示例11: _validateTcgaMafContents

    def _validateTcgaMafContents(self, filename):
        """
        This is a utility, private method for unit tests to get a semblance that a valid maf file was created.
        
        Note: This method has nothing to do with the TCGA validator.
        
        TODO: This is code duplication from TCGA MAF Output RendererTest.  This should be refactored into a base class
        (to preserve self.assertTrue, etc).
        """
        statinfo = os.stat(filename)
        self.assertTrue(statinfo.st_size > 0, "Generated MAF file (" + filename + ") is empty.")

        tsvReader = GenericTsvReader(filename)

        self.assertTrue(tsvReader.getComments().find('#version') <> -1, "First line did not specify a version number")

        ctr = 1
        for lineDict in tsvReader:
            if lineDict['Entrez_Gene_Id'] == "0":
                self.assertTrue(lineDict['Hugo_Symbol'] == "Unknown",
                                "Entrez_Gene_Id was zero, but Hugo Symbol was not 'Unknown'.  Line: " + str(ctr))

            unknownKeys = []
            for k in lineDict.keys():
                if lineDict[k] == "__UNKNOWN__":
                    unknownKeys.append(k)

                self.assertTrue('\r' not in lineDict[k], "Carriage return character found in an annotation value.")

                configFile = ConfigUtils.createConfigParser('configs/tcgaMAF2.3_output.config')
                requiredColumns = configFile.get("general", "requiredColumns")
                optionalColumns = configFile.get("general", "optionalColumns")
                if (k not in requiredColumns) and (k not in optionalColumns):
                    self.assertTrue(k.startswith("i_"), "Internal column was not prepended with 'i_'")

            unknownKeys.sort()
            self.assertTrue(len(unknownKeys) == 0,
                            "__UNKNOWN__ values (" + str(len(unknownKeys)) + ") seen on line " + str(
                                ctr) + ", in fields: " + ", ".join(unknownKeys))

            ctr += 1
開發者ID:broadinstitute,項目名稱:oncotator,代碼行數:41,代碼來源:VcfInputMutationCreatorTest.py

示例12: testBasicAnnotation

    def testBasicAnnotation(self):
        """ Test annotation from a generic TSV based on a transcript annotation.  Only confirms the proper headers of the output. """
        # We need a gaf data source to annotate gene

        gafDatasource = TestUtils.createTranscriptProviderDatasource(config=self.config)
        transcriptDS = DatasourceFactory.createDatasource(
            "testdata/small_transcript_tsv_ds/small_transcript_tsv_ds.config", "testdata/small_transcript_tsv_ds/"
        )
        outputFilename = "out/genericTranscriptTest.out.tsv"

        annotator = Annotator()
        annotator.setInputCreator(MafliteInputMutationCreator("testdata/maflite/Patient0.snp.maf.txt"))
        annotator.setOutputRenderer(SimpleOutputRenderer(outputFilename))
        annotator.addDatasource(gafDatasource)
        annotator.addDatasource(transcriptDS)
        outputFilename = annotator.annotate()

        tsvReader = GenericTsvReader(outputFilename)
        headers = tsvReader.getFieldNames()
        self.assertTrue("refseq_test_mRNA_Id" in headers, "refseq_test_mRNA_Id not found in headers: " + str(headers))
        self.assertTrue("refseq_test_prot_Id" in headers, "refseq_test_prot_Id not found in headers: " + str(headers))
開發者ID:jcambry,項目名稱:oncotator,代碼行數:21,代碼來源:GenericTranscriptDatasourceTest.py

示例13: testTCGAMAFAsInputAndQuickAnnotate

    def testTCGAMAFAsInputAndQuickAnnotate(self):
        """ Test that we can take in a TCGA MAF (using MAFLITE), do annotating, and still render it properly """
        inputFilename = "testdata/maf/Patient0.maf.annotated"
        tmp = MafliteInputMutationCreator(inputFilename, 'configs/maflite_input.config')
        outputFilename = "out/testTCGAMAFAsInputAndQuickAnnotate.tsv"
        outputRenderer = TcgaMafOutputRenderer(outputFilename, 'configs/tcgaMAF2.4_output.config')
        annotator = Annotator()
        
        annotator.setInputCreator(tmp)
        annotator.setOutputRenderer(outputRenderer)
        ds = DatasourceFactory.createDatasource("testdata/thaga_janakari_gene_ds/hg19/tj_data.config", "testdata/thaga_janakari_gene_ds/hg19/")
        annotator.addDatasource(ds)
        annotator.annotate()
        
        statinfo = os.stat(outputFilename)
        self.assertTrue(statinfo.st_size > 0, "Generated MAF file (" + outputFilename + ") is empty.")
        tsvReaderIn = GenericTsvReader(inputFilename)
        tsvReader = GenericTsvReader(outputFilename)
        
        self.assertTrue(tsvReader.getComments().find('#version') != -1, "First line did not specify a version number")
        self.assertTrue("i_TJ_Data_Why" in tsvReader.getFieldNames(), "New field missing (i_TJ_Data_Why) from header")
        self.assertTrue("i_TJ_Data_Who" in tsvReader.getFieldNames(), "New field missing (i_TJ_Data_Who) from header")
        
        ctrOut = 0
        for lineDict in tsvReader:
            ctrOut += 1
        ctrIn = 0
        for lineDict in tsvReaderIn:
            ctrIn += 1
        ctrIn += len(tsvReaderIn.getCommentsAsList())
        ctrOut += len(tsvReader.getCommentsAsList())

        self.assertTrue(ctrOut == (ctrIn + 2), "Output file should have same number of lines plus two (for maf version and Oncotator version comments) as input file.  (In,Out): " + str(ctrIn) + ", " + str(ctrOut))
開發者ID:alexramos,項目名稱:oncotator,代碼行數:33,代碼來源:MafliteInputMutationCreatorTest.py

示例14: testAnnotationWithExampleVcf

    def testAnnotationWithExampleVcf(self):
        """
        Tests whether parsed annotations match the actual annotations in a simple TSV.  Missing format fields yield -->""  ".,." --> ","
        """
        inputFilename = os.path.join(*["testdata", "vcf", "example.vcf"])
        outputFilename = os.path.join("out", "example.out.tsv")
        expectedOutputFilename = os.path.join(*["testdata", "vcf", "example.expected.out.tsv"])

        creator = VcfInputMutationCreator(inputFilename)
        creator.createMutations()
        renderer = SimpleOutputRenderer(outputFilename)
        annotator = Annotator()
        annotator.setInputCreator(creator)
        annotator.setOutputRenderer(renderer)
        annotator.annotate()

        tsvReader = GenericTsvReader(outputFilename)

        current = pandas.read_csv(outputFilename, sep='\t', header=len(tsvReader.getCommentsAsList()))
        expected = pandas.read_csv(expectedOutputFilename, sep='\t')

        currentColNames = set()
        for i in range(len(current.columns)):
            currentColNames.add(current.columns[i])

        expectedColNames = set()
        for i in range(len(expected.columns)):
            expectedColNames.add(expected.columns[i])

        self.assertTrue(len(currentColNames.symmetric_difference(expectedColNames)) is 0,
                        "Should have the same columns")
        self.assertTrue(len(current.index) == len(expected.index), "Should have the same number of rows")

        for colName in currentColNames:
            self.assertTrue(sum((current[colName] == expected[colName]) | (pandas.isnull(current[colName]) &
                                                                           pandas.isnull(expected[colName]))) ==
                            len(current.index), "Should have the same values in column " + colName + ": \n" +
                            str(current[colName]) + "\nvs\n" + str(expected[colName]))
開發者ID:broadinstitute,項目名稱:oncotator,代碼行數:38,代碼來源:VcfInputMutationCreatorTest.py

示例15: testMissingFilter

    def testMissingFilter(self):
        """
        Tests that the missing FILTER fields are parsed correctly.
        """
        inputFilename = os.path.join(*["testdata", "vcf", "example.missing_filters.vcf"])
        outputFilename = os.path.join("out", "example.missing_filters.out.tsv")
        expectedOutputFilename = os.path.join(*["testdata", "vcf", "example.expected.missing_filters.out.tsv"])

        creator = VcfInputMutationCreator(inputFilename)
        creator.createMutations()
        renderer = SimpleOutputRenderer(outputFilename)
        annotator = Annotator()
        annotator.setInputCreator(creator)
        annotator.setOutputRenderer(renderer)
        annotator.annotate()

        tsvReader = GenericTsvReader(outputFilename)

        current = pandas.read_csv(outputFilename, sep='\t', header=len(tsvReader.getCommentsAsList()))
        expected = pandas.read_csv(expectedOutputFilename, sep='\t')

        currentColNames = set()
        for i in range(len(current.columns)):
            currentColNames.add(current.columns[i])

        expectedColNames = set()
        for i in range(len(expected.columns)):
            expectedColNames.add(expected.columns[i])

        self.assertTrue(len(currentColNames.symmetric_difference(expectedColNames)) is 0,
                        "Should have the same columns")
        self.assertTrue(len(current.index) == len(expected.index), "Should have the same number of rows")

        for colName in currentColNames:
            self.assertTrue(sum((current[colName] == expected[colName]) | (pandas.isnull(current[colName]) &
                                                                           pandas.isnull(expected[colName]))) ==
                            len(current.index), "Should have the same values in column " + colName)
開發者ID:broadinstitute,項目名稱:oncotator,代碼行數:37,代碼來源:VcfInputMutationCreatorTest.py


注:本文中的oncotator.utils.GenericTsvReader.GenericTsvReader類示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。