Python MutUtils.MutUtils类代码示例

本文整理汇总了Python中oncotator.utils.MutUtils.MutUtils类的典型用法代码示例。如果您正苦于以下问题：Python MutUtils类的具体用法？Python MutUtils怎么用？Python MutUtils使用的例子？那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。

在下文中一共展示了MutUtils类的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_build_ensembl_transcript_index

    def test_build_ensembl_transcript_index(self):
        """Build the gtf portion of the ensembl transcript db
        """
        # cat ~/oncotator_pycharm/oncotator/test/testdata/Saccharomyces_cerevisiae.EF4.71_trim.gtf | cut -f 9 | cut -f 5 --delimiter=" " | sort | uniq | sed -r "s/;//g" | sed -r "s/\"//g"
        #  snR84, tK(UUU)K, YAL067C, YAL067W-A, YAL068C, YAL068W-A, YAL069W, YBR278W, YBR279W, YBR280C, YBR281C, YDR528W, YDR529C, YKR074W,
        #
        # grep -Pzo  ">(snR84|tK\(UUU\)K|YAL067C|YAL067W-A|YAL068C|YAL068W-A|YAL069W|YBR278W|YBR279W|YBR280C|YBR281C|YDR528W|YDR529C|YKR074W)([A-Za-z_0-9 \:\-\n]+)" Saccharomyces_cerevisiae.EF4.71.cdna.all.fa >Saccharomyces_cerevisiae.EF4.71_trim.cdna.all.fa
        #
        ensembl_input_gtf = "testdata/Saccharomyces_cerevisiae.EF4.71_trim.gtf"
        ensembl_input_fasta = "testdata/Saccharomyces_cerevisiae.EF4.71_trim.cdna.all.fa"

        output_filename = "out/test_ensembl_gtf.db"
        protocol = "file"
        genome_build_factory = GenomeBuildFactory()
        genome_build_factory.build_ensembl_transcript_index([ensembl_input_gtf], [ensembl_input_fasta], output_filename, protocol=protocol)
        self.assertTrue(os.path.exists(output_filename))

        shove = Shove(protocol + "://" + output_filename, "memory://")
        self.assertTrue(len(shove.keys()) > 0)
        self.assertTrue("YDR529C" in shove.keys())
        t = shove["YDR529C"]
        self.assertTrue(t.get_seq() is not None)
        self.assertTrue(t.get_seq() is not "")
        self.assertTrue(len(t.get_cds()) > 0)
        self.assertTrue(len(t.get_exons()) > 0)
        MutUtils.removeDir(output_filename)

开发者ID:alexramos，项目名称:oncotator，代码行数:26，代码来源:GenomeBuildFactoryTest.py

示例2: testChromosomeConversionHG19

    def testChromosomeConversionHG19(self):
        """Test that an hg19 build with chrom = 23 or 24 gets converted to X or Y
        """
        self.assertEqual(MutUtils.convertChromosomeStringToMutationDataFormat("23", build="hg19"), "X", "chrom of 23 did not produce X: " + MutUtils.convertChromosomeStringToMutationDataFormat("23", build="hg19"))
        self.assertEqual(MutUtils.convertChromosomeStringToMutationDataFormat("24", build="hg19"), "Y", "chrom of 24 did not produce Y: " + MutUtils.convertChromosomeStringToMutationDataFormat("24", build="hg19"))

        self.assertEqual(MutUtils.convertChromosomeStringToMutationDataFormat("2", build="hg19"), "2", "chrom of 2 yielded different value: " + MutUtils.convertChromosomeStringToMutationDataFormat("2", build="hg19"))
        self.assertEqual(MutUtils.convertChromosomeStringToMutationDataFormat("4", build="hg19"), "4", "chrom of 4 yielded different value: " + MutUtils.convertChromosomeStringToMutationDataFormat("4", build="hg19"))

开发者ID:Tmacme，项目名称:oncotator，代码行数:8，代码来源:MutUtilsTest.py

示例3: testSimpleRead

 def testSimpleRead(self):
     """ Read a good maflite file and make sure that each mutation validates """
     tmp = MafliteInputMutationCreator("testdata/maflite/Patient0.indel.maf.txt", 'configs/maflite_input.config')
     muts = tmp.createMutations()
     
     # If no exception is thrown, then this test passes.
     for m in muts:
         MutUtils.validateMutation(m)

开发者ID:alexramos，项目名称:oncotator，代码行数:8，代码来源:MafliteInputMutationCreatorTest.py

示例4: testExampleVcfDBAnnotationWithSNPExactMatch

    def testExampleVcfDBAnnotationWithSNPExactMatch(self):
        """

        """
        tabixIndexedVcfDirName = os.path.join(*["testdata", "vcf_db_exact", "hg19"])
        tabixIndexedVcfDatasource = DatasourceFactory.createDatasource(
            os.path.join(tabixIndexedVcfDirName, "vcf_db_exact.config"), tabixIndexedVcfDirName)

        chrom = "20"
        start = "1110696"
        end = "1110696"
        ref_allele = "A"
        alt_allele = "T"
        build = "hg19"
        m1 = MutUtils.initializeMutFromAttributes(chrom, start, end, ref_allele, alt_allele, build)

        m1_annotated = tabixIndexedVcfDatasource.annotate_mutation(m1)

        m1_annotation = m1_annotated.getAnnotation("ESP_AF")
        cur_annotation = Annotation(value="0.667", datasourceName="ESP", dataType="Float",
                                    description="Allele Frequency", tags=[TagConstants.INFO, TagConstants.SPLIT],
                                    number=-1)
        self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")

        m1_annotation = m1_annotated.getAnnotation("ESP_AC")
        cur_annotation = Annotation(value="2,4", datasourceName="ESP", dataType="Integer",
                                    description="Allele Count", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT],
                                    number=None)
        self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")

        m1_annotation = m1_annotated.getAnnotation("ESP_H2")
        cur_annotation = Annotation(value="False", datasourceName="ESP", dataType="Flag",
                                    description="HapMap2 membership", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT],
                                    number=0)
        self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")

        chrom = "20"
        start = "1230237"
        end = "1230237"
        ref_allele = "T"
        alt_allele = "A"
        build = "hg19"
        m1 = MutUtils.initializeMutFromAttributes(chrom, start, end, ref_allele, alt_allele, build)

        m1_annotated = tabixIndexedVcfDatasource.annotate_mutation(m1)

        m1_annotation = m1_annotated.getAnnotation("ESP_NS")
        cur_annotation = Annotation(value="3", datasourceName="ESP", dataType="Integer",
                                    description="Number of Samples With Data",
                                    tags=[TagConstants.INFO, TagConstants.NOT_SPLIT],
                                    number=1)
        self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")

        m1_annotation = m1_annotated.getAnnotation("ESP_AF")
        cur_annotation = Annotation(value="", datasourceName="ESP", dataType="Float",
                                    description="Allele Frequency", tags=[TagConstants.INFO, TagConstants.SPLIT],
                                    number=-1)
        self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")

开发者ID:alexramos，项目名称:oncotator，代码行数:58，代码来源:TabixIndexedVcfDatasourceTest.py

示例5: testNoUnknownAnnotations

 def testNoUnknownAnnotations(self):
     """ Make sure that the gaf 3.0 datasource does not annotate anything with source set to Unknown """
     inputCreator = MafliteInputMutationCreator('testdata/maflite/Patient0.snp.maf.txt')
     gafDatasource = TestUtils.createTranscriptProviderDatasource(self.config)
     mutations = inputCreator.createMutations()    
     for m in mutations:
         m = gafDatasource.annotate_mutation(m)
         MutUtils.validateMutation(m)
         unknownAnnotations = MutUtils.getUnknownAnnotations(m)
         self.assertTrue(len(unknownAnnotations) == 0, "Unknown annotations exist in mutation: " + str(unknownAnnotations))

开发者ID:Tmacme，项目名称:oncotator，代码行数:10，代码来源:GafDatasourceTest.py

示例6: testCreateGPTsvDatasource

    def testCreateGPTsvDatasource(self):
        """


        """
        datasourceFilename = "testdata/small_genome_position_tsv_ds/oreganno_trim.hg19.txt"
        datasourceType = "gp_tsv"
        datasourceName = "ORegAnno"
        datasourceFoldername = "ORegAnno"
        datasourceVersion = "UCSC Track"
        genomeBuild = "hg19"
        genomicPositionColumnNames = "hg19.oreganno.chrom,hg19.oreganno.chromStart,hg19.oreganno.chromEnd"

        tmpDir = tempfile.mkdtemp()
        destDir = os.path.join(*[tmpDir, datasourceFoldername, genomeBuild])
        os.makedirs(destDir)

        DatasourceInstallUtils.create_datasource(destDir, datasourceFilename, datasourceFoldername, datasourceName,
                                                 datasourceType, datasourceVersion, genomicPositionColumnNames)

        datasourceFilename = "oreganno_trim.hg19.txt"
        configFilename = os.path.join(*[destDir, "ORegAnno.config"])
        configParser = ConfigUtils.createConfigParser(configFilename)
        self.assertTrue(configParser.has_section("general"), "general section is missing.")
        self.assertTrue(configParser.has_option("general", "type"), "type option is missing in general section.")
        self.assertTrue(configParser.has_option("general", "src_file"),
                        "src_file option is missing in general section.")
        self.assertTrue(configParser.has_option("general", "title"), "title option is missing in general section.")
        self.assertTrue(configParser.has_option("general", "version"), "version option is missing in general section.")
        self.assertTrue(configParser.has_option("general", "genomic_position_cols"),
                        "genomic_position_cols option is missing in general section.")

        self.assertEqual(configParser.get("general", "type"), datasourceType,
                         "Expected data source type is %s but was %s."
                         % (datasourceType, configParser.get("general", "type")))
        self.assertEqual(configParser.get("general", "src_file"), datasourceFilename,
                         "Expected data source src_file is %s but was %s."
                         % (datasourceFilename, configParser.get("general", "src_file")))
        self.assertEqual(configParser.get("general", "title"), datasourceName,
                         "Expected data source title is %s but was %s."
                         % (datasourceName, configParser.get("general", "title")))
        self.assertEqual(configParser.get("general", "version"), datasourceVersion,
                         "Expected data source version is %s but was %s."
                         % (datasourceVersion, configParser.get("general", "version")))
        self.assertEqual(configParser.get("general", "genomic_position_cols"), genomicPositionColumnNames,
                         "Expected data source genomic_position_cols is %s but was %s."
                         % (genomicPositionColumnNames, configParser.get("general", "genomic_position_cols")))

        self.assertTrue(os.path.exists(os.path.join(*[tmpDir, datasourceFoldername, genomeBuild + ".md5"])),
                        "No md5 file was generated.")

        MutUtils.removeDir(tmpDir)

开发者ID:Tmacme，项目名称:oncotator，代码行数:52，代码来源:DatasourceInstallUtilsTest.py

示例7: testChrGLs

 def testChrGLs(self):
     """ Test that mutations on unaligned transcripts can be annotated properly.  I.e. when chromosome = GL....."""
     inputCreator = MafliteInputMutationCreator('testdata/maflite/chrGLs.maf.tsv', "configs/maflite_input.config")
     gafDatasource = TestUtils.createTranscriptProviderDatasource(self.config)
     mutations = inputCreator.createMutations() 
     for m in mutations:
         try:
             m = gafDatasource.annotate_mutation(m)
             MutUtils.validateMutation(m)
         except Exception as e:
             # Fail this test because an exception was thrown
             self.assertTrue(False, "Erroneous exception was thrown: " + str(e) + "\n" + traceback.format_exc())
         self.assertTrue(m['gene'] != '')

开发者ID:Tmacme，项目名称:oncotator，代码行数:13，代码来源:GafDatasourceTest.py

示例8: _handleMissingAnnotations

 def _handleMissingAnnotations(self, m):
     missingHeaderAnnotations = MutUtils.retrieveMissingAnnotations(m,
                                                                    TcgaVcfOutputRenderer.requiredHeaderAnnotations)
     missingMutAnnotations = MutUtils.retrieveMissingAnnotations(m, TcgaVcfOutputRenderer.requiredMutAnnotations)
     if len(missingHeaderAnnotations) > 0:
         sError = "The following annotations are required for rendering a TCGA VCF 1.1, but were not found: " + str(
             missingHeaderAnnotations)
         self.logger.error(sError)
         raise MissingAnnotationException(sError)
     if len(missingMutAnnotations) > 0:
         sError = "The following annotations important for rendering a TCGA VCF 1.1.  Proceeding... : " + str(
             missingMutAnnotations)
         self.logger.warn(sError)

开发者ID:alexramos，项目名称:oncotator，代码行数:13，代码来源:TcgaVcfOutputRenderer.py

示例9: _determine_matching_alt_indices

    def _determine_matching_alt_indices(self, mut, record, build):
        """

        :param mut:
        :param record:
        :return:
        """
        indices = []
        if record.is_monomorphic:
            chrom = MutUtils.convertChromosomeStringToMutationDataFormat(record.CHROM)
            startPos = record.POS
            endPos = record.POS
            ref_allele = record.REF

            if self.match_mode == "exact":
                if mut.chr == chrom and mut.ref_allele == ref_allele:
                    indices = [-1]
            else:
                if mut.chr == chrom and int(mut.start) <= startPos and int(mut.end) >= endPos:
                    indices = [-1]
        else:
            # Iterate over all alternates in the record
            for index in xrange(0, len(record.ALT)):
                chrom = MutUtils.convertChromosomeStringToMutationDataFormat(record.CHROM)
                startPos = record.POS
                endPos = record.POS
                ref = str(record.REF)
                alt = str(record.ALT[index])
                ds_mut = MutUtils.initializeMutFromAttributes(chrom, startPos, endPos, ref, alt, build)

                if self.match_mode == "exact":
                    if mut.chr == ds_mut.chr and mut.ref_allele == ds_mut.ref_allele \
                        and mut.alt_allele == ds_mut.alt_allele and int(mut.start) == int(ds_mut.start) \
                        and int(mut.end) == int(ds_mut.end):
                        indices += [index]
                else:  # cases whether the match mode isn't exact
                    if mut.chr == ds_mut.chr and int(mut.start) == int(ds_mut.start) and int(mut.end) == int(ds_mut.end):
                        indices += [index]
                    elif mut.chr == ds_mut.chr and int(mut.start) >= int(ds_mut.start) \
                        and int(mut.end) >= int(ds_mut.end) and int(mut.start) <= int(ds_mut.end):
                        indices += [index]
                    elif mut.chr == ds_mut.chr and int(mut.start) <= int(ds_mut.start) and int(mut.end) >= int(ds_mut.end):
                        indices += [index]
                    elif mut.chr == ds_mut.chr and int(mut.start) <= int(ds_mut.start) \
                        and int(mut.end) <= int(ds_mut.end) and int(mut.end) >= int(ds_mut.start):
                        indices += [index]

        # if len(indices) == 0:
        #     indices = [None]

        return indices

开发者ID:Tmacme，项目名称:oncotator，代码行数:51，代码来源:TabixIndexedVcfDatasource.py

示例10: testCreateIndexedVcfDatasource

    def testCreateIndexedVcfDatasource(self):
        datasourceFilename = "testdata/vcf/example.vcf"
        datasourceFoldername = "1000Genomes"
        datasourceName = "1000Genomes"
        datasourceType = "indexed_vcf"
        datasourceVersion = "V4.1"
        genomeBuild = "hg19"
        tmpDir = tempfile.mkdtemp()
        destDir = os.path.join(*[tmpDir, datasourceFoldername, genomeBuild])
        os.makedirs(destDir)

        DatasourceInstallUtils.create_datasource(destDir, datasourceFilename, datasourceFoldername, datasourceName,
                                                 datasourceType, datasourceVersion)

        datasourceFilename = "example.tabix_indexed.vcf.gz"
        configFilename = os.path.join(*[destDir, "1000Genomes.config"])
        configParser = ConfigUtils.createConfigParser(configFilename)
        self.assertTrue(configParser.has_section("general"), "general section is missing.")
        self.assertTrue(configParser.has_option("general", "type"), "type option is missing in general section.")
        self.assertTrue(configParser.has_option("general", "src_file"),
                        "src_file option is missing in general section.")
        self.assertTrue(configParser.has_option("general", "title"), "title option is missing in general section.")
        self.assertTrue(configParser.has_option("general", "version"), "version option is missing in general section.")

        self.assertEqual(configParser.get("general", "type"), datasourceType,
                         "Expected data source type is %s but was %s."
                         % (datasourceType, configParser.get("general", "type")))
        self.assertEqual(configParser.get("general", "src_file"), datasourceFilename,
                         "Expected data source src_file is %s but was %s."
                         % (datasourceFilename, configParser.get("general", "src_file")))
        self.assertEqual(configParser.get("general", "title"), datasourceName,
                         "Expected data source title is %s but was %s."
                         % (datasourceName, configParser.get("general", "title")))
        self.assertEqual(configParser.get("general", "version"), datasourceVersion,
                         "Expected data source version is %s but was %s."
                         % (datasourceVersion, configParser.get("general", "version")))

        self.assertTrue(os.path.exists(os.path.join(*[tmpDir, datasourceFoldername, genomeBuild + ".md5"])),
                        "No md5 file was generated.")

        # Data source was created correctly
        tabixIndexedFilename = os.path.join(*[destDir, "example.tabix_indexed.vcf.gz"])
        self.assertTrue(os.path.exists(tabixIndexedFilename), "No index file was generated.")

        vcfReader = vcf.Reader(filename=tabixIndexedFilename, compressed=True, strict_whitespace=True)
        vcfRecords = vcfReader.fetch(chrom=20, start=1230237, end=1230237)
        for vcfRecord in vcfRecords:
            self.assertEqual(vcfRecord.INFO["NS"], 3, "Expected %s but got %s." % (3, vcfRecord.INFO["NS"]))
            self.assertEqual(vcfRecord.INFO["DP"], 13, "Expected %s but got %s." % (13, vcfRecord.INFO["DP"]))

        MutUtils.removeDir(tmpDir)

开发者ID:Tmacme，项目名称:oncotator，代码行数:51，代码来源:DatasourceInstallUtilsTest.py

示例11: testNoLostMutations

    def testNoLostMutations(self):
        """ Does a simple gaf datasource annotation run and makes sure that no mutations were lost """
        inputFilename = 'testdata/maflite/Patient0.snp.maf.txt'
        inputCreator = MafliteInputMutationCreator(inputFilename, "configs/maflite_input.config")
        gafDatasource = TestUtils.createTranscriptProviderDatasource(self.config)

        numMutsInput = len(file(inputFilename, 'r').readlines()) - 1
        mutations = inputCreator.createMutations()  
        ctr = 0  
        for m in mutations:
            m = gafDatasource.annotate_mutation(m)
            MutUtils.validateMutation(m)
            ctr += 1
        self.assertEqual(ctr, numMutsInput, "Gaf data source altered mutation count.")

开发者ID:Tmacme，项目名称:oncotator，代码行数:14，代码来源:GafDatasourceTest.py

示例12: testChrom2HashCodeTable

    def testChrom2HashCodeTable(self):
        chroms = ["1", "X", "3", "contig1", "Y", "25", "mt"]
        h = MutUtils.createChrom2HashCodeTable(chroms)
        self.assertTrue(h["1"] == 1, "For chrom 1, hash code should be 1 but it was %s." % h["1"])
        self.assertTrue(h["3"] == 3, "For chrom 3, hash code should be 3 but it was %s." % h["3"])
        self.assertTrue(h["25"] == 25, "For chrom 25, hash code should be 25 but it was %s." % h["25"])
        self.assertTrue(h["X"] == 26, "For chrom X, hash code should be 26 but it was %s." % h["X"])
        self.assertTrue(h["Y"] == 27, "For chrom Y, hash code should be 27 but it was %s." % h["Y"])
        self.assertTrue(h["mt"] == 28, "For chrom mt, hash code should be 28 but it was %s." % h["mt"])
        self.assertTrue(h["contig1"] == 29, "For chrom contig1, hash code should be 29 but it was %s." % h["contig1"])

        chroms = ["contig1", "mt"]
        h = MutUtils.createChrom2HashCodeTable(chroms)
        self.assertTrue(h["mt"] == 3, "For chrom mt, hash code should be 3 but it was %s." % h["mt"])
        self.assertTrue(h["contig1"] == 4, "For chrom contig1, hash code should be 4 but it was %s." % h["contig1"])

开发者ID:Tmacme，项目名称:oncotator，代码行数:15，代码来源:MutUtilsTest.py

示例13: _is_matching

    def _is_matching(self, mut, tsv_record):

        chrom = tsv_record[self.tsv_index["chrom"]]
        startPos = tsv_record[self.tsv_index["start"]]
        endPos = tsv_record[self.tsv_index["end"]]
        build = "hg19"

        if self.match_mode == "exact":
            if "ref" in self.tsv_index and "alt" in self.tsv_index:  # ref and alt information is present
                ref = tsv_record[self.tsv_index["ref"]]
                alt = tsv_record[self.tsv_index["alt"]]
                if ref == "-" or alt == "-":  # addresses Mutation Annotation Format based tsv records

                    # TODO: This looks risky to be calling the MutationData constructor directly
                    ds_mut = MutationData(chrom, startPos, endPos, ref, alt, build)
                else:  # addresses tsv records where the input isn't a Mutation Annotation Format file
                    ds_mut = MutUtils.initializeMutFromAttributes(chrom, startPos, endPos, ref, alt, build)

                if mut.chr == ds_mut.chr and mut.ref_allele == ds_mut.ref_allele \
                    and mut.alt_allele == ds_mut.alt_allele and int(mut.start) == int(ds_mut.start) \
                    and int(mut.end) == int(ds_mut.end):
                    return True
            else:  # do not use ref and alt information
                if mut.chr == chrom and int(mut.start) == int(startPos) and int(mut.end) == int(endPos):
                    return True
        else:
           return TranscriptProviderUtils.test_overlap(int(mut.start), int(mut.end), int(startPos), int(endPos))
        return False

开发者ID:Tmacme，项目名称:oncotator，代码行数:28，代码来源:TabixIndexedTsvDatasource.py

示例14: retrieve_cached_annotations

 def retrieve_cached_annotations(self, m):
     """
     :param m: mutation
     :return: list of Annotations, or None, if cache miss.
     """
     cache_key = MutUtils.create_variant_key_by_mutation(m, self.get_db_dir_key())
     return self.get_cache().retrieve_from_cache(cache_key)

开发者ID:Tmacme，项目名称:oncotator，代码行数:7，代码来源:CacheManager.py

示例15: retrieveExons

    def retrieveExons(self, gene, padding=10, isCodingOnly=False):
        """Return a list of (chr, start, end) tuples for each exon"""
        result = set()
        geneTuple = self.gene_id_idx.get(gene, None)
        if geneTuple is None:
            return result
        ctr = 0
        contig = MutUtils.convertChromosomeStringToMutationDataFormat(geneTuple[0])
        for b in self.Transcripts.get(contig, []):
            for i in self.Transcripts[contig][b]:
                if i["gene"] == gene:
                    if isCodingOnly and gaf_annotation.is_non_coding_transcript(i, self):
                        ctr += 1
                        continue

                    if isCodingOnly:
                        genomic_coords = self.getCodingTranscriptCoords(i)
                    else:
                        genomic_coords = i["genomic_coords"]

                    for coord in genomic_coords:
                        start = min(coord[0], coord[1])
                        end = max(coord[0], coord[1])
                        result.add((gene, i["chr"], str(start - padding), str(end + padding)))
        return result

开发者ID:jcambry，项目名称:oncotator，代码行数:25，代码来源:Gaf.py

注：本文中的oncotator.utils.MutUtils.MutUtils类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。