当前位置: 首页>>代码示例>>Python>>正文

Python proteins.ProteinGrouper类代码示例

本文整理汇总了Python中dark.proteins.ProteinGrouper的典型用法代码示例。如果您正苦于以下问题:Python ProteinGrouper类的具体用法?Python ProteinGrouper怎么用?Python ProteinGrouper使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


示例1: testOneLineInOneFile

 def testOneLineInOneFile(self):
     If a protein grouper is given one file with one line, its virusTitles
     dict must be as expected.
     fp = StringIO(
         '0.77 46.6 48.1 5 6 74 gi|327|X|I44.6 ubiquitin [Lausannevirus]\n')
     pg = ProteinGrouper()
     pg.addFile('sample-filename', fp)
             'Lausannevirus': {
                 'sample-filename': [
                         'bestScore': 48.1,
                         'bluePlotFilename': 'out/0.png',
                         'coverage': 0.77,
                         'fastaFilename': 'out/0.fasta',
                         'hspCount': 6,
                         'index': 0,
                         'medianScore': 46.6,
                         'outDir': 'out',
                         'proteinLength': 74,
                         'proteinTitle': 'gi|327|X|I44.6 ubiquitin',
                         'proteinURL': (
                         'readCount': 5,

示例2: testNoFilesToStr

 def testNoFilesToStr(self):
     If no files have been given to a protein grouper, its text string
     format must as expected.
     pg = ProteinGrouper()
     self.assertEqual('0 viruses found in 0 samples\n', pg.toStr())

示例3: testOneLineInOneFileFASTQ

 def testOneLineInOneFileFASTQ(self):
     If a protein grouper is given one file with one line, its pathogenNames
     dict must be as expected, including for a FASTQ file.
     fp = StringIO(
         '0.77 46.6 48.1 5 6 74 gi|327|X|I44.6 ubiquitin [Lausannevirus]\n')
     pg = ProteinGrouper(format_='fastq')
     pg.addFile('sample-filename', fp)
             'Lausannevirus': {
                 'sample-filename': {
                     'proteins': {
                         'gi|327|X|I44.6 ubiquitin': {
                             'bestScore': 48.1,
                             'bluePlotFilename': 'out/0.png',
                             'coverage': 0.77,
                             'readsFilename': 'out/0.fastq',
                             'hspCount': 6,
                             'index': 0,
                             'medianScore': 46.6,
                             'outDir': 'out',
                             'proteinLength': 74,
                             'proteinName': 'gi|327|X|I44.6 ubiquitin',
                             'proteinURL': (
                             'readCount': 5,
                     'uniqueReadCount': None,

示例4: testOneLineInOneFileWithDifferentAssetDir

 def testOneLineInOneFileWithDifferentAssetDir(self):
     If a protein grouper is given a different assetDir name, 
     the outDir needs to have that same name, as expected.
     fp = StringIO(
         '0.77 46.6 48.1 5 6 74 gi|327|X|I44.6 ubiquitin [Lausannevirus]\n')
     pg = ProteinGrouper(assetDir='differentname')
     pg.addFile('sample-filename', fp)
             'Lausannevirus': {
                 'sample-filename': {
                     'proteins': {
                         'gi|327|X|I44.6 ubiquitin': {
                             'bestScore': 48.1,
                             'bluePlotFilename': 'differentname/0.png',
                             'coverage': 0.77,
                             'readsFilename': 'differentname/0.fasta',
                             'hspCount': 6,
                             'index': 0,
                             'medianScore': 46.6,
                             'outDir': 'differentname',
                             'proteinLength': 74,
                             'proteinName': 'gi|327|X|I44.6 ubiquitin',
                             'proteinURL': (
                             'readCount': 5,
                     'uniqueReadCount': None,

示例5: testOneLineInEachOfTwoFilesSamePathogen

 def testOneLineInEachOfTwoFilesSamePathogen(self):
     If a protein grouper is given two files, each with one line from the
     same pathogen, its pathogenNames dict must be as expected.
     fp1 = StringIO(
         '0.63 41.3 44.2 9 9 12 gi|327410| protein 77 [Lausannevirus]\n'
     fp2 = StringIO(
         '0.77 46.6 48.1 5 6 74 gi|327409| ubiquitin [Lausannevirus]\n'
     pg = ProteinGrouper()
     pg.addFile('sample-filename-1', fp1)
     pg.addFile('sample-filename-2', fp2)
             'Lausannevirus': {
                 'sample-filename-1': {
                     'proteins': {
                         'gi|327410| protein 77': {
                             'bestScore': 44.2,
                             'bluePlotFilename': 'out/0.png',
                             'coverage': 0.63,
                             'readsFilename': 'out/0.fasta',
                             'hspCount': 9,
                             'index': 0,
                             'medianScore': 41.3,
                             'outDir': 'out',
                             'proteinLength': 12,
                             'proteinName': 'gi|327410| protein 77',
                             'proteinURL': None,
                             'readCount': 9,
                     'uniqueReadCount': None,
                 'sample-filename-2': {
                     'proteins': {
                         'gi|327409| ubiquitin': {
                             'bestScore': 48.1,
                             'bluePlotFilename': 'out/0.png',
                             'coverage': 0.77,
                             'readsFilename': 'out/0.fasta',
                             'hspCount': 6,
                             'index': 0,
                             'medianScore': 46.6,
                             'outDir': 'out',
                             'proteinLength': 74,
                             'proteinName': 'gi|327409| ubiquitin',
                             'proteinURL': None,
                             'readCount': 5,
                     'uniqueReadCount': None,

示例6: testNoFilesToStr

 def testNoFilesToStr(self):
     If no files have been given to a protein grouper, its text string
     format must as expected.
     pg = ProteinGrouper()
         'Overall, proteins from 0 pathogens were found in 0 samples.\n',

示例7: testTwoLinesInOneFileDifferentPathogens

 def testTwoLinesInOneFileDifferentPathogens(self):
     If a protein grouper is given one file with two lines from different
     pathogens, its pathogenNames dict must be as expected.
     fp = StringIO(
         '0.63 41.3 44.2 9 9 12 gi|327410| protein 77 [Lausannevirus]\n'
         '0.77 46.6 48.1 5 6 74 gi|327409| ubiquitin [Hepatitis B virus]\n'
     pg = ProteinGrouper()
     pg.addFile('sample-filename', fp)
             'Lausannevirus': {
                 'sample-filename': {
                     'proteins': {
                         'gi|327410| protein 77': {
                             'bestScore': 44.2,
                             'bluePlotFilename': 'out/0.png',
                             'coverage': 0.63,
                             'readsFilename': 'out/0.fasta',
                             'hspCount': 9,
                             'index': 0,
                             'medianScore': 41.3,
                             'outDir': 'out',
                             'proteinLength': 12,
                             'proteinName': 'gi|327410| protein 77',
                             'proteinURL': None,
                             'readCount': 9,
                     'uniqueReadCount': None,
             'Hepatitis B virus': {
                 'sample-filename': {
                     'proteins': {
                         'gi|327409| ubiquitin': {
                             'bestScore': 48.1,
                             'bluePlotFilename': 'out/1.png',
                             'coverage': 0.77,
                             'readsFilename': 'out/1.fasta',
                             'hspCount': 6,
                             'index': 1,
                             'medianScore': 46.6,
                             'outDir': 'out',
                             'proteinLength': 74,
                             'proteinName': 'gi|327409| ubiquitin',
                             'proteinURL': None,
                             'readCount': 5,
                     'uniqueReadCount': None,

示例8: testOneLineInOneFileTitle

 def testOneLineInOneFileTitle(self):
     If a protein grouper is given one file with one line, its _title method
     must return the expected string.
     fp = StringIO(
         '0.77 46.6 48.1 5 6 74 gi|327|X|I44.6 ubiquitin [Lausannevirus]\n')
     pg = ProteinGrouper()
     pg.addFile('sample-filename', fp)
     self.assertEqual('1 virus found in 1 sample', pg._title())

示例9: testNoFilesToHTML

 def testNoFilesToHTML(self):
     If no files have been given to a protein grouper, its HTML string
     format must as expected.
     pg = ProteinGrouper()
             '0 viruses found in 0 samples',
             '            body {',
             '                margin-left: 2%;',
             '                margin-right: 2%;',
             '            }',
             '            .sample {',
             '                margin-bottom: 2px;',
             '            }',
             '            .sample-name {',
             '                color: red;',
             '            }',
             '            .index {',
             '                font-size: small;',
             '            }',
             '            .protein-title {',
             '                font-family: "Courier New", Courier, '
             '            }',
             '            .stats {',
             '                font-family: "Courier New", Courier, '
             '                white-space: pre;',
             '            }',
             '            .protein-list {',
             '                margin-top: 2px;',
             '            }',
             '<h1>0 viruses found in 0 samples</h1>',
             '<h2>Virus index</h2>',
             '<h2>Sample index</h2>',
             '<h1>Viruses by sample</h1>',
             '<h1>Samples by virus</h1>',

示例10: testOneLineInEachOfTwoFilesDifferentViruses

 def testOneLineInEachOfTwoFilesDifferentViruses(self):
     If a protein grouper is given two files in two different directories,
     each with one line from the different viruses, its virusTitles dict
     must be as expected.
     fp1 = StringIO(
         '0.63 41.3 44.2 9 9 12 gi|327410| protein 77 [Lausannevirus]\n'
     fp2 = StringIO(
         '0.77 46.6 48.1 5 6 74 gi|327409| ubiquitin [Hepatitis B virus]\n'
     pg = ProteinGrouper()
     pg.addFile('dir-1/sample-filename-1', fp1)
     pg.addFile('dir-2/sample-filename-2', fp2)
             'Lausannevirus': {
                 'dir-1/sample-filename-1': [
                         'bestScore': 44.2,
                         'bluePlotFilename': 'dir-1/out/0.png',
                         'coverage': 0.63,
                         'fastaFilename': 'dir-1/out/0.fasta',
                         'hspCount': 9,
                         'index': 0,
                         'medianScore': 41.3,
                         'outDir': 'dir-1/out',
                         'proteinLength': 12,
                         'proteinTitle': 'gi|327410| protein 77',
                         'proteinURL': None,
                         'readCount': 9,
             'Hepatitis B virus': {
                 'dir-2/sample-filename-2': [
                         'bestScore': 48.1,
                         'bluePlotFilename': 'dir-2/out/0.png',
                         'coverage': 0.77,
                         'fastaFilename': 'dir-2/out/0.fasta',
                         'hspCount': 6,
                         'index': 0,
                         'medianScore': 46.6,
                         'outDir': 'dir-2/out',
                         'proteinLength': 74,
                         'proteinTitle': 'gi|327409| ubiquitin',
                         'proteinURL': None,
                         'readCount': 5,

示例11: testOpenNotCalledOnRepeatedCall

    def testOpenNotCalledOnRepeatedCall(self):
        If a repeated call to pathogenSampleFiles.add is made with the same
        arguments, no file should be read because the original result value is
        class Open(object):
            def __init__(self, test, manager):
                self.test = test
                self.manager = manager
                self.count = 0

            def sideEffect(self, filename, *args, **kwargs):
                if self.count == 0:
                    self.test.assertEqual('out/0.fasta', filename)
                    self.count += 1
                    return File(['>id1\n', 'ACTG\n'])
                elif self.count == 1:
                    self.count += 1
                    return self.manager
                        'We are only supposed to be called twice. '
                        'Filename: %r, Args: %r, Keyword args: %r.' %
                        (filename, args, kwargs))

        fp = StringIO(
            '0.63 41.3 44.2 9 9 12 gi|327410| protein 77 [Lausannevirus]\n'
        fastaIO = StringIO()

        def manager():
            yield fastaIO

        pg = ProteinGrouper()
        pg.addFile('filename-1', fp)
        pathogenSampleFiles = PathogenSampleFiles(pg)

        sideEffect = Open(self, manager()).sideEffect
        with patch.object(builtins, 'open') as mockMethod:
            mockMethod.side_effect = sideEffect
            filename = pathogenSampleFiles.add('Lausannevirus', 'filename-1')
            self.assertEqual('out/pathogen-0-sample-0.fasta', filename)
            self.assertEqual('>id1\nACTG\n', fastaIO.getvalue())

            # Repeated call. The side effect open will fail if open is
            # called at this point.
            filename = pathogenSampleFiles.add('Lausannevirus', 'filename-1')
            self.assertEqual('out/pathogen-0-sample-0.fasta', filename)

示例12: testDuplicatePathogenProteinSample

 def testDuplicatePathogenProteinSample(self):
     If a protein grouper is given duplicate information for a
     pathogen/protein/sample combination it must raise a ValueError.
     fp = StringIO(
         '0.77 46.6 48.1 5 6 74 gi|327|X|I44.6 ubiquitin [Lausannevirus]\n')
     pg = ProteinGrouper()
     pg.addFile('sample', fp)
     error = ("^Protein 'gi\|327\|X\|I44.6 ubiquitin' already seen for "
              "pathogen 'Lausannevirus' sample 'sample'\.$")
     assertRaisesRegex(self, ValueError, error, pg.addFile, 'sample', fp)

示例13: testTwoLinesInOneFileTitle

 def testTwoLinesInOneFileTitle(self):
     If a protein grouper is given one file with two protein lines, each
     from a different virus, its _title method must return the expected
     fp = StringIO(
         '0.77 46.6 48.1 5 6 74 gi|327|X|I44.6 ubiquitin [Lausannevirus]\n'
         '0.77 46.6 48.1 5 6 74 gi|327|X|I44.6 ubiquitin [X Virus]\n'
     pg = ProteinGrouper()
     pg.addFile('sample-filename', fp)
     self.assertEqual('2 viruses found in 1 sample', pg._title())

示例14: testIdenticalReadsRemoved

    def testIdenticalReadsRemoved(self):
        If two proteins in the same pathogen are matched by the same read, the
        de-duplicated FASTA for the pathogen must have only one copy of the
        duplicated read.
        class Open(object):
            def __init__(self, test, manager):
                self.test = test
                self.manager = manager
                self.expectedFilenames = {'out/0.fasta', 'out/1.fasta',

            def sideEffect(self, filename, *args, **kwargs):
                except KeyError:
                        'Open called with unexpected filename: %r, Args: %r, '
                        'Keyword args: %r.' % (filename, args, kwargs))
                    if filename == 'out/0.fasta':
                        return File(['>id1\n', 'ACTG\n'])
                    elif filename == 'out/1.fasta':
                        return File(['>id1\n', 'ACTG\n', '>id2\n', 'CAGT\n'])
                        return self.manager

        fp = StringIO(
            '0.63 41.3 44.2 9 9 12 gi|327410| protein 77 [Lausannevirus]\n'
            '0.77 46.6 48.1 5 6 74 gi|327409| ubiquitin [Lausannevirus]\n'
        fastaIO = StringIO()

        def manager():
            yield fastaIO

        pg = ProteinGrouper()
        pg.addFile('filename-1', fp)
        pathogenSampleFiles = PathogenSampleFiles(pg)

        opener = Open(self, manager())
        with patch.object(builtins, 'open') as mockMethod:
            mockMethod.side_effect = opener.sideEffect
            filename = pathogenSampleFiles.add('Lausannevirus', 'filename-1')

        self.assertEqual('out/pathogen-0-sample-0.fasta', filename)
        self.assertEqual('>id1\nACTG\n>id2\nCAGT\n', fastaIO.getvalue())
        # Make sure all expected filenames were seen by the mocked open.
        self.assertEqual(set(), opener.expectedFilenames)

示例15: testReadLengthsAdded

    def testReadLengthsAdded(self):
        If saveReadLengths is True for a ProteinGrouper, read lengths must be
        saved for each protein.
        class Open(object):
            def __init__(self, test, manager):
                self.test = test
                self.manager = manager
                self.expectedFilenames = {'out/0.fasta', 'out/1.fasta',

            def sideEffect(self, filename, *args, **kwargs):
                if filename in self.expectedFilenames:
                    if filename == 'out/0.fasta':
                        return File(['>id1\n', 'ACTG\n'])
                    elif filename == 'out/1.fasta':
                        return File(['>id2\n', 'AC\n', '>id3\n', 'CAGTTTT\n'])
                        return self.manager
                        'Open called with unexpected filename: %r, Args: %r, '
                        'Keyword args: %r.' % (filename, args, kwargs))

        fp = StringIO(
            '0.63 41.3 44.2 9 9 12 gi|327410| protein 77 [Lausannevirus]\n'
            '0.77 46.6 48.1 5 6 74 gi|327409| ubiquitin [Lausannevirus]\n'
        fastaIO = StringIO()

        def manager():
            yield fastaIO

        opener = Open(self, manager())
        with patch.object(builtins, 'open') as mockMethod:
            mockMethod.side_effect = opener.sideEffect
            pg = ProteinGrouper(saveReadLengths=True)
            pg.addFile('filename-1', fp)
            pathogenSampleFiles = PathogenSampleFiles(pg)
            pathogenSampleFiles.add('Lausannevirus', 'filename-1')

        # Read lengths must be saved correctly.
        proteins = pg.pathogenNames['Lausannevirus']['filename-1']['proteins']
                         proteins['gi|327410| protein 77']['readLengths'])
        self.assertEqual((2, 7),
                         proteins['gi|327409| ubiquitin']['readLengths'])
