本文整理汇总了Python中dark.proteins.ProteinGrouper类的典型用法代码示例。如果您正苦于以下问题:Python ProteinGrouper类的具体用法?Python ProteinGrouper怎么用?Python ProteinGrouper使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了ProteinGrouper类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: testOneLineInOneFile
def testOneLineInOneFile(self):
"""
If a protein grouper is given one file with one line, its virusTitles
dict must be as expected.
"""
fp = StringIO(
'0.77 46.6 48.1 5 6 74 gi|327|X|I44.6 ubiquitin [Lausannevirus]\n')
pg = ProteinGrouper()
pg.addFile('sample-filename', fp)
self.assertEqual(
{
'Lausannevirus': {
'sample-filename': [
{
'bestScore': 48.1,
'bluePlotFilename': 'out/0.png',
'coverage': 0.77,
'fastaFilename': 'out/0.fasta',
'hspCount': 6,
'index': 0,
'medianScore': 46.6,
'outDir': 'out',
'proteinLength': 74,
'proteinTitle': 'gi|327|X|I44.6 ubiquitin',
'proteinURL': (
'http://www.ncbi.nlm.nih.gov/nuccore/I44'),
'readCount': 5,
},
]
}
},
pg.virusTitles)
示例2: testNoFilesToStr
def testNoFilesToStr(self):
"""
If no files have been given to a protein grouper, its text string
format must as expected.
"""
pg = ProteinGrouper()
self.assertEqual('0 viruses found in 0 samples\n', pg.toStr())
示例3: testOneLineInOneFileFASTQ
def testOneLineInOneFileFASTQ(self):
"""
If a protein grouper is given one file with one line, its pathogenNames
dict must be as expected, including for a FASTQ file.
"""
fp = StringIO(
'0.77 46.6 48.1 5 6 74 gi|327|X|I44.6 ubiquitin [Lausannevirus]\n')
pg = ProteinGrouper(format_='fastq')
pg.addFile('sample-filename', fp)
self.assertEqual(
{
'Lausannevirus': {
'sample-filename': {
'proteins': {
'gi|327|X|I44.6 ubiquitin': {
'bestScore': 48.1,
'bluePlotFilename': 'out/0.png',
'coverage': 0.77,
'readsFilename': 'out/0.fastq',
'hspCount': 6,
'index': 0,
'medianScore': 46.6,
'outDir': 'out',
'proteinLength': 74,
'proteinName': 'gi|327|X|I44.6 ubiquitin',
'proteinURL': (
'http://www.ncbi.nlm.nih.gov/nuccore/I44'),
'readCount': 5,
},
},
'uniqueReadCount': None,
},
}
},
pg.pathogenNames)
示例4: testOneLineInOneFileWithDifferentAssetDir
def testOneLineInOneFileWithDifferentAssetDir(self):
"""
If a protein grouper is given a different assetDir name,
the outDir needs to have that same name, as expected.
"""
fp = StringIO(
'0.77 46.6 48.1 5 6 74 gi|327|X|I44.6 ubiquitin [Lausannevirus]\n')
pg = ProteinGrouper(assetDir='differentname')
pg.addFile('sample-filename', fp)
self.assertEqual(
{
'Lausannevirus': {
'sample-filename': {
'proteins': {
'gi|327|X|I44.6 ubiquitin': {
'bestScore': 48.1,
'bluePlotFilename': 'differentname/0.png',
'coverage': 0.77,
'readsFilename': 'differentname/0.fasta',
'hspCount': 6,
'index': 0,
'medianScore': 46.6,
'outDir': 'differentname',
'proteinLength': 74,
'proteinName': 'gi|327|X|I44.6 ubiquitin',
'proteinURL': (
'http://www.ncbi.nlm.nih.gov/nuccore/I44'),
'readCount': 5,
},
},
'uniqueReadCount': None,
},
}
},
pg.pathogenNames)
示例5: testOneLineInEachOfTwoFilesSamePathogen
def testOneLineInEachOfTwoFilesSamePathogen(self):
"""
If a protein grouper is given two files, each with one line from the
same pathogen, its pathogenNames dict must be as expected.
"""
fp1 = StringIO(
'0.63 41.3 44.2 9 9 12 gi|327410| protein 77 [Lausannevirus]\n'
)
fp2 = StringIO(
'0.77 46.6 48.1 5 6 74 gi|327409| ubiquitin [Lausannevirus]\n'
)
pg = ProteinGrouper()
pg.addFile('sample-filename-1', fp1)
pg.addFile('sample-filename-2', fp2)
self.assertEqual(
{
'Lausannevirus': {
'sample-filename-1': {
'proteins': {
'gi|327410| protein 77': {
'bestScore': 44.2,
'bluePlotFilename': 'out/0.png',
'coverage': 0.63,
'readsFilename': 'out/0.fasta',
'hspCount': 9,
'index': 0,
'medianScore': 41.3,
'outDir': 'out',
'proteinLength': 12,
'proteinName': 'gi|327410| protein 77',
'proteinURL': None,
'readCount': 9,
},
},
'uniqueReadCount': None,
},
'sample-filename-2': {
'proteins': {
'gi|327409| ubiquitin': {
'bestScore': 48.1,
'bluePlotFilename': 'out/0.png',
'coverage': 0.77,
'readsFilename': 'out/0.fasta',
'hspCount': 6,
'index': 0,
'medianScore': 46.6,
'outDir': 'out',
'proteinLength': 74,
'proteinName': 'gi|327409| ubiquitin',
'proteinURL': None,
'readCount': 5,
},
},
'uniqueReadCount': None,
},
},
},
pg.pathogenNames)
示例6: testNoFilesToStr
def testNoFilesToStr(self):
"""
If no files have been given to a protein grouper, its text string
format must as expected.
"""
pg = ProteinGrouper()
self.assertEqual(
'Overall, proteins from 0 pathogens were found in 0 samples.\n',
pg.toStr())
示例7: testTwoLinesInOneFileDifferentPathogens
def testTwoLinesInOneFileDifferentPathogens(self):
"""
If a protein grouper is given one file with two lines from different
pathogens, its pathogenNames dict must be as expected.
"""
fp = StringIO(
'0.63 41.3 44.2 9 9 12 gi|327410| protein 77 [Lausannevirus]\n'
'0.77 46.6 48.1 5 6 74 gi|327409| ubiquitin [Hepatitis B virus]\n'
)
pg = ProteinGrouper()
pg.addFile('sample-filename', fp)
self.assertEqual(
{
'Lausannevirus': {
'sample-filename': {
'proteins': {
'gi|327410| protein 77': {
'bestScore': 44.2,
'bluePlotFilename': 'out/0.png',
'coverage': 0.63,
'readsFilename': 'out/0.fasta',
'hspCount': 9,
'index': 0,
'medianScore': 41.3,
'outDir': 'out',
'proteinLength': 12,
'proteinName': 'gi|327410| protein 77',
'proteinURL': None,
'readCount': 9,
},
},
'uniqueReadCount': None,
},
},
'Hepatitis B virus': {
'sample-filename': {
'proteins': {
'gi|327409| ubiquitin': {
'bestScore': 48.1,
'bluePlotFilename': 'out/1.png',
'coverage': 0.77,
'readsFilename': 'out/1.fasta',
'hspCount': 6,
'index': 1,
'medianScore': 46.6,
'outDir': 'out',
'proteinLength': 74,
'proteinName': 'gi|327409| ubiquitin',
'proteinURL': None,
'readCount': 5,
},
},
'uniqueReadCount': None,
},
},
},
pg.pathogenNames)
示例8: testOneLineInOneFileTitle
def testOneLineInOneFileTitle(self):
"""
If a protein grouper is given one file with one line, its _title method
must return the expected string.
"""
fp = StringIO(
'0.77 46.6 48.1 5 6 74 gi|327|X|I44.6 ubiquitin [Lausannevirus]\n')
pg = ProteinGrouper()
pg.addFile('sample-filename', fp)
self.assertEqual('1 virus found in 1 sample', pg._title())
示例9: testNoFilesToHTML
def testNoFilesToHTML(self):
"""
If no files have been given to a protein grouper, its HTML string
format must as expected.
"""
pg = ProteinGrouper()
self.assertEqual(
'\n'.join([
'<html>',
'<head>',
'<title>',
'0 viruses found in 0 samples',
'</title>',
'</head>',
'<body>',
'<style>',
' body {',
' margin-left: 2%;',
' margin-right: 2%;',
' }',
' .sample {',
' margin-bottom: 2px;',
' }',
' .sample-name {',
' color: red;',
' }',
' .index {',
' font-size: small;',
' }',
' .protein-title {',
' font-family: "Courier New", Courier, '
'monospace;',
' }',
' .stats {',
' font-family: "Courier New", Courier, '
'monospace;',
' white-space: pre;',
' }',
' .protein-list {',
' margin-top: 2px;',
' }',
'</style>',
'</head>',
'<body>',
'<h1>0 viruses found in 0 samples</h1>',
'<h2>Virus index</h2>',
'</p>',
'<h2>Sample index</h2>',
'</p>',
'<h1>Viruses by sample</h1>',
'<h1>Samples by virus</h1>',
'</body>',
'</html>',
]),
pg.toHTML())
示例10: testOneLineInEachOfTwoFilesDifferentViruses
def testOneLineInEachOfTwoFilesDifferentViruses(self):
"""
If a protein grouper is given two files in two different directories,
each with one line from the different viruses, its virusTitles dict
must be as expected.
"""
fp1 = StringIO(
'0.63 41.3 44.2 9 9 12 gi|327410| protein 77 [Lausannevirus]\n'
)
fp2 = StringIO(
'0.77 46.6 48.1 5 6 74 gi|327409| ubiquitin [Hepatitis B virus]\n'
)
pg = ProteinGrouper()
pg.addFile('dir-1/sample-filename-1', fp1)
pg.addFile('dir-2/sample-filename-2', fp2)
self.assertEqual(
{
'Lausannevirus': {
'dir-1/sample-filename-1': [
{
'bestScore': 44.2,
'bluePlotFilename': 'dir-1/out/0.png',
'coverage': 0.63,
'fastaFilename': 'dir-1/out/0.fasta',
'hspCount': 9,
'index': 0,
'medianScore': 41.3,
'outDir': 'dir-1/out',
'proteinLength': 12,
'proteinTitle': 'gi|327410| protein 77',
'proteinURL': None,
'readCount': 9,
},
],
},
'Hepatitis B virus': {
'dir-2/sample-filename-2': [
{
'bestScore': 48.1,
'bluePlotFilename': 'dir-2/out/0.png',
'coverage': 0.77,
'fastaFilename': 'dir-2/out/0.fasta',
'hspCount': 6,
'index': 0,
'medianScore': 46.6,
'outDir': 'dir-2/out',
'proteinLength': 74,
'proteinTitle': 'gi|327409| ubiquitin',
'proteinURL': None,
'readCount': 5,
},
],
},
},
pg.virusTitles)
示例11: testOpenNotCalledOnRepeatedCall
def testOpenNotCalledOnRepeatedCall(self):
"""
If a repeated call to pathogenSampleFiles.add is made with the same
arguments, no file should be read because the original result value is
cached.
"""
class Open(object):
def __init__(self, test, manager):
self.test = test
self.manager = manager
self.count = 0
def sideEffect(self, filename, *args, **kwargs):
if self.count == 0:
self.test.assertEqual('out/0.fasta', filename)
self.count += 1
return File(['>id1\n', 'ACTG\n'])
elif self.count == 1:
self.test.assertEqual('out/pathogen-0-sample-0.fasta',
filename)
self.count += 1
return self.manager
else:
self.test.fail(
'We are only supposed to be called twice. '
'Filename: %r, Args: %r, Keyword args: %r.' %
(filename, args, kwargs))
fp = StringIO(
'0.63 41.3 44.2 9 9 12 gi|327410| protein 77 [Lausannevirus]\n'
)
fastaIO = StringIO()
@contextmanager
def manager():
yield fastaIO
pg = ProteinGrouper()
pg.addFile('filename-1', fp)
pathogenSampleFiles = PathogenSampleFiles(pg)
sideEffect = Open(self, manager()).sideEffect
with patch.object(builtins, 'open') as mockMethod:
mockMethod.side_effect = sideEffect
filename = pathogenSampleFiles.add('Lausannevirus', 'filename-1')
self.assertEqual('out/pathogen-0-sample-0.fasta', filename)
self.assertEqual('>id1\nACTG\n', fastaIO.getvalue())
# Repeated call. The side effect open will fail if open is
# called at this point.
filename = pathogenSampleFiles.add('Lausannevirus', 'filename-1')
self.assertEqual('out/pathogen-0-sample-0.fasta', filename)
示例12: testDuplicatePathogenProteinSample
def testDuplicatePathogenProteinSample(self):
"""
If a protein grouper is given duplicate information for a
pathogen/protein/sample combination it must raise a ValueError.
"""
fp = StringIO(
'0.77 46.6 48.1 5 6 74 gi|327|X|I44.6 ubiquitin [Lausannevirus]\n')
pg = ProteinGrouper()
pg.addFile('sample', fp)
fp.seek(0)
error = ("^Protein 'gi\|327\|X\|I44.6 ubiquitin' already seen for "
"pathogen 'Lausannevirus' sample 'sample'\.$")
assertRaisesRegex(self, ValueError, error, pg.addFile, 'sample', fp)
示例13: testTwoLinesInOneFileTitle
def testTwoLinesInOneFileTitle(self):
"""
If a protein grouper is given one file with two protein lines, each
from a different virus, its _title method must return the expected
string.
"""
fp = StringIO(
'0.77 46.6 48.1 5 6 74 gi|327|X|I44.6 ubiquitin [Lausannevirus]\n'
'0.77 46.6 48.1 5 6 74 gi|327|X|I44.6 ubiquitin [X Virus]\n'
)
pg = ProteinGrouper()
pg.addFile('sample-filename', fp)
self.assertEqual('2 viruses found in 1 sample', pg._title())
示例14: testIdenticalReadsRemoved
def testIdenticalReadsRemoved(self):
"""
If two proteins in the same pathogen are matched by the same read, the
de-duplicated FASTA for the pathogen must have only one copy of the
duplicated read.
"""
class Open(object):
def __init__(self, test, manager):
self.test = test
self.manager = manager
self.expectedFilenames = {'out/0.fasta', 'out/1.fasta',
'out/pathogen-0-sample-0.fasta'}
def sideEffect(self, filename, *args, **kwargs):
try:
self.expectedFilenames.remove(filename)
except KeyError:
self.test.fail(
'Open called with unexpected filename: %r, Args: %r, '
'Keyword args: %r.' % (filename, args, kwargs))
else:
if filename == 'out/0.fasta':
return File(['>id1\n', 'ACTG\n'])
elif filename == 'out/1.fasta':
return File(['>id1\n', 'ACTG\n', '>id2\n', 'CAGT\n'])
else:
return self.manager
fp = StringIO(
'0.63 41.3 44.2 9 9 12 gi|327410| protein 77 [Lausannevirus]\n'
'0.77 46.6 48.1 5 6 74 gi|327409| ubiquitin [Lausannevirus]\n'
)
fastaIO = StringIO()
@contextmanager
def manager():
yield fastaIO
pg = ProteinGrouper()
pg.addFile('filename-1', fp)
pathogenSampleFiles = PathogenSampleFiles(pg)
opener = Open(self, manager())
with patch.object(builtins, 'open') as mockMethod:
mockMethod.side_effect = opener.sideEffect
filename = pathogenSampleFiles.add('Lausannevirus', 'filename-1')
self.assertEqual('out/pathogen-0-sample-0.fasta', filename)
self.assertEqual('>id1\nACTG\n>id2\nCAGT\n', fastaIO.getvalue())
# Make sure all expected filenames were seen by the mocked open.
self.assertEqual(set(), opener.expectedFilenames)
示例15: testReadLengthsAdded
def testReadLengthsAdded(self):
"""
If saveReadLengths is True for a ProteinGrouper, read lengths must be
saved for each protein.
"""
class Open(object):
def __init__(self, test, manager):
self.test = test
self.manager = manager
self.expectedFilenames = {'out/0.fasta', 'out/1.fasta',
'out/pathogen-0-sample-0.fasta'}
def sideEffect(self, filename, *args, **kwargs):
if filename in self.expectedFilenames:
if filename == 'out/0.fasta':
return File(['>id1\n', 'ACTG\n'])
elif filename == 'out/1.fasta':
return File(['>id2\n', 'AC\n', '>id3\n', 'CAGTTTT\n'])
else:
return self.manager
else:
self.test.fail(
'Open called with unexpected filename: %r, Args: %r, '
'Keyword args: %r.' % (filename, args, kwargs))
fp = StringIO(
'0.63 41.3 44.2 9 9 12 gi|327410| protein 77 [Lausannevirus]\n'
'0.77 46.6 48.1 5 6 74 gi|327409| ubiquitin [Lausannevirus]\n'
)
fastaIO = StringIO()
@contextmanager
def manager():
yield fastaIO
opener = Open(self, manager())
with patch.object(builtins, 'open') as mockMethod:
mockMethod.side_effect = opener.sideEffect
pg = ProteinGrouper(saveReadLengths=True)
pg.addFile('filename-1', fp)
pathogenSampleFiles = PathogenSampleFiles(pg)
pathogenSampleFiles.add('Lausannevirus', 'filename-1')
# Read lengths must be saved correctly.
proteins = pg.pathogenNames['Lausannevirus']['filename-1']['proteins']
self.assertEqual((4,),
proteins['gi|327410| protein 77']['readLengths'])
self.assertEqual((2, 7),
proteins['gi|327409| ubiquitin']['readLengths'])