当前位置: 首页>>代码示例>>Python>>正文


Python SeqIO.to_dict方法代码示例

本文整理汇总了Python中Bio.SeqIO.to_dict方法的典型用法代码示例。如果您正苦于以下问题:Python SeqIO.to_dict方法的具体用法?Python SeqIO.to_dict怎么用?Python SeqIO.to_dict使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在Bio.SeqIO的用法示例。


在下文中一共展示了SeqIO.to_dict方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_prepare_with_alignment_with_ref_name

# 需要导入模块: from Bio import SeqIO [as 别名]
# 或者: from Bio.SeqIO import to_dict [as 别名]
def test_prepare_with_alignment_with_ref_name(self, test_file, test_seqs, existing_with_ref, existing_aln, ref_seq, out_file):
        """Test that, given a set of test sequences, an existing alignment, and a reference sequence name, no changes are made."""
        aln_outfile, seqs_outfile, _ = align.prepare([test_file,], existing_with_ref, out_file, ref_seq.id, None)
        assert os.path.isfile(aln_outfile), "Didn't write existing alignment where it said"
        assert aln_outfile == existing_with_ref, "Rewrote the alignment file unexpectedly"
        # Alignment file should be unchanged
        aln_output = SeqIO.to_dict(SeqIO.parse(aln_outfile, "fasta"))
        assert aln_output[ref_seq.id].seq == ref_seq.seq, "Reference sequence dropped from alignment"
        for seq in existing_aln:
            assert seq in aln_output, "Some existing alignment sequences dropped unexpectedly"
            assert aln_output[seq].seq == existing_aln[seq].seq, "Some existing alignment sequences changed unexpectedly"
        # test sequences should be unchanged
        assert os.path.isfile(seqs_outfile), "Didn't write test sequences where it said"
        seq_output = SeqIO.to_dict(SeqIO.parse(seqs_outfile, "fasta"))
        for seq in test_seqs:
            assert seq in seq_output, "Some test sequences unexpectedly dropped"
            assert seq_output[seq].seq == test_seqs[seq].seq, "Some test sequences changed unexpectedly"
        assert seq_output.keys() == test_seqs.keys() 
开发者ID:nextstrain,项目名称:augur,代码行数:20,代码来源:test_align.py

示例2: test_prepare_with_alignment_with_ref_seq

# 需要导入模块: from Bio import SeqIO [as 别名]
# 或者: from Bio.SeqIO import to_dict [as 别名]
def test_prepare_with_alignment_with_ref_seq(self, test_file, test_seqs, existing_file, existing_aln, ref_seq, ref_file, out_file):
        """Test that, given a set of test sequences, an existing alignment, and a reference sequence, the reference
        is added to the existing alignment and no other changes are made."""
        aln_outfile, seqs_outfile, ref_name = align.prepare([test_file,], existing_file, out_file, None, ref_file)
        assert ref_name == ref_seq.id, "Didn't return strain name from refrence file"
        assert os.path.isfile(aln_outfile), "Didn't write existing alignment where it said"
        assert aln_outfile != existing_aln, "Unexpectedly overwrote existing alignment"
        # Alignment file should have the reference added
        aln_output = SeqIO.to_dict(SeqIO.parse(aln_outfile, "fasta"))
        assert aln_output[ref_seq.id].seq == ref_seq.seq, "Reference sequence not added to alignment"
        for seq in existing_aln:
            assert seq in aln_output, "Some existing alignment sequences dropped unexpectedly"
            assert aln_output[seq].seq == existing_aln[seq].seq, "Some existing alignment sequences changed unexpectedly"
        # test sequences should be unchanged
        assert os.path.isfile(seqs_outfile), "Didn't write test sequences where it said"
        seq_output = SeqIO.to_dict(SeqIO.parse(seqs_outfile, "fasta"))
        for seq in test_seqs:
            assert seq in seq_output, "Some test sequences unexpectedly dropped"
            assert seq_output[seq].seq == test_seqs[seq].seq, "Some test sequences changed unexpectedly"
        assert seq_output.keys() == test_seqs.keys() 
开发者ID:nextstrain,项目名称:augur,代码行数:22,代码来源:test_align.py

示例3: extract_paralogs

# 需要导入模块: from Bio import SeqIO [as 别名]
# 或者: from Bio.SeqIO import to_dict [as 别名]
def extract_paralogs(gene,prefix):
    
    putative_paralog_ids = list(set([x.split()[1].rstrip() for x in open(os.path.join(gene,prefix,"paralog_warning.txt"))]))
    try:
        chosen_paralog = open(os.path.join(gene,prefix,"exonerate_stats.csv")).readline().rstrip()
    except IOError:
        return 0
    
    exonerate_dict = SeqIO.to_dict(SeqIO.parse(os.path.join(gene,prefix,"exonerate_results.fasta"),'fasta'))
    
    if not os.path.isdir(os.path.join(gene,prefix,'paralogs')):
        os.mkdir(os.path.join(gene,prefix,"paralogs"))
    seqs_to_write = [exonerate_dict[x] for x in putative_paralog_ids]
    
    for seq in range(len(seqs_to_write)):
        if seqs_to_write[seq].id == chosen_paralog:
            seqs_to_write[seq].id = "{}.{}".format(prefix,"main")
            
        else:
            seqs_to_write[seq].id = "{}.{}".format(prefix,seq)
    
    SeqIO.write(seqs_to_write,os.path.join(gene,prefix,'paralogs','{}_paralogs.fasta'.format(gene)),'fasta')
    
    return len(seqs_to_write) 
开发者ID:mossmatters,项目名称:HybPiper,代码行数:26,代码来源:paralog_investigator.py

示例4: initial_exonerate

# 需要导入模块: from Bio import SeqIO [as 别名]
# 或者: from Bio.SeqIO import to_dict [as 别名]
def initial_exonerate(proteinfilename, assemblyfilename,prefix):
    """Conduct exonerate search, returns a dictionary of results.
    Using the ryo option in exonerate, the header should contain all the useful information."""
    logger = logging.getLogger("pipeline")
    
    outputfilename = "%s/exonerate_results.fasta" %prefix
    exonerate_ryo = '">%ti,%qi,%qab,%qae,%pi,(%tS),%tab,%tae\\n%tcs\\n"'
    exonerate_command = "exonerate -m protein2genome --showalignment no --showvulgar no -V 0 --ryo %s %s %s >%s" % (exonerate_ryo,proteinfilename,assemblyfilename,outputfilename)
    
    logger.debug(exonerate_command)
    #print exonerate_ryo
    #proc = subprocess.Popen(['exonerate','-m','protein2genome','--showalignment','no','-V','0','--showvulgar','no','--ryo',exonerate_ryo,proteinfilename,assemblyfilename])
    proc = subprocess.call(exonerate_command,shell=True)
    protHitsCount = 0
    #proc.wait()
    records = SeqIO.to_dict(SeqIO.parse(outputfilename,'fasta'))
    #proc.stdout.close()
    
    return records 
开发者ID:mossmatters,项目名称:HybPiper,代码行数:21,代码来源:exonerate_hits.py

示例5: make_intron_supercontig

# 需要导入模块: from Bio import SeqIO [as 别名]
# 或者: from Bio.SeqIO import to_dict [as 别名]
def make_intron_supercontig(contig_info,gene,prefix,add_N = False):
    cap3contigs = SeqIO.to_dict(SeqIO.parse("../{}_contigs.fasta".format(gene),'fasta'))
    intron_supercontig = SeqRecord(Seq(''))
    for i in contig_info:
        if i[5] == "(+)":
            intron_supercontig += cap3contigs[i[0]]
        elif i[5] == "(-)":
            intron_supercontig += cap3contigs[i[0]].reverse_complement()    
        else:
            sys.stderr.write("Strandedness not found!")
            sys.exit(1)
        if add_N and i != contig_info[-1]:
            intron_supercontig += "NNNNNNNNNN"    
    intron_supercontig.id = '{}-{}'.format(prefix,gene)
    intron_supercontig.description = ''
    SeqIO.write(intron_supercontig,'sequences/intron/{}_supercontig.fasta'.format(gene),'fasta') 
开发者ID:mossmatters,项目名称:HybPiper,代码行数:18,代码来源:intronerate.py

示例6: testResfinderBetaLactam2MutationsSuccess

# 需要导入模块: from Bio import SeqIO [as 别名]
# 或者: from Bio.SeqIO import to_dict [as 别名]
def testResfinderBetaLactam2MutationsSuccess(self):
        file = path.join(self.test_data_dir, "beta-lactam-blaIMP-42-mut-2.fsa")
        files = [file]
        self.amr_detection.run_amr_detection(files, 99, 90, 90, 90,0,0,0,0,0)

        resfinder_results = self.amr_detection.get_resfinder_results()
        self.assertEqual(len(resfinder_results.index), 1, 'Wrong number of rows in result')

        result = resfinder_results[resfinder_results['Gene'] == 'blaIMP-42']
        self.assertEqual(len(result.index), 1, 'Wrong number of results detected')
        self.assertAlmostEqual(result['%Identity'].iloc[0], 99.73, places=2, msg='Wrong pid')
        self.assertAlmostEqual(result['%Overlap'].iloc[0], 100.00, places=2, msg='Wrong overlap')
        self.assertEqual(result['HSP Length/Total Length'].iloc[0], '741/741', msg='Wrong lengths')
        self.assertEqual(result['Predicted Phenotype'].iloc[0],
                         'ampicillin, amoxicillin/clavulanic acid, cefoxitin, ceftriaxone, meropenem',
                         'Wrong phenotype')

        hit_file = path.join(self.outdir.name, 'resfinder_beta-lactam-blaIMP-42-mut-2.fsa')
        records = SeqIO.to_dict(SeqIO.parse(hit_file, 'fasta'))

        self.assertEqual(len(records), 1, 'Wrong number of hit records')

        expected_records = SeqIO.to_dict(SeqIO.parse(file, 'fasta'))
        self.assertEqual(expected_records['blaIMP-42_1_AB753456'].seq, records['blaIMP-42_1_AB753456'].seq,
                         "records don't match") 
开发者ID:phac-nml,项目名称:staramr,代码行数:27,代码来源:test_AMRDetection.py

示例7: testResfinderBetaLactamDelStartSuccess

# 需要导入模块: from Bio import SeqIO [as 别名]
# 或者: from Bio.SeqIO import to_dict [as 别名]
def testResfinderBetaLactamDelStartSuccess(self):
        file = path.join(self.test_data_dir, "beta-lactam-blaIMP-42-del-start.fsa")
        files = [file]
        self.amr_detection.run_amr_detection(files, 99, 91, 90, 90,0,0,0,0,0)

        resfinder_results = self.amr_detection.get_resfinder_results()
        self.assertEqual(len(resfinder_results.index), 1, 'Wrong number of rows in result')

        result = resfinder_results[resfinder_results['Gene'] == 'blaIMP-42']
        self.assertEqual(len(result.index), 1, 'Wrong number of results detected')
        self.assertAlmostEqual(result['%Identity'].iloc[0], 100.00, places=2, msg='Wrong pid')
        self.assertAlmostEqual(result['%Overlap'].iloc[0], 91.90, places=2, msg='Wrong overlap')
        self.assertEqual(result['HSP Length/Total Length'].iloc[0], '681/741', msg='Wrong lengths')

        hit_file = path.join(self.outdir.name, 'resfinder_beta-lactam-blaIMP-42-del-start.fsa')
        records = SeqIO.to_dict(SeqIO.parse(hit_file, 'fasta'))

        self.assertEqual(len(records), 1, 'Wrong number of hit records')

        expected_records = SeqIO.to_dict(SeqIO.parse(file, 'fasta'))
        self.assertEqual(expected_records['blaIMP-42_1_AB753456'].seq, records['blaIMP-42_1_AB753456'].seq,
                         "records don't match") 
开发者ID:phac-nml,项目名称:staramr,代码行数:24,代码来源:test_AMRDetection.py

示例8: testResfinderBetaLactamInsStartSuccess

# 需要导入模块: from Bio import SeqIO [as 别名]
# 或者: from Bio.SeqIO import to_dict [as 别名]
def testResfinderBetaLactamInsStartSuccess(self):
        file = path.join(self.test_data_dir, "beta-lactam-blaIMP-42-ins-start.fsa")
        files = [file]
        self.amr_detection.run_amr_detection(files, 99, 91, 90, 90,0,0,0,0,0)

        resfinder_results = self.amr_detection.get_resfinder_results()
        self.assertEqual(len(resfinder_results.index), 1, 'Wrong number of rows in result')

        result = resfinder_results[resfinder_results['Gene'] == 'blaIMP-42']
        self.assertEqual(len(result.index), 1, 'Wrong number of results detected')
        self.assertAlmostEqual(result['%Identity'].iloc[0], 99.73, places=2, msg='Wrong pid')
        self.assertAlmostEqual(result['%Overlap'].iloc[0], 100.00, places=2, msg='Wrong overlap')
        self.assertEqual(result['HSP Length/Total Length'].iloc[0], '741/741', msg='Wrong lengths')

        hit_file = path.join(self.outdir.name, 'resfinder_beta-lactam-blaIMP-42-ins-start.fsa')
        records = SeqIO.to_dict(SeqIO.parse(hit_file, 'fasta'))

        self.assertEqual(len(records), 1, 'Wrong number of hit records')

        expected_records = SeqIO.to_dict(
            SeqIO.parse(path.join(self.test_data_dir, 'beta-lactam-blaIMP-42-mut-2.fsa'), 'fasta'))
        logger.debug("expected_seq=%s", expected_records['blaIMP-42_1_AB753456'].seq)
        logger.debug("actual_seq=%s", records['blaIMP-42_1_AB753456'].seq)
        self.assertEqual(expected_records['blaIMP-42_1_AB753456'].seq, records['blaIMP-42_1_AB753456'].seq,
                         "records don't match") 
开发者ID:phac-nml,项目名称:staramr,代码行数:27,代码来源:test_AMRDetection.py

示例9: testResfinderBetaLactamDelMiddleSuccess

# 需要导入模块: from Bio import SeqIO [as 别名]
# 或者: from Bio.SeqIO import to_dict [as 别名]
def testResfinderBetaLactamDelMiddleSuccess(self):
        file = path.join(self.test_data_dir, "beta-lactam-blaIMP-42-del-middle.fsa")
        files = [file]
        self.amr_detection.run_amr_detection(files, 99, 91, 90, 90,0,0,0,0,0)

        resfinder_results = self.amr_detection.get_resfinder_results()
        self.assertEqual(len(resfinder_results.index), 1, 'Wrong number of rows in result')

        result = resfinder_results[resfinder_results['Gene'] == 'blaIMP-42']
        self.assertEqual(len(result.index), 1, 'Wrong number of results detected')
        self.assertAlmostEqual(result['%Identity'].iloc[0], 99.33, places=2, msg='Wrong pid')
        self.assertAlmostEqual(result['%Overlap'].iloc[0], 100.00, places=2, msg='Wrong overlap')
        self.assertEqual(result['HSP Length/Total Length'].iloc[0], '741/741', msg='Wrong lengths')

        hit_file = path.join(self.outdir.name, 'resfinder_beta-lactam-blaIMP-42-del-middle.fsa')
        records = SeqIO.to_dict(SeqIO.parse(hit_file, 'fasta'))

        self.assertEqual(len(records), 1, 'Wrong number of hit records')

        expected_records = SeqIO.to_dict(
            SeqIO.parse(path.join(self.test_data_dir, 'resfinder_beta-lactam-blaIMP-42-del-middle.fsa'), 'fasta'))
        logger.debug("expected_seq=%s", expected_records['blaIMP-42_1_AB753456'].seq)
        logger.debug("actual_seq=%s", records['blaIMP-42_1_AB753456'].seq)
        self.assertEqual(expected_records['blaIMP-42_1_AB753456'].seq, records['blaIMP-42_1_AB753456'].seq,
                         "records don't match") 
开发者ID:phac-nml,项目名称:staramr,代码行数:27,代码来源:test_AMRDetection.py

示例10: testResfinderBetaLactamInsMiddleSuccess

# 需要导入模块: from Bio import SeqIO [as 别名]
# 或者: from Bio.SeqIO import to_dict [as 别名]
def testResfinderBetaLactamInsMiddleSuccess(self):
        file = path.join(self.test_data_dir, "beta-lactam-blaIMP-42-ins-middle.fsa")
        files = [file]
        self.amr_detection.run_amr_detection(files, 97, 99, 99, 90,0,0,0,0,0)

        resfinder_results = self.amr_detection.get_resfinder_results()
        self.assertEqual(len(resfinder_results.index), 1, 'Wrong number of rows in result')

        result = resfinder_results[resfinder_results['Gene'] == 'blaIMP-42']
        self.assertEqual(len(result.index), 1, 'Wrong number of results detected')
        self.assertAlmostEqual(result['%Identity'].iloc[0], 98.14, places=2, msg='Wrong pid')
        self.assertAlmostEqual(result['%Overlap'].iloc[0], 101.62, places=2, msg='Wrong overlap')
        self.assertEqual(result['HSP Length/Total Length'].iloc[0], '753/741', msg='Wrong lengths')

        hit_file = path.join(self.outdir.name, 'resfinder_beta-lactam-blaIMP-42-ins-middle.fsa')
        records = SeqIO.to_dict(SeqIO.parse(hit_file, 'fasta'))

        self.assertEqual(len(records), 1, 'Wrong number of hit records')

        expected_records = SeqIO.to_dict(
            SeqIO.parse(path.join(self.test_data_dir, 'beta-lactam-blaIMP-42-ins-middle.fsa'), 'fasta'))
        logger.debug("expected_seq=%s", expected_records['blaIMP-42_1_AB753456'].seq)
        logger.debug("actual_seq=%s", records['blaIMP-42_1_AB753456'].seq)
        self.assertEqual(expected_records['blaIMP-42_1_AB753456'].seq.upper(), records['blaIMP-42_1_AB753456'].seq,
                         "records don't match") 
开发者ID:phac-nml,项目名称:staramr,代码行数:27,代码来源:test_AMRDetection.py

示例11: testResfinderExcludeNonMatches

# 需要导入模块: from Bio import SeqIO [as 别名]
# 或者: from Bio.SeqIO import to_dict [as 别名]
def testResfinderExcludeNonMatches(self):
        amr_detection = AMRDetectionResistance(self.resfinder_database, self.resfinder_drug_table, self.blast_handler,
                                               self.pointfinder_drug_table, self.pointfinder_database,
                                               include_negative_results=False, output_dir=self.outdir.name)
        file_beta_lactam = path.join(self.test_data_dir, "beta-lactam-blaIMP-42-mut-2.fsa")
        file_non_match = path.join(self.test_data_dir, "non-match.fsa")
        files = [file_beta_lactam, file_non_match]
        amr_detection.run_amr_detection(files, 99, 90, 90, 90,0,0,0,0,0)

        summary_results = amr_detection.get_summary_results()
        self.assertEqual(len(summary_results.index), 1, 'Wrong number of rows in result')

        hit_file = path.join(self.outdir.name, 'resfinder_beta-lactam-blaIMP-42-mut-2.fsa')
        records = SeqIO.to_dict(SeqIO.parse(hit_file, 'fasta'))

        self.assertEqual(len(records), 1, 'Wrong number of hit records')

        expected_records = SeqIO.to_dict(SeqIO.parse(file_beta_lactam, 'fasta'))
        self.assertEqual(expected_records['blaIMP-42_1_AB753456'].seq, records['blaIMP-42_1_AB753456'].seq,
                         "records don't match") 
开发者ID:phac-nml,项目名称:staramr,代码行数:22,代码来源:test_AMRDetection.py

示例12: testPlasmidfinderNameSuccess

# 需要导入模块: from Bio import SeqIO [as 别名]
# 或者: from Bio.SeqIO import to_dict [as 别名]
def testPlasmidfinderNameSuccess(self):
        file = path.join(self.test_data_dir, "test-plasmids-seq.fsa")
        files = [file]
        self.amr_detection.run_amr_detection(files, 99, 90, 90, 90,0,0,0,0,0)

        plasmidfinder_results = self.amr_detection.get_plasmidfinder_results()
        self.assertEqual(len(plasmidfinder_results.index), 1, 'Wrong number of rows in result')

        result = plasmidfinder_results[plasmidfinder_results['Plasmid'] == "IncW"]

        self.assertEqual(len(result.index), 1, 'Wrong number of results detected')
        self.assertAlmostEqual(result['%Identity'].iloc[0], 100.00, places=2, msg='Wrong pid')
        self.assertAlmostEqual(result['%Overlap'].iloc[0], 100.00, places=2, msg='Wrong overlap')
        self.assertEqual(result['Accession'].iloc[0], 'EF633507', msg='Wrong accession')
        self.assertEqual(result['HSP Length/Total Length'].iloc[0], '243/243', msg='Wrong lengths')

        hit_file = path.join(self.outdir.name, 'plasmidfinder_test-plasmids-seq.fsa')
        records = SeqIO.to_dict(SeqIO.parse(hit_file, 'fasta'))

        self.assertEqual(len(records), 1, 'Wrong number of hit records')

        expected_records = SeqIO.to_dict(SeqIO.parse(file, 'fasta'))
        self.assertEqual(expected_records['IncW_1__EF633507'].seq, records['IncW_1__EF633507'].seq,
                         "records don't match") 
开发者ID:phac-nml,项目名称:staramr,代码行数:26,代码来源:test_AMRDetectionPlasmid.py

示例13: checkPsvs

# 需要导入模块: from Bio import SeqIO [as 别名]
# 或者: from Bio.SeqIO import to_dict [as 别名]
def checkPsvs(df):
	print("Checking if PSVs appear in assemblies", file=sys.stderr)
	from Bio import SeqIO
	recs = SeqIO.to_dict(SeqIO.parse(args.check, "fasta"))
	groups = df.groupby(by ="ccid")
	for name, group in groups:
		# skip if we cannot find fasta entry 
		if(name not in recs):
			continue 
		rec = recs[name]
		for idx, row in group.iterrows():
			pos = row["qpos"]
			alt = row["truealt"]
			recalt = rec.seq[pos].upper()
			#print(alt, recalt, pos, name, file=sys.stderr)
			assert alt == recalt, "PSV called inccorectly at {}:{}, {} instead of {}".format(name,pos, alt, recalt) 
开发者ID:mrvollger,项目名称:SDA,代码行数:18,代码来源:PSVLocations.py

示例14: get_ctxnum

# 需要导入模块: from Bio import SeqIO [as 别名]
# 或者: from Bio.SeqIO import to_dict [as 别名]
def get_ctxnum(reffile):
    """
    Get the number of CG/CHG/CHH from a reference genome FASTA file
    """
    with open(reffile) as infile:
        fasta = SeqIO.to_dict(SeqIO.parse(infile, 'fasta'))
        for chr in fasta:
            fasta[chr] = str(fasta[chr].seq).upper()
    num_cg = 0
    num_chg = 0
    num_chh = 0
    for chr in fasta:
        num_cg += len([match.start() for match in re.finditer(r'(?=(CG))', fasta[chr])])
        num_cg += len([match.start()-1 for match in re.finditer(r'(?<=(CG))', fasta[chr])])
        num_chg += len([match.start() for match in re.finditer(r'(?=(C[ACT]G))', fasta[chr])])
        num_chg += len([match.start()-1 for match in re.finditer(r'(?<=(C[AGT]G))', fasta[chr])])
        num_chh += len([match.start() for match in re.finditer(r'(?=(C[ACT][ACT]))', fasta[chr])])
        num_chh += len([match.start()-1 for match in re.finditer(r'(?<=([AGT][AGT]G))', fasta[chr])])
    return num_cg, num_chg, num_chh 
开发者ID:paoyangchen-laboratory,项目名称:methgo,代码行数:21,代码来源:cov.py

示例15: __init__

# 需要导入模块: from Bio import SeqIO [as 别名]
# 或者: from Bio.SeqIO import to_dict [as 别名]
def __init__(self, seq_table, records, max_dist, min_fold, threshold_pval, log=None):
        '''
        seq_table: pandas.DataFrame
          Samples on the columns; sequences on the rows
        records: index of Bio.Seq
          Indexed, unaligned input sequences. This could come from BioPython's
          SeqIO.to_dict or SeqIO.index.
        max_dist: float
          genetic distance cutoff above which a sequence will not be merged into an OTU
        min_fold: float
          Multiply the sequence's abundance by this fold to get the minimum abundance
          of an OTU for merging
        threshold_pval: float
          P-value below which a sequence will not be merged into an OTU
        log: filehandle
          Log file reporting the abundance, genetic, and distribution checks.
        '''
        self.seq_table = seq_table
        self.records = records
        self.max_dist = max_dist
        self.min_fold = min_fold
        self.threshold_pval = threshold_pval
        self.log = log

        # get a list of the names of the sequences in order of their (decreasing) abundance
        self.seq_abunds = self.seq_table.sum(axis=1).sort_values(ascending=False)

        # check that all sequence IDs in the table are in the fasta
        missing_ids = [seq_id for seq_id in self.seq_abunds.index if seq_id not in self.records]
        if len(missing_ids) > 0:
            raise RuntimeError("{} sequence IDs found in the sequence table but not in the fasta: {}".format(len(missing_ids), missing_ids))

        # initialize OTU information
        self.membership = {}
        self.otus = [] 
开发者ID:thomasgurry,项目名称:amplicon_sequencing_pipeline,代码行数:37,代码来源:dbotu.py


注:本文中的Bio.SeqIO.to_dict方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。