当前位置: 首页>>代码示例>>Python>>正文


Python SearchIO.parse方法代码示例

本文整理汇总了Python中Bio.SearchIO.parse方法的典型用法代码示例。如果您正苦于以下问题:Python SearchIO.parse方法的具体用法?Python SearchIO.parse怎么用?Python SearchIO.parse使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在Bio.SearchIO的用法示例。


在下文中一共展示了SearchIO.parse方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: parse_write_and_compare

# 需要导入模块: from Bio import SearchIO [as 别名]
# 或者: from Bio.SearchIO import parse [as 别名]
 def parse_write_and_compare(self, source_file, source_format, out_file, out_format, **kwargs):
     """Compares parsed QueryResults after they have been written to a file."""
     source_qresults = list(SearchIO.parse(source_file, source_format, **kwargs))
     SearchIO.write(source_qresults, out_file, out_format, **kwargs)
     out_qresults = list(SearchIO.parse(out_file, out_format, **kwargs))
     for source, out in zip(source_qresults, out_qresults):
         self.assertTrue(compare_search_obj(source, out))
开发者ID:janusz005,项目名称:biopython,代码行数:9,代码来源:test_SearchIO_write.py

示例2: parseBlastOutFile

# 需要导入模块: from Bio import SearchIO [as 别名]
# 或者: from Bio.SearchIO import parse [as 别名]
def parseBlastOutFile(filename):
	if filename[-3:] == "xml":
		qResultGen = SearchIO.parse(filename, 'blast-xml')
	elif filename[-3:] == "txt":
		qResultGen = SearchIO.parse(filename, 'blast-tab')
	else:
		print("Unrecognized filetype.")
		assert False

	parsed = {qRes.id : qRes for qRes in qResultGen}
	print("Parsed "+filename)

	return parsed
开发者ID:Rinoahu,项目名称:MICA,代码行数:15,代码来源:compare-blast-results.py

示例3: blastparse

# 需要导入模块: from Bio import SearchIO [as 别名]
# 或者: from Bio.SearchIO import parse [as 别名]
def blastparse(stdout, output, tname, ntname):
    global recorddict, minLength
    handle = open(output, 'w')  # open the target fasta file for writing
    blast_handle = cStringIO.StringIO(stdout)  # Convert string to IO object for use in SearchIO using StringIO
    try:  # Necessary to avoid bad genomes
        for qresult in SearchIO.parse(blast_handle, 'blast-tab'):  # Parse the blast output sting as if it were a file
            for hit in qresult:  # Hit object
                for hsp in hit:  # Hsp object
                    begin = hsp.query_range[0]  # Start of hsp
                    finish = hsp.query_range[1]  # End of hsp
                    if hsp.query_id in recorddict:
                        # For the Contig name in the target fasta dictionary mask using coordinates
                        if finish > begin:
                            recorddict[hsp.query_id].seq = \
                                recorddict[hsp.query_id].seq[:begin] + 'N' * (finish - begin + 1) \
                                + recorddict[hsp.query_id].seq[finish:]
                        else:
                            recorddict[hsp.query_id].seq \
                                = recorddict[hsp.query_id].seq[:finish] + 'N' * (begin - finish + 1) \
                                + recorddict[hsp.query_id].seq[begin:]
        recorddict_bak = deepcopy(recorddict)  # Copy the dictionary so we may iterate and modify the result
        for idline in recorddict_bak:
            # pattern = r'[^N]{'+ re.escape(str(minLength))+r'}' #  Find a sequence of at least the target length
            pattern = r'[ATCG]{100,}N{200,900}[ATCG]{100,}|[^N]{' + re.escape(str(minLength))+r'}'
            if re.match(pattern, str(recorddict[idline].seq)) is not None:
                SeqIO.write(recorddict[idline], handle, "fasta")
            else:
                # print 'Contig \'%s\' not written to file' % id
                recorddict.pop(idline)
    except ValueError:
        print 'Value Error: There was an error removing %s genome from %s' % (ntname, tname)
开发者ID:OLC-LOC-Bioinformatics,项目名称:SigSeekr,代码行数:33,代码来源:USSPpip.py

示例4: process_blast_output

# 需要导入模块: from Bio import SearchIO [as 别名]
# 或者: from Bio.SearchIO import parse [as 别名]
def process_blast_output(file, simple, argparser):
    qresults = SearchIO.parse(file, 'blast-xml')
    if simple:
        for qresult in qresults:
            for hit in qresult:
                for hsp in hit:
                    if ((hsp.aln_span == argparser.cont and (hsp.gap_num == 0) and
                             (hsp.aln_span == hsp.ident_num)) or (hsp.aln_span > argparser.cont)):
                        yield ([str(hsp), "\n\n"], None, hsp.aln_span)

                for hsp in hit:
                    if (hsp.aln_span >= argparser.cont and (hsp.gap_num == 0) and
                            (hsp.aln_span == hsp.ident_num)):
                        yield (None, [str(hsp), "\n\n"], hsp.aln_span)
    else:
        for qresult in qresults:
            for hit in qresult:
                for hsp in hit:
                    for v, c, p in encode(simstr(hsp.aln)):
                        if v == "1" and c >= argparser.cont:
                            yield (format_alignment(hsp, p, c), None, c)
                for hsp in hit:
                    for t0, t1, t2 in thrids(encode(simstr(hsp.aln))):
                        if t0[0] == "1":
                            assert (t0[2] < t1[2] < t2[2])
                            assert t2[0] == "1"
                            assert t1[0] == "0"
                            if t0[1] >= argparser.leftmin and t2[1] >= argparser.rightmin and \
                               (t0[1] + t2[1]) >= argparser.summin and \
                                t1[1] <= argparser.gapmax:
                                if not (argparser.S and
                                        (t0[1] >= argparser.cont or t2[1] >= argparser.cont)):
                                    yield (None, format_alignment(hsp, t0[2], t0[1], t2[2], t2[1]),
                                           t0[1]+t2[1]-t1[1])
开发者ID:aurbn,项目名称:peptblast,代码行数:36,代码来源:peptblast.py

示例5: parse_n_fill_run_data_searchio

# 需要导入模块: from Bio import SearchIO [as 别名]
# 或者: from Bio.SearchIO import parse [as 别名]
def parse_n_fill_run_data_searchio(run_path, run_data, querydb):
    run_id = get_run_id(run_path)
    run_format = get_run_format(run_path)
    for query in SearchIO.parse(run_path, run_format):
        for hit in query.hits:
            for hsp in hit.hsps:
                exons = [x.hit_range for x in hsp.fragments]
                coverage = 'N/A'

                if querydb is not None:
                    total_matched = sum(x.query_span for x in hsp.fragments)
                    coverage = '{:.2f}%'.format(100 * total_matched / len(querydb[query.id]))

                if hasattr(hsp, 'score'):
                    score = hsp.score
                elif hasattr(hsp, 'bitscore'):
                    score = hsp.bitscore
                else:
                    score = 'N/A'

                if hasattr(hsp, 'ident_num') and hasattr(query, 'seq_len'):
                    matched = '{:.2f}%'.format(100 * hsp.ident_num / query.seq_len)
                else:
                    matched = 'N/A'

                alignment = AlignmentData(run_id, score, matched, coverage, hsp.hit_range, exons)
                run_data[query.id][hit.id].append(alignment)
开发者ID:bioinf,项目名称:bi2014-neurospora-crassa,代码行数:29,代码来源:compare.py

示例6: check_index

# 需要导入模块: from Bio import SearchIO [as 别名]
# 或者: from Bio.SearchIO import parse [as 别名]
    def check_index(self, filename, format, **kwargs):
        # check if Python3 installation has sqlite3
        try:
            import sqlite3
        except ImportError:
            sqlite3 = None

        parsed = list(SearchIO.parse(filename, format, **kwargs))
        # compare values by index
        indexed = SearchIO.index(filename, format, **kwargs)
        self.assertEqual(len(parsed), len(indexed.keys()))
        # compare values by index_db, only if sqlite3 is present
        if sqlite3 is not None:
            db_indexed = SearchIO.index_db(':memory:', [filename], format, **kwargs)
            self.assertEqual(len(parsed), len(db_indexed.keys()))

        for qres in parsed:
            idx_qres = indexed[qres.id]
            # parsed and indexed qresult are different objects!
            self.assertNotEqual(id(qres), id(idx_qres))
            # but they should have the same attribute values
            self.assertTrue(compare_search_obj(qres, idx_qres))
            # sqlite3 comparison, only if it's present
            if sqlite3 is not None:
                dbidx_qres = db_indexed[qres.id]
                self.assertNotEqual(id(qres), id(dbidx_qres))
                self.assertTrue(compare_search_obj(qres, dbidx_qres))

        indexed._proxy._handle.close()  # TODO - Better solution
        if sqlite3 is not None:
            db_indexed.close()
            db_indexed._con.close()
开发者ID:rwbarrette,项目名称:biopython,代码行数:34,代码来源:search_tests_common.py

示例7: parse_hmmscan_tab

# 需要导入模块: from Bio import SearchIO [as 别名]
# 或者: from Bio.SearchIO import parse [as 别名]
def parse_hmmscan_tab(infile, print_header=True):
    '''Parse hmmscan output in --tblout format'''
    if print_header:
        yield "query","top hit","evalue","certainty","num sig hits"
    records = SearchIO.parse(infile,'hmmer3-tab')
    for rec in records:
        query = rec.id
        if len(rec) > 1:
            hit1,hit2 = rec.hits[0],rec.hits[1]
            eval1,eval2 = hit1.evalue,hit2.evalue
            if eval1 != 0: # convert to -ln evalue
                eval1 = -np.log(eval1)
            if eval2 != 0:
                eval2 = -np.log(eval2)
            if eval1 == 0 and eval2 != 0: # this may be a hack, I don't care
                certainty = 1
            elif eval1 == 0 and eval2 == 0:
                certainty = 0
            else: # calculate certainty with info theoretic calc.
                total = eval1 + eval2
                p1,p2 = eval1/total, eval2/total
                certainty = 1 + (p1 * np.log2(p1)) + (p2 * np.log2(p2))
        else:
            certainty = 1
        yield query, rec.hits[0].id, rec.hits[0].evalue, certainty, len(rec)
开发者ID:bliebeskind,项目名称:PhyloPreprocessing,代码行数:27,代码来源:hmmscan_results.py

示例8: first_exonerate_parse

# 需要导入模块: from Bio import SearchIO [as 别名]
# 或者: from Bio.SearchIO import parse [as 别名]
def first_exonerate_parse(dir, newdir, prefix):
	cwd = os.getcwd()
	if not os.path.exists(cwd + newdir):
		os.makedirs(cwd + newdir)
	if not os.path.exists(cwd + '/merged_exons/'):
		os.makedirs(cwd + '/merged_exons/')
	for file in slistdir(cwd + dir):
		if 'DS_Store' not in file:
			result = SearchIO.parse(cwd + dir + file, 'exonerate-text')
			for h in result:
				for hh in h:
					for hhh in hh:
						hitcounter = 1
						for hhhh in hhh:
							hitseq =  hhhh.query
							rootname = file.split('.fasta')
							orthoname = file.split("_")
							orthosubdir = cwd + newdir + '/' + orthoname[0]
							if not os.path.exists(orthosubdir):
								os.makedirs(orthosubdir)
							newseqstr = str(hitseq.seq.ungap("-"))
							newid = prefix + str(hitcounter) + '_' + rootname[0]
							record = SeqRecord(Seq(newseqstr, generic_dna), id =  newid, description = '')
							fastaname = prefix + str(hitcounter) + '_' + rootname[0] + '.fasta'
							SeqIO.write(record, orthosubdir + '/' + fastaname, "fasta")
							hitcounter += 1
开发者ID:JMPflug,项目名称:Exon_machine,代码行数:28,代码来源:Exons_0.014.py

示例9: generate_blast_graph

# 需要导入模块: from Bio import SearchIO [as 别名]
# 或者: from Bio.SearchIO import parse [as 别名]
    def generate_blast_graph(self):
        evalue_filter = lambda hsp: hsp.evalue < self.evalue
        file_name = "{}/blast_graph.txt".format(self.blast_output_path)
        for blast_file in glob.glob(self.blast_data_path):
            print("working on " + blast_file)
            # Parse the Blast file
            qresults = SearchIO.parse(blast_file, 'blast-tab', comments=True)
            for qresult in qresults:
                write_line = ""
                write_line += qresult.id + ":"
                # Go to the Hit section of query
                for hit in qresult[:]:
                    if not self.blast_graph.has_node(qresult.id):
                        self.blast_graph.add_node(qresult.id)
                    # Check if Hit has min value
                    filtered_hit = hit.filter(evalue_filter)
                    if filtered_hit is not None:
                        if not self.blast_graph.has_node(filtered_hit.id):
                            self.blast_graph.add_node(filtered_hit.id)
                        # Add Edge between graph nodes
                        self.blast_graph.add_edge(qresult.id, filtered_hit.id)
                        write_line += filtered_hit.id + ","
                if write_line != "":
                    with open(file_name, "a") as f_handle:
                        f_handle.write(write_line + '\n')

        # Write GML files
        if self.generate_gml_files:
            file_name = "{}/blast_graph.gml".format(self.blast_output_path)
            with open(file_name, "a") as f_handle:
                nx.write_gml(self.blast_graph, f_handle)
开发者ID:Falgunithakor,项目名称:BioInfo,代码行数:33,代码来源:FileManager.py

示例10: _call_hmmer

# 需要导入模块: from Bio import SearchIO [as 别名]
# 或者: from Bio.SearchIO import parse [as 别名]
def _call_hmmer(hmm, inputproteins):
	inputproteins = list(inputproteins)
	scores = {}
	for ip in inputproteins:
		scores[ip.id] = 0

	with ntf(prefix="/dev/shm/") as inputfasta:
		with ntf(prefix="/dev/shm/") as hmmoutput:
			SeqIO.write(inputproteins, inputfasta.name, 'fasta')
			hmmfile = os.path.join(hmm_location, hmm + '.hmm')
			sp.call(['hmmsearch', '-o', hmmoutput.name, hmmfile, inputfasta.name])
			hmmoutput.flush()
			hmmoutput.seek(0)
			QRS = SearchIO.parse(hmmoutput, format="hmmer3-text")
			for qr in QRS:
				# there's *always* a QR, even though it's usually empty.
				# qr.sort()
				# I'm kind of hoping this sorts by hit strength.
				# worth checking. I guess it doesn't matter anyway.

				for hit in qr:
					scores[hit.id] = max(scores[hit.id], hit.bitscore)
					for hsp in hit.hsps:
						def appropriate_hyphens(m):
							return '-' * len(m.group(0))

						if len(hsp.hit.seq) > 100:
							hitseq = re.sub('PPPPP+', appropriate_hyphens, str(hsp.hit.seq))
							hitseq = hitseq.translate(None,'-*').upper()
							yield hit.id, hsp.bitscore, hitseq
开发者ID:kemball,项目名称:karkadann,代码行数:32,代码来源:hmm.py

示例11: get_adenylation_domains

# 需要导入模块: from Bio import SearchIO [as 别名]
# 或者: from Bio.SearchIO import parse [as 别名]
def get_adenylation_domains(fasta, known=None, lagging_strand=False):
    adenylation_domains = []

    fasta_seqs = []
    for fs in SeqIO.parse(fasta, 'fasta'):
        revcom=False
        seq = str(fs.seq)
        pepseq, rf = get_pepseq(seq)
        if rf < 0 == lagging_strand:
            revcom=True
            seq = utils.reverse_complement(seq)
        fasta_seqs.append({'id': fs.id, 'seq': seq, 'pepseq': pepseq, 'rf': rf})
    for fs in fasta_seqs:
        utils.run_cmd([hmmsearch, '--domtblout', 'dump', os.path.abspath('lib/AMP-binding.hmm'), '-'],
                  '>header\n' + pepseq)
        with open('dump') as f:
            out = f.read()
        res_stream = StringIO(out)
        os.remove('dump')
        results = list(SearchIO.parse(res_stream, 'hmmsearch3-domtab'))

        for result in results:
            for i, hsp in enumerate(result.hsps, 1):
                s = hsp.hit_start
                e = hsp.hit_end

                adenylation_domains.append((AdenylationDomain(fs['seq'][s*3:e*3], known, '{}_{}'.format(fs['id'], i), revcom), s, e))

    return adenylation_domains
开发者ID:dkmva,项目名称:nrps-oligo-designer,代码行数:31,代码来源:nrpslib.py

示例12: get_hit_seq

# 需要导入模块: from Bio import SearchIO [as 别名]
# 或者: from Bio.SearchIO import parse [as 别名]
def get_hit_seq(fastafile, filename):
	yamlfile = yaml_load_file(fastafile)
	blout = SearchIO.parse(filename, 'blast-text')
	for query in blout:
		seqid = query.id.split("\n")[0]
		#print(seqid)
		fh = open("multi_" + seqid + ".fasta", 'a')
		yamlfile[seqid]['hits'] = {}
		for hit in query.hits:
			gi = re.match(r"gi\|(.*)\|ref", hit.id).group(1)
			yamlfile[seqid]['hits'][gi] = {}
			#print(yamlfile[seqid]['hits'])
			for hsp in hit.hsps:
				#print(hsp.hit)
				#print(hsp.hit_strand)
				#print(hsp.hit_start)
				#print(hsp.hit_end)
				hitstart = hsp.hit_start + 1 - HIT_SEQUENCE_BPS
				hitstart = 1 if hitstart < 0 else hitstart
				hitend = hsp.hit_end + 1 + HIT_SEQUENCE_BPS
				hitstrand = "plus" if (hsp.hit_strand == 1) else "minus"
				#print(hsp.hit_end)
				out = os.popen(BLAST_BINARY + "/blastdbcmd -db " + BLAST_DATABASE + " -dbtype nucl -entry " + str(gi) + " -range " + str(hitstart) + "-" + str(hitend) + " -strand " + str(hitstrand)).read()
				fh.write(out)
				#print(hsp.hit.seq)
				#print(hsp.query.seq)
				#print("-----")
		fh.close()
		#break
	yaml_dump_file(fastafile, yamlfile)
开发者ID:kashmatic,项目名称:BLCA,代码行数:32,代码来源:msa_setup.py

示例13: get_scores_for_curated_via_hmm

# 需要导入模块: from Bio import SearchIO [as 别名]
# 或者: from Bio.SearchIO import parse [as 别名]
 def get_scores_for_curated_via_hmm(self):
     """
     For every curated variant we want to generate a set of scores against HMMs.
     This is needed to supply the same type of information for curated as well as for automatic seqs.
     """
     #Construct the one big file from all cureated seqs.
     with open(self.curated_all_fasta, "w") as f:
         for hist_type, seed in self.get_seeds():
             seed_aln_file = os.path.join(self.seed_directory, hist_type, seed)
             for s in SeqIO.parse(seed_aln_file, "fasta"):
                 s.seq = s.seq.ungap("-")
                 SeqIO.write(s, f, "fasta")
     #Search it by our HMMs
     self.search(hmms_db=self.combined_hmm_file, out=self.curated_search_results_file,sequences=self.curated_all_fasta)
     ##We need to parse this results file;
     ##we take here a snippet from load_hmmsearch.py, and tune it to work for our curated seq header format
     for variant_query in SearchIO.parse(self.curated_search_results_file, "hmmer3-text"):
         print "Loading hmmsearch for variant:", variant_query.id
         variant_model=Variant.objects.get(id=variant_query.id)
         for hit in variant_query:
             gi = hit.id.split("|")[1]
             seq = Sequence.objects.get(id=gi)
             # print hit
             try: #sometimes we get this :    [No individual domains that satisfy reporting thresholds (although complete target did)]
                 best_hsp = max(hit, key=lambda hsp: hsp.bitscore)
                 add_score(seq, variant_model, best_hsp, seq.variant==variant_model)
             except:
                 pass
开发者ID:edraizen,项目名称:HistoneDB,代码行数:30,代码来源:buildvariants.py

示例14: retrieve_blast_data

# 需要导入模块: from Bio import SearchIO [as 别名]
# 或者: from Bio.SearchIO import parse [as 别名]
 def retrieve_blast_data(self):
     for blast_file in glob.glob(self.blast_data_path):
         print(blast_file)
         print self.network_data
         qresults = SearchIO.parse(blast_file, 'blast-tab', comments=True)
         for qresult in qresults:
             if(qresult.id in self.network_data):
                 print qresult.id
开发者ID:Falgunithakor,项目名称:BioInfo,代码行数:10,代码来源:SilixNetworkChart.py

示例15: check_index

# 需要导入模块: from Bio import SearchIO [as 别名]
# 或者: from Bio.SearchIO import parse [as 别名]
    def check_index(self, filename, format, **kwargs):
        # check if Python3 installation has sqlite3
        try:
            import sqlite3
        except ImportError:
            sqlite3 = None

        if filename.endswith(".bgz"):
            handle = gzip.open(filename)
            parsed = list(SearchIO.parse(handle, format, **kwargs))
            handle.close()
        else:
            parsed = list(SearchIO.parse(filename, format, **kwargs))
        # compare values by index
        indexed = SearchIO.index(filename, format, **kwargs)
        self.assertEqual(len(parsed), len(indexed),
                         "Should be %i records in %s, index says %i"
                         % (len(parsed), filename, len(indexed)))
        # compare values by index_db, only if sqlite3 is present
        if sqlite3 is not None:
            db_indexed = SearchIO.index_db(':memory:', [filename], format, **kwargs)
            self.assertEqual(len(parsed), len(db_indexed),
                             "Should be %i records in %s, index_db says %i"
                             % (len(parsed), filename, len(db_indexed)))

        for qres in parsed:
            idx_qres = indexed[qres.id]
            # parsed and indexed qresult are different objects!
            self.assertNotEqual(id(qres), id(idx_qres))
            # but they should have the same attribute values
            self.assertTrue(compare_search_obj(qres, idx_qres))
            # sqlite3 comparison, only if it's present
            if sqlite3 is not None:
                dbidx_qres = db_indexed[qres.id]
                self.assertNotEqual(id(qres), id(dbidx_qres))
                self.assertTrue(compare_search_obj(qres, dbidx_qres))

        indexed.close()
        if sqlite3 is not None:
            db_indexed.close()
            db_indexed._con.close()

        if os.path.isfile(filename + ".bgz"):
            # Do the tests again with the BGZF compressed file
            print("[BONUS %s.bgz]" % filename)
            self.check_index(filename + ".bgz", format, **kwargs)
开发者ID:dzhang4,项目名称:biopython,代码行数:48,代码来源:search_tests_common.py


注:本文中的Bio.SearchIO.parse方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。