Python Bio.SearchIO类代码示例

本文整理汇总了Python中Bio.SearchIO类的典型用法代码示例。如果您正苦于以下问题：Python SearchIO类的具体用法？Python SearchIO怎么用？Python SearchIO使用的例子？那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。

在下文中一共展示了SearchIO类的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: check_raw

    def check_raw(self, filename, id, raw, **kwargs):
        """Index filename using keyword arguments, check get_raw(id)==raw."""
        idx = SearchIO.index(filename, self.fmt, **kwargs)
        raw = _as_bytes(raw)
        # Anticipate cases where the raw string and/or file uses different
        # newline characters ~ we set everything to \n.
        new = idx.get_raw(id)
        self.assertTrue(isinstance(new, bytes),
                        "Didn't get bytes from %s get_raw" % self.fmt)
        self.assertEqual(raw.replace(b'\r\n', b'\n'),
                         new.replace(b'\r\n', b'\n'))
        idx.close()

        # Now again, but using SQLite backend
        if sqlite3:
            idx = SearchIO.index_db(":memory:", filename, self.fmt, **kwargs)
            new = idx.get_raw(id)
            self.assertTrue(isinstance(new, bytes),
                            "Didn't get bytes from %s get_raw" % self.fmt)
            self.assertEqual(raw.replace(b'\r\n', b'\n'),
                             new.replace(b'\r\n', b'\n'))
            idx.close()

        if os.path.isfile(filename + ".bgz"):
            # Do the tests again with the BGZF compressed file
            print("[BONUS %s.bgz]" % filename)
            self.check_raw(filename + ".bgz", id, raw, **kwargs)

开发者ID:BioGeek，项目名称:biopython，代码行数:27，代码来源:search_tests_common.py

示例2: parse_write_and_compare

 def parse_write_and_compare(self, source_file, source_format, out_file, out_format, **kwargs):
     """Compares parsed QueryResults after they have been written to a file."""
     source_qresults = list(SearchIO.parse(source_file, source_format, **kwargs))
     SearchIO.write(source_qresults, out_file, out_format, **kwargs)
     out_qresults = list(SearchIO.parse(out_file, out_format, **kwargs))
     for source, out in zip(source_qresults, out_qresults):
         self.assertTrue(compare_search_obj(source, out))

开发者ID:janusz005，项目名称:biopython，代码行数:7，代码来源:test_SearchIO_write.py

示例3: main

def main():
    extensions = {'blast-tab': ['tsv', 'csv', 'blast', 'm8', 'blastm8'],
        'blast-text': ['txt', 'bls', 'blast'], 'blast-xml': ['xml'],
        'blat-psl': ['psl'], 'hmmer3-tab': ['tsv', 'csv'],
        'hmmer3-text': ['txt'], 'hmmer2-text': ['txt'],
        'exonerate-text': ['txt']}
    kwargs = args.keywords
    infile = args.infile
    in_type = args.in_type
    in_ext = infile.split('.')[-1]
    proper_ext = extensions[in_type][0]
    if in_ext not in extensions[in_type]:
        print(textwrap.fill("error: invalid input file extension \"{}\". An "
            "appropriate extension for this input type is {}"
            .format(in_ext, proper_ext), 79))
        sys.exit(1)

    out_type = args.out_type
    if args.output:
        outfile = io_check(args.output, 'w')
    else:
        out_ext = extensions[out_type][0]
        outfile = io_check("{}.{}".format('.'.join(infile.split('.')[:-1]), out_ext), 'w')

    print("output will be in {} and formatted as {}".format(outfile, out_type))
    SearchIO.convert(infile, in_type, outfile, out_type, out_kwargs=kwargs)

开发者ID:Brazelton-Lab，项目名称:lab_scripts，代码行数:26，代码来源:convert_query_format.py

示例4: check_index

    def check_index(self, filename, format, **kwargs):
        # check if Python3 installation has sqlite3
        try:
            import sqlite3
        except ImportError:
            sqlite3 = None

        parsed = list(SearchIO.parse(filename, format, **kwargs))
        # compare values by index
        indexed = SearchIO.index(filename, format, **kwargs)
        self.assertEqual(len(parsed), len(indexed.keys()))
        # compare values by index_db, only if sqlite3 is present
        if sqlite3 is not None:
            db_indexed = SearchIO.index_db(':memory:', [filename], format, **kwargs)
            self.assertEqual(len(parsed), len(db_indexed.keys()))

        for qres in parsed:
            idx_qres = indexed[qres.id]
            # parsed and indexed qresult are different objects!
            self.assertNotEqual(id(qres), id(idx_qres))
            # but they should have the same attribute values
            self.assertTrue(compare_search_obj(qres, idx_qres))
            # sqlite3 comparison, only if it's present
            if sqlite3 is not None:
                dbidx_qres = db_indexed[qres.id]
                self.assertNotEqual(id(qres), id(dbidx_qres))
                self.assertTrue(compare_search_obj(qres, dbidx_qres))

        indexed._proxy._handle.close()  # TODO - Better solution
        if sqlite3 is not None:
            db_indexed.close()
            db_indexed._con.close()

开发者ID:rwbarrette，项目名称:biopython，代码行数:32，代码来源:search_tests_common.py

示例5: read_write_and_compare

 def read_write_and_compare(self, source_file, source_format, out_file,
         out_format, **kwargs):
     """Compares read QueryResults after it has been written to a file."""
     source_qresult = SearchIO.read(source_file, source_format, **kwargs)
     SearchIO.write(source_qresult, out_file, out_format, **kwargs)
     out_qresult = SearchIO.read(out_file, out_format, **kwargs)
     self.assertTrue(compare_search_obj(source_qresult, out_qresult))

开发者ID:AllanDaemon，项目名称:biopython，代码行数:7，代码来源:test_SearchIO_write.py

示例6: parseBlastOutFile

def parseBlastOutFile(filename):
	if filename[-3:] == "xml":
		qResultGen = SearchIO.parse(filename, 'blast-xml')
	elif filename[-3:] == "txt":
		qResultGen = SearchIO.parse(filename, 'blast-tab')
	else:
		print("Unrecognized filetype.")
		assert False

	parsed = {qRes.id : qRes for qRes in qResultGen}
	print("Parsed "+filename)

	return parsed

开发者ID:Rinoahu，项目名称:MICA，代码行数:13，代码来源:compare-blast-results.py

示例7: main

def main(args):
	if len(args) == 2:
		filenameRoot = args[1].split(".")[0]
		filenameXML = filenameRoot + ".xml"
		SearchIO.convert(args[1], 'blast-tab', filenameXML, 'blast-xml')

	elif len(args) == 3:
		filenameRoot = args[1].split(".")[0]
		filenameXML = args[2]
		SearchIO.convert(args[1], 'blast-tab', filenameXML, 'blast-xml')

	else:
		print("Usage: path/to/blast/tabular/file [optional path/for/new/blast/xml/file]")

开发者ID:Rinoahu，项目名称:MICA，代码行数:13，代码来源:convert-blastTab-to-blastXML.py

示例8: start_queryResult_generator

def start_queryResult_generator(inFile, fDic, work_sheet):
    """ invoking the parse function to return a 'generator' that can allow you 
        to step though the record one QueryResult Object at a time but invoking
        nextQuery = (next)generator on it.This approach can allow you to save 
        on memory. I have found with my current task casting this generator with
        (list) works fine but it is really not called for in this current 
        task of parsing and sorting the records.
    """
    """ http://biopython.org/DIST/docs/api/Bio.SearchIO.BlastIO-module.html"""
    qGenerator = SearchIO.parse(inFile, 'blast-xml')
    max_hits = 0
    query_count = 1
    # Step through all the records in the lump xml data file and write out
    # each separate hit to file. Also write the summary information to the
    # work sheet.
    for query_result in qGenerator:
        print('Processing Query BLAST return ' + str(query_count))
        number_hits = int(len(query_result.hits))
        # Extend header out right if new MAXHITS
        if number_hits > max_hits:
            max_hits = number_hits       
        if number_hits == 0:
            # Construct path plus file name for no hit query
            filename = str(fDic['topDir'] + fDic['noHit'] + 'Query_' 
                           + str(query_count) + '_H_none.xml')
            # Write out any Queries that had to hits to a no Hit subfolder
            SearchIO.write(query_result, filename, 'blast-xml')
            write_qr_to_ws(query_count, query_result, work_sheet)
        else :
            # Now set up a counter of 'hits' in the QueryResult so hit's
            # can be sliced away into their own record cleanly.
            hit_count = 0;
            for hit in query_result.hits:
                total_hsps = len (hit.hsps)
                lowest_eval = hit.hsps[0].evalue
                best_hsp = hit.hsps[0]
                for hsp in hit.hsps:
                    if hsp.evalue < lowest_eval:
                        lowest_eval = hsp.evalue
                        best_hsp = hsp
                filename = str(fDic['topDir'] + outputFileName(query_count, hit, best_hsp))
                SearchIO.write(query_result[hit_count:(hit_count + 1)], filename , 'blast-xml')
                hit_count += 1
            # Write out query_result to worksheet           
            write_qr_to_ws(query_count, query_result, work_sheet)
        query_count += 1
        # break is debugging code
        # if query_count == 20:
        #   break
    build_ws_header(work_sheet, max_hits)
    return qGenerator

开发者ID:Lathian212，项目名称:ME_BLAST_Manipulations，代码行数:51，代码来源:make_single_records_and_Excel.py

示例9: get_hit_seq

def get_hit_seq(fastafile, filename):
	yamlfile = yaml_load_file(fastafile)
	blout = SearchIO.parse(filename, 'blast-text')
	for query in blout:
		seqid = query.id.split("\n")[0]
		#print(seqid)
		fh = open("multi_" + seqid + ".fasta", 'a')
		yamlfile[seqid]['hits'] = {}
		for hit in query.hits:
			gi = re.match(r"gi\|(.*)\|ref", hit.id).group(1)
			yamlfile[seqid]['hits'][gi] = {}
			#print(yamlfile[seqid]['hits'])
			for hsp in hit.hsps:
				#print(hsp.hit)
				#print(hsp.hit_strand)
				#print(hsp.hit_start)
				#print(hsp.hit_end)
				hitstart = hsp.hit_start + 1 - HIT_SEQUENCE_BPS
				hitstart = 1 if hitstart < 0 else hitstart
				hitend = hsp.hit_end + 1 + HIT_SEQUENCE_BPS
				hitstrand = "plus" if (hsp.hit_strand == 1) else "minus"
				#print(hsp.hit_end)
				out = os.popen(BLAST_BINARY + "/blastdbcmd -db " + BLAST_DATABASE + " -dbtype nucl -entry " + str(gi) + " -range " + str(hitstart) + "-" + str(hitend) + " -strand " + str(hitstrand)).read()
				fh.write(out)
				#print(hsp.hit.seq)
				#print(hsp.query.seq)
				#print("-----")
		fh.close()
		#break
	yaml_dump_file(fastafile, yamlfile)

开发者ID:kashmatic，项目名称:BLCA，代码行数:30，代码来源:msa_setup.py

示例10: reciprocal_hmm_search

def reciprocal_hmm_search(modelname, modelname_regex, filename, organism, rev_inc_bitscore_percentage, out_filename = None):
    ''' Performs reciprocal hmmer search against the proteome of given organism
    '''
    print "# Reciprocal search..."
    is_found = False
    if out_filename == None:
        out_filename = modelname + ".rechits_" + organism
    reciprocal_search_command = "phmmer --noali --tblout " + out_filename + " " + filename + " " + proteomes_dir + organism + ".fasta > hmmer_res"
    os.system(reciprocal_search_command)
    try:
        hits = SearchIO.read(out_filename, "hmmer3-tab")
        max_bitscore = hits[0].bitscore
    except:
        hits = []
        max_bitscore = 0
    if len(hits) > 0:
        if re.search(modelname_regex, hits[0].description):
            is_found = True
    # for h in hits:
    #     if h.bitscore > rev_inc_bitscore_percentage * max_bitscore:
    #         if manual_mode:
    #             print modelname_regex, h.description, re.search(modelname_regex, h.description)
    #             raw_input("...")
    #         if re.search(modelname_regex, h.description):
    #             is_found = True
    if manual_mode:
        print filename, is_found
        raw_input("Check reciprocal search results...")
    return is_found

开发者ID:yklsorok，项目名称:prot_evol_toolkit，代码行数:29，代码来源:reciprocal_hmmer_search.py

示例11: get_scores_for_curated_via_hmm

 def get_scores_for_curated_via_hmm(self):
     """
     For every curated variant we want to generate a set of scores against HMMs.
     This is needed to supply the same type of information for curated as well as for automatic seqs.
     """
     #Construct the one big file from all cureated seqs.
     with open(self.curated_all_fasta, "w") as f:
         for hist_type, seed in self.get_seeds():
             seed_aln_file = os.path.join(self.seed_directory, hist_type, seed)
             for s in SeqIO.parse(seed_aln_file, "fasta"):
                 s.seq = s.seq.ungap("-")
                 SeqIO.write(s, f, "fasta")
     #Search it by our HMMs
     self.search(hmms_db=self.combined_hmm_file, out=self.curated_search_results_file,sequences=self.curated_all_fasta)
     ##We need to parse this results file;
     ##we take here a snippet from load_hmmsearch.py, and tune it to work for our curated seq header format
     for variant_query in SearchIO.parse(self.curated_search_results_file, "hmmer3-text"):
         print "Loading hmmsearch for variant:", variant_query.id
         variant_model=Variant.objects.get(id=variant_query.id)
         for hit in variant_query:
             gi = hit.id.split("|")[1]
             seq = Sequence.objects.get(id=gi)
             # print hit
             try: #sometimes we get this :    [No individual domains that satisfy reporting thresholds (although complete target did)]
                 best_hsp = max(hit, key=lambda hsp: hsp.bitscore)
                 add_score(seq, variant_model, best_hsp, seq.variant==variant_model)
             except:
                 pass

开发者ID:edraizen，项目名称:HistoneDB，代码行数:28，代码来源:buildvariants.py

示例12: init

	def __init__(self,Maxicircle,out_file):
		file_out=open(out_file,'w')
		writer = csv.writer(file_out,delimiter="\t")
		writer.writerow(["##gff-version","3"])
		rows=[]
		
		for protein in glob.glob("/Users/Said/Github/Maxicircle/DB/AA/*.faa"):
			output_file=protein.split("/")[-1]+".xml"
			blastx_cline = NcbiblastxCommandline(query=Maxicircle , db=protein, 
		                                      outfmt=5, out=output_file)
			blastx_cline()
			blast_qresult = SearchIO.read(output_file, 'blast-xml')
			if len(blast_qresult)>0:
				best=blast_qresult[0][0]
				query_range=[x for x in best.query_range]
				if best.query_strand>0:
					query_strand="+"
				else:
					query_strand="-"
				chromosome=best.query_id
				rows.append([chromosome,".","exon",query_range[0],query_range[1],".",query_strand,".","ID="+protein.split("/")[-1].split(".faa")[0]])
		
		print(str(len(rows))+" exons found")
		rows=iter(rows)
		writer.writerows(rows)

开发者ID:PMIG-Mexico，项目名称:Maxicircle，代码行数:25，代码来源:Maxicircle_annotator.py

示例13: generate_blast_graph

    def generate_blast_graph(self):
        evalue_filter = lambda hsp: hsp.evalue < self.evalue
        file_name = "{}/blast_graph.txt".format(self.blast_output_path)
        for blast_file in glob.glob(self.blast_data_path):
            print("working on " + blast_file)
            # Parse the Blast file
            qresults = SearchIO.parse(blast_file, 'blast-tab', comments=True)
            for qresult in qresults:
                write_line = ""
                write_line += qresult.id + ":"
                # Go to the Hit section of query
                for hit in qresult[:]:
                    if not self.blast_graph.has_node(qresult.id):
                        self.blast_graph.add_node(qresult.id)
                    # Check if Hit has min value
                    filtered_hit = hit.filter(evalue_filter)
                    if filtered_hit is not None:
                        if not self.blast_graph.has_node(filtered_hit.id):
                            self.blast_graph.add_node(filtered_hit.id)
                        # Add Edge between graph nodes
                        self.blast_graph.add_edge(qresult.id, filtered_hit.id)
                        write_line += filtered_hit.id + ","
                if write_line != "":
                    with open(file_name, "a") as f_handle:
                        f_handle.write(write_line + '\n')

        # Write GML files
        if self.generate_gml_files:
            file_name = "{}/blast_graph.gml".format(self.blast_output_path)
            with open(file_name, "a") as f_handle:
                nx.write_gml(self.blast_graph, f_handle)

开发者ID:Falgunithakor，项目名称:BioInfo，代码行数:31，代码来源:FileManager.py

示例14: parse_hmmscan_tab

def parse_hmmscan_tab(infile, print_header=True):
    '''Parse hmmscan output in --tblout format'''
    if print_header:
        yield "query","top hit","evalue","certainty","num sig hits"
    records = SearchIO.parse(infile,'hmmer3-tab')
    for rec in records:
        query = rec.id
        if len(rec) > 1:
            hit1,hit2 = rec.hits[0],rec.hits[1]
            eval1,eval2 = hit1.evalue,hit2.evalue
            if eval1 != 0: # convert to -ln evalue
                eval1 = -np.log(eval1)
            if eval2 != 0:
                eval2 = -np.log(eval2)
            if eval1 == 0 and eval2 != 0: # this may be a hack, I don't care
                certainty = 1
            elif eval1 == 0 and eval2 == 0:
                certainty = 0
            else: # calculate certainty with info theoretic calc.
                total = eval1 + eval2
                p1,p2 = eval1/total, eval2/total
                certainty = 1 + (p1 * np.log2(p1)) + (p2 * np.log2(p2))
        else:
            certainty = 1
        yield query, rec.hits[0].id, rec.hits[0].evalue, certainty, len(rec)

开发者ID:bliebeskind，项目名称:PhyloPreprocessing，代码行数:25，代码来源:hmmscan_results.py

示例15: process_blast_output

def process_blast_output(file, simple, argparser):
    qresults = SearchIO.parse(file, 'blast-xml')
    if simple:
        for qresult in qresults:
            for hit in qresult:
                for hsp in hit:
                    if ((hsp.aln_span == argparser.cont and (hsp.gap_num == 0) and
                             (hsp.aln_span == hsp.ident_num)) or (hsp.aln_span > argparser.cont)):
                        yield ([str(hsp), "\n\n"], None, hsp.aln_span)

                for hsp in hit:
                    if (hsp.aln_span >= argparser.cont and (hsp.gap_num == 0) and
                            (hsp.aln_span == hsp.ident_num)):
                        yield (None, [str(hsp), "\n\n"], hsp.aln_span)
    else:
        for qresult in qresults:
            for hit in qresult:
                for hsp in hit:
                    for v, c, p in encode(simstr(hsp.aln)):
                        if v == "1" and c >= argparser.cont:
                            yield (format_alignment(hsp, p, c), None, c)
                for hsp in hit:
                    for t0, t1, t2 in thrids(encode(simstr(hsp.aln))):
                        if t0[0] == "1":
                            assert (t0[2] < t1[2] < t2[2])
                            assert t2[0] == "1"
                            assert t1[0] == "0"
                            if t0[1] >= argparser.leftmin and t2[1] >= argparser.rightmin and \
                               (t0[1] + t2[1]) >= argparser.summin and \
                                t1[1] <= argparser.gapmax:
                                if not (argparser.S and
                                        (t0[1] >= argparser.cont or t2[1] >= argparser.cont)):
                                    yield (None, format_alignment(hsp, t0[2], t0[1], t2[2], t2[1]),
                                           t0[1]+t2[1]-t1[1])

开发者ID:aurbn，项目名称:peptblast，代码行数:34，代码来源:peptblast.py

注：本文中的Bio.SearchIO类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。