本文整理汇总了Python中bitstring.Bits.tobytes方法的典型用法代码示例。如果您正苦于以下问题:Python Bits.tobytes方法的具体用法?Python Bits.tobytes怎么用?Python Bits.tobytes使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类bitstring.Bits
的用法示例。
在下文中一共展示了Bits.tobytes方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: uncompress_golomb_coding
# 需要导入模块: from bitstring import Bits [as 别名]
# 或者: from bitstring.Bits import tobytes [as 别名]
def uncompress_golomb_coding(coded_bytes, hash_length, M):
"""Given a bytstream produced using golomb_coded_bytes, uncompress it."""
ret_list = []
instream = BitStream(
bytes=coded_bytes, length=len(coded_bytes) * 8)
hash_len_bits = hash_length * 8
m_bits = int(math.log(M, 2))
# First item is a full hash value.
prev = instream.read("bits:%d" % hash_len_bits)
ret_list.append(prev.tobytes())
while (instream.bitpos + m_bits) <= instream.length:
# Read Unary-encoded value.
read_prefix = 0
curr_bit = instream.read("uint:1")
while curr_bit == 1:
read_prefix += 1
curr_bit = instream.read("uint:1")
assert curr_bit == 0
# Read r, assuming M bits were used to represent it.
r = instream.read("uint:%d" % m_bits)
curr_diff = read_prefix * M + r
curr_value_int = prev.uint + curr_diff
curr_value = Bits(uint=curr_value_int, length=hash_len_bits)
ret_list.append(curr_value.tobytes())
prev = curr_value
return ret_list
示例2: uncompress_golomb_coding
# 需要导入模块: from bitstring import Bits [as 别名]
# 或者: from bitstring.Bits import tobytes [as 别名]
def uncompress_golomb_coding(coded_bytes, hash_length, M):
ret_list = []
instream = BitStream(
bytes=coded_bytes, length=len(coded_bytes) * hash_length)
hash_len_bits = hash_length * 8
m_bits = int(math.log(M, 2))
prev = instream.read("bits:%d" % hash_len_bits)
ret_list.append(prev.tobytes())
while instream.bitpos < instream.length:
read_prefix = 0
curr_bit = instream.read("uint:1")
while curr_bit == 1:
read_prefix += 1
curr_bit = instream.read("uint:1")
assert curr_bit == 0
r = instream.read("uint:%d" % m_bits)
curr_diff = read_prefix * M + r
curr_value_int = prev.uint + curr_diff
curr_value = Bits(uint=curr_value_int, length=hash_len_bits)
ret_list.append(curr_value.tobytes())
prev = curr_value
return ret_list
示例3: multiple_exon_alnmt
# 需要导入模块: from bitstring import Bits [as 别名]
# 或者: from bitstring.Bits import tobytes [as 别名]
#.........这里部分代码省略.........
for map in maps:
switch_to_db (cursor, ensembl_db_name[map.species_2])
if map.similarity < min_similarity: continue
exon = map2exon(cursor, ensembl_db_name, map)
pepseq = get_exon_pepseq (cursor,exon)
if (not pepseq):
continue
if map.source == 'sw_sharp':
exon_known_code = 2
hassw = True
elif map.source == 'usearch':
exon_known_code = 3
hassw = True
else:
exon_known_code = map.exon_known_2
seqname = "{0}:{1}:{2}".format(map.species_2, map.exon_id_2, exon_known_code)
headers.append(seqname)
sequences[seqname] = pepseq
# for split exon concatenation (see below)
if not map.species_2 in exons_per_species.keys():
exons_per_species[map.species_2] = []
exons_per_species[map.species_2].append ([ map.exon_id_2, exon_known_code]);
if (len(headers) <=1 ):
if verbose: print "single species in the alignment"
no_orthologues += 1
continue
# concatenate exons from the same gene - the alignment program might go wrong otherwise
concatenated = concatenate_exons (cursor, ensembl_db_name, sequences, exons_per_species)
fasta_fnm = "{0}/{1}.fa".format( cfg.dir_path['scratch'], human_exon.exon_id)
output_fasta (fasta_fnm, sequences.keys(), sequences)
# align
afa_fnm = "{0}/{1}.afa".format( cfg.dir_path['scratch'], human_exon.exon_id)
mafftcmd = acg.generate_mafft_command (fasta_fnm, afa_fnm)
ret = commands.getoutput(mafftcmd)
if (verbose): print 'almt to', afa_fnm
# read in the alignment
inf = erropen(afa_fnm, "r")
aligned_seqs = {}
for record in SeqIO.parse(inf, "fasta"):
aligned_seqs[record.id] = str(record.seq)
inf.close()
# split back the concatenated exons
if concatenated: split_concatenated_exons (aligned_seqs, concatenated)
human_seq_seen = False
for seq_name, sequence in aligned_seqs.iteritems():
# if this is one of the concatenated seqs, split them back to two
### store the alignment as bitstring
# Generate the bitmap
bs = Bits(bin='0b' + re.sub("[^0]","1", sequence.replace('-','0')))
# The returned value of tobytes() will be padded at the end
# with between zero and seven 0 bits to make it byte aligned.
# I will end up with something that looks like extra alignment gaps, that I'll have to return
msa_bitmap = bs.tobytes()
# Retrieve information on the cognate
cognate_species, cognate_exon_id, cognate_exon_known = seq_name.split(':')
if cognate_exon_known == '2':
source = 'sw_sharp'
elif cognate_exon_known == '3':
source = 'usearch'
else:
source = 'ensembl'
if (cognate_species == 'homo_sapiens'):
human_seq_seen = True
cognate_genome_db_id = species2genome_db_id(cursor, cognate_species) # moves the cursor
switch_to_db(cursor, ensembl_db_name['homo_sapiens']) # so move it back to homo sapiens
# Write the bitmap to the database
#if (cognate_species == 'homo_sapiens'):
if verbose: # and (source=='sw_sharp' or source=='usearch'):
print "storing"
print human_exon.exon_id, human_exon.is_known
print cognate_species, cognate_genome_db_id, cognate_exon_id, cognate_exon_known, source
print sequence
if not msa_bitmap:
print "no msa_bitmap"
continue
store_or_update(cursor, "exon_map", {"cognate_genome_db_id":cognate_genome_db_id,
"cognate_exon_id":cognate_exon_id ,"cognate_exon_known" :cognate_exon_known,
"source": source, "exon_id" :human_exon.exon_id, "exon_known":human_exon.is_known},
{"msa_bitstring":MySQLdb.escape_string(msa_bitmap)})
ok += 1
commands.getoutput("rm "+afa_fnm+" "+fasta_fnm)
if verbose: print " time: %8.3f\n" % (time()-start);
print "tot: ", tot, "ok: ", ok
print "no maps ", no_pepseq
print "no pepseq ", no_pepseq
print "no orthologues ", no_orthologues
print
示例4: multiple_exon_alnmt
# 需要导入模块: from bitstring import Bits [as 别名]
# 或者: from bitstring.Bits import tobytes [as 别名]
#.........这里部分代码省略.........
# get the paralogues - only the representative for the family will have this
paralogues = get_paras (cursor, gene_id)
if not paralogues:
if verbose: print "\t not a template or no paralogues"
continue
if verbose: print "paralogues: ", paralogues
# get _all_ exons
template_exons = gene2exon_list(cursor, gene_id)
if (not template_exons):
if verbose: print 'no exons for ', gene_id
continue
# find all template exons we are tracking in the database
for template_exon in template_exons:
if verbose: print template_exon.exon_id
maps = get_maps(cursor, ensembl_db_name, template_exon.exon_id,
template_exon.is_known, species=species, table='para_exon_map')
if not maps:
no_maps += 1
continue
# output to fasta:
seqname = "{0}:{1}:{2}".format('template', template_exon.exon_id, template_exon.is_known)
exon_seqs_info = get_exon_seqs (cursor, template_exon.exon_id, template_exon.is_known)
if not exon_seqs_info: continue
[exon_seq_id, pepseq, pepseq_transl_start, pepseq_transl_end,
left_flank, right_flank, dna_seq] = exon_seqs_info
if (not pepseq):
if ( template_exon.is_coding and template_exon.covering_exon <0): # this should be a master exon
print "no pep seq for", template_exon.exon_id, "coding ", template_exon.is_coding,
print "canonical: ", template_exon.is_canonical
print "length of dna ", len(dna_seq)
no_pepseq += 1
continue
tot += 1
sequences = {seqname:pepseq}
headers = [seqname]
for map in maps:
exon = map2exon(cursor, ensembl_db_name, map, paralogue=True)
pepseq = get_exon_pepseq (cursor,exon)
if (not pepseq):
continue
seqname = "{0}:{1}:{2}".format('para', map.exon_id_2, map.exon_known_2)
headers.append(seqname)
sequences[seqname] = pepseq
fasta_fnm = "{0}/{1}_{2}_{3}.fa".format( cfg.dir_path['scratch'], species, template_exon.exon_id, template_exon.is_known)
output_fasta (fasta_fnm, headers, sequences)
if (len(headers) <=1 ):
print "single species in the alignment (?)"
no_paralogues += 1
continue
# align
afa_fnm = "{0}/{1}_{2}_{3}.afa".format( cfg.dir_path['scratch'], species, template_exon.exon_id, template_exon.is_known)
mafftcmd = acg.generate_mafft_command (fasta_fnm, afa_fnm)
ret = commands.getoutput(mafftcmd)
# read in the alignment
inf = erropen(afa_fnm, "r")
if not inf:
print gene_id
continue
template_seq_seen = False
for record in SeqIO.parse(inf, "fasta"):
### store the alignment as bitstring
# Generate the bitmap
bs = Bits(bin='0b' + re.sub("[^0]","1", str(record.seq).replace('-','0')))
msa_bitmap = bs.tobytes()
# Retrieve information on the cognate
label, cognate_exon_id, cognate_exon_known = record.id.split(':')
if (label == 'template'):
template_seq_seen = True
# Write the bitmap to the database
#print "updating: ", template_exon.exon_id
store_or_update(cursor, "para_exon_map", {"cognate_exon_id" :cognate_exon_id,
"cognate_exon_known" :cognate_exon_known,
"exon_id" :template_exon.exon_id,
"exon_known" :template_exon.is_known},
{"msa_bitstring":MySQLdb.escape_string(msa_bitmap)})
inf.close()
ok += 1
commands.getoutput("rm "+afa_fnm+" "+fasta_fnm)
if verbose: print " time: %8.3f\n" % (time()-start);
outstr = species + " done \n"
outstr += "tot: %d ok: %d \n" % (tot, ok)
outstr += "no maps %d \n" % no_pepseq
outstr += "no pepseq %d \n" % no_pepseq
outstr += "no paralogues %d \n" % no_paralogues
outstr += "\n"
print outstr