本文整理匯總了Python中pysam.FastxFile方法的典型用法代碼示例。如果您正苦於以下問題:Python pysam.FastxFile方法的具體用法?Python pysam.FastxFile怎麽用?Python pysam.FastxFile使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類pysam
的用法示例。
在下文中一共展示了pysam.FastxFile方法的4個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: create_reference
# 需要導入模塊: import pysam [as 別名]
# 或者: from pysam import FastxFile [as 別名]
def create_reference(mapping, reference_file, fasta_path):
ref_count = 0
with open(reference_file, "w") as ref:
for g in glob.glob(fasta_path):
with pysam.FastxFile(g) as fh:
id = os.path.splitext(os.path.basename(g))[0]
for entry in fh:
if id in mapping:
ref_count += 1
name = ','.join([str(x) for x in mapping[id]])
print(">" + str(mapping[id][0]) + " " + entry.name,
entry.comment, file=ref)
print(entry.sequence, file=ref)
if ref_count != len(mapping.keys()):
raise RuntimeError(
"Couldn't find all references, please check mappings!")
return ref_count
示例2: split_fastx
# 需要導入模塊: import pysam [as 別名]
# 或者: from pysam import FastxFile [as 別名]
def split_fastx(fname, output, chunksize=10000):
"""Split records in a fasta/q into fixed lengths.
:param fname: input filename.
:param output: output filename.
:param chunksize: (maximum) length of output records.
"""
with open(output, 'w') as fout:
with pysam.FastxFile(fname, persist=False) as fin:
for rec in fin:
name = rec.name
seq = rec.sequence
qual = rec.quality
if rec.comment is None:
comment = 'chunk_length={}'.format(chunksize)
else:
comment = '{} chunk_length={}'.format(rec.comment, chunksize)
if qual is None:
for i, s in enumerate(chunks(seq, chunksize)):
chunk_name = '{}_chunk{}'.format(name, i)
fout.write(">{} {}\n{}\n".format(
chunk_name, comment, ''.join(s)))
else:
for i, (s, q) in enumerate(zip(chunks(seq, chunksize), chunks(qual, chunksize))):
chunk_name = '{}_chunk{}'.format(name, i)
fout.write('@{} {}\n{}\n+\n{}\n'.format(
chunk_name, comment, ''.join(s), ''.join(q)))
示例3: get_seq_lens
# 需要導入模塊: import pysam [as 別名]
# 或者: from pysam import FastxFile [as 別名]
def get_seq_lens(fastx):
"""Get sequence lengths from fastx file"""
return [len(r.sequence) for r in pysam.FastxFile(fastx)]
示例4: multi_from_fastx
# 需要導入模塊: import pysam [as 別名]
# 或者: from pysam import FastxFile [as 別名]
def multi_from_fastx(cls, fastx,
take_all=False, read_id=None, depth_filter=1,
length_filter=0):
"""Create multiple `Read` s from a fasta/q file.
It is assumed that subreads are grouped by read and named with
<read_id>_<subread_id>.
:param fastx: input file path.
:param take_all: skip check on subread_ids, take all subreads in one
`Read`.
:param read_id: name of `Read`. Only used for `take_all == True`. If
not given the basename of the input file is used.
:param depth_filter: require reads to have at least this many subreads.
:param length_filter: require reads to have a median subread length
above this value.
"""
depth_filter = max(1, depth_filter)
if take_all and read_id is None:
read_id = os.path.splitext(os.path.basename(fastx))[0]
else:
read_id = None
subreads = []
with pysam.FastxFile(fastx) as fh:
for entry in fh:
if not take_all:
cur_read_id = entry.name.split("_")[0]
if cur_read_id != read_id:
if len(subreads) >= depth_filter:
med_length = np.median(
[len(x.seq) for x in subreads])
if med_length > length_filter:
yield cls(read_id, subreads)
read_id = cur_read_id
subreads = []
if len(entry.sequence) > 0:
subreads.append(Subread(entry.name, entry.sequence))
if len(subreads) >= depth_filter:
med_length = np.median([len(x.seq) for x in subreads])
if med_length > length_filter:
yield cls(read_id, subreads)