本文整理汇总了Python中pysam.index方法的典型用法代码示例。如果您正苦于以下问题:Python pysam.index方法的具体用法?Python pysam.index怎么用?Python pysam.index使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pysam
的用法示例。
在下文中一共展示了pysam.index方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: parseArgs
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import index [as 别名]
def parseArgs(self):
description = "{} to {} conversion tool".format(
self.inputFileFormat, self.outputFileFormat)
parser = argparse.ArgumentParser(
description=description)
inputHelpText = "the name of the {} file to read".format(
self.inputFileFormat)
parser.add_argument(
"inputFile", help=inputHelpText)
outputHelpText = "the name of the {} file to write".format(
self.outputFileFormat)
defaultOutputFilePath = "out.{}".format(
self.outputFileFormat.lower())
parser.add_argument(
"--outputFile", "-o", default=defaultOutputFilePath,
help=outputHelpText)
parser.add_argument(
"--numLines", "-n", default=10,
help="the number of lines to write")
parser.add_argument(
"--skipIndexing", default=False, action='store_true',
help="don't create an index file")
args = parser.parse_args()
self.args = args
示例2: createBamHeader
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import index [as 别名]
def createBamHeader(self, baseHeader):
"""
Creates a new bam header based on the specified header from the
parent BAM file.
"""
header = dict(baseHeader)
newSequences = []
for index, referenceInfo in enumerate(header['SQ']):
if index < self.numChromosomes:
referenceName = referenceInfo['SN']
# The sequence dictionary in the BAM file has to match up
# with the sequence ids in the data, so we must be sure
# that these still match up.
assert referenceName == self.chromosomes[index]
newReferenceInfo = {
'AS': self.referenceSetName,
'SN': referenceName,
'LN': 0, # FIXME
'UR': 'http://example.com',
'M5': 'dbb6e8ece0b5de29da56601613007c2a', # FIXME
'SP': 'Human'
}
newSequences.append(newReferenceInfo)
header['SQ'] = newSequences
return header
示例3: get_aligned_reads
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import index [as 别名]
def get_aligned_reads(samfile, collapsed=None, readcounts=None):
"""Get all aligned reads from a sam file into a pandas dataframe"""
sam = HTSeq.SAM_Reader(str(samfile))
f=[]
for a in sam:
if a.aligned == True:
seq = a.read.seq.decode()
f.append((seq,a.read.name,a.iv.chrom,a.iv.start,a.iv.end,a.iv.strand))
#else:
# f.append((seq,a.read.name,'_unmapped'))
counts = pd.DataFrame(f, columns=['seq','read','name','start','end','strand'])
counts['length'] = counts.seq.str.len()
counts = counts.drop(['read'],1)
if collapsed is not None:
readcounts = read_collapsed_file(collapsed)
if readcounts is not None:
counts = counts.merge(readcounts, on='seq')
counts['align_id'] = counts.index
return counts
示例4: sam_to_bam
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import index [as 别名]
def sam_to_bam(self, sam_path, bam_path_prefix):
if self._sam_file_is_empty(sam_path) is True:
# pysam will generate an error if an emtpy SAM file will
# be converted. Due to this an empty bam file with the
# same header information will be generated from scratch
self._generate_empty_bam_file(sam_path, bam_path_prefix)
# Remove SAM file
os.remove(sam_path)
return
temp_unsorted_bam_path = self._temp_unsorted_bam_path(
bam_path_prefix)
# Generate unsorted BAM file
pysam.samtools.view(
"-b", "-o{}".format(temp_unsorted_bam_path), sam_path,
catch_stdout=False)
# Generate sorted BAM file
pysam.sort(temp_unsorted_bam_path, "-o", bam_path_prefix + ".bam")
# Generate index for BAM file
pysam.index("%s.bam" % bam_path_prefix)
# Remove unsorted BAM file
os.remove(temp_unsorted_bam_path)
# Remove SAM file
os.remove(sam_path)
示例5: rlebam
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import index [as 别名]
def rlebam(args):
"""Entry point for merging run length information for fast5s to bam."""
logger = medaka.common.get_named_logger('BAMDecor')
read_index = medaka.common.read_key_value_tsv(args.read_index)
logger.info("Found {} read in index\n".format(len(read_index)))
def _ingress():
for line in sys.stdin:
if line[0] == '@':
yield line.rstrip(), None, None, None
else:
read_id, flag, _ = line.split('\t', 2)
is_rev = bool(int(flag) & 16)
fname = read_index[read_id]
yield line.rstrip(), read_id, is_rev, fname
with concurrent.futures.ProcessPoolExecutor(
max_workers=args.workers) as executor:
for results in executor.map(
_rle_bam_hdf_worker, _ingress(), chunksize=10):
print(results)
示例6: write_bam
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import index [as 别名]
def write_bam(fname, alignments, header, bam=True):
"""Write a `.bam` file for a set of alignments.
:param fname: output filename.
:param alignments: a list of `Alignment` tuples.
:param header: bam header
:param bam: write bam, else sam
"""
mode = 'wb' if bam else 'w'
with pysam.AlignmentFile(fname, mode, header=header) as fh:
for ref_id, subreads in enumerate(alignments):
for aln in sorted(subreads, key=lambda x: x.rstart):
a = pysam.AlignedSegment()
a.reference_id = ref_id
a.query_name = aln.qname
a.query_sequence = aln.seq
a.reference_start = aln.rstart
a.cigarstring = aln.cigar
a.flag = aln.flag
a.mapping_quality = 60
fh.write(a)
if mode == 'wb':
pysam.index(fname)
示例7: exportIndexes
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import index [as 别名]
def exportIndexes(input_dir):
import unique
bam_dirs = unique.read_directory(input_dir)
print 'Building BAM index files',
for file in bam_dirs:
if string.lower(file[-4:]) == '.bam':
bam_dir = input_dir+'/'+file
bamf = pysam.AlignmentFile(bam_dir, "rb" )
### Is there an indexed .bai for the BAM? Check.
try:
for entry in bamf.fetch():
codes = map(lambda x: x[0],entry.cigar)
break
except Exception:
### Make BAM Indexv lciv9df8scivx
print '.',
bam_dir = str(bam_dir)
#On Windows, this indexing step will fail if the __init__ pysam file line 51 is not set to - catch_stdout = False
pysam.index(bam_dir)
示例8: exportIndexes
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import index [as 别名]
def exportIndexes(input_dir):
import unique
bam_dirs = unique.read_directory(input_dir)
print 'Building BAM index files',
for file in bam_dirs:
if string.lower(file[-4:]) == '.bam':
bam_dir = input_dir+'/'+file
bamf = pysam.Samfile(bam_dir, "rb" )
### Is there an indexed .bai for the BAM? Check.
try:
for entry in bamf.fetch():
codes = map(lambda x: x[0],entry.cigar)
break
except Exception:
### Make BAM Indexv lciv9df8scivx
print '.',
bam_dir = str(bam_dir)
#On Windows, this indexing step will fail if the __init__ pysam file line 51 is not set to - catch_stdout = False
pysam.index(bam_dir)
bamf = pysam.Samfile(bam_dir, "rb" )
示例9: CigarIndex
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import index [as 别名]
def CigarIndex(cigar, pos) :
position = 0
k = 0 # cigar index
addpos = pos # extra positions
for cigartype, cigarlength in cigar :
position += cigarlength
if position < pos :
k += 1
addpos -= cigarlength
else : return k, addpos
# Converts string of cigar symbol characters into the format used by Pysam
示例10: getMacsPeakShiftEstimate
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import index [as 别名]
def getMacsPeakShiftEstimate(infile):
'''
Parses the peak shift estimate file from the estimateInsertSize
function and returns the fragment size, which is used in the macs2
postprocessing steps as the "offset" for bed2table
Parameters
----------
infile: str
path to input file
'''
with IOTools.openFile(infile, "r") as inf:
header = inf.readline().strip().split("\t")
values = inf.readline().strip().split("\t")
fragment_size_mean_ix = header.index("fragmentsize_mean")
fragment_size = int(float(values[fragment_size_mean_ix]))
return fragment_size
示例11: makeBamLink
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import index [as 别名]
def makeBamLink(currentname, newname):
'''
Makes soft links to an existing bam file and its index - used instead
of copying files.
Generates and runs a command line statement.
Parameters:
currentname: str
path to original file
newname: str
path to link location
'''
cwd = os.getcwd()
os.system("""
ln -s %(cwd)s/%(currentname)s %(cwd)s/%(newname)s;
ln -s %(cwd)s/%(currentname)s.bai %(cwd)s/%(newname)s.bai;
""" % locals())
示例12: mergeBams
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import index [as 别名]
def mergeBams(infile_list, outfile):
infile_list = " ".join(infile_list)
job_memory = "5G"
statement = ("samtools merge - %(infile_list)s"
" | samtools sort - -o %(outfile)s"
" 2>%(outfile)s.log;"
" checkpoint;"
" samtools index %(outfile)s"
" 2>%(outfile)s.bai.log")
P.run()
##########################################################################
##########################################################################
##########################################################################
# Run Peak Calling For IDR
##########################################################################
示例13: linkBamToWorkingDirs
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import index [as 别名]
def linkBamToWorkingDirs(infiles, outfile):
'''
symlink the bam file and index to the working directories
for execution of the transcript building pipeline
'''
bamfile = P.snip(infiles[0], ".bai")
indexfile = infiles[0]
directories = [P.snip(logfile, ".log") for logfile in infiles[1]]
for directory in directories:
os.symlink(os.path.abspath(bamfile), os.path.join(directory, bamfile))
os.symlink(
os.path.abspath(indexfile), os.path.join(directory, indexfile))
updateFile(outfile)
###################################################################
###################################################################
###################################################################
示例14: saveReads
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import index [as 别名]
def saveReads(dataHub, nameExtra=None):
if dataHub.args.save_reads:
logging.info("* Saving relevant reads *")
for i, sample in enumerate(dataHub):
outbam_path = dataHub.args.save_reads
if not outbam_path.endswith(".bam"):
outbam_path += ".bam"
if len(dataHub.samples) > 1:
logging.debug("Using i = {}".format(i))
outbam_path = outbam_path.replace(".bam", ".{}.bam".format(i))
if nameExtra is not None:
outbam_path = outbam_path.replace(".bam", ".{}.bam".format(nameExtra))
logging.info(" Outpath: {}".format(outbam_path))
# print out just the reads we're interested for use later
bam_small = pysam.Samfile(outbam_path, "wb", template=sample.bam)
for read in sample.reads:
bam_small.write(read)
for read in sample.readStatistics.reads:
bam_small.write(read)
bam_small.close()
sorted_path = outbam_path.replace(".bam", ".sorted")
pysam.sort(outbam_path, sorted_path)
pysam.index(sorted_path+".bam")
示例15: convert
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import index [as 别名]
def convert(self):
# set flags
if self.inputFileFormat == AlignmentFileConstants.SAM:
inputFlags = "r"
elif self.inputFileFormat == AlignmentFileConstants.BAM:
inputFlags = "rb"
if self.outputFileFormat == AlignmentFileConstants.SAM:
outputFlags = "wh"
elif self.outputFileFormat == AlignmentFileConstants.BAM:
outputFlags = "wb"
# open files
inputFile = pysam.AlignmentFile(
self.args.inputFile, inputFlags)
outputFile = pysam.AlignmentFile(
self.args.outputFile, outputFlags, header=inputFile.header)
outputFilePath = outputFile.filename
log("Creating alignment file '{}'".format(outputFilePath))
# write new file
for _ in range(self.args.numLines):
alignedSegment = inputFile.next()
outputFile.write(alignedSegment)
# clean up
inputFile.close()
outputFile.close()
# create index file
if (not self.args.skipIndexing and
self.outputFileFormat == AlignmentFileConstants.BAM):
indexFilePath = "{}.{}".format(
outputFilePath, AlignmentFileConstants.BAI.lower())
log("Creating index file '{}'".format(indexFilePath))
pysam.index(outputFilePath)