当前位置: 首页>>代码示例>>Python>>正文


Python pysam.AlignmentFile方法代码示例

本文整理汇总了Python中pysam.AlignmentFile方法的典型用法代码示例。如果您正苦于以下问题:Python pysam.AlignmentFile方法的具体用法?Python pysam.AlignmentFile怎么用?Python pysam.AlignmentFile使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pysam的用法示例。


在下文中一共展示了pysam.AlignmentFile方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: learn_shift_pair

# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import AlignmentFile [as 别名]
def learn_shift_pair(self, bam_file):
    """ Learn the optimal fragment shift from paired end fragments. """

    t0 = time.time()
    print('Learning shift from paired-end sequences.', end='', flush=True)

    # read proper pair template lengths
    template_lengths = []
    for align in pysam.AlignmentFile(bam_file):
      if align.is_proper_pair and align.is_read1:
        template_lengths.append(abs(align.template_length))

    # compute mean
    self.shift_forward = int(np.round(np.mean(template_lengths) / 2))

    print(' Done in %ds.' % (time.time() - t0))
    print('Shift: %d' % self.shift_forward, flush=True) 
开发者ID:calico,项目名称:basenji,代码行数:19,代码来源:bam_cov.py

示例2: retrieve_loc_reads

# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import AlignmentFile [as 别名]
def retrieve_loc_reads(sv, bam, max_dep, threshold):
    bp_dtype = [('chrom', '<U20'), ('start', int), ('end', int), ('dir', '<U1')]

    sv_id, chr1, pos1, dir1, chr2, pos2, dir2, \
        sv_class, orig_id, orig_pos1, orig_pos2 = [h[0] for h in dtypes.sv_dtype]
    bp1 = np.array((sv[chr1], sv[pos1]-(threshold*2), \
                    sv[pos1]+(threshold*2), sv[dir1]), dtype=bp_dtype)
    bp2 = np.array((sv[chr2], sv[pos2]-(threshold*2), \
                    sv[pos2]+(threshold*2), sv[dir2]), dtype=bp_dtype)

    bamf = pysam.AlignmentFile(bam, "rb")
    loc1_reads, err_code1 = count.get_loc_reads(bp1, bamf, max_dep)
    loc2_reads, err_code2 = count.get_loc_reads(bp2, bamf, max_dep)
    bamf.close()

    sv_class = str(sv['classification'])
    if err_code1 == 1 or err_code2 == 1:
        sv['classification'] = 'HIDEP' if sv_class == '' else sv_class+';HIDEP'
    elif err_code1 == 2 or err_code2 == 2:
        sv['classification'] = 'READ_FETCH_FAILED' if sv_class == '' \
                                                    else sv_class+';READ_FETCH_FAILED'

    return sv, loc1_reads, loc2_reads, err_code1, err_code2 
开发者ID:mcmero,项目名称:SVclone,代码行数:25,代码来源:annotate.py

示例3: isPaired

# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import AlignmentFile [as 别名]
def isPaired(bamfile, alignments=1000):
    '''check if a *bamfile* contains paired end data

    The method reads at most the first *alignments* and returns
    True if any of the alignments are paired.
    '''

    samfile = pysam.AlignmentFile(bamfile)
    n = 0
    for read in samfile:
        if read.is_paired:
            break
        n += 1
        if n == alignments:
            break

    samfile.close()

    return n != alignments 
开发者ID:mcmero,项目名称:SVclone,代码行数:21,代码来源:bamtools.py

示例4: estimateInsertSizeDistribution

# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import AlignmentFile [as 别名]
def estimateInsertSizeDistribution(bamfile,
                                   alignments=50000):
    '''estimate insert size from first alignments in bam file.

    returns mean and stddev of insert sizes.
    '''

    assert isPaired(bamfile), \
        'can only estimate insert size from' \
        'paired bam files'

    samfile = pysam.AlignmentFile(bamfile)
    # only get positive to avoid double counting
    inserts = numpy.array(
        [read.tlen for read in samfile.head(alignments)
         if read.is_proper_pair and read.tlen > 0])
    insert_mean, insert_std = numpy.mean(inserts), numpy.std(inserts)
    print('Insert mean of %f, with standard deviation of %f inferred' % 
            (insert_mean, insert_std))
    if insert_mean > 10000 or insert_std > 1000 or \
        insert_mean < 1 or insert_std < 1:
            print('''WARNING: anomalous insert sizes detected. Please 
              double check or consider setting values manually.''')
    return insert_mean, insert_std 
开发者ID:mcmero,项目名称:SVclone,代码行数:26,代码来源:bamtools.py

示例5: estimateTagSize

# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import AlignmentFile [as 别名]
def estimateTagSize(bamfile,
                    alignments=10,
                    multiple="error"):
    '''estimate tag size from first alignments in file.'''
    samfile = pysam.AlignmentFile(bamfile)
    sizes = [read.rlen for read in samfile.head(alignments)]
    mi, ma = min(sizes), max(sizes)

    if mi == 0 and ma == 0:
        sizes = [read.inferred_length for read in samfile.head(alignments)]
        # remove 0 sizes (unaligned reads?)
        sizes = [x for x in sizes if x > 0]
        mi, ma = min(sizes), max(sizes)

    if mi != ma:
        if multiple == "error":
            raise ValueError('multiple tag sizes in %s: %s' % (bamfile, sizes))
        elif multiple == "mean":
            mi = int(sum(sizes) / len(sizes))

    return mi 
开发者ID:mcmero,项目名称:SVclone,代码行数:23,代码来源:bamtools.py

示例6: write_anomalous_read_to_bam

# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import AlignmentFile [as 别名]
def write_anomalous_read_to_bam(bam,split_reads,span_reads,anom_reads,out):
    print('Writing anom reads to file')
    split_reads = np.unique(split_reads['query_name'])
    span_reads = np.unique(span_reads['query_name'])
    anom_reads = np.unique(anom_reads['query_name'])

    # need to filter out any reads that were at any point marked as valid supporting reads
    anom_reads = np.array([x for x in anom_reads if x not in split_reads])
    anom_reads = np.array([x for x in anom_reads if x not in span_reads])

    bamf = pysam.AlignmentFile(bam, "rb")
    index = pysam.IndexedReads(bamf)
    index.build()
    anom_bam = pysam.AlignmentFile("%s_anom_reads.bam" % out, "wb", template=bamf)
    for read_name in anom_reads:
        for read in index.find(read_name):
            anom_bam.write(read)
    anom_bam.close() 
开发者ID:mcmero,项目名称:SVclone,代码行数:20,代码来源:count.py

示例7: pysam_open

# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import AlignmentFile [as 别名]
def pysam_open(alignment_file, in_format='BAM'):
    """Open SAM/BAM file using pysam.

    :param alignment_file: Input file.
    :param in_format: Format (SAM or BAM).
    :returns: pysam.AlignmentFile
    :rtype: pysam.AlignmentFile
    """
    if in_format == 'BAM':
        mode = "rb"
    elif in_format == 'SAM':
        mode = "r"
    else:
        raise Exception("Invalid format: {}".format(in_format))

    aln_iter = pysam.AlignmentFile(alignment_file, mode)
    return aln_iter 
开发者ID:nanoporetech,项目名称:wub,代码行数:19,代码来源:common.py

示例8: __init__

# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import AlignmentFile [as 别名]
def __init__(self, *args):
        """A class that is essentially pysam.AlignmentFile, with some added bonuses

        This class inherits pysam.AlignmentFile and adds a little flair. Init such an object the
        way you would an AlignmentFile, i.e. bam = bamops.BAMFileObject(path_to_bam)
        """

        self.input_bam_path = args[0]
        filesnpaths.is_file_exists(self.input_bam_path)

        try:
            pysam.AlignmentFile.__init__(self)
        except ValueError as e:
            raise ConfigError('Are you sure "%s" is a BAM file? Because samtools is not happy with it: """%s"""' % (self.input_bam_path, e))

        try:
            self.mapped
        except ValueError:
            raise ConfigError("It seems the BAM file is not indexed. See 'anvi-init-bam' script.") 
开发者ID:merenlab,项目名称:anvio,代码行数:21,代码来源:bamops.py

示例9: write_reads_to_file

# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import AlignmentFile [as 别名]
def write_reads_to_file(read_groups, intervals, header_template, tmp_dir = "talon_tmp/"):
    """ For each read group, iterate over the reads and write them to a file
        named for the interval they belong to. This step is necessary because
        Pysam objects cannot be pickled. """
 
    tmp_dir = tmp_dir + "interval_files/"
    os.system("mkdir -p %s " % (tmp_dir))
 
    outbam_names = []
    with pysam.AlignmentFile(header_template, "rb") as template:
        for group, interval in zip(read_groups, intervals):
            fname = tmp_dir + "_".join([str(x) for x in interval]) + ".bam"
            outbam_names.append(fname)
            with pysam.AlignmentFile(fname, "wb", template = template) as f:
                for read in group:
                    f.write(read)
        
    return outbam_names 
开发者ID:mortazavilab,项目名称:TALON,代码行数:20,代码来源:process_sams.py

示例10: test_read_labels

# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import AlignmentFile [as 别名]
def test_read_labels(self):
        """ Given two sam files and two corresponding dataset labels, ensure 
            that the merged BAM file preserves the RB tags assigned by 
            pysam.merge """

        sams = ["input_files/preprocess_sam/read1.sam", 
               "input_files/preprocess_sam/read2.sam" ]
        datasets = ["dataset1", "dataset2"]
        tmp_dir = "scratch/test_read_labels/test1/"

        merged_bam = procsam.preprocess_sam(sams, datasets, tmp_dir = tmp_dir)

        with pysam.AlignmentFile(merged_bam) as bam:
            for entry in bam:
                if entry.query_name == "read_1":
                    assert entry.get_tag("RG") == "dataset1"
                elif entry.query_name == "read_2":
                    assert entry.get_tag("RG") == "dataset1"
                elif entry.query_name == "read_3":
                    assert entry.get_tag("RG") == "dataset2"
                else:
                    pytest.fail("Unexpected read encountered") 
开发者ID:mortazavilab,项目名称:TALON,代码行数:24,代码来源:test_preprocess_sam_dataset_labeling.py

示例11: test_parse_custom_SAM_tags

# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import AlignmentFile [as 别名]
def test_parse_custom_SAM_tags():
    """ Test that custom SAM tags are handled as expected """
   
    sam_file = "input_files/test_parse_custom_SAM_tags/toy_reads.sam"
    with pysam.AlignmentFile(sam_file, "rb") as sam: 
        for sam_record in sam:
            fraction_As, custom_label, allelic_label, \
            start_support, end_support = talon.parse_custom_SAM_tags(sam_record)
            if sam_record.query_name == "read_1":
                assert round(fraction_As,1) == 0.2
                assert custom_label == "yes"
                assert allelic_label == "paternal"
                assert start_support == "yes"
                assert end_support == "no"
            elif sam_record.query_name == "read_4":
                assert fraction_As == custom_label == allelic_label == None
                assert start_support == end_support == None
            else:
                pytest.fail("Did not recognize read name") 
开发者ID:mortazavilab,项目名称:TALON,代码行数:21,代码来源:test_parse_custom_SAM_tags.py

示例12: get_bam_coverages

# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import AlignmentFile [as 别名]
def get_bam_coverages(options, sample, dataset):
    window_skip = int(1e6)
    window_size = 1000
    skip_at_ends = int(1e6)

    bam = pysam.AlignmentFile(dataset.bam)

    print "getting coverages"
    coverages = []
    count = 0
    for chrom, start, end in get_search_regions(options.reference, 
                                                window_skip, window_size, skip_at_ends):
        coverages.append(bam.count(chrom, start, end))

        if count % 1000 == 0:
            print count
        count += 1
    return coverages 
开发者ID:grocsvs,项目名称:grocsvs,代码行数:20,代码来源:sample_info.py

示例13: count_ref_reads

# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import AlignmentFile [as 别名]
def count_ref_reads(bampath, reference, chrom, start, end):
    ref_counts = numpy.zeros(end-start)
    non_ref_counts = numpy.zeros(end-start)

    bam = pysam.AlignmentFile(bampath)

    # stepper = "all" skips dupe, unmapped, secondary, and qcfail reads
    start = max(0, start)
    
    for col in bam.pileup(chrom, start, end, truncate=True, stepper="all"):
        refnuc = reference.fasta[chrom][col.reference_pos].upper()
        nuc_counts = collections.Counter()
        
        for read in col.pileups:
            if read.query_position is None:
                # nuc_counts["indel"] += 1
                pass
            else:
                nuc_counts[read.alignment.query_sequence[read.query_position]] += 1
    
        ref_counts[col.reference_pos-start] = nuc_counts[refnuc]
        non_ref_counts[col.reference_pos-start] = sum(nuc_counts.values()) - nuc_counts[refnuc]

    return ref_counts, non_ref_counts 
开发者ID:grocsvs,项目名称:grocsvs,代码行数:26,代码来源:genotyping.py

示例14: test_quick_consensus

# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import AlignmentFile [as 别名]
def test_quick_consensus():
    bam = pysam.AlignmentFile("data/quick_consensus_test.bam")

    consensus = MismatchCounts("4", 96549060, 96567077)

    consensus.tally_reads(bam) 
开发者ID:nspies,项目名称:genomeview,代码行数:8,代码来源:test_quick_consensus.py

示例15: test_compare_quick_consensus

# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import AlignmentFile [as 别名]
def test_compare_quick_consensus():
    from genomeview import quickconsensus
    from genomeview import _quickconsensus

    bam = pysam.AlignmentFile("data/quick_consensus_test.bam")

    cython_consensus = _quickconsensus.MismatchCounts("4", 96549060, 96549060+1000)
    python_consensus = quickconsensus.MismatchCounts("4", 96549060, 96549060+1000)

    python_consensus.tally_reads(bam)
    cython_consensus.tally_reads(bam)

    assert (cython_consensus.counts == python_consensus.counts).all()
    assert (cython_consensus.insertions == python_consensus.insertions).all() 
开发者ID:nspies,项目名称:genomeview,代码行数:16,代码来源:test_quick_consensus.py


注:本文中的pysam.AlignmentFile方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。