当前位置: 首页>>代码示例>>Python>>正文


Python BedTool.slop方法代码示例

本文整理汇总了Python中pybedtools.BedTool.slop方法的典型用法代码示例。如果您正苦于以下问题:Python BedTool.slop方法的具体用法?Python BedTool.slop怎么用?Python BedTool.slop使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pybedtools.BedTool的用法示例。


在下文中一共展示了BedTool.slop方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: gc_content

# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import slop [as 别名]
def gc_content(vf, fa, flank=50):
    print "inside gc_content"
    v = BedTool(vf)
    flanks = v.slop(g=pybedtools.chromsizes('hg19'), b=flank)
    nc = flanks.nucleotide_content(fi=fa)
    results = dict([ (r.name, float(r[5])) for r in nc ])
    print "exiting gc_content"
    return Series(results, name="GC") 
开发者ID:amanchahar,项目名称:ComplexVariants,代码行数:10,代码来源:gwava_annotate.py

示例2: seq_context

# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import slop [as 别名]
def seq_context(vf, fa):
    print "inside seq_context"
    v = BedTool(vf)
    flanks = v.slop(g=pybedtools.chromsizes('hg19'), b=1)
    nc = flanks.nucleotide_content(fi=fa, seq=True, pattern="CG", C=True)
    cpg_context = Series(dict([ (r.name, float(r[14])) for r in nc ]))
    nucleotide = Series(dict([ (r.name, r[13][1].upper()) for r in nc ]))
    results = {}
    for b in 'ACGT':
        results['seq_'+b] = (nucleotide == b).apply(float)

    results['in_cpg'] = cpg_context
    print "exiting seq_context"
    return DataFrame(results) 
开发者ID:amanchahar,项目名称:ComplexVariants,代码行数:16,代码来源:gwava_annotate.py

示例3: snp_stats

# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import slop [as 别名]
def snp_stats(vf, af, stat='avg_het', flank=500):
    v = BedTool(vf)
    feats = BedTool(af)
    flanks = v.slop(g=pybedtools.chromsizes('hg19'), b=flank)
    intersection = feats.intersect(flanks, wb=True)
    results = {}
    if len(intersection) > 0:
        sort_cmd = 'sort -k6,6 -k7,7n -k8,8n -k9,9 %s -o %s' % (intersection.fn, intersection.fn)
        call(sort_cmd, shell=True)
        annots = intersection.groupby(g=[6,7,8,9], c=5, ops='collapse')

        for entry in annots:
            rates = entry[4].split(',')
            tot = reduce(lambda x, y: x + float(y), rates, 0.)
            rate = tot / (flank * 2)
            results[entry.name] = rate
        
    return Series(results, name=stat)
开发者ID:tianxiahuihui,项目名称:bioinformatics,代码行数:20,代码来源:anno_info2.py

示例4: slop_bed

# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import slop [as 别名]
    def slop_bed(self, bed_in, flank_length=50):
        """Add flanking sequences to bed file

        Parameters
        ----------
        bed_in: string
            Path to input bed file
        flank_length: int
            the bed region is expanded in both direction by flank_length number of bases

        Returns
        -------
        slopped_bed: dataframe
            Slopped bed data object
        """
        inbed = BedTool(bed_in)
        slopped_bed = inbed.slop(g=self.genome_table, b=flank_length)
        filename, extension = os.path.splitext(bed_in)
        filename += '_slop_{}'.format(flank_length) + extension
        slopped_bed.saveas(filename)
        return filename
开发者ID:saketkc,项目名称:moca,代码行数:23,代码来源:model.py

示例5: average_gerp

# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import slop [as 别名]
def average_gerp(vf, af, flank=50):
    v = BedTool(vf)
    flanks = v.slop(g=pybedtools.chromsizes('hg19'), b=flank)
    return gerp(flanks.fn, af, name="avg_gerp")
开发者ID:tianxiahuihui,项目名称:bioinformatics,代码行数:6,代码来源:anno_info2.py

示例6: __init__

# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import slop [as 别名]

#.........这里部分代码省略.........
                if BedTool(sort_target_bed_fpath).field_count() == 3 or reannotate:
                    debug('Annotating target BED file and collecting overlapping genome features')
                    overlap_with_features(sort_target_bed_fpath, ann_target_bed_fpath, work_dir=work_dir,
                         genome=genome, extended=True, reannotate=reannotate, only_canonical=True)
                else:
                    debug('Overlapping with genomic features:')
                    overlap_with_features(sort_target_bed_fpath, ann_target_bed_fpath, work_dir=work_dir,
                         genome=genome, extended=True, only_canonical=True)
                debug('Saved to ' + ann_target_bed_fpath)
                verify_file(ann_target_bed_fpath, is_critical=True)
        else:
            ann_target_bed_fpath = sort_target_bed_fpath

        final_clean_target_bed_fpath = intermediate_fname(work_dir, ann_target_bed_fpath, 'clean')
        if not can_reuse(final_clean_target_bed_fpath, ann_target_bed_fpath):
            bed = BedTool(ann_target_bed_fpath).remove_invalid()
            with file_transaction(work_dir, final_clean_target_bed_fpath) as tx:
                bed.saveas(tx)
                pass
            verify_file(final_clean_target_bed_fpath, is_critical=True)

        self.bed_fpath = final_clean_target_bed_fpath
        self.bed = BedTool(self.bed_fpath)
        
        self.capture_bed_fpath = add_suffix(join(output_dir, basename(bed_fpath)), 'clean_sorted_ann')
        if not can_reuse(self.capture_bed_fpath, self.bed_fpath):
            with file_transaction(work_dir, self.capture_bed_fpath) as tx:
                self.get_capture_bed().saveas(tx)

        gene_key_set, gene_key_list = get_genes_from_bed(bed_fpath)
        self.gene_keys_set = gene_key_set
        self.gene_keys_list = gene_key_list
        self.regions_num = self.get_capture_bed().count()

        self._make_qualimap_bed(work_dir)
        if padding:
            self._make_padded_bed(work_dir, fai_fpath, padding)

    def _make_padded_bed(self, work_dir, fai_fpath, padding):
        if self.is_wgs:
            return None

        self.padded_bed_fpath = intermediate_fname(work_dir, self.capture_bed_fpath, 'padded')
        if can_reuse(self.padded_bed_fpath, self.capture_bed_fpath):
            return BedTool(self.padded_bed_fpath)

        padded_bed = self.bed.slop(b=padding, g=fai_fpath).sort().merge()
        with file_transaction(work_dir, self.padded_bed_fpath) as tx:
            padded_bed.saveas(tx)
        verify_file(self.padded_bed_fpath, is_critical=True)
        return BedTool(self.padded_bed_fpath)

    def _make_qualimap_bed(self, work_dir):
        if self.is_wgs:
            return None

        self.qualimap_bed_fpath = intermediate_fname(work_dir, self.capture_bed_fpath, 'qualimap_ready')
        if can_reuse(self.qualimap_bed_fpath, self.capture_bed_fpath):
            return self.qualimap_bed_fpath

        debug('Merging and saving BED into required bed6 format for Qualimap')
        bed = self.bed.sort().merge()
        with file_transaction(work_dir, self.qualimap_bed_fpath) as tx:
            with open(tx, 'w') as out:
                for i, region in enumerate(x for x in bed):
                    region = [x for x in list(region) if x]
                    fillers = [str(i), "1.0", "+"]
                    full = region + fillers[:6 - len(region)]
                    out.write("\t".join(full) + "\n")
        verify_file(self.qualimap_bed_fpath, is_critical=True)
        return self.qualimap_bed_fpath

    def _make_wgs_regions_file(self, work_dir, genome=None):
        self.wgs_bed_fpath = join(work_dir, 'targqc_features_to_report.bed')
        if can_reuse(self.wgs_bed_fpath, ebl.ensembl_gtf_fpath(genome)):
            return self.wgs_bed_fpath

        chr_order = reference_data.get_chrom_order(genome or cfg.genome)

        r_by_tx_by_gene = OrderedDefaultDict(lambda: defaultdict(list))
        all_features = ebl.get_all_features(genome or cfg.genome, high_confidence=True)

        debug('Select best transcript to report')
        for r in all_features:
            if r[ebl.BedCols.FEATURE] != 'gene':
                gene = r[ebl.BedCols.GENE]
                tx = r[ebl.BedCols.ENSEMBL_ID]
                r_by_tx_by_gene[gene][tx].append(r.fields)

        with file_transaction(work_dir, self.wgs_bed_fpath) as tx:
            with open(tx, 'w') as out:
                for gname, r_by_tx in r_by_tx_by_gene.items():
                    all_tx = (x for xx in r_by_tx.values() for x in xx if x[ebl.BedCols.FEATURE] == 'transcript')
                    tx_sorted_list = [x[ebl.BedCols.ENSEMBL_ID] for x in sorted(all_tx, key=tx_priority_sort_key)]
                    if not tx_sorted_list:
                        continue
                    tx_id = tx_sorted_list[0]
                    for r in sorted(r_by_tx[tx_id], key=get_sort_key(chr_order)):
                        out.write('\t'.join(str(f) for f in r) + '\n')
        return self.wgs_bed_fpath
开发者ID:vladsaveliev,项目名称:TargQC,代码行数:104,代码来源:Target.py

示例7: average_gerp

# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import slop [as 别名]
def average_gerp(vf, af, flank=50):
    print "inside average_gerp"
    v = BedTool(vf)
    flanks = v.slop(g=pybedtools.chromsizes('hg19'), b=flank)
    print "exiting average gerp"
    return gerp(flanks.fn, af, name="avg_gerp")
开发者ID:amanchahar,项目名称:ComplexVariants,代码行数:8,代码来源:gwava_annotate.py

示例8: Bedfile

# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import slop [as 别名]
class Bedfile(object):
    """Class to crate a bed file object
    Parameters
    ----------
    filepath: string
        Absolute path to bedfile

    genome_table: string
        Absolute path to geonme chromosome size file
    """
    def __init__(self, filepath, genome_table):
        self.filepath = filepath
        self.bed_format = None
        if not os.path.isfile(filepath):
            raise MocaException('Bed file {} not found'.format(self.filepath))
        self._read()
        self.bed_format = self.guess_bedformat()
        self.sort_bed()
        self.bed = BedTool(filepath)
        self.genome_table = genome_table
        assert self.bed_Format is not None

    def _read(self):
        try:
            self.bed_df = pandas.read_table(self.filepath,
                                        header=None)
        except Exception as e:
            raise MocaException('Error reading bed file {}'.format(self.filepath),
                                'Traceback: {}'.format(e))

    def guess_bedformat(self):
        """Method to guess bed format
        Returns
        -------
        bed_format: string
            BED format

        Example:
            >>> bed_df = Bedfile('file.bed')
            >>> print(bed_df.guess_bed_format())

        """
        self.bed_columns = self.bed_df.columns
        count = len(self.bed_columns)
        try:
            bed_format = __BED_TYPES__[count]
        except KeyError:
            raise MocaException('Bed file had {} columns. Supported column lengths are {}')
        return bed_format

    def slop_bed(self, flank_length=5):
        """Add flanking sequences to bed file
        Parameters
        ----------
        flank_length: int
            the bed region is expanded in both direction by flank_length number of bases
        Returns
        -------
        slop_bed: dataframe
            Slopped bed data object
        """
        self.bed.slop(g=self.genome_table,
                      b=flank_length
                      )

    def convert_to_scorefile(self):

        """
        filename, file_extension = os.path.splitext(self.filepath)
        filename += '.sorted'
        self.bed_df.to_csv(filename+file_extension,
                           sep='\t',
                           columns=['chrom', 'peak_positions', 'score'],
                           index=False,
                           header=False)
        """
    if filetype=='narrowPeak':
        filter_df1 = df[df.peak.astype(int)==-1]
        filter_df2 = df[df.peak.astype(int)!=-1]
        filter_df1['peak_positions'] = (filter_df1['chromStart'].astype(int)+filter_df1['chromEnd'].astype(int))
        filter_df1['peak_positions'] = [int(x/2) for x in filter_df1['peak_positions'].astype(int)]
        filter_df2['peak_positions'] = filter_df2['chromStart'].astype(int)+filter_df2['peak'].astype(int)
        df = pandas.concat([filter_df1, filter_df2])
    else:
        df['peak_positions'] = (df['chromStart']+df['chromEnd'])
        df['peak_positions'] = [int(x/2) for x in df['peak_positions'].astype(int)]



    def extract_fasta(self, fasta_file):
        """Extract fasta of bed regions
        Parameters
        ----------
        fasta_file: string
            Absolute path to location of fasta file
        Returns
        -------
        fasta: string
            Fasta sequence combined
        """
#.........这里部分代码省略.........
开发者ID:saketkc,项目名称:moca_web,代码行数:103,代码来源:model.py


注:本文中的pybedtools.BedTool.slop方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。