当前位置: 首页>>代码示例>>Python>>正文


Python BedTool.field_count方法代码示例

本文整理汇总了Python中pybedtools.BedTool.field_count方法的典型用法代码示例。如果您正苦于以下问题:Python BedTool.field_count方法的具体用法?Python BedTool.field_count怎么用?Python BedTool.field_count使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pybedtools.BedTool的用法示例。


在下文中一共展示了BedTool.field_count方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: xstream

# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import field_count [as 别名]
def xstream(a, b, distance, updown, out):
    """
    find all things in b that are within
    distance of a in the given direction
    (up or down-stream)
    """
    direction = dict(u="l", d="r")[updown[0]]
    kwargs = {'sw':True, direction: distance}

    if "l" in kwargs: kwargs["r"] = 0
    else: kwargs["l"] = 0
    a = BedTool(a).saveas()

    kwargs['stream'] = True
    c = a.window(b, **kwargs)
    afields = a.field_count()

    seen = collections.defaultdict(set)
    for feat in c:
        key = "\t".join(feat[:afields])
        # keep track of all the feature names that overlap this one
        seen[key].update((feat[afields + 3],))

    # the entries that did appear in the window
    for row in seen:
        out.write(row + "\t" + ",".join(sorted(seen[row])) + "\n")

    # write the entries that did not appear in the window'ed Bed
    for row in a:
        key = "\t".join(row[:afields])
        if key in seen: continue
        out.write(str(row) + "\t.\n")
    out.flush()
    assert len(BedTool(out.name)) == len(a)
开发者ID:GunioRobot,项目名称:bio-playground,代码行数:36,代码来源:superanno.py

示例2: add_closest

# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import field_count [as 别名]
def add_closest(aname, bname):
    a, b = BedTool(aname), BedTool(bname)

    afields = a.field_count()
    c = a.closest(b, d=True)
    get_name = gen_get_name(b, afields)

    dbed = open(BedTool._tmp(), "w")
    # keep the name and distance
    seen_by_line = collections.defaultdict(list)
    for feat in c:
        key = "\t".join(feat[:afields])
        seen_by_line[key].append([feat[-1], get_name(feat)])

    for key, dist_names in seen_by_line.iteritems():
        if len(dist_names) > 0:
            assert len(set([d[0] for d in dist_names])) == 1
        names = ",".join(sorted(set(d[1] for d in dist_names)))
        new_line = "\t".join([key] + [names] + [dist_names[0][0]])
        dbed.write(new_line + "\n")
    dbed.close()
    d = BedTool(dbed.name)
    assert len(d) == len(a)
    return d
开发者ID:lbeltrame,项目名称:pybedtools,代码行数:26,代码来源:annotate.py

示例3: annotate

# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import field_count [as 别名]
def annotate(input_bed_fpath, output_fpath, work_dir, genome=None,
             reannotate=True, high_confidence=False, only_canonical=False,
             coding_only=False, short=False, extended=False, is_debug=False, **kwargs):

    debug('Getting features from storage')
    features_bed = ba.get_all_features(genome)
    if features_bed is None:
        critical('Genome ' + genome + ' is not supported. Supported: ' + ', '.join(ba.SUPPORTED_GENOMES))

    if genome:
        fai_fpath = reference_data.get_fai(genome)
        chr_order = reference_data.get_chrom_order(genome)
    else:
        fai_fpath = None
        chr_order = bed_chrom_order(input_bed_fpath)

    input_bed_fpath = sort_bed(input_bed_fpath, work_dir=work_dir, chr_order=chr_order, genome=genome)

    ori_bed = BedTool(input_bed_fpath)
    ori_col_num = ori_bed.field_count()
    reannotate = reannotate or ori_col_num == 3
    pybedtools.set_tempdir(safe_mkdir(join(work_dir, 'bedtools')))
    ori_bed = BedTool(input_bed_fpath)

    if high_confidence:
        features_bed = features_bed.filter(ba.high_confidence_filter)
    if only_canonical:
        features_bed = features_bed.filter(ba.get_only_canonical_filter(genome))
    if coding_only:
        features_bed = features_bed.filter(ba.protein_coding_filter)
    # unique_tx_by_gene = find_best_tx_by_gene(features_bed)

    info('Extracting features from Ensembl GTF')
    features_bed = features_bed.filter(lambda x:
                                       x[ba.BedCols.FEATURE] in ['exon', 'CDS', 'stop_codon', 'transcript'])
        # x[ebl.BedCols.ENSEMBL_ID] == unique_tx_by_gene[x[ebl.BedCols.GENE]])

    info('Overlapping regions with Ensembl data')
    if is_debug:
        ori_bed = ori_bed.saveas(join(work_dir, 'bed.bed'))
        debug(f'Saved regions to {ori_bed.fn}')
        features_bed = features_bed.saveas(join(work_dir, 'features.bed'))
        debug(f'Saved features to {features_bed.fn}')
    annotated = _annotate(ori_bed, features_bed, chr_order, fai_fpath, work_dir, ori_col_num,
                          high_confidence=False, reannotate=reannotate, is_debug=is_debug, **kwargs)

    full_header = [ba.BedCols.names[i] for i in ba.BedCols.cols]
    add_ori_extra_fields = ori_col_num > 3
    if not reannotate and ori_col_num == 4:
        add_ori_extra_fields = False  # no need to report the original gene field if we are not re-annotating

    info('Saving annotated regions...')
    total = 0
    with file_transaction(work_dir, output_fpath) as tx:
        with open(tx, 'w') as out:
            header = full_header[:6]
            if short:
                header = full_header[:4]
            if extended:
                header = full_header[:-1]
            if add_ori_extra_fields:
                header.append(full_header[-1])

            if extended:
                out.write('## ' + ba.BedCols.names[ba.BedCols.TX_OVERLAP_PERCENTAGE] +
                          ': part of region overlapping with transcripts\n')
                out.write('## ' + ba.BedCols.names[ba.BedCols.EXON_OVERLAPS_PERCENTAGE] +
                          ': part of region overlapping with exons\n')
                out.write('## ' + ba.BedCols.names[ba.BedCols.CDS_OVERLAPS_PERCENTAGE] +
                          ': part of region overlapping with protein coding regions\n')
                out.write('\t'.join(header) + '\n')
            for full_fields in annotated:
                fields = full_fields[:6]
                if short:
                    fields = full_fields[:4]
                if extended:
                    fields = full_fields[:-1]
                if add_ori_extra_fields:
                    fields.append(full_fields[-1])

                out.write('\t'.join(map(_format_field, fields)) + '\n')
                total += 1
    
    debug('Saved ' + str(total) + ' total annotated regions')
    return output_fpath
开发者ID:vladsaveliev,项目名称:GeneAnnotation,代码行数:87,代码来源:bed_annotation.py

示例4: _make_target_bed

# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import field_count [as 别名]
    def _make_target_bed(self, bed_fpath, work_dir, output_dir, is_debug,
                         padding=None, fai_fpath=None, genome=None, reannotate=False):
        clean_target_bed_fpath = intermediate_fname(work_dir, bed_fpath, 'clean')
        if not can_reuse(clean_target_bed_fpath, bed_fpath):
            debug()
            debug('Cleaning target BED file...')
            bed = BedTool(bed_fpath)
            if bed.field_count() > 4:
                bed = bed.cut(range(4))
            bed = bed\
                .filter(lambda x: x.chrom and not any(x.chrom.startswith(e) for e in ['#', ' ', 'track', 'browser']))\
                .remove_invalid()
            with file_transaction(work_dir, clean_target_bed_fpath) as tx:
                bed.saveas(tx)
            debug('Saved to ' + clean_target_bed_fpath)
            verify_file(clean_target_bed_fpath, is_critical=True)

        sort_target_bed_fpath = intermediate_fname(work_dir, clean_target_bed_fpath, 'sorted')
        if not can_reuse(sort_target_bed_fpath, clean_target_bed_fpath):
            debug()
            debug('Sorting target BED file...')
            sort_target_bed_fpath = sort_bed(clean_target_bed_fpath, output_bed_fpath=sort_target_bed_fpath, fai_fpath=fai_fpath)
            debug('Saved to ' + sort_target_bed_fpath)
            verify_file(sort_target_bed_fpath, is_critical=True)

        if genome in ebl.SUPPORTED_GENOMES:
            ann_target_bed_fpath = intermediate_fname(work_dir, sort_target_bed_fpath, 'ann_plus_features')
            if not can_reuse(ann_target_bed_fpath, sort_target_bed_fpath):
                debug()
                if BedTool(sort_target_bed_fpath).field_count() == 3 or reannotate:
                    debug('Annotating target BED file and collecting overlapping genome features')
                    overlap_with_features(sort_target_bed_fpath, ann_target_bed_fpath, work_dir=work_dir,
                         genome=genome, extended=True, reannotate=reannotate, only_canonical=True)
                else:
                    debug('Overlapping with genomic features:')
                    overlap_with_features(sort_target_bed_fpath, ann_target_bed_fpath, work_dir=work_dir,
                         genome=genome, extended=True, only_canonical=True)
                debug('Saved to ' + ann_target_bed_fpath)
                verify_file(ann_target_bed_fpath, is_critical=True)
        else:
            ann_target_bed_fpath = sort_target_bed_fpath

        final_clean_target_bed_fpath = intermediate_fname(work_dir, ann_target_bed_fpath, 'clean')
        if not can_reuse(final_clean_target_bed_fpath, ann_target_bed_fpath):
            bed = BedTool(ann_target_bed_fpath).remove_invalid()
            with file_transaction(work_dir, final_clean_target_bed_fpath) as tx:
                bed.saveas(tx)
                pass
            verify_file(final_clean_target_bed_fpath, is_critical=True)

        self.bed_fpath = final_clean_target_bed_fpath
        self.bed = BedTool(self.bed_fpath)
        
        self.capture_bed_fpath = add_suffix(join(output_dir, basename(bed_fpath)), 'clean_sorted_ann')
        if not can_reuse(self.capture_bed_fpath, self.bed_fpath):
            with file_transaction(work_dir, self.capture_bed_fpath) as tx:
                self.get_capture_bed().saveas(tx)

        gene_key_set, gene_key_list = get_genes_from_bed(bed_fpath)
        self.gene_keys_set = gene_key_set
        self.gene_keys_list = gene_key_list
        self.regions_num = self.get_capture_bed().count()

        self._make_qualimap_bed(work_dir)
        if padding:
            self._make_padded_bed(work_dir, fai_fpath, padding)
开发者ID:vladsaveliev,项目名称:TargQC,代码行数:68,代码来源:Target.py

示例5: BedTool

# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import field_count [as 别名]
exons = [];
for interval in BedTool(args.gff3):
	if('ID' in interval.attrs and interval.attrs['ID'].split(':')[0] == 'gene'):
		curname = interval.attrs['gene_id']
		enames = set()
	if(interval[2] == 'exon'):
		if(interval.name not in enames):
			enames.add(interval.name)
			interval.name = curname
			exons.append(gff2bed(interval))
		

	
#Get an intersection between circles and expms
bed = BedTool(args.path);
offset  = bed.field_count();
intersection = bed.intersect(b=exons, s=True, wao=True);

curname = ''
cexons = []
for interval in intersection:
	if(curname == interval.name):
		cexons.append(tuple(interval[offset:offset+6]))
	else:
		if(curname):
			get_exons(cinterval, cexons)
		cinterval = interval
		cexons = [tuple(interval[offset:offset+6])]
		curname = interval.name
else:
	get_exons(cinterval, cexons)
开发者ID:afilipch,项目名称:nrlbio,代码行数:33,代码来源:circle2exons.py

示例6: str

# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import field_count [as 别名]
			for k2, i in d.items():
				l.append("%s:\t%s:\t%s" % (k1, k2, str(i).strip()));
			
		return "\n".join(l)	





chimeras = BedTool(args.path)
if(len(chimeras) == 0):
	sys.stderr.write("input file \'%s\' is empty\n" % args.path);
	sys.exit();

exons = BedTool(args.exons)
offset  = chimeras.field_count();
chimeras_vs_exons = chimeras.intersect(exons, s=args.stranded, wao=True)


first = chimeras_vs_exons[0]

curname, cur_pair_number = first.name.split("|");
intersections = defaultdict(dict);
intervals = OrderedDict();
intervals[int(cur_pair_number)] = first;
intersections[first[offset+3]][int(cur_pair_number)] = list2interval(first[offset:])

for i in chimeras_vs_exons[1:]:
	name, pair_number = i.name.split("|");
	if(name == curname):
		if(pair_number == cur_pair_number):
开发者ID:afilipch,项目名称:nrlbio,代码行数:33,代码来源:annotate_chimera.py


注:本文中的pybedtools.BedTool.field_count方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。