本文整理汇总了Python中pybedtools.BedTool.field_count方法的典型用法代码示例。如果您正苦于以下问题:Python BedTool.field_count方法的具体用法?Python BedTool.field_count怎么用?Python BedTool.field_count使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pybedtools.BedTool
的用法示例。
在下文中一共展示了BedTool.field_count方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: xstream
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import field_count [as 别名]
def xstream(a, b, distance, updown, out):
"""
find all things in b that are within
distance of a in the given direction
(up or down-stream)
"""
direction = dict(u="l", d="r")[updown[0]]
kwargs = {'sw':True, direction: distance}
if "l" in kwargs: kwargs["r"] = 0
else: kwargs["l"] = 0
a = BedTool(a).saveas()
kwargs['stream'] = True
c = a.window(b, **kwargs)
afields = a.field_count()
seen = collections.defaultdict(set)
for feat in c:
key = "\t".join(feat[:afields])
# keep track of all the feature names that overlap this one
seen[key].update((feat[afields + 3],))
# the entries that did appear in the window
for row in seen:
out.write(row + "\t" + ",".join(sorted(seen[row])) + "\n")
# write the entries that did not appear in the window'ed Bed
for row in a:
key = "\t".join(row[:afields])
if key in seen: continue
out.write(str(row) + "\t.\n")
out.flush()
assert len(BedTool(out.name)) == len(a)
示例2: add_closest
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import field_count [as 别名]
def add_closest(aname, bname):
a, b = BedTool(aname), BedTool(bname)
afields = a.field_count()
c = a.closest(b, d=True)
get_name = gen_get_name(b, afields)
dbed = open(BedTool._tmp(), "w")
# keep the name and distance
seen_by_line = collections.defaultdict(list)
for feat in c:
key = "\t".join(feat[:afields])
seen_by_line[key].append([feat[-1], get_name(feat)])
for key, dist_names in seen_by_line.iteritems():
if len(dist_names) > 0:
assert len(set([d[0] for d in dist_names])) == 1
names = ",".join(sorted(set(d[1] for d in dist_names)))
new_line = "\t".join([key] + [names] + [dist_names[0][0]])
dbed.write(new_line + "\n")
dbed.close()
d = BedTool(dbed.name)
assert len(d) == len(a)
return d
示例3: annotate
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import field_count [as 别名]
def annotate(input_bed_fpath, output_fpath, work_dir, genome=None,
reannotate=True, high_confidence=False, only_canonical=False,
coding_only=False, short=False, extended=False, is_debug=False, **kwargs):
debug('Getting features from storage')
features_bed = ba.get_all_features(genome)
if features_bed is None:
critical('Genome ' + genome + ' is not supported. Supported: ' + ', '.join(ba.SUPPORTED_GENOMES))
if genome:
fai_fpath = reference_data.get_fai(genome)
chr_order = reference_data.get_chrom_order(genome)
else:
fai_fpath = None
chr_order = bed_chrom_order(input_bed_fpath)
input_bed_fpath = sort_bed(input_bed_fpath, work_dir=work_dir, chr_order=chr_order, genome=genome)
ori_bed = BedTool(input_bed_fpath)
ori_col_num = ori_bed.field_count()
reannotate = reannotate or ori_col_num == 3
pybedtools.set_tempdir(safe_mkdir(join(work_dir, 'bedtools')))
ori_bed = BedTool(input_bed_fpath)
if high_confidence:
features_bed = features_bed.filter(ba.high_confidence_filter)
if only_canonical:
features_bed = features_bed.filter(ba.get_only_canonical_filter(genome))
if coding_only:
features_bed = features_bed.filter(ba.protein_coding_filter)
# unique_tx_by_gene = find_best_tx_by_gene(features_bed)
info('Extracting features from Ensembl GTF')
features_bed = features_bed.filter(lambda x:
x[ba.BedCols.FEATURE] in ['exon', 'CDS', 'stop_codon', 'transcript'])
# x[ebl.BedCols.ENSEMBL_ID] == unique_tx_by_gene[x[ebl.BedCols.GENE]])
info('Overlapping regions with Ensembl data')
if is_debug:
ori_bed = ori_bed.saveas(join(work_dir, 'bed.bed'))
debug(f'Saved regions to {ori_bed.fn}')
features_bed = features_bed.saveas(join(work_dir, 'features.bed'))
debug(f'Saved features to {features_bed.fn}')
annotated = _annotate(ori_bed, features_bed, chr_order, fai_fpath, work_dir, ori_col_num,
high_confidence=False, reannotate=reannotate, is_debug=is_debug, **kwargs)
full_header = [ba.BedCols.names[i] for i in ba.BedCols.cols]
add_ori_extra_fields = ori_col_num > 3
if not reannotate and ori_col_num == 4:
add_ori_extra_fields = False # no need to report the original gene field if we are not re-annotating
info('Saving annotated regions...')
total = 0
with file_transaction(work_dir, output_fpath) as tx:
with open(tx, 'w') as out:
header = full_header[:6]
if short:
header = full_header[:4]
if extended:
header = full_header[:-1]
if add_ori_extra_fields:
header.append(full_header[-1])
if extended:
out.write('## ' + ba.BedCols.names[ba.BedCols.TX_OVERLAP_PERCENTAGE] +
': part of region overlapping with transcripts\n')
out.write('## ' + ba.BedCols.names[ba.BedCols.EXON_OVERLAPS_PERCENTAGE] +
': part of region overlapping with exons\n')
out.write('## ' + ba.BedCols.names[ba.BedCols.CDS_OVERLAPS_PERCENTAGE] +
': part of region overlapping with protein coding regions\n')
out.write('\t'.join(header) + '\n')
for full_fields in annotated:
fields = full_fields[:6]
if short:
fields = full_fields[:4]
if extended:
fields = full_fields[:-1]
if add_ori_extra_fields:
fields.append(full_fields[-1])
out.write('\t'.join(map(_format_field, fields)) + '\n')
total += 1
debug('Saved ' + str(total) + ' total annotated regions')
return output_fpath
示例4: _make_target_bed
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import field_count [as 别名]
def _make_target_bed(self, bed_fpath, work_dir, output_dir, is_debug,
padding=None, fai_fpath=None, genome=None, reannotate=False):
clean_target_bed_fpath = intermediate_fname(work_dir, bed_fpath, 'clean')
if not can_reuse(clean_target_bed_fpath, bed_fpath):
debug()
debug('Cleaning target BED file...')
bed = BedTool(bed_fpath)
if bed.field_count() > 4:
bed = bed.cut(range(4))
bed = bed\
.filter(lambda x: x.chrom and not any(x.chrom.startswith(e) for e in ['#', ' ', 'track', 'browser']))\
.remove_invalid()
with file_transaction(work_dir, clean_target_bed_fpath) as tx:
bed.saveas(tx)
debug('Saved to ' + clean_target_bed_fpath)
verify_file(clean_target_bed_fpath, is_critical=True)
sort_target_bed_fpath = intermediate_fname(work_dir, clean_target_bed_fpath, 'sorted')
if not can_reuse(sort_target_bed_fpath, clean_target_bed_fpath):
debug()
debug('Sorting target BED file...')
sort_target_bed_fpath = sort_bed(clean_target_bed_fpath, output_bed_fpath=sort_target_bed_fpath, fai_fpath=fai_fpath)
debug('Saved to ' + sort_target_bed_fpath)
verify_file(sort_target_bed_fpath, is_critical=True)
if genome in ebl.SUPPORTED_GENOMES:
ann_target_bed_fpath = intermediate_fname(work_dir, sort_target_bed_fpath, 'ann_plus_features')
if not can_reuse(ann_target_bed_fpath, sort_target_bed_fpath):
debug()
if BedTool(sort_target_bed_fpath).field_count() == 3 or reannotate:
debug('Annotating target BED file and collecting overlapping genome features')
overlap_with_features(sort_target_bed_fpath, ann_target_bed_fpath, work_dir=work_dir,
genome=genome, extended=True, reannotate=reannotate, only_canonical=True)
else:
debug('Overlapping with genomic features:')
overlap_with_features(sort_target_bed_fpath, ann_target_bed_fpath, work_dir=work_dir,
genome=genome, extended=True, only_canonical=True)
debug('Saved to ' + ann_target_bed_fpath)
verify_file(ann_target_bed_fpath, is_critical=True)
else:
ann_target_bed_fpath = sort_target_bed_fpath
final_clean_target_bed_fpath = intermediate_fname(work_dir, ann_target_bed_fpath, 'clean')
if not can_reuse(final_clean_target_bed_fpath, ann_target_bed_fpath):
bed = BedTool(ann_target_bed_fpath).remove_invalid()
with file_transaction(work_dir, final_clean_target_bed_fpath) as tx:
bed.saveas(tx)
pass
verify_file(final_clean_target_bed_fpath, is_critical=True)
self.bed_fpath = final_clean_target_bed_fpath
self.bed = BedTool(self.bed_fpath)
self.capture_bed_fpath = add_suffix(join(output_dir, basename(bed_fpath)), 'clean_sorted_ann')
if not can_reuse(self.capture_bed_fpath, self.bed_fpath):
with file_transaction(work_dir, self.capture_bed_fpath) as tx:
self.get_capture_bed().saveas(tx)
gene_key_set, gene_key_list = get_genes_from_bed(bed_fpath)
self.gene_keys_set = gene_key_set
self.gene_keys_list = gene_key_list
self.regions_num = self.get_capture_bed().count()
self._make_qualimap_bed(work_dir)
if padding:
self._make_padded_bed(work_dir, fai_fpath, padding)
示例5: BedTool
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import field_count [as 别名]
exons = [];
for interval in BedTool(args.gff3):
if('ID' in interval.attrs and interval.attrs['ID'].split(':')[0] == 'gene'):
curname = interval.attrs['gene_id']
enames = set()
if(interval[2] == 'exon'):
if(interval.name not in enames):
enames.add(interval.name)
interval.name = curname
exons.append(gff2bed(interval))
#Get an intersection between circles and expms
bed = BedTool(args.path);
offset = bed.field_count();
intersection = bed.intersect(b=exons, s=True, wao=True);
curname = ''
cexons = []
for interval in intersection:
if(curname == interval.name):
cexons.append(tuple(interval[offset:offset+6]))
else:
if(curname):
get_exons(cinterval, cexons)
cinterval = interval
cexons = [tuple(interval[offset:offset+6])]
curname = interval.name
else:
get_exons(cinterval, cexons)
示例6: str
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import field_count [as 别名]
for k2, i in d.items():
l.append("%s:\t%s:\t%s" % (k1, k2, str(i).strip()));
return "\n".join(l)
chimeras = BedTool(args.path)
if(len(chimeras) == 0):
sys.stderr.write("input file \'%s\' is empty\n" % args.path);
sys.exit();
exons = BedTool(args.exons)
offset = chimeras.field_count();
chimeras_vs_exons = chimeras.intersect(exons, s=args.stranded, wao=True)
first = chimeras_vs_exons[0]
curname, cur_pair_number = first.name.split("|");
intersections = defaultdict(dict);
intervals = OrderedDict();
intervals[int(cur_pair_number)] = first;
intersections[first[offset+3]][int(cur_pair_number)] = list2interval(first[offset:])
for i in chimeras_vs_exons[1:]:
name, pair_number = i.name.split("|");
if(name == curname):
if(pair_number == cur_pair_number):