本文整理汇总了Python中pybedtools.BedTool.slop方法的典型用法代码示例。如果您正苦于以下问题:Python BedTool.slop方法的具体用法?Python BedTool.slop怎么用?Python BedTool.slop使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pybedtools.BedTool
的用法示例。
在下文中一共展示了BedTool.slop方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: gc_content
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import slop [as 别名]
def gc_content(vf, fa, flank=50):
print "inside gc_content"
v = BedTool(vf)
flanks = v.slop(g=pybedtools.chromsizes('hg19'), b=flank)
nc = flanks.nucleotide_content(fi=fa)
results = dict([ (r.name, float(r[5])) for r in nc ])
print "exiting gc_content"
return Series(results, name="GC")
示例2: seq_context
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import slop [as 别名]
def seq_context(vf, fa):
print "inside seq_context"
v = BedTool(vf)
flanks = v.slop(g=pybedtools.chromsizes('hg19'), b=1)
nc = flanks.nucleotide_content(fi=fa, seq=True, pattern="CG", C=True)
cpg_context = Series(dict([ (r.name, float(r[14])) for r in nc ]))
nucleotide = Series(dict([ (r.name, r[13][1].upper()) for r in nc ]))
results = {}
for b in 'ACGT':
results['seq_'+b] = (nucleotide == b).apply(float)
results['in_cpg'] = cpg_context
print "exiting seq_context"
return DataFrame(results)
示例3: snp_stats
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import slop [as 别名]
def snp_stats(vf, af, stat='avg_het', flank=500):
v = BedTool(vf)
feats = BedTool(af)
flanks = v.slop(g=pybedtools.chromsizes('hg19'), b=flank)
intersection = feats.intersect(flanks, wb=True)
results = {}
if len(intersection) > 0:
sort_cmd = 'sort -k6,6 -k7,7n -k8,8n -k9,9 %s -o %s' % (intersection.fn, intersection.fn)
call(sort_cmd, shell=True)
annots = intersection.groupby(g=[6,7,8,9], c=5, ops='collapse')
for entry in annots:
rates = entry[4].split(',')
tot = reduce(lambda x, y: x + float(y), rates, 0.)
rate = tot / (flank * 2)
results[entry.name] = rate
return Series(results, name=stat)
示例4: slop_bed
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import slop [as 别名]
def slop_bed(self, bed_in, flank_length=50):
"""Add flanking sequences to bed file
Parameters
----------
bed_in: string
Path to input bed file
flank_length: int
the bed region is expanded in both direction by flank_length number of bases
Returns
-------
slopped_bed: dataframe
Slopped bed data object
"""
inbed = BedTool(bed_in)
slopped_bed = inbed.slop(g=self.genome_table, b=flank_length)
filename, extension = os.path.splitext(bed_in)
filename += '_slop_{}'.format(flank_length) + extension
slopped_bed.saveas(filename)
return filename
示例5: average_gerp
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import slop [as 别名]
def average_gerp(vf, af, flank=50):
v = BedTool(vf)
flanks = v.slop(g=pybedtools.chromsizes('hg19'), b=flank)
return gerp(flanks.fn, af, name="avg_gerp")
示例6: __init__
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import slop [as 别名]
#.........这里部分代码省略.........
if BedTool(sort_target_bed_fpath).field_count() == 3 or reannotate:
debug('Annotating target BED file and collecting overlapping genome features')
overlap_with_features(sort_target_bed_fpath, ann_target_bed_fpath, work_dir=work_dir,
genome=genome, extended=True, reannotate=reannotate, only_canonical=True)
else:
debug('Overlapping with genomic features:')
overlap_with_features(sort_target_bed_fpath, ann_target_bed_fpath, work_dir=work_dir,
genome=genome, extended=True, only_canonical=True)
debug('Saved to ' + ann_target_bed_fpath)
verify_file(ann_target_bed_fpath, is_critical=True)
else:
ann_target_bed_fpath = sort_target_bed_fpath
final_clean_target_bed_fpath = intermediate_fname(work_dir, ann_target_bed_fpath, 'clean')
if not can_reuse(final_clean_target_bed_fpath, ann_target_bed_fpath):
bed = BedTool(ann_target_bed_fpath).remove_invalid()
with file_transaction(work_dir, final_clean_target_bed_fpath) as tx:
bed.saveas(tx)
pass
verify_file(final_clean_target_bed_fpath, is_critical=True)
self.bed_fpath = final_clean_target_bed_fpath
self.bed = BedTool(self.bed_fpath)
self.capture_bed_fpath = add_suffix(join(output_dir, basename(bed_fpath)), 'clean_sorted_ann')
if not can_reuse(self.capture_bed_fpath, self.bed_fpath):
with file_transaction(work_dir, self.capture_bed_fpath) as tx:
self.get_capture_bed().saveas(tx)
gene_key_set, gene_key_list = get_genes_from_bed(bed_fpath)
self.gene_keys_set = gene_key_set
self.gene_keys_list = gene_key_list
self.regions_num = self.get_capture_bed().count()
self._make_qualimap_bed(work_dir)
if padding:
self._make_padded_bed(work_dir, fai_fpath, padding)
def _make_padded_bed(self, work_dir, fai_fpath, padding):
if self.is_wgs:
return None
self.padded_bed_fpath = intermediate_fname(work_dir, self.capture_bed_fpath, 'padded')
if can_reuse(self.padded_bed_fpath, self.capture_bed_fpath):
return BedTool(self.padded_bed_fpath)
padded_bed = self.bed.slop(b=padding, g=fai_fpath).sort().merge()
with file_transaction(work_dir, self.padded_bed_fpath) as tx:
padded_bed.saveas(tx)
verify_file(self.padded_bed_fpath, is_critical=True)
return BedTool(self.padded_bed_fpath)
def _make_qualimap_bed(self, work_dir):
if self.is_wgs:
return None
self.qualimap_bed_fpath = intermediate_fname(work_dir, self.capture_bed_fpath, 'qualimap_ready')
if can_reuse(self.qualimap_bed_fpath, self.capture_bed_fpath):
return self.qualimap_bed_fpath
debug('Merging and saving BED into required bed6 format for Qualimap')
bed = self.bed.sort().merge()
with file_transaction(work_dir, self.qualimap_bed_fpath) as tx:
with open(tx, 'w') as out:
for i, region in enumerate(x for x in bed):
region = [x for x in list(region) if x]
fillers = [str(i), "1.0", "+"]
full = region + fillers[:6 - len(region)]
out.write("\t".join(full) + "\n")
verify_file(self.qualimap_bed_fpath, is_critical=True)
return self.qualimap_bed_fpath
def _make_wgs_regions_file(self, work_dir, genome=None):
self.wgs_bed_fpath = join(work_dir, 'targqc_features_to_report.bed')
if can_reuse(self.wgs_bed_fpath, ebl.ensembl_gtf_fpath(genome)):
return self.wgs_bed_fpath
chr_order = reference_data.get_chrom_order(genome or cfg.genome)
r_by_tx_by_gene = OrderedDefaultDict(lambda: defaultdict(list))
all_features = ebl.get_all_features(genome or cfg.genome, high_confidence=True)
debug('Select best transcript to report')
for r in all_features:
if r[ebl.BedCols.FEATURE] != 'gene':
gene = r[ebl.BedCols.GENE]
tx = r[ebl.BedCols.ENSEMBL_ID]
r_by_tx_by_gene[gene][tx].append(r.fields)
with file_transaction(work_dir, self.wgs_bed_fpath) as tx:
with open(tx, 'w') as out:
for gname, r_by_tx in r_by_tx_by_gene.items():
all_tx = (x for xx in r_by_tx.values() for x in xx if x[ebl.BedCols.FEATURE] == 'transcript')
tx_sorted_list = [x[ebl.BedCols.ENSEMBL_ID] for x in sorted(all_tx, key=tx_priority_sort_key)]
if not tx_sorted_list:
continue
tx_id = tx_sorted_list[0]
for r in sorted(r_by_tx[tx_id], key=get_sort_key(chr_order)):
out.write('\t'.join(str(f) for f in r) + '\n')
return self.wgs_bed_fpath
示例7: average_gerp
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import slop [as 别名]
def average_gerp(vf, af, flank=50):
print "inside average_gerp"
v = BedTool(vf)
flanks = v.slop(g=pybedtools.chromsizes('hg19'), b=flank)
print "exiting average gerp"
return gerp(flanks.fn, af, name="avg_gerp")
示例8: Bedfile
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import slop [as 别名]
class Bedfile(object):
"""Class to crate a bed file object
Parameters
----------
filepath: string
Absolute path to bedfile
genome_table: string
Absolute path to geonme chromosome size file
"""
def __init__(self, filepath, genome_table):
self.filepath = filepath
self.bed_format = None
if not os.path.isfile(filepath):
raise MocaException('Bed file {} not found'.format(self.filepath))
self._read()
self.bed_format = self.guess_bedformat()
self.sort_bed()
self.bed = BedTool(filepath)
self.genome_table = genome_table
assert self.bed_Format is not None
def _read(self):
try:
self.bed_df = pandas.read_table(self.filepath,
header=None)
except Exception as e:
raise MocaException('Error reading bed file {}'.format(self.filepath),
'Traceback: {}'.format(e))
def guess_bedformat(self):
"""Method to guess bed format
Returns
-------
bed_format: string
BED format
Example:
>>> bed_df = Bedfile('file.bed')
>>> print(bed_df.guess_bed_format())
"""
self.bed_columns = self.bed_df.columns
count = len(self.bed_columns)
try:
bed_format = __BED_TYPES__[count]
except KeyError:
raise MocaException('Bed file had {} columns. Supported column lengths are {}')
return bed_format
def slop_bed(self, flank_length=5):
"""Add flanking sequences to bed file
Parameters
----------
flank_length: int
the bed region is expanded in both direction by flank_length number of bases
Returns
-------
slop_bed: dataframe
Slopped bed data object
"""
self.bed.slop(g=self.genome_table,
b=flank_length
)
def convert_to_scorefile(self):
"""
filename, file_extension = os.path.splitext(self.filepath)
filename += '.sorted'
self.bed_df.to_csv(filename+file_extension,
sep='\t',
columns=['chrom', 'peak_positions', 'score'],
index=False,
header=False)
"""
if filetype=='narrowPeak':
filter_df1 = df[df.peak.astype(int)==-1]
filter_df2 = df[df.peak.astype(int)!=-1]
filter_df1['peak_positions'] = (filter_df1['chromStart'].astype(int)+filter_df1['chromEnd'].astype(int))
filter_df1['peak_positions'] = [int(x/2) for x in filter_df1['peak_positions'].astype(int)]
filter_df2['peak_positions'] = filter_df2['chromStart'].astype(int)+filter_df2['peak'].astype(int)
df = pandas.concat([filter_df1, filter_df2])
else:
df['peak_positions'] = (df['chromStart']+df['chromEnd'])
df['peak_positions'] = [int(x/2) for x in df['peak_positions'].astype(int)]
def extract_fasta(self, fasta_file):
"""Extract fasta of bed regions
Parameters
----------
fasta_file: string
Absolute path to location of fasta file
Returns
-------
fasta: string
Fasta sequence combined
"""
#.........这里部分代码省略.........