本文整理汇总了Python中pybedtools.BedTool方法的典型用法代码示例。如果您正苦于以下问题:Python pybedtools.BedTool方法的具体用法?Python pybedtools.BedTool怎么用?Python pybedtools.BedTool使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pybedtools
的用法示例。
在下文中一共展示了pybedtools.BedTool方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: import pybedtools [as 别名]
# 或者: from pybedtools import BedTool [as 别名]
def __init__(self,
intervals_file,
fasta_file,
dnase_file,
use_linecache=True):
# intervals
if use_linecache:
linecache.clearcache()
BT = BedToolLinecache
else:
BT = BedTool
self.bt = BT(intervals_file)
# Fasta
self.fasta_file = fasta_file
self.fasta_extractor = None # initialize later
# DNase
self.dnase_file = dnase_file
self.dnase_extractor = None
示例2: ensure_file_is_bed
# 需要导入模块: import pybedtools [as 别名]
# 或者: from pybedtools import BedTool [as 别名]
def ensure_file_is_bed(path):
if path.endswith(".bedpe"):
table = pandas.read_table(path)
columns = ["chromx", "startx", "endx", "chromy", "starty", "endy", "name"]
i = 0
while len(columns) < len(table.columns):
columns.append("extra_{}".format(i))
table.columns = columns
bedx = pandas.DataFrame()
bedx["chrom"] = table["chromx"]
bedx["start"] = table["startx"]
bedx["end"] = table["endx"]
bedx["name"] = table["name"]
bedy = pandas.DataFrame()
bedy["chrom"] = table["chromy"]
bedy["start"] = table["starty"]
bedy["end"] = table["endy"]
bedy["name"] = table["name"]
bed = pandas.concat([bedx, bedy], ignore_index=True)
return pybedtools.BedTool.from_dataframe(bed).sort()
return pybedtools.BedTool(path).sort()
示例3: merge_for_each_sv
# 需要导入模块: import pybedtools [as 别名]
# 或者: from pybedtools import BedTool [as 别名]
def merge_for_each_sv(bedtool, c, o, svs_to_softclip=SVS_SOFTCLIP_SUPPORTED,
overlap_ratio=OVERLAP_RATIO, d=0, reciprocal_for_2bp=True,
sv_type_field=[3, 1], inter_tools=False):
merged_bedtool = pybedtools.BedTool([])
for svtype in svs_to_softclip:
sv_bedtool = bedtool.filter(lambda x: svtype in x.fields[sv_type_field[0]].split(',')[sv_type_field[1]]).sort()
if sv_bedtool.count() == 0:
continue
if svtype == "INS" or not reciprocal_for_2bp:
sv_bedtool = sv_bedtool.merge(c=c, o=o, d=d)
else:
sv_bedtool = merge_intervals_bed(sv_bedtool, overlap_ratio=overlap_ratio,
c=c, o=o)
if len(merged_bedtool) > 0:
merged_bedtool = sv_bedtool.cat(merged_bedtool, postmerge=False)
else:
merged_bedtool = sv_bedtool
return merged_bedtool.sort()
示例4: add_weighted_score
# 需要导入模块: import pybedtools [as 别名]
# 或者: from pybedtools import BedTool [as 别名]
def add_weighted_score(in_bed, score_bed):
out_bed = in_bed.intersect(score_bed, wao=True).saveas(os.path.join(args.tmpdir, "score.bed"))
bed_array = []
last_interval = pybedtools.Interval("", 0, 0)
map_value = 0.0
for interval in out_bed:
if interval.chrom != last_interval.chrom or interval.start != last_interval.start or interval.end != last_interval.end:
if last_interval.chrom:
bed_array.append(tuple(last_interval.fields[:-5]) + (str(map_value),))
map_value = 0.0
last_interval = interval
if float(interval.fields[-1]) > 0:
map_value += float(interval.fields[-1]) * float(interval.fields[-2]) / float(interval.length)
if last_interval.chrom:
bed_array.append(tuple(last_interval.fields[:-5]) + (str(map_value),))
return pybedtools.BedTool(bed_array)
示例5: make_annot_files
# 需要导入模块: import pybedtools [as 别名]
# 或者: from pybedtools import BedTool [as 别名]
def make_annot_files(args, bed_for_annot):
print('making annot file')
df_bim = pd.read_csv(args.bimfile,
delim_whitespace=True, usecols = [0,1,2,3], names = ['CHR','SNP','CM','BP'])
iter_bim = [['chr'+str(x1), x2 - 1, x2] for (x1, x2) in np.array(df_bim[['CHR', 'BP']])]
bimbed = BedTool(iter_bim)
annotbed = bimbed.intersect(bed_for_annot)
bp = [x.start + 1 for x in annotbed]
df_int = pd.DataFrame({'BP': bp, 'ANNOT':1})
df_annot = pd.merge(df_bim, df_int, how='left', on='BP')
df_annot.fillna(0, inplace=True)
df_annot = df_annot[['ANNOT']].astype(int)
if args.annot_file.endswith('.gz'):
with gzip.open(args.annot_file, 'wb') as f:
df_annot.to_csv(f, sep = "\t", index = False)
else:
df_annot.to_csv(args.annot_file, sep="\t", index=False)
示例6: group_reads
# 需要导入模块: import pybedtools [as 别名]
# 或者: from pybedtools import BedTool [as 别名]
def group_reads(bedfile):
reads = pybedtools.BedTool(bedfile)
forward = reads.filter(lambda x: x.strand == "+").saveas()
if len(forward) > 0:
forward = sort_by_strand(forward, strand="+")\
.groupby(g=[1,3,6], c=[2,4], o=['min','count'], full=True)\
.cut([0,6,2,3,7,5])\
.saveas()
reverse = reads.filter(lambda x: x.strand == "-").saveas()
if len(reverse) > 0:
reverse = sort_by_strand(reverse, strand="-")\
.groupby(g=[1,2,6], c=[3,4], o=['max','count'], full=True)\
.cut([0,1,6,3,7,5])\
.saveas()
grouped_reads = forward.cat(reverse, postmerge=False)
grouped_reads = sort_bed(grouped_reads).saveas()
return grouped_reads
示例7: is_bed
# 需要导入模块: import pybedtools [as 别名]
# 或者: from pybedtools import BedTool [as 别名]
def is_bed(fname):
"""Check if a given file is a bed file
Args:
fname: a bed file name
Returns:
True if file is a bed file, otherwise False
"""
try:
# open fname using pysam
fileType = pybedtools.BedTool(fname).file_type;
if fileType == "bed":
return True
except IndexError as e:
# handle the errors
return False
return False
示例8: is_gff
# 需要导入模块: import pybedtools [as 别名]
# 或者: from pybedtools import BedTool [as 别名]
def is_gff(fname):
"""Check if a given file is a gff file
Args:
fname: a gff file name
Returns:
True if file is a gff or gtf file, otherwise False
"""
try:
# open fname using pysam
fileType = pybedtools.BedTool(fname).file_type;
if fileType == "gff":
return True
except IndexError as e:
# handle the errors
return False
return False
示例9: is_bam
# 需要导入模块: import pybedtools [as 别名]
# 或者: from pybedtools import BedTool [as 别名]
def is_bam(fname):
"""Check if a given file is a bam file
Args:
fname: file name
Returns:
True if file is a bam file, otherwise False
"""
try:
fileType = pybedtools.BedTool(fname).file_type;
if fileType == "bam":
return True
except ValueError as e:
# handle the errors
return False
return True
示例10: find_closest_bound
# 需要导入模块: import pybedtools [as 别名]
# 或者: from pybedtools import BedTool [as 别名]
def find_closest_bound(sj_bound, ref_bounds):
""" Given one side of a splice junction, find the closest reference """
# Create a Bedtool object for the bound
bed_pos = pybedtools.BedTool(sj_bound.getBED(), from_string=True)
# Run Bedtools Closest operation
closest = bed_pos.closest(ref_bounds, s=True, D="ref", t="first", nonamecheck = True)[0]
# Create an object to represent the closest match
# Coordinates are 0-based since they are coming from BED
obj_closest = dstruct.Struct()
obj_closest.chrom = closest[6]
obj_closest.start = int(closest[7])
obj_closest.end = int(closest[8])
obj_closest.dist = int(closest[-1])
return obj_closest
示例11: __init__
# 需要导入模块: import pybedtools [as 别名]
# 或者: from pybedtools import BedTool [as 别名]
def __init__(self, intervals_file, fasta_file):
# intervals
self.bt = BedTool(intervals_file)
self.fasta_file = fasta_file
self.fasta_extractor = None
示例12: __init__
# 需要导入模块: import pybedtools [as 别名]
# 或者: from pybedtools import BedTool [as 别名]
def __init__(self,
intervals_file,
fasta_file,
dnase_file,
mappability_file=None,
use_linecache=True):
# intervals
if use_linecache:
linecache.clearcache()
BT = BedToolLinecache
else:
BT = BedTool
self.bt = BT(intervals_file)
# Fasta
self.fasta_file = fasta_file
self.fasta_extractor = None # initialize later
# DNase
self.dnase_file = dnase_file
self.dnase_extractor = None
# mappability
if mappability_file is None:
# download the mappability file if not existing
common_dl_dir = os.path.join(this_dir, "../../template/downloaded/dataloader_files")
makedir_exist_ok(common_dl_dir)
rf = RemoteFile(url="http://hgdownload.cse.ucsc.edu/goldenPath/hg19/encodeDCC/wgEncodeMapability/wgEncodeDukeMapabilityUniqueness35bp.bigWig",
md5="1d15ddafe2c8df51cf08495db96679e7")
mappability_file = os.path.join(common_dl_dir, "wgEncodeDukeMapabilityUniqueness35bp.bigWig")
if not os.path.exists(mappability_file) or not rf.validate(mappability_file):
# download the path
rf.get_file(mappability_file)
self.mappability_file = mappability_file
self.mappability_extractor = None