本文整理汇总了Python中pybedtools.BedTool.merge方法的典型用法代码示例。如果您正苦于以下问题:Python BedTool.merge方法的具体用法?Python BedTool.merge怎么用?Python BedTool.merge使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pybedtools.BedTool
的用法示例。
在下文中一共展示了BedTool.merge方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: merge_bed
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import merge [as 别名]
def merge_bed(bed_name):
""" MERGES a bed file after removing rmsk, sd
"""
pybedtools.set_tempdir('/sc/orga/scratch/richtf01')
bed_in = bed_name + '.sorted.noRmsk.noSD.bed'
bed_out = bed_name + '.merged.sorted.noRmsk.noSD.bed'
if not os.path.isfile(bed_out):
bed = BedTool(bed_in)
print "Merging " + bed_in + "..."
bed_merged = bed.merge()
bed_merged.saveas(bed_out)
print bed_name + " done!"
else:
print bed_out + " already merged"
示例2: mergeLoc
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import merge [as 别名]
def mergeLoc(loc_list):
#For data types where there can be redundancy, we need to merge the locs and then get the lengths
uniq_loc_list=[]
tot_len=0
loc_str=""
for loc in loc_list:
(chrom, pre_loc)=loc.split(":")
(start, end)=pre_loc.split("-")
if not loc_str:
loc_str="%s\t%s\t%s" % (chrom, start, end)
else:
loc_str="%s\n%s\t%s\t%s" % (loc_str, chrom, start, end)
non_uniq=BedTool(loc_str, from_string=True)
uniq_int=non_uniq.merge()
for inter in uniq_int:
loc=(inter.chrom, inter.start, inter.stop, inter.stop-inter.start)
uniq_loc_list.append(loc)
return uniq_loc_list
示例3: consolidate
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import merge [as 别名]
def consolidate(nbedfile, obedfile, cbedfile):
from pybedtools import BedTool
nbedtool = BedTool(nbedfile)
obedtool = BedTool(obedfile)
ab = nbedtool.intersect(obedtool, s=True, u=True)
ba = obedtool.intersect(nbedtool, s=True, u=True)
cmd = "cat {0} {1} | sort -k1,1 -k2,2n".format(ab.fn, ba.fn)
fp = popen(cmd)
ovl = BedTool(fp.readlines())
abmerge = ovl.merge(s=True, nms=True, scores="mean").sort()
cmd = "cat {0}".format(abmerge.fn)
fp = popen(cmd, debug=False)
ovl = BedTool(fp.readlines())
notovl = nbedtool.intersect(ovl.sort(), s=True, v=True)
infile = "{0} {1}".format(notovl.fn, ovl.fn)
tmpfile = "/tmp/reformat.{0}.bed".format(os.getpid())
cmd = "sort -k1,1 -k2,2n"
sh(cmd, infile=infile, outfile=tmpfile)
fp = open(cbedfile, "w")
bed = Bed(tmpfile)
for b in bed:
if ";" in b.accn:
accns = set()
for accn in b.accn.split(";"):
accns.add(accn)
b.accn = ";".join(accns)
print >> fp, b
fp.close()
os.remove(tmpfile)
sort([cbedfile, "-i"])
示例4: getMergedBedIntervals
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import merge [as 别名]
def getMergedBedIntervals(bedPath, ncol=3, sort = False, ignoreBed12 = True):
""" Merge all contiguous and overlapping intervals"""
if not os.path.isfile(bedPath):
raise RuntimeError("Bed interval file %s not found" % bedPath)
logger.debug("mergeBedIntervals(%s)" % bedPath)
outIntervals = []
bedTool = BedTool(bedPath)
if sort is True:
bedTool = bedTool.sort()
logger.debug("sortBed(%s)" % bedPath)
if ignoreBed12 is False:
logger.debug("bed6(%s)" % bedPath)
bedTool = bedTool.bed6()
for feat in bedTool.merge():
outInterval = (feat.chrom, feat.start, feat.end)
if ncol >= 4:
outInterval += (feat.name,)
if ncol >= 5:
outInterval += (feat.score,)
outIntervals.append(outInterval)
logger.debug("finished mergeBedIntervals(%s)" % bedPath)
return outIntervals
示例5: gene_set_to_bed
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import merge [as 别名]
df_int = pd.DataFrame({'BP': bp, 'ANNOT':1})
df_annot = pd.merge(df_bim, df_int, how='left', on='BP')
df_annot.fillna(0, inplace=True)
df_annot = df_annot[['ANNOT']].astype(int)
if args.annot_file.endswith('.gz'):
with gzip.open(args.annot_file, 'wb') as f:
df_annot.to_csv(f, sep = "\t", index = False)
else:
df_annot.to_csv(args.annot_file, sep="\t", index=False)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--gene-set-file', type=str, help='a file of gene names, one line per gene.')
parser.add_argument('--gene-coord-file', type=str, default='ENSG_coord.txt', help='a file with columns GENE, CHR, START, and END, where START and END are base pair coordinates of TSS and TES. This file can contain more genes than are in the gene set. We provide ENSG_coord.txt as a default.')
parser.add_argument('--windowsize', type=int, help='how many base pairs to add around the transcribed region to make the annotation?')
parser.add_argument('--bed-file', type=str, help='the UCSC bed file with the regions that make up your annotation')
parser.add_argument('--nomerge', action='store_true', default=False, help='don\'t merge the bed file; make an annot file wi th values proportional to the number of intervals in the bedfile overlapping the SNP.')
parser.add_argument('--bimfile', type=str, help='plink bim file for the dataset you will use to compute LD scores.')
parser.add_argument('--annot-file', type=str, help='the name of the annot file to output.')
args = parser.parse_args()
if args.gene_set_file is not None:
bed_for_annot = gene_set_to_bed(args)
else:
bed_for_annot = BedTool(args.bed_file).sort()
if not args.nomerge:
bed_for_annot = bed_for_annot.merge()
make_annot_files(args, bed_for_annot)
示例6: type_filter
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import merge [as 别名]
parser.add_argument('-t', '--transcription', nargs = '+', default = None, choices = ['exon', 'intron', 'None'], type = str, help = "type of transcription of intervals to choose");
parser.add_argument('-mt', '--regulation', nargs = '+', default = None, type = str, help = "type of regulation of intervals to choose");
args = parser.parse_args();
def type_filter(feature, transcription, regulation):
if(transcription and feature.attrs['transcription'] not in transcription):
return False;
elif(regulation and feature.attrs['regulation'] not in regulation):
return False;
else:
return True;
genes_features = defaultdict(list)
for i in BedTool(args.path).filter(type_filter, transcription=args.transcription, regulation=args.regulation):
genes_features[i.attrs['gene_id']].append(i);
for c, (k, v) in enumerate(genes_features.iteritems()):
b = BedTool((Interval(i.chrom, i.start, i.stop, strand = i.strand, name = k, score = "0") for i in v))
a = b.merge(s=True, d=0)
sys.stderr.write("%d\n%d\n\n" % (c, len(a)))
for i in a:
sys.stdout.write(str(i))
if(c>10000):
sys.exit()