当前位置: 首页>>代码示例>>Python>>正文


Python BedTool.sort方法代码示例

本文整理汇总了Python中pybedtools.BedTool.sort方法的典型用法代码示例。如果您正苦于以下问题:Python BedTool.sort方法的具体用法?Python BedTool.sort怎么用?Python BedTool.sort使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pybedtools.BedTool的用法示例。


在下文中一共展示了BedTool.sort方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: generate_remainder

# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import sort [as 别名]
def generate_remainder(whole_bed, bed_prefix, bed_list):
    """
    Calculate the remaining regions that are not included in the truth set
    :param whole_bed: path to the truth regions for the whole panel
    :param bed_prefix: prefix used for all the bed files
    :param bed_list: list of all the bed files for that panel
    :return: BEDTool containing any regions that are completely missing from the truth regions
    """

    whole_truth = BedTool(whole_bed)
    whole_truth.saveas()
    whole = BedTool()

    for bed in bed_list:
        print bed
        tool = BedTool(bed)
        tool.saveas()
        if bed == bed_list[0]:
            whole = tool
        else:
            whole = whole.cat(tool)
            whole.saveas()

    whole_sorted = whole.sort()
    whole_merged = whole_sorted.merge()
    whole_merged.saveas()

    remainder = whole_merged.subtract(whole_truth)
    remainder.moveto('/results/Analysis/MiSeq/MasterBED/GIAB/' + bed_prefix + '.remainder.bed')
    missing_regions = whole_merged.subtract(whole_truth, A=True)
    return missing_regions
开发者ID:sch-sdgs,项目名称:SDGSValidation,代码行数:33,代码来源:giab_comparison_freebayes.py

示例2: check_bed

# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import sort [as 别名]
    def check_bed(self, bed_file, stream):
        bed = BedTool(bed_file, from_string=stream)
        try:
            sorted_bed = bed.sort()
            merged_bed = sorted_bed.merge(c="4", o="distinct")

            return merged_bed

        except Exception as exception:
            print ("ERROR: " + str(exception))
开发者ID:sch-sdgs,项目名称:PanelPal,代码行数:12,代码来源:db_commands.py

示例3: readBedIntervals

# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import sort [as 别名]
def readBedIntervals(bedPath, ncol = 3, 
                     chrom = None, start = None, end = None,
                     sort = False, ignoreBed12 = True):
    """ Read bed intervals from a bed file (or a specifeid range therein).
    NOTE: intervals are sorted by their coordinates"""
    
    if not os.path.isfile(bedPath):
        raise RuntimeError("Bed interval file %s not found" % bedPath)
    assert ncol == 3 or ncol == 4 or ncol == 5
    outIntervals = []
    logger.debug("readBedIntervals(%s)" % bedPath)
    bedTool = BedTool(bedPath)
    if sort is True:
        bedTool = bedTool.sort()
        logger.debug("sortBed(%s)" % bedPath)
    if ignoreBed12 is False:
        bedTool = bedTool.bed6()
        logger.debug("bed6(%s)" % bedPath)
    if chrom is None:
        bedIntervals = bedTool
    else:
        assert start is not None and end is not None
        interval = Interval(chrom, start, end)
        logger.debug("intersecting (%s,%d,%d) and %s" % (chrom, start, end,
                                                          bedPath))
        # Below, we try switching from all_hits to intersect()
        # all_hits seems to leak a ton of memory for big files, so
        # we hope intersect (which creates a temp file) will be better
        #bedIntervals = bedTool.all_hits(interval)
        tempTool = BedTool(str(interval), from_string = True)
        bedIntervals = bedTool.intersect(tempTool)
        tempTool.delete_temporary_history(ask=False)

    logger.debug("appending bed intervals")
    for feat in bedIntervals:
        outInterval = (feat.chrom, feat.start, feat.end)
        if ncol >= 4:
            outInterval += (feat.name,)
        if ncol >= 5:
            outInterval += (feat.score,)
        outIntervals.append(outInterval)
    logger.debug("finished readBedIntervals(%s)" % bedPath)
        
    return outIntervals
开发者ID:glennhickey,项目名称:teHmm,代码行数:46,代码来源:trackIO.py

示例4: generate_remainder

# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import sort [as 别名]
def generate_remainder(whole_bed, out_dir, bed_list):
    """
    Calculate the remaining regions that are not included in the truth set

    :param whole_bed: Path to the truth regions for the whole panel
    :type whole_bed: String
    :param out_dir: Prefix used for all the bed files
    :type out_dir: String
    :param bed_list: List of all the bed files for that panel
    :type bed_list: List of String
    :return: BEDTool containing any regions that are completely missing from the truth regions
    :rtype: BedTool
    """
    try:
        whole_truth = BedTool(whole_bed)
        whole_truth.saveas()
        whole = BedTool()

        for bed in bed_list:
            print(bed)
            tool = BedTool(bed)
            tool.saveas()
            if bed == bed_list[0]:
                whole = tool
            else:
                whole = whole.cat(tool)
                whole.saveas()

        whole_sorted = whole.sort()
        whole_merged = whole_sorted.merge()
        whole_merged.saveas()

        remainder = whole_merged.subtract(whole_truth)
        remainder.moveto(out_dir + '/remainder.bed')
        missing_regions = whole_merged.subtract(whole_truth, A=True)
    except UnicodeDecodeError:
        missing_regions = None
    return missing_regions
开发者ID:sch-sdgs,项目名称:SDGSValidation,代码行数:40,代码来源:giab_comparison.py

示例5: exonunion

# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import sort [as 别名]
def exonunion(args):
    """
    %prog exonunion gencode.v26.annotation.exon.bed

    Collapse overlapping exons within the same gene. File
    `gencode.v26.annotation.exon.bed` can be generated by:

    $ zcat gencode.v26.annotation.gtf.gz |  awk 'OFS="\t" {if ($3=="exon")
    {print $1,$4-1,$5,$10,$12,$14,$16,$7}}' | tr -d '";'
    """
    p = OptionParser(exonunion.__doc__)
    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    gencodebed, = args
    beds = BedTool(gencodebed)
    # fields[3] is gene_id; fields[6] is gene_name
    for g, gb in groupby(beds, key=lambda x: x.fields[3]):
        gb = BedTool(gb)
        sys.stdout.write(str(gb.sort().merge(c="4,5,6,7",
                             o=','.join(['first'] * 4))))
开发者ID:xuanblo,项目名称:jcvi,代码行数:25,代码来源:cnv.py

示例6: consolidate

# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import sort [as 别名]
def consolidate(nbedfile, obedfile, cbedfile):
    from pybedtools import BedTool
    nbedtool = BedTool(nbedfile)
    obedtool = BedTool(obedfile)

    ab = nbedtool.intersect(obedtool, s=True, u=True)
    ba = obedtool.intersect(nbedtool, s=True, u=True)

    cmd = "cat {0} {1} | sort -k1,1 -k2,2n".format(ab.fn, ba.fn)
    fp = popen(cmd)
    ovl = BedTool(fp.readlines())

    abmerge = ovl.merge(s=True, nms=True, scores="mean").sort()
    cmd = "cat {0}".format(abmerge.fn)
    fp = popen(cmd, debug=False)
    ovl = BedTool(fp.readlines())

    notovl = nbedtool.intersect(ovl.sort(), s=True, v=True)

    infile = "{0} {1}".format(notovl.fn, ovl.fn)
    tmpfile = "/tmp/reformat.{0}.bed".format(os.getpid())
    cmd = "sort -k1,1 -k2,2n"
    sh(cmd, infile=infile, outfile=tmpfile)

    fp = open(cbedfile, "w")
    bed = Bed(tmpfile)
    for b in bed:
        if ";" in b.accn:
            accns = set()
            for accn in b.accn.split(";"):
                accns.add(accn)
            b.accn = ";".join(accns)
        print >> fp, b
    fp.close()
    os.remove(tmpfile)

    sort([cbedfile, "-i"])
开发者ID:Hensonmw,项目名称:jcvi,代码行数:39,代码来源:reformat.py

示例7: getMergedBedIntervals

# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import sort [as 别名]
def getMergedBedIntervals(bedPath, ncol=3, sort = False, ignoreBed12 = True):
    """ Merge all contiguous and overlapping intervals""" 

    if not os.path.isfile(bedPath):
        raise RuntimeError("Bed interval file %s not found" % bedPath)
    logger.debug("mergeBedIntervals(%s)" % bedPath)
    outIntervals = []
    bedTool = BedTool(bedPath)
    if sort is True:
        bedTool = bedTool.sort()
        logger.debug("sortBed(%s)" % bedPath)
    if ignoreBed12 is False:
        logger.debug("bed6(%s)" % bedPath)
        bedTool = bedTool.bed6()
    for feat in bedTool.merge():
        outInterval = (feat.chrom, feat.start, feat.end)
        if ncol >= 4:
            outInterval += (feat.name,)
        if ncol >= 5:
            outInterval += (feat.score,)
        outIntervals.append(outInterval)
    logger.debug("finished mergeBedIntervals(%s)" % bedPath)

    return outIntervals
开发者ID:glennhickey,项目名称:teHmm,代码行数:26,代码来源:trackIO.py

示例8: print_split_sort_bed

# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import sort [as 别名]
        split_region_list = []
        for region in self.bed:
            pass


    def print_split_sort_bed(self):
        pass

split_num = 100

split_region_list = [[]*5]
print split_region_list
bed = BedTool('/Users/huangzhibo/workitems/10.testData/testPlatformTJ/bed/test.bed')


bed = BedTool(bed.sort().merge().window_maker(b=bed.fn, w=100))

bed.all_hits()

# x = BedTool().window_maker(genome='hg38', w=1000000)
bed.saveas('/Users/huangzhibo/workitems/10.testData/testPlatformTJ/bed/test_w100.bed')

split_num = bed.count() if bed.count() < split_num else split_num

print bed.count()/split_num

# print bed.split(10, 'out')

# print x

n = 0
开发者ID:huangzhibo,项目名称:HelloWorld,代码行数:33,代码来源:bed_split.py

示例9: vcf_to_long_bed

# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import sort [as 别名]
def vcf_to_long_bed(*in_vcfs):

    intervals = bed_generator(in_vcfs)
    region_file = BedTool(intervals)

    return region_file.sort()
开发者ID:kylessmith,项目名称:SASE-hunter,代码行数:8,代码来源:SASE_hunter.py

示例10: __init__

# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import sort [as 别名]
class Target:
    def __init__(self, work_dir, output_dir, fai_fpath, bed_fpath=None,
                 padding=None, reannotate=False, genome=None, is_debug=False):
        self.bed = None
        self.original_bed_fpath = None
        self.bed_fpath = None  # with genomic features
        self.capture_bed_fpath = None  # w/o genomic features
        self.qualimap_bed_fpath = None
        self.padded_bed_fpath = None

        self.gene_keys_set = set()  # set of pairs (gene_name, chrom)
        self.gene_keys_list = list()  # list of pairs (gene_name, chrom)
        self.regions_num = None

        self.bases_num = None
        self.fraction = None

        if bed_fpath:
            debug('Using target BED file ' + bed_fpath)
            self.is_wgs = False
            verify_bed(bed_fpath, is_critical=True)
            self.original_bed_fpath = bed_fpath
            self._make_target_bed(bed_fpath, work_dir, output_dir, padding=padding,
                is_debug=is_debug, fai_fpath=fai_fpath, genome=genome, reannotate=reannotate)
        else:
            debug('No input BED. Assuming whole genome. For region-based reports, analysing RefSeq CDS.')
            self.is_wgs = True
            self._make_wgs_regions_file(work_dir, genome=genome)

    def get_capture_bed(self):
        if not self.is_wgs:
            if self.bed.field_count() > ebl.BedCols.FEATURE:
                return self.bed.filter(lambda x: x[ebl.BedCols.FEATURE] == 'capture')
            else:
                return self.bed
        else:
            return None

    def _make_target_bed(self, bed_fpath, work_dir, output_dir, is_debug,
                         padding=None, fai_fpath=None, genome=None, reannotate=False):
        clean_target_bed_fpath = intermediate_fname(work_dir, bed_fpath, 'clean')
        if not can_reuse(clean_target_bed_fpath, bed_fpath):
            debug()
            debug('Cleaning target BED file...')
            bed = BedTool(bed_fpath)
            if bed.field_count() > 4:
                bed = bed.cut(range(4))
            bed = bed\
                .filter(lambda x: x.chrom and not any(x.chrom.startswith(e) for e in ['#', ' ', 'track', 'browser']))\
                .remove_invalid()
            with file_transaction(work_dir, clean_target_bed_fpath) as tx:
                bed.saveas(tx)
            debug('Saved to ' + clean_target_bed_fpath)
            verify_file(clean_target_bed_fpath, is_critical=True)

        sort_target_bed_fpath = intermediate_fname(work_dir, clean_target_bed_fpath, 'sorted')
        if not can_reuse(sort_target_bed_fpath, clean_target_bed_fpath):
            debug()
            debug('Sorting target BED file...')
            sort_target_bed_fpath = sort_bed(clean_target_bed_fpath, output_bed_fpath=sort_target_bed_fpath, fai_fpath=fai_fpath)
            debug('Saved to ' + sort_target_bed_fpath)
            verify_file(sort_target_bed_fpath, is_critical=True)

        if genome in ebl.SUPPORTED_GENOMES:
            ann_target_bed_fpath = intermediate_fname(work_dir, sort_target_bed_fpath, 'ann_plus_features')
            if not can_reuse(ann_target_bed_fpath, sort_target_bed_fpath):
                debug()
                if BedTool(sort_target_bed_fpath).field_count() == 3 or reannotate:
                    debug('Annotating target BED file and collecting overlapping genome features')
                    overlap_with_features(sort_target_bed_fpath, ann_target_bed_fpath, work_dir=work_dir,
                         genome=genome, extended=True, reannotate=reannotate, only_canonical=True)
                else:
                    debug('Overlapping with genomic features:')
                    overlap_with_features(sort_target_bed_fpath, ann_target_bed_fpath, work_dir=work_dir,
                         genome=genome, extended=True, only_canonical=True)
                debug('Saved to ' + ann_target_bed_fpath)
                verify_file(ann_target_bed_fpath, is_critical=True)
        else:
            ann_target_bed_fpath = sort_target_bed_fpath

        final_clean_target_bed_fpath = intermediate_fname(work_dir, ann_target_bed_fpath, 'clean')
        if not can_reuse(final_clean_target_bed_fpath, ann_target_bed_fpath):
            bed = BedTool(ann_target_bed_fpath).remove_invalid()
            with file_transaction(work_dir, final_clean_target_bed_fpath) as tx:
                bed.saveas(tx)
                pass
            verify_file(final_clean_target_bed_fpath, is_critical=True)

        self.bed_fpath = final_clean_target_bed_fpath
        self.bed = BedTool(self.bed_fpath)
        
        self.capture_bed_fpath = add_suffix(join(output_dir, basename(bed_fpath)), 'clean_sorted_ann')
        if not can_reuse(self.capture_bed_fpath, self.bed_fpath):
            with file_transaction(work_dir, self.capture_bed_fpath) as tx:
                self.get_capture_bed().saveas(tx)

        gene_key_set, gene_key_list = get_genes_from_bed(bed_fpath)
        self.gene_keys_set = gene_key_set
        self.gene_keys_list = gene_key_list
        self.regions_num = self.get_capture_bed().count()
#.........这里部分代码省略.........
开发者ID:vladsaveliev,项目名称:TargQC,代码行数:103,代码来源:Target.py

示例11: BedTool

# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import sort [as 别名]
		#sys.stderr.write("_"*140	+ "\n")
		return None;
			
	


bed = BedTool(args.path);
if(not len(bed)):
	sys.exit('Provided bed file is empty. Interactions cannot be generated\n')
	
	
name2intervals = doublebed2dict(bed);
name2interaction = defaultdict(list);
interaction2name = defaultdict(list);

for c, i in enumerate(bed.sort().merge(s=True, d=args.distance, c='4,6', o='distinct', delim=';')):
	print i.name
	for name in i.name.split(";"):
		 n = "|".join(name.split("|")[:-1])
		 name2interaction[n].append(c);
		  
		 
for k, v in	name2interaction.iteritems():
	key = tuple(sorted(v));
	interaction2name[key].append(k)
		 
number_chimeras = len(name2intervals);
number_interactions = 0
number_self_intersection = 0;
	
	
开发者ID:afilipch,项目名称:nrlbio,代码行数:31,代码来源:chimera2interaction.py

示例12: run

# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import sort [as 别名]
def run(chipdir, refseq, filedir, DMSO, CA):
    TSS = (-200, 1000)

    a = BedTool(chipdir)
    b = a.closest(refseq, d=True)
    b.cut([9, 10, 11, 12, 13, 14, 21]).saveas(filedir + "/SRF_closest.bed")
    d = dict()
    with open(filedir + "/SRF_closest.bed") as F:
        for line in F:
            line = line.strip().split()
            chrom, start, stop = line[0:3]
            d[chrom + "\t" + start + "\t" + stop + "\t"] = "\t".join(line[3:])
    outfile = open(filedir + "/SRF_closest.rmdup.bed", "w")
    for key in d:
        if "." not in key.split():
            outfile.write(key + d[key] + "\n")
    outfile.close()
    # os.system("sort -k1,1 -k2,2n " + filedir + "/SRF_closest.rmdup.bed > " + filedir + "/SRF_closest.rmdup.sorted.bed")
    a = BedTool(filedir + "/SRF_closest.rmdup.bed")
    a.sort().saveas(filedir + "/SRF_closest.rmdup.sorted.bed")
    outfile = open(filedir + "/SRF.TSS.bed", "w")
    outfile2 = open(filedir + "/SRF.gene.bed", "w")
    with open(filedir + "/SRF_closest.rmdup.sorted.bed") as F:
        for line in F:
            chrom, start, stop, gene, number, strand, distance = line.strip().split()
            if int(stop) - int(start) > 2000 and int(distance) > 10000:
                if strand is "+":
                    outfile.write(chrom + "\t" + str(int(start) + TSS[0]) + "\t" + str(int(start) + TSS[1]) + "\n")
                    outfile2.write(chrom + "\t" + str(int(start) + TSS[1]) + "\t" + stop + "\n")
                else:
                    outfile.write(chrom + "\t" + str(int(stop) - TSS[1]) + "\t" + str(int(stop) - TSS[0]) + "\n")
                    outfile2.write(chrom + "\t" + start + "\t" + str(int(stop) - TSS[1]) + "\n")
    outfile.close()
    outfile2.close()
    a = BedTool(filedir + "/SRF.TSS.bed")
    a.sort().saveas(filedir + "/SRF.TSS.bed")
    a = BedTool(filedir + "/SRF.gene.bed")
    a.sort().saveas(filedir + "/SRF.gene.bed")

    TSS = filedir + "/SRF.TSS.bed"
    genes = filedir + "/SRF.gene.bed"

    os.system("bedtools map -a " + genes + " -b " + DMSO + " -c 4 -o sum > " + filedir + "/DMSO.genes.bed")
    os.system("bedtools map -a " + TSS + " -b " + DMSO + " -c 4 -o sum > " + filedir + "/DMSO.TSS.bed")
    os.system("bedtools map -a " + genes + " -b " + CA + " -c 4 -o sum > " + filedir + "/CA.genes.bed")
    os.system("bedtools map -a " + TSS + " -b " + CA + " -c 4 -o sum > " + filedir + "/CA.TSS.bed")

    TRx = list()
    TRy = list()
    expressionlist = list()

    with open(filedir + "/DMSO.genes.bed") as a, open(filedir + "/DMSO.TSS.bed") as b, open(
        filedir + "/CA.genes.bed"
    ) as c, open(filedir + "/CA.TSS.bed") as d:
        for line in a:
            bline = b.readline().strip().split()[-1]
            cline = c.readline().strip().split()[-1]
            dline = d.readline().strip().split()[-1]
            if line.strip().split()[-1] is ".":
                DMSOgene = 0.0
            else:
                DMSOgene = float(line.strip().split()[-1])
            if bline is ".":
                DMSOTSS = 0.0
            else:
                DMSOTSS = float(bline)
            if cline is ".":
                CAgene = 0.0
            else:
                CAgene = float(cline)
            if dline is ".":
                CATSS = 0.0
            else:
                CATSS = float(dline)
            if DMSOgene == 0.0:
                TRx.append(0.0)
            else:
                TRx.append((DMSOTSS / DMSOgene))
            if CAgene == 0.0:
                TRy.append(0.0)
            else:
                TRy.append((CATSS / CAgene))
            expressionlist.append((np.log2(DMSOgene) + np.log2(CAgene)) / 2.0)

    F6 = plt.figure()
    ax1 = F6.add_subplot(111)
    xy = np.vstack([TRx, TRy])
    z = gaussian_kde(xy)(xy)
    ax1.scatter(TRx, TRy, c=z, edgecolor="")
    # ax1.scatter(TRx2,TRy2,c='red',edgecolor="",s=expressionlist2)
    ax1.set_title("Pausing Index")
    ax1.set_ylabel("CA")
    ax1.set_xlabel("DMSO")
    ax1.get_xaxis().tick_bottom()
    ax1.get_yaxis().tick_left()
    # ax1.plot([0,1/slope1],[intercept1,1],color = 'r')
    ax1.set_xlim([0, 20])
    ax1.set_ylim([0, 20])
    ax1.plot([0, 50.0], [0, 50.0], color="k")
    # ax1.text(8,18, "Pearson = " + str(pearsons)[0:5])
#.........这里部分代码省略.........
开发者ID:jdrubin91,项目名称:GROAnalysis,代码行数:103,代码来源:closest_gene.py

示例13: isec_vcf

# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import sort [as 别名]
def isec_vcf(out=None,v1=None,s1=None,v1_prep=None,v2=None,s2=None,bed=None,bam=None,ref=None):
    parser = argparse.ArgumentParser()
    parser.add_argument('-o', help='Location of output')
    parser.add_argument('-v1', help='VCF 1')
    parser.add_argument('-s1', help='Sample name (as appears in vcf 1)')
    parser.add_argument('-v1_prep', help='Does the reference VCF need to be prepared', default=True)
    parser.add_argument('-v2', help='VCF 2')
    parser.add_argument('-s2', help='Sample name (as appears in vcf 2), if omitted it will be the same as s1', default=None)
    parser.add_argument('-b', help='BED file to filter to')
    parser.add_argument('-bam', help='BAM file for vcf 2 sample to generate coverage stats from')
    parser.add_argument('-ref', help="Path to the Reference Genome used", default='/results/Pipeline/program/GATK_resource_bundle/ucsc.hg19.nohap.masked.fasta')

    args = parser.parse_args()

    if not v1:
        v1 = args.v1
    if not s1:
        s1 = args.s1
    if not v2:
        v2 = args.v2
    if not s2 and not args.s2:
        s2 = s1
    elif not s2:
        s2 = args.s2
    if not bed:
        bed = args.b
    if not bam:
        bam = args.bam
    if not v1_prep:
        v1_prep = args.v1_prep

    if not out:
        out = args.o

    if not ref:
        ref = args.ref

    if out.endswith('/'):
        out_dir = os.path.dirname(out)
    else:
        out_dir = out

    if not os.path.exists(out_dir):
        print("Output directory does not exist")
        exit(1)

    no_header = os.path.basename(bed).replace('.bed', '_noheader.bed')
    no_header = out_dir + '/' + no_header
    command = "grep -i -v start " + bed + " > " + no_header
    try:
        subprocess.check_call(command, shell=True)
    except subprocess.CalledProcessError as e:
        print('Error executing command: ' + str(e.returncode))
        exit(1)

    if os.path.basename(no_header) == "IEM_all_panels_header_noheader.bed":
        #IEM name column causes bedtools error
        tool = BedTool(no_header)
        sorted_tool = tool.sort()
        merged_tool = sorted_tool.merge()
        merged_tool.saveas(no_header.replace('.bed', '.merged.bed'))
        no_header = no_header.replace('.bed', '.merged.bed')

    print(no_header)

    #prepare vcfs
    if v1_prep != "False":
        v1_decomposed = prepare_vcf(v1, ref)
    else:
        print('no prep')
        v1_decomposed = v1
    if os.path.exists(v2.replace('.vcf', '.decomposed.normalised.vcf.gz')):
        v2_decomposed = v2.replace('.vcf', '.decomposed.normalised.vcf.gz')
    else:
        v2_decomposed = prepare_vcf(v2, ref)

    #isec
    command = '/results/Pipeline/program/bcftools-1.3.1/bcftools isec -R ' + \
                bed + ' -p ' + \
                out_dir + ' ' + \
                v1_decomposed + ' ' + \
                v2_decomposed
    try:
        subprocess.check_call(command, shell=True)
    except subprocess.CalledProcessError as e:
        print('Error executing command: ' + str(e.returncode))
        exit(1)

    #annotate
    coverage = get_coverage(no_header, out_dir, s2, bam)
    false_negs_ann = annotate_false_negs(out_dir, s1, coverage)
    false_pos_ann = annotate_false_pos(out_dir, coverage, s2)
    matching, mismatching_genotype = check_genotype(out_dir, s2, s1, coverage)
    false_negs_indels = len(false_negs_ann['indels'])
    false_negs_cov = len(false_negs_ann['no_coverage'])
    false_negs_ev_alt = len(false_negs_ann['evidence_of_alt'])
    false_neg_oth = len(false_negs_ann['false_neg'])
    false_negs = false_neg_oth + false_negs_cov + false_negs_cov + false_negs_ev_alt + false_negs_indels
    false_pos = len(false_pos_ann)
    num_mismatch = len(mismatching_genotype)
#.........这里部分代码省略.........
开发者ID:sch-sdgs,项目名称:SDGSValidation,代码行数:103,代码来源:isec_vcf.py


注:本文中的pybedtools.BedTool.sort方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。