當前位置: 首頁>>代碼示例>>Python>>正文


Python GenomicRegionSet.add方法代碼示例

本文整理匯總了Python中rgt.GenomicRegionSet.GenomicRegionSet.add方法的典型用法代碼示例。如果您正苦於以下問題:Python GenomicRegionSet.add方法的具體用法?Python GenomicRegionSet.add怎麽用?Python GenomicRegionSet.add使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在rgt.GenomicRegionSet.GenomicRegionSet的用法示例。


在下文中一共展示了GenomicRegionSet.add方法的14個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: create_file

# 需要導入模塊: from rgt.GenomicRegionSet import GenomicRegionSet [as 別名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import add [as 別名]
    def create_file(self):
        # Expanding summits
        tfbs_summit_regions = GenomicRegionSet("TFBS Summit Regions")
        tfbs_summit_regions.read_bed(self.tfbs_summit_fname)

        for region in iter(tfbs_summit_regions):
            summit = int(region.data.split()[-1]) + region.initial
            region.initial = max(summit - (self.peak_ext / 2), 0)
            region.final = summit + (self.peak_ext / 2)

        # Calculating intersections
        mpbs_regions = GenomicRegionSet("MPBS Regions")
        mpbs_regions.read_bed(self.mpbs_fname)

        tfbs_summit_regions.sort()
        mpbs_regions.sort()

        with_overlap_regions = mpbs_regions.intersect(tfbs_summit_regions, mode=OverlapType.ORIGINAL)
        without_overlap_regions = mpbs_regions.subtract(tfbs_summit_regions, whole_region=True)
        tfbs_regions = GenomicRegionSet("TFBS Regions")

        for region in iter(with_overlap_regions):
            region.name = region.name.split(":")[0] + ":Y"
            tfbs_regions.add(region)

        for region in iter(without_overlap_regions):
            region.name = region.name.split(":")[0] + ":N"
            tfbs_regions.add(region)

        tfbs_regions.sort()

        tfbs_fname = os.path.join(self.output_location, "{}.bed".format(self.mpbs_name))
        tfbs_regions.write_bed(tfbs_fname)
開發者ID:eggduzao,項目名稱:reg-gen,代碼行數:35,代碼來源:evidence.py

示例2: fisher_table

# 需要導入模塊: from rgt.GenomicRegionSet import GenomicRegionSet [as 別名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import add [as 別名]
def fisher_table(motif_name, regions, mpbs, gene_set=False, mpbs_set=False):
    """
    TODO

    Keyword arguments:
    motif_name -- TODO
    regions -- TODO
    mpbs -- TODO
    gene_set -- TODO
    mpbs_set -- TODO

    Return:
    a -- TODO
    b -- TODO
    gene_set -- TODO
    mpbs_set -- TODO
    """

    # Fetching motif
    mpbs_motif = GenomicRegionSet(name="mpbs_motif")
    for region in mpbs.sequences:
        if motif_name in region.name:
            mpbs_motif.add(region)

    # Performing intersections
    if len(mpbs_motif) > 0:
        # regions which are overlapping with mpbs_motif
        intersect_original = regions.intersect(mpbs_motif, mode=OverlapType.ORIGINAL, rm_duplicates=True)
        # regions which are not overlapping with regions from mpbs_motif
        subtract_overlap = regions.subtract(mpbs_motif, whole_region=True)

        # Fetching genes
        if gene_set:
            gene_set_res = GeneSet(motif_name)
            for genomic_region in intersect_original.sequences:
                if genomic_region.name:
                    gene_list = [e if e[0] != "." else e[1:] for e in genomic_region.name.split(":")]
                    for g in gene_list:
                        gene_set_res.genes.append(g)
            gene_set_res.genes = list(set(gene_set_res.genes))  # Keep only unique genes
        else:
            gene_set_res = None

        # Fetching mpbs
        if mpbs_set:
            mpbs_set_res = mpbs_motif.intersect(regions, mode=OverlapType.ORIGINAL, rm_duplicates=True)
        else:
            mpbs_set_res = None

        return len(intersect_original), len(subtract_overlap), gene_set_res, mpbs_set_res

    else:
        gene_set_res = GeneSet(motif_name) if gene_set else None
        mpbs_set_res = GenomicRegionSet(mpbs_motif.name) if mpbs_set else None

        return 0, len(regions), gene_set_res, mpbs_set_res
開發者ID:eggduzao,項目名稱:reg-gen,代碼行數:58,代碼來源:Statistics.py

示例3: initialize

# 需要導入模塊: from rgt.GenomicRegionSet import GenomicRegionSet [as 別名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import add [as 別名]
def initialize(name, dims, genome_path, regions, stepsize, binsize, bamfiles, exts, \
               inputs, exts_inputs, factors_inputs, chrom_sizes, verbose, no_gc_content, \
               tracker, debug, norm_regions, scaling_factors_ip, save_wig, housekeeping_genes):
    """Initialize the MultiCoverageSet"""

    regionset = GenomicRegionSet(name)
    chrom_sizes_dict = {}
    #if regions option is set, take the values, otherwise the whole set of 
    #chromosomes as region to search for DPs
    if regions is not None:
        print("Call DPs on specified regions.", file=sys.stderr)
        with open(regions) as f:
            for line in f:
                line = line.strip()
                line = line.split('\t')
                c, s, e = line[0], int(line[1]), int(line[2])
                regionset.add(GenomicRegion(chrom=c, initial=s, final=e))
                chrom_sizes_dict[c] = e
    else:
        print("Call DPs on whole genome.", file=sys.stderr)
        with open(chrom_sizes) as f:
            for line in f:
                line = line.strip()
                line = line.split('\t')
                chrom, end = line[0], int(line[1])
                regionset.add(GenomicRegion(chrom=chrom, initial=0, final=end))
                chrom_sizes_dict[chrom] = end
    
    if norm_regions:
        norm_regionset = GenomicRegionSet('norm_regions')
        norm_regionset.read_bed(norm_regions)
    else:
        norm_regionset = None
        
    if housekeeping_genes:
        scaling_factors_ip, _ = norm_gene_level(bamfiles, housekeeping_genes, name, verbose=True)
    
    if scaling_factors_ip:
        tracker.write(text=map(lambda x: str(x), scaling_factors_ip), header="Scaling factors")
    
    regionset.sequences.sort()
    exts, exts_inputs = _compute_extension_sizes(bamfiles, exts, inputs, exts_inputs, verbose)
    tracker.write(text=str(exts).strip('[]'), header="Extension size (rep1, rep2, input1, input2)")
    
    multi_cov_set = MultiCoverageSet(name=name, regions=regionset, dims=dims, genome_path=genome_path, binsize=binsize, stepsize=stepsize,rmdup=True,\
                                  path_bamfiles = bamfiles, path_inputs = inputs, exts = exts, exts_inputs = exts_inputs, factors_inputs = factors_inputs, \
                                  chrom_sizes=chrom_sizes, verbose=verbose, no_gc_content=no_gc_content, chrom_sizes_dict=chrom_sizes_dict, debug=debug, \
                                  norm_regionset=norm_regionset, scaling_factors_ip=scaling_factors_ip, save_wig=save_wig)
    
    return multi_cov_set
開發者ID:jovesus,項目名稱:reg-gen,代碼行數:52,代碼來源:dpc_help.py

示例4: merge_delete

# 需要導入模塊: from rgt.GenomicRegionSet import GenomicRegionSet [as 別名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import add [as 別名]
def merge_delete(ext_size, merge, peak_list, pvalue_list):
#     peaks_gain = read_diffpeaks(path)
    
    regions_plus = GenomicRegionSet('regions') #pot. mergeable
    regions_minus = GenomicRegionSet('regions') #pot. mergeable
    regions_unmergable = GenomicRegionSet('regions')
    last_orientation = ""
    
    for i, t in enumerate(peak_list):
        chrom, start, end, c1, c2, strand, ratio = t[0], t[1], t[2], t[3], t[4], t[5], t[6]
        r = GenomicRegion(chrom = chrom, initial = start, final = end, name = '', \
                          orientation = strand, data = str((c1, c2, pvalue_list[i], ratio)))
        if end - start > ext_size:
            if strand == '+':
                if last_orientation == '+':
                    region_plus.add(r)
                else:
                    regions_unmergable.add(r)
            elif strand == '-':
                if last_orientation == '-':
                    region_mins.add(r)
                else:
                    regions_unmergable.add(r)
                    
                    
    if merge:
        regions_plus.extend(ext_size/2, ext_size/2)
        regions_plus.merge()
        regions_plus.extend(-ext_size/2, -ext_size/2)
        merge_data(regions_plus)
        
        regions_minus.extend(ext_size/2, ext_size/2)
        regions_minus.merge()
        regions_minus.extend(-ext_size/2, -ext_size/2)
        merge_data(regions_minus)
    
    results = GenomicRegionSet('regions')
    for el in regions_plus:
        results.add(el)
    for el in regions_minus:
        results.add(el)
    for el in regions_unmergable:
        results.add(el)
    results.sort()
    
    return results
開發者ID:eggduzao,項目名稱:reg-gen,代碼行數:48,代碼來源:postprocessing.py

示例5: get_training_regionset

# 需要導入模塊: from rgt.GenomicRegionSet import GenomicRegionSet [as 別名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import add [as 別名]
    def get_training_regionset(self):
        r = GenomicRegionSet('')
        r.add(self.regionset[self.counter])
        
        if self.counter == len(self.chrom_sizes_dict):
            return None
        else:
            self.counter += 1
            return r
        
            
            
#if regions option is set, take the values, otherwise the whole set of 
    #chromosomes as region to search for DPs
#     if test:
#         contained_chrom = ['chr1', 'chr2']
#     else:
#         #contained_chrom = get_all_chrom(bamfiles)
#         contained_chrom = ['chr1', 'chr2']
開發者ID:eggduzao,項目名稱:reg-gen,代碼行數:21,代碼來源:RegionGiver.py

示例6: rna_associated_gene

# 需要導入模塊: from rgt.GenomicRegionSet import GenomicRegionSet [as 別名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import add [as 別名]
def rna_associated_gene(rna_regions, name, organism):
    if rna_regions:
        s = [ rna_regions[0][0], min([e[1] for e in rna_regions]), 
              max([e[2] for e in rna_regions]), rna_regions[0][3] ]
        g = GenomicRegionSet("RNA associated genes")
        g.add( GenomicRegion(chrom=s[0], initial=s[1], final=s[2], name=name, orientation=s[3]) )
        asso_genes = g.gene_association(organism=organism, promoterLength=1000, show_dis=True)

        genes = asso_genes[0].name.split(":")
        closest_genes = []
        for n in genes:
            if name not in n: closest_genes.append(n)
        closest_genes = set(closest_genes)

        if len(closest_genes) == 0:
            return "."
        else:
            return ":".join(closest_genes)
    else:
        return "."
開發者ID:eggduzao,項目名稱:reg-gen,代碼行數:22,代碼來源:triplexTools.py

示例7: chip_evaluate

# 需要導入模塊: from rgt.GenomicRegionSet import GenomicRegionSet [as 別名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import add [as 別名]
    def chip_evaluate(self):
        """
        This evaluation methodology uses motif-predicted binding sites (MPBSs) together with TF ChIP-seq data
        to evaluate the footprint predictions.

        return:
        """

        # Evaluate Statistics
        fpr = dict()
        tpr = dict()
        roc_auc = dict()
        roc_auc_1 = dict()
        roc_auc_2 = dict()
        recall = dict()
        precision = dict()
        prc_auc = dict()

        if "SEG" in self.footprint_type:
            mpbs_regions = GenomicRegionSet("TFBS")
            mpbs_regions.read_bed(self.tfbs_file)
            mpbs_regions.sort()

            # Verifying the maximum score of the MPBS file
            max_score = -99999999
            for region in iter(mpbs_regions):
                score = int(region.data)
                if score > max_score:
                    max_score = score
            max_score += 1

        for i in range(len(self.footprint_file)):
            footprints_regions = GenomicRegionSet("Footprints Prediction")
            footprints_regions.read_bed(self.footprint_file[i])

            # Sort footprint prediction bed files
            footprints_regions.sort()

            if self.footprint_type[i] == "SEG":
                # Increasing the score of MPBS entry once if any overlaps found in the predicted footprints.
                increased_score_mpbs_regions = GenomicRegionSet("Increased Regions")
                intersect_regions = mpbs_regions.intersect(footprints_regions, mode=OverlapType.ORIGINAL)
                for region in iter(intersect_regions):
                    region.data = str(int(region.data) + max_score)
                    increased_score_mpbs_regions.add(region)


                # Keep the score of remained MPBS entry unchanged
                without_intersect_regions = mpbs_regions.subtract(footprints_regions, whole_region=True)
                for region in iter(without_intersect_regions):
                    increased_score_mpbs_regions.add(region)

                increased_score_mpbs_regions.sort_score()

                fpr[i], tpr[i], roc_auc[i], roc_auc_1[i], roc_auc_2[i] = self.roc_curve(increased_score_mpbs_regions)
                recall[i], precision[i], prc_auc[i] = self.precision_recall_curve(increased_score_mpbs_regions)
            elif self.footprint_type[i] == "SC":
                footprints_regions.sort_score()
                fpr[i], tpr[i], roc_auc[i], roc_auc_1[i], roc_auc_2[i] = self.roc_curve(footprints_regions)
                recall[i], precision[i], prc_auc[i] = self.precision_recall_curve(footprints_regions)

        # Output the statistics results into text
        stats_fname = self.output_location + self.tf_name + "_stats.txt"
        stats_header = ["METHOD", "AUC_100", "AUC_10", "AUC_1", "AUPR"]
        with open(stats_fname, "w") as stats_file:
            stats_file.write("\t".join(stats_header) + "\n")
            for i in range(len(self.footprint_name)):
                stats_file.write(self.footprint_name[i] + "\t" + str(roc_auc[i]) + "\t" + str(roc_auc_1[i]) + "\t"
                                 + str(roc_auc_2[i]) + "\t" + str(prc_auc[i]) + "\n")

        # Output the curves
        if self.print_roc_curve:
            label_x = "False Positive Rate"
            label_y = "True Positive Rate"
            curve_name = "ROC"
            self.plot_curve(fpr, tpr, roc_auc, label_x, label_y, self.tf_name, curve_name)
        if self.print_pr_curve:
            label_x = "Recall"
            label_y = "Precision"
            curve_name = "PRC"
            self.plot_curve(recall, precision, prc_auc, label_x, label_y, self.tf_name, curve_name)

        self.output_points(self.tf_name, fpr, tpr, recall, precision)
開發者ID:eggduzao,項目名稱:reg-gen,代碼行數:85,代碼來源:evaluation.py

示例8: chip_evaluate

# 需要導入模塊: from rgt.GenomicRegionSet import GenomicRegionSet [as 別名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import add [as 別名]
def chip_evaluate(args):
    # Evaluate Statistics
    fpr = dict()
    tpr = dict()
    roc_auc_1 = dict()
    roc_auc_10 = dict()
    roc_auc_50 = dict()
    roc_auc_100 = dict()
    recall = dict()
    precision = dict()
    prc_auc_1 = dict()
    prc_auc_10 = dict()
    prc_auc_50 = dict()
    prc_auc_100 = dict()

    footprint_file = args.footprint_file.split(",")
    footprint_name = args.footprint_name.split(",")
    footprint_type = args.footprint_type.split(",")

    max_score = 0
    if "SEG" in footprint_type:
        mpbs_regions = GenomicRegionSet("TFBS")
        mpbs_regions.read(args.tfbs_file)

        # Verifying the maximum score of the MPBS file
        for region in iter(mpbs_regions):
            score = int(region.data.split("\t")[0])
            if score > max_score:
                max_score = score
        max_score += 1

    max_points = []
    for i in range(len(footprint_file)):
        footprints_regions = GenomicRegionSet("Footprints Prediction")
        footprints_regions.read(footprint_file[i])
        footprints_regions.sort()

        if footprint_type[i] == "SEG":
            # Increasing the score of MPBS entry once if any overlaps found in the predicted footprints.
            increased_score_mpbs_regions = GenomicRegionSet("Increased Regions")
            intersect_regions = mpbs_regions.intersect(footprints_regions, mode=OverlapType.ORIGINAL)
            for region in iter(intersect_regions):
                region.data = str(int(region.data.split("\t")[0]) + max_score)
                increased_score_mpbs_regions.add(region)

            # Keep the score of remained MPBS entry unchanged
            without_intersect_regions = mpbs_regions.subtract(footprints_regions, whole_region=True)
            for region in iter(without_intersect_regions):
                increased_score_mpbs_regions.add(region)

            increased_score_mpbs_regions.sort_score()

            fpr[i], tpr[i], roc_auc_1[i], roc_auc_10[i], roc_auc_50[i], roc_auc_100[i] = \
                roc_curve(increased_score_mpbs_regions)
            recall[i], precision[i], prc_auc_1[i], prc_auc_10[i], prc_auc_50[i], prc_auc_100[i] = \
                precision_recall_curve(increased_score_mpbs_regions)

            max_points.append(len(intersect_regions))

        elif footprint_type[i] == "SC":
            footprints_regions.sort_score()
            fpr[i], tpr[i], roc_auc_1[i], roc_auc_10[i], roc_auc_50[i], roc_auc_100[i] = \
                roc_curve(footprints_regions)
            recall[i], precision[i], prc_auc_1[i], prc_auc_10[i], prc_auc_50[i], prc_auc_100[i] = \
                precision_recall_curve(footprints_regions)

            max_points.append(len(footprints_regions))

    # Output the statistics results into text
    stats_fname = os.path.join(args.output_location, "{}_stats.txt".format(args.output_prefix))
    stats_header = ["METHOD", "AUC_100", "AUC_50", "AUC_10", "AUC_1", "AUPR_100", "AUPR_50", "AUPR_10", "AUPR_1"]
    with open(stats_fname, "w") as stats_file:
        stats_file.write("\t".join(stats_header) + "\n")
        for i in range(len(footprint_name)):
            stats_file.write(footprint_name[i] + "\t" +
                             str(roc_auc_100[i]) + "\t" + str(roc_auc_50[i]) + "\t" + str(roc_auc_10[i]) + "\t" +
                             str(roc_auc_1[i]) + "\t" + str(prc_auc_100[i]) + "\t" + str(prc_auc_50[i]) + "\t" +
                             str(prc_auc_10[i]) + "\t" + str(prc_auc_1[i]) + "\n")

    # Output the curves
    if args.print_roc_curve:
        label_x = "False Positive Rate"
        label_y = "True Positive Rate"
        curve_name = "ROC"
        plot_curve(footprint_name, args.output_location, fpr, tpr, roc_auc_100, label_x, label_y, args.output_prefix,
                   curve_name, max_points=max_points)
    if args.print_pr_curve:
        label_x = "Recall"
        label_y = "Precision"
        curve_name = "PRC"
        plot_curve(footprint_name, args.output_location, recall, precision, prc_auc_100, label_x, label_y,
                   args.output_prefix, curve_name, max_points=max_points)

    output_points(footprint_name, args.output_location, args.output_prefix, fpr, tpr, recall, precision)
開發者ID:CostaLab,項目名稱:reg-gen,代碼行數:96,代碼來源:Evaluation.py

示例9: initialize

# 需要導入模塊: from rgt.GenomicRegionSet import GenomicRegionSet [as 別名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import add [as 別名]
def initialize(name, genome_path, regions, stepsize, binsize, bam_file_1, bam_file_2, ext_1, ext_2, \
               input_1, input_factor_1, ext_input_1, input_2, input_factor_2, ext_input_2, chrom_sizes, verbose, norm_strategy, no_gc_content, deadzones,\
               factor_input_1, factor_input_2, debug, tracker):
    
    regionset = GenomicRegionSet(name)
    chrom_sizes_dict = {}
    #if regions option is set, take the values, otherwise the whole set of 
    #chromosomes as region to search for DPs
    if regions is not None:
        with open(regions) as f:
            for line in f:
                line = line.strip()
                line = line.split('\t')
                c, s, e = line[0], int(line[1]), int(line[2])
                regionset.add(GenomicRegion(chrom=c, initial=s, final=e))
                chrom_sizes_dict[c] = e
    else:
        with open(chrom_sizes) as f:
            for line in f:
                line = line.strip()
                line = line.split('\t')
                chrom, end = line[0], int(line[1])
                regionset.add(GenomicRegion(chrom=chrom, initial=0, final=end))
                chrom_sizes_dict[chrom] = end
    
    regionset.sequences.sort()
    
    start = 0
    end = 600
    ext_stepsize = 5
    #TODO: maybe for-loops?
    #compute extension size
    if [ext_1, ext_2, ext_input_1, ext_input_2].count(None) > 0:
        print("Computing read extension sizes...", file=sys.stderr)
    if ext_1 is None:
        ext_1, values_1 = get_extension_size(bam_file_1, start=start, end=end, stepsize=ext_stepsize)
    print("Read extension for first file: %s" %ext_1, file=sys.stderr)
    
    if ext_2 is None:
        ext_2, values_2 = get_extension_size(bam_file_2, start=start, end=end, stepsize=ext_stepsize)
    print("Read extension for second file: %s" %ext_2, file=sys.stderr)

    if input_1 is not None and ext_input_1 is None:
        ext_input_1, values_input_1 = get_extension_size(input_1, start=start, end=end, stepsize=ext_stepsize)
    print("Read extension for first input file: %s" %ext_input_1, file=sys.stderr)
    
    if input_1 is not None and input_2 is not None and input_1 == input_2 and 'ext_input_1' in locals() and 'values_input_1' in locals():
        ext_input_2, values_input_2 = ext_input_1, values_input_1
    elif input_2 is not None and ext_input_2 is None:
        ext_input_2, values_input_2 = get_extension_size(input_2, start=start, end=end, stepsize=ext_stepsize)
    print("Read extension for second input file: %s" %ext_input_2, file=sys.stderr)
    
    tracker.write(text=str(ext_1) + "," + str(ext_2), header="Extension size IP1, IP2")
    if input_1 is not None and input_2 is not None:
        tracker.write(text=str(ext_input_1) + "," + str(ext_input_2), header="Extension size Control1, Control2")
    
    if verbose:
        if 'values_1' in locals() and values_1 is not None:
            with open(name + '-read-ext-1', 'w') as f:
                for v, i in values_1:
                    print(i, v, sep='\t', file=f)
        
        if 'values_2' in locals() and values_2 is not None:
            with open(name + '-read-ext-2', 'w') as f:
                for v, i in values_2:
                    print(i, v, sep='\t', file=f)
        
        if 'values_input_1' in locals() and values_input_1 is not None:
            with open(name + '-read-ext-input-1', 'w') as f:
                for v, i in values_input_1:
                    print(i, v, sep='\t', file=f)
        
        if 'values_input_2' in locals() and values_input_2 is not None:
            with open(name + '-read-ext-input-2', 'w') as f:
                for v, i in values_input_2:
                    print(i, v, sep='\t', file=f)

    cov_cdp_mpp = DualCoverageSet(name=name, region=regionset, genome_path=genome_path, binsize=binsize, stepsize=stepsize,rmdup=True,\
                                  file_1=bam_file_1, ext_1=ext_1,\
                                  file_2=bam_file_2, ext_2=ext_2, \
                                  input_1=input_1, ext_input_1=ext_input_1, input_factor_1=input_factor_1, \
                                  input_2=input_2, ext_input_2=ext_input_2, input_factor_2=input_factor_2, \
                                  chrom_sizes=chrom_sizes, verbose=verbose, norm_strategy=norm_strategy, no_gc_content=no_gc_content, deadzones=deadzones,\
                                  factor_input_1=factor_input_1, factor_input_2=factor_input_2, chrom_sizes_dict=chrom_sizes_dict, debug=debug, tracker=tracker)
    
    return cov_cdp_mpp, [ext_1, ext_2]
開發者ID:Marvin84,項目名稱:reg-gen,代碼行數:88,代碼來源:dpc_help.py

示例10: __iter__

# 需要導入模塊: from rgt.GenomicRegionSet import GenomicRegionSet [as 別名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import add [as 別名]
 def __iter__(self):
     for el in self.regionset:
         tmp = GenomicRegionSet('')
         tmp.add(el)
         yield tmp
開發者ID:eggduzao,項目名稱:reg-gen,代碼行數:7,代碼來源:RegionGiver.py

示例11: dbd_regions

# 需要導入模塊: from rgt.GenomicRegionSet import GenomicRegionSet [as 別名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import add [as 別名]
def dbd_regions(exons, sig_region, rna_name, output,out_file=False, temp=None, fasta=True):
    """Generate the BED file of significant DBD regions and FASTA file of the sequences"""
    if len(sig_region) == 0:
        return
    #print(self.rna_regions)
    if not exons:
        pass
    else:
        dbd = GenomicRegionSet("DBD")
        dbdmap = {}
        if len(exons) == 1:
            print("## Warning: No information of exons in the given RNA sequence, the DBD position may be problematic. ")
        for rbs in sig_region:
            loop = True

            if exons[0][3] == "-":
                while loop:
                    cf = 0
                    for exon in exons:
                        #print(exon)

                        l = abs(exon[2] - exon[1])
                        tail = cf + l

                        if cf <= rbs.initial <=  tail:
                            dbdstart = exon[2] - rbs.initial + cf
                            
                            if rbs.final <= tail: 
                                #print("1")
                                dbdend = exon[2] - rbs.final + cf
                                if dbdstart > dbdend: dbdstart, dbdend = dbdend, dbdstart
                                dbd.add( GenomicRegion(chrom=exons[0][0], 
                                                       initial=dbdstart, final=dbdend, 
                                                       orientation=exons[0][3], 
                                                       name=str(rbs.initial)+"-"+str(rbs.final) ) )
                                dbdmap[str(rbs)] = dbd[-1].toString() + " strand:-"
                                loop = False
                                break
                            elif rbs.final > tail:

                                subtract = l + cf - rbs.initial
                                #print("2")
                                #print("Subtract: "+str(subtract))
                                if dbdstart > exon[1]: dbdstart, exon[1] = exon[1], dbdstart
                                dbd.add( GenomicRegion(chrom=exons[0][0], 
                                                       initial=dbdstart, final=exon[1], 
                                                       orientation=exons[0][3], 
                                                       name=str(rbs.initial)+"-"+str(rbs.initial+subtract)+"_split1" ) )
                        
                        elif rbs.initial < cf and rbs.final <= tail: 
                            #print("3")
                            dbdstart = exon[2]
                            dbdend = exon[2] - rbs.final + rbs.initial + subtract
                            if dbdstart > dbdend: dbdstart, dbdend = dbdend, dbdstart
                            dbd.add( GenomicRegion(chrom=exons[0][0], 
                                                   initial=dbdstart, final=dbdend, 
                                                   orientation=exons[0][3], 
                                                   name=str(cf)+"-"+str(rbs.final)+"_split2" ) )
                            dbdmap[str(rbs)] = dbd[-2].toString() + " & " + dbd[-1].toString() + " strand:-"
                            loop = False
                            break

                        elif rbs.initial > tail:
                            pass

                        cf += l
                        
                    loop = False
            else:

                while loop:
                    cf = 0
                    for exon in exons:
                        #print(exon)
                        l = exon[2] - exon[1]
                        tail = cf + l
                        #print("cf:   " + str(cf))
                        #print("tail: " + str(tail) )
                        if cf <= rbs.initial <=  tail:
                            dbdstart = exon[1] + rbs.initial - cf
                            
                            if rbs.final <= tail: 
                                #print("1")
                                dbdend = exon[1] + rbs.final -cf
                                dbd.add( GenomicRegion(chrom=exons[0][0], 
                                                       initial=dbdstart, final=dbdend, 
                                                       orientation=exons[0][3], 
                                                       name=str(rbs.initial)+"-"+str(rbs.final) ) )
                                dbdmap[str(rbs)] = dbd[-1].toString() + " strand:+"
                                loop = False
                                break
                            elif rbs.final > tail:

                                subtract = l + cf - rbs.initial
                                #print("2")
                                #print("Subtract: "+str(subtract))
                                dbd.add( GenomicRegion(chrom=exons[0][0], 
                                                       initial=dbdstart, final=exon[2], 
                                                       orientation=exons[0][3], 
                                                       name=str(rbs.initial)+"-"+str(rbs.initial+subtract)+"_split1" ) )
#.........這裏部分代碼省略.........
開發者ID:eggduzao,項目名稱:reg-gen,代碼行數:103,代碼來源:triplexTools.py

示例12: print

# 需要導入模塊: from rgt.GenomicRegionSet import GenomicRegionSet [as 別名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import add [as 別名]
        print("output:\t" + args.o)
        print("organism:\t" + args.organism)
        gene = GenomicRegionSet("genes")
        ### Input BED file
        if args.i.endswith(".bed"):
            gene.read_bed(args.i)
            promoter = GenomicRegionSet("promoter")

            promoterLength = int(args.l)

            for s in gene:
                if s.orientation == "+": 
                    s.initial, s.final = max(s.initial-promoterLength, 0), s.initial

                else: s.initial, s.final = s.final, s.final+promoterLength
                promoter.add(s)
        ### Input gene list
        else:
            ann = AnnotationSet(gene_source=args.organism, alias_source=args.organism,
                                filter_havana=False, protein_coding=False, known_only=False)
            de_gene = GeneSet("de genes")
            de_gene.read(args.i)
            print(len(de_gene))
            promoter = ann.get_promoters(promoterLength=args.l, gene_set=de_gene, unmaplist=False)
            #print(len(de_prom))

        
        #print(len(promoter))
        promoter.write_bed(args.o)

開發者ID:Marvin84,項目名稱:reg-gen,代碼行數:31,代碼來源:rgt-convertor.py

示例13: _intersect

# 需要導入模塊: from rgt.GenomicRegionSet import GenomicRegionSet [as 別名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import add [as 別名]
 def _intersect(self, y, rm_duplicates=False):
     """Return the overlapping regions with three different modes.
                
     (mode = OverlapType.ORIGINAL)
     Return the regions of original GenomicRegionSet which have any intersections with y.
     
         Keyword arguments:
         y -- the GenomicRegionSet which to compare with
         
         Return:
         z -- the regions of original GenomicRegionSet which have any intersections with y
         
         Graphical explanation:
         self       ----------              ------
         y              ----------                    ----
         Result     ----------
         
     """
     a = self
     b = y
     
     z = GenomicRegionSet(a.name + ' + ' + b.name)
     # XXX - someone putted an special symbol and spaces in the name! this is used as file name, never use strange characters.
     if len(a) == 0 or len(b) == 0: return z
     
     else:
         # If there is overlap within self or y, they should be merged first. 
         if a.sorted == False: a.sort()
         if b.sorted == False: b.sort()
         
         iter_a = iter(a)
         s = iter_a.next()
         last_j = len(b)-1
         j = 0
         cont_loop = True
              
         ########################### OverlapType.ORIGINAL ###################################
         while cont_loop:
             #print(str(s),"\t",str(b[j]))
             # When the regions overlap
             if s.overlap(b[j]):
                 z.add(s)
                 
                 
                 try: s = iter_a.next()
                 except: cont_loop = False
                 
             elif s < b[j]:
                 try: s = iter_a.next()
                 except: cont_loop = False
             elif s > b[j]:
                 if j == last_j: cont_loop = False
                 else: j = j + 1
             else:
                 try: s = iter_a.next()
                 except: cont_loop = False
                     
         if rm_duplicates: 
             z.remove_duplicates()
         
         return z
開發者ID:Marvin84,項目名稱:reg-gen,代碼行數:63,代碼來源:GenomicVariantSet.py

示例14: gen_html

# 需要導入模塊: from rgt.GenomicRegionSet import GenomicRegionSet [as 別名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import add [as 別名]
    def gen_html(self, directory, parameters, obed, align=50, alpha=0.05, score=False):
        """Generate the HTML file"""
        dir_name = os.path.basename(directory)
        html_header = "Genomic Region Test: " + dir_name
        link_ds = OrderedDict()
        link_ds["RNA"] = "index.html"
        link_ds["Sig Target Regions"] = "starget_regions.html"
        link_ds["Target Regions"] = "target_regions.html"
        link_ds["Parameters"] = "parameters.html"

        ##################################################
        # index.html

        html = Html(name=html_header, links_dict=link_ds,  # fig_dir=os.path.join(directory,"style"),
                    fig_rpath="../style", RGT_header=False, other_logo="TDF", homepage="../index.html")
        # Plots
        html.add_figure("lineplot_region.png", align="left", width="45%", more_images=["boxplot_regions.png"])
        if self.showdbs:
            html.add_figure("lineplot_dbs.png", align="left", width="45%", more_images=["boxplot_dbs.png"])

        if self.showdbs:
            header_list = [["#", "DBD", "Target Regions", None, "Non-target Regions", None, "Statistics",
                            "Target Regions", "Non-target Regions", None, "Statistics"],
                           ["", "", "with DBS", "without DBS", "with DBS (average)", "s.d.", "<i>p</i>-value",
                            "NO. DBSs", "NO. DBSs (average)", "s.d.", "<i>p</i>-value"]]
            header_titles = [["Rank", "DNA Binding Domain", "Given target regions on DNA", None,
                              "Regions from randomization", None, "Statistics based on target regions",
                              "Given target regions on DNA", "Regions from randomization", None,
                              "Statistics based on DNA Binding Sites"],
                             ["", "",
                              "Number of target regions with DBS binding",
                              "Number of target regions without DBS binding",
                              "Average number of regions from randomization with DBS binding",
                              "Standard deviation", "P value",
                              "Number of related DNA Binding Sites binding to target regions",
                              "Average number of DNA Binding Sites binding to random regions",
                              "Standard deviation", "P-value"]]
            border_list = [" style=\"border-right:1pt solid gray\"",
                           " style=\"border-right:1pt solid gray\"", "",
                           " style=\"border-right:1pt solid gray\"", "",
                           " style=\"border-right:1pt solid gray\"",
                           " style=\"border-right:2pt solid gray\"",
                           " style=\"border-right:1pt solid gray\"", "",
                           " style=\"border-right:1pt solid gray\"",
                           " style=\"border-right:1pt solid gray\""]
        else:
            header_list = [["#", "DBD", "Target Regions", None, "Non-target Regions", None, "Statistics", None],
                           ["", "", "with DBS", "without DBS", "with DBS (average)", "s.d.", "<i>p</i>-value",
                            "z-score"]]
            header_titles = [["Rank", "DNA Binding Domain", "Given target regions on DNA", None,
                              "Regions from randomization", None, "Statistics based on target regions", None],
                             ["", "",
                              "Number of target regions with DBS binding",
                              "Number of target regions without DBS binding",
                              "Average number of regions from randomization with DBS binding",
                              "Standard deviation", "P value", "Z-score"]]
            border_list = [" style=\"border-right:1pt solid gray\"",
                           " style=\"border-right:1pt solid gray\"", "",
                           " style=\"border-right:1pt solid gray\"", "",
                           " style=\"border-right:1pt solid gray\"",
                           " style=\"border-right:1pt solid gray\"", ""]

        type_list = 'ssssssssssssssss'
        col_size_list = [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]
        data_table = []

        for i, rbs in enumerate(self.rbss):
            if self.data["region"]["p"][i] < alpha:
                p_region = "<font color=\"red\">" + value2str(self.data["region"]["p"][i]) + "</font>"

            else:
                p_region = value2str(self.data["region"]["p"][i])
            zs = (self.counts_tr[rbs][0] - self.data["region"]["ave"][i]) / self.data["region"]["sd"][i]
            new_line = [str(i + 1),
                        rbs.str_rna(pa=False),
                        '<a href="dbd_region.html#' + rbs.str_rna() +
                        '" style="text-align:left">' + str(self.counts_tr[rbs][0]) + '</a>',
                        str(self.counts_tr[rbs][1]),
                        value2str(self.data["region"]["ave"][i]),
                        value2str(self.data["region"]["sd"][i]),
                        p_region,
                        value2str(zs)]
            if self.showdbs:
                if self.data["dbs"]["p"][i] < alpha:
                    p_dbs = "<font color=\"red\">" + value2str(self.data["dbs"]["p"][i]) + "</font>"
                else:
                    p_dbs = value2str(self.data["dbs"]["p"][i])

                new_line += [str(self.counts_dbs[rbs]),
                             value2str(self.data["dbs"]["ave"][i]),
                             value2str(self.data["dbs"]["sd"][i]),
                             p_dbs]
            data_table.append(new_line)

        data_table = natsort.natsorted(data_table, key=lambda x: x[6])
        html.add_zebra_table(header_list, col_size_list, type_list, data_table, align=align, cell_align="left",
                             auto_width=True, header_titles=header_titles, border_list=border_list, sortable=True)

        html.add_heading("Notes")
        html.add_list(["RNA name: " + self.rna_name,
#.........這裏部分代碼省略.........
開發者ID:eggduzao,項目名稱:reg-gen,代碼行數:103,代碼來源:tdf_regiontest.py


注:本文中的rgt.GenomicRegionSet.GenomicRegionSet.add方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。