当前位置: 首页>>代码示例>>Python>>正文


Python GenomicRegionSet.combine方法代码示例

本文整理汇总了Python中rgt.GenomicRegionSet.GenomicRegionSet.combine方法的典型用法代码示例。如果您正苦于以下问题:Python GenomicRegionSet.combine方法的具体用法?Python GenomicRegionSet.combine怎么用?Python GenomicRegionSet.combine使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在rgt.GenomicRegionSet.GenomicRegionSet的用法示例。


在下文中一共展示了GenomicRegionSet.combine方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: merge_DBD_regions

# 需要导入模块: from rgt.GenomicRegionSet import GenomicRegionSet [as 别名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import combine [as 别名]
def merge_DBD_regions(path):
    """Merge all available DBD regions in BED format. """

    for t in os.listdir(path):
        if os.path.isdir(os.path.join(path, t)):
            dbd_pool = GenomicRegionSet(t)
            for rna in os.listdir(os.path.join(path,t)):
                f = os.path.join(path, t, rna, "DBD_"+rna+".bed")
                if os.path.exists(f):
                    dbd = GenomicRegionSet(rna)
                    dbd.read_bed(f)
                    for r in dbd: r.name = rna+"_"+r.name
                    dbd_pool.combine(dbd)
            dbd_pool.write_bed(os.path.join(path, t, "DBD_"+t+".bed"))
开发者ID:eggduzao,项目名称:reg-gen,代码行数:16,代码来源:triplexTools.py

示例2: get_bc_signal

# 需要导入模块: from rgt.GenomicRegionSet import GenomicRegionSet [as 别名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import combine [as 别名]
def get_bc_signal(arguments):
    (mpbs_name, mpbs_file1, mpbs_file2, reads_file1, reads_file2, organism,
     window_size, forward_shift, reverse_shift, bias_table1, bias_table2) = arguments

    mpbs1 = GenomicRegionSet("Motif Predicted Binding Sites of Condition1")
    mpbs1.read(mpbs_file1)

    mpbs2 = GenomicRegionSet("Motif Predicted Binding Sites of Condition2")
    mpbs2.read(mpbs_file2)

    mpbs = mpbs1.combine(mpbs2, output=True)
    mpbs.sort()

    bam1 = Samfile(reads_file1, "rb")
    bam2 = Samfile(reads_file2, "rb")

    genome_data = GenomeData(organism)
    fasta = Fastafile(genome_data.get_genome())

    signal_1 = np.zeros(window_size)
    signal_2 = np.zeros(window_size)
    motif_len = None
    pwm = dict([("A", [0.0] * window_size), ("C", [0.0] * window_size),
                ("G", [0.0] * window_size), ("T", [0.0] * window_size),
                ("N", [0.0] * window_size)])

    mpbs_regions = mpbs.by_names([mpbs_name])
    num_motif = len(mpbs_regions)

    # Fetch bias corrected signal
    for region in mpbs_regions:
        if motif_len is None:
            motif_len = region.final - region.initial

        mid = (region.final + region.initial) / 2
        p1 = mid - window_size / 2
        p2 = mid + window_size / 2

        if p1 <= 0:
            continue
        # Fetch raw signal
        signal1 = bias_correction(chrom=region.chrom, start=p1, end=p2, bam=bam1,
                                  bias_table=bias_table1, genome_file_name=genome_data.get_genome(),
                                  forward_shift=forward_shift, reverse_shift=reverse_shift)

        signal2 = bias_correction(chrom=region.chrom, start=p1, end=p2, bam=bam2,
                                  bias_table=bias_table2, genome_file_name=genome_data.get_genome(),
                                  forward_shift=forward_shift, reverse_shift=reverse_shift)

        if len(signal1) != len(signal_1) or len(signal2) != len(signal_2):
            continue

        # smooth the signal
        signal_1 = np.add(signal_1, np.array(signal1))
        signal_2 = np.add(signal_2, np.array(signal2))

        update_pwm(pwm, fasta, region, p1, p2)

    return signal_1, signal_2, motif_len, pwm, num_motif
开发者ID:CostaLab,项目名称:reg-gen,代码行数:61,代码来源:DifferentialAnalysis.py

示例3: get_raw_signal

# 需要导入模块: from rgt.GenomicRegionSet import GenomicRegionSet [as 别名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import combine [as 别名]
def get_raw_signal(arguments):
    (mpbs_name, mpbs_file1, mpbs_file2, reads_file1, reads_file2, organism,
     window_size, forward_shift, reverse_shift) = arguments

    mpbs1 = GenomicRegionSet("Motif Predicted Binding Sites of Condition1")
    mpbs1.read(mpbs_file1)

    mpbs2 = GenomicRegionSet("Motif Predicted Binding Sites of Condition2")
    mpbs2.read(mpbs_file2)

    mpbs = mpbs1.combine(mpbs2, output=True)
    mpbs.sort()

    bam1 = Samfile(reads_file1, "rb")
    bam2 = Samfile(reads_file2, "rb")

    genome_data = GenomeData(organism)
    fasta = Fastafile(genome_data.get_genome())

    signal_1 = np.zeros(window_size)
    signal_2 = np.zeros(window_size)
    motif_len = None
    pwm = dict([("A", [0.0] * window_size), ("C", [0.0] * window_size),
                ("G", [0.0] * window_size), ("T", [0.0] * window_size),
                ("N", [0.0] * window_size)])

    mpbs_regions = mpbs.by_names([mpbs_name])
    num_motif = len(mpbs_regions)

    for region in mpbs_regions:
        if motif_len is None:
            motif_len = region.final - region.initial

        mid = (region.final + region.initial) / 2
        p1 = mid - window_size / 2
        p2 = mid + window_size / 2

        if p1 <= 0:
            continue

        # Fetch raw signal
        for read in bam1.fetch(region.chrom, p1, p2):
            # check if the read is unmapped, according to issue #112
            if read.is_unmapped:
                continue

            if not read.is_reverse:
                cut_site = read.pos + forward_shift
                if p1 <= cut_site < p2:
                    signal_1[cut_site - p1] += 1.0
            else:
                cut_site = read.aend + reverse_shift - 1
                if p1 <= cut_site < p2:
                    signal_1[cut_site - p1] += 1.0

        for read in bam2.fetch(region.chrom, p1, p2):
            # check if the read is unmapped, according to issue #112
            if read.is_unmapped:
                continue

            if not read.is_reverse:
                cut_site = read.pos + forward_shift
                if p1 <= cut_site < p2:
                    signal_2[cut_site - p1] += 1.0
            else:
                cut_site = read.aend + reverse_shift - 1
                if p1 <= cut_site < p2:
                    signal_2[cut_site - p1] += 1.0

        update_pwm(pwm, fasta, region, p1, p2)

    return signal_1, signal_2, motif_len, pwm, num_motif
开发者ID:CostaLab,项目名称:reg-gen,代码行数:74,代码来源:DifferentialAnalysis.py

示例4: diff_analysis_run

# 需要导入模块: from rgt.GenomicRegionSet import GenomicRegionSet [as 别名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import combine [as 别名]
def diff_analysis_run(args):
    # Initializing Error Handler
    err = ErrorHandler()

    output_location = os.path.join(args.output_location, "Lineplots")
    try:
        if not os.path.isdir(output_location):
            os.makedirs(output_location)
    except Exception:
        err.throw_error("MM_OUT_FOLDER_CREATION")

    # Check if the index file exists
    base_name1 = "{}.bai".format(args.reads_file1)
    if not os.path.exists(base_name1):
        pysam.index(args.reads_file1)

    base_name2 = "{}.bai".format(args.reads_file2)
    if not os.path.exists(base_name2):
        pysam.index(args.reads_file2)

    mpbs1 = GenomicRegionSet("Motif Predicted Binding Sites of Condition1")
    mpbs1.read(args.mpbs_file1)

    mpbs2 = GenomicRegionSet("Motif Predicted Binding Sites of Condition2")
    mpbs2.read(args.mpbs_file2)

    mpbs = mpbs1.combine(mpbs2, output=True)
    mpbs.sort()
    mpbs.remove_duplicates()
    mpbs_name_list = list(set(mpbs.get_names()))

    signal_dict_by_tf_1 = dict()
    signal_dict_by_tf_2 = dict()
    motif_len_dict = dict()
    motif_num_dict = dict()
    pwm_dict_by_tf = dict()

    pool = Pool(processes=args.nc)
    # differential analysis using bias corrected signal
    if args.bc:
        hmm_data = HmmData()
        table_F = hmm_data.get_default_bias_table_F_ATAC()
        table_R = hmm_data.get_default_bias_table_R_ATAC()
        bias_table1 = BiasTable().load_table(table_file_name_F=table_F, table_file_name_R=table_R)
        bias_table2 = BiasTable().load_table(table_file_name_F=table_F, table_file_name_R=table_R)

        mpbs_list = list()
        for mpbs_name in mpbs_name_list:
            mpbs_list.append((mpbs_name, args.mpbs_file1, args.mpbs_file2, args.reads_file1, args.reads_file2,
                              args.organism, args.window_size, args.forward_shift, args.reverse_shift,
                              bias_table1, bias_table2))
        try:
            res = pool.map(get_bc_signal, mpbs_list)
        except Exception:
            logging.exception("get bias corrected signal failed")

    # differential analysis using raw signal
    else:
        mpbs_list = list()
        for mpbs_name in mpbs_name_list:
            mpbs_list.append((mpbs_name, args.mpbs_file1, args.mpbs_file2, args.reads_file1, args.reads_file2,
                              args.organism, args.window_size, args.forward_shift, args.reverse_shift))
        try:
            res = pool.map(get_raw_signal, mpbs_list)
        except Exception:
            logging.exception("get raw signal failed")

    for idx, mpbs_name in enumerate(mpbs_name_list):
        signal_dict_by_tf_1[mpbs_name] = res[idx][0]
        signal_dict_by_tf_2[mpbs_name] = res[idx][1]
        motif_len_dict[mpbs_name] = res[idx][2]
        pwm_dict_by_tf[mpbs_name] = res[idx][3]
        motif_num_dict[mpbs_name] = res[idx][4]

    if args.factor1 is None or args.factor2 is None:
        args.factor1, args.factor2 = compute_factors(signal_dict_by_tf_1, signal_dict_by_tf_2)
        output_factor(args, args.factor1, args.factor2)

    if args.output_profiles:
        output_profiles(mpbs_name_list, signal_dict_by_tf_1, output_location, args.condition1)
        output_profiles(mpbs_name_list, signal_dict_by_tf_2, output_location, args.condition2)

    ps_tc_results_by_tf = dict()

    plots_list = list()
    for mpbs_name in mpbs_name_list:
        plots_list.append((mpbs_name, motif_num_dict[mpbs_name], signal_dict_by_tf_1[mpbs_name],
                           signal_dict_by_tf_2[mpbs_name], args.factor1, args.factor2, args.condition1,
                           args.condition2, pwm_dict_by_tf[mpbs_name], output_location, args.window_size,
                           args.standardize))

    pool.map(line_plot, plots_list)

    for mpbs_name in mpbs_name_list:
        res = get_ps_tc_results(signal_dict_by_tf_1[mpbs_name], signal_dict_by_tf_2[mpbs_name],
                                args.factor1, args.factor2, motif_num_dict[mpbs_name], motif_len_dict[mpbs_name])
        #
        #     # only use the factors whose protection scores are greater than 0
        #     if res[0] > 0 and res[1] < 0:
        ps_tc_results_by_tf[mpbs_name] = res
#.........这里部分代码省略.........
开发者ID:CostaLab,项目名称:reg-gen,代码行数:103,代码来源:DifferentialAnalysis.py

示例5: figure

# 需要导入模块: from rgt.GenomicRegionSet import GenomicRegionSet [as 别名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import combine [as 别名]
class Boxplot:
    """
    input:
        exps: input experimental matrix
        title: Default = boxplot
        groupby: Group the data by the given factor in the header of experimental matrix

    output:
        parameters: list of records
        figs: a list of figure(s)
    """

    def __init__(self, EMpath, fields, title="boxplot", df=False):
        # Read the Experimental Matrix
        self.title = title
        self.exps = ExperimentalMatrix()
        self.exps.read(EMpath)
        for f in fields:
            if f not in ["None", "reads", "regions", "factor"]:
                self.exps.match_ms_tags(f)
        self.exps.remove_name()
        self.beds = self.exps.get_regionsets()  # A list of GenomicRegionSets
        self.bednames = self.exps.get_regionsnames()
        self.reads = self.exps.get_readsfiles()
        self.readsnames = self.exps.get_readsnames()
        self.fieldsDict = self.exps.fieldsDict
        self.parameter = []
        self.df = df

    def combine_allregions(self):

        self.all_bed = GenomicRegionSet("All regions")
        for bed in self.beds:
            self.all_bed.combine(bed)
        self.all_bed.remove_duplicates()  # all_bed is sorted!!

    def bedCoverage(self):
        """ Return coverage matrix of multiple reads on one bed.
        bed --> GenomicRegionSet
        """
        c = []
        for rp in self.reads:
            print("    processing: ..." + rp[-45:])
            r = os.path.abspath(rp)  # Here change the relative path into absolute path
            cov = CoverageSet(r, self.all_bed)
            cov.coverage_from_genomicset(r)
            cov.normRPM()
            c.append(cov.coverage)
        self.all_table = numpy.transpose(c)

    def quantile_normalization(self):
        """ Return the np.array which contains the normalized values
        """
        rank_matrix = []
        for c in range(self.all_table.shape[1]):
            col = self.all_table[:, c]
            rank_col = mstats.rankdata(col)
            rank_matrix.append(rank_col)

        ranks = numpy.array(rank_matrix)
        trans_rank = numpy.transpose(ranks)

        # Calculate for means of ranks
        print("    Calculating for the mean of ranked data...")
        sort_matrix = numpy.sort(self.all_table, axis=0)
        means = []
        for r in range(self.all_table.shape[0]):
            row = [x for x in sort_matrix[r, :]]
            means.append(numpy.mean(row))

        # Replace the value by new means
        print("    Replacing the data value by normalized mean...")
        normalized_table = numpy.around(trans_rank)
        for i, v in enumerate(means):
            normalized_table[normalized_table == i + 1] = v
        # print(rounded_rank)
        self.norm_table = normalized_table

    def tables_for_plot(self):
        """ Return a Dict which stores all tables for each bed with file name as its key. """
        self.tableDict = OrderedDict()  # Storage all tables for each bed with bedname as the key
        conList = []  # Store containers of beds
        iterList = []

        for i, bed in enumerate(self.beds):
            self.tableDict[bed.name] = []
            bed.sort()
            conList.append(bed.__iter__())
            iterList.append(conList[-1].next())

        for i, r in enumerate(self.all_bed.sequences):
            for j in range(len(self.beds)):
                while r > iterList[j]:
                    try:
                        iterList[j] = conList[j].next()
                    except:
                        break
                if r == iterList[j]:
                    self.tableDict[self.beds[j].name].append(self.norm_table[i])
                elif r < iterList[j]:
#.........这里部分代码省略.........
开发者ID:eggduzao,项目名称:reg-gen,代码行数:103,代码来源:boxplot.py


注:本文中的rgt.GenomicRegionSet.GenomicRegionSet.combine方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。