当前位置: 首页>>代码示例>>Python>>正文


Python extraviews.tadbit_savefig函数代码示例

本文整理汇总了Python中pytadbit.utils.extraviews.tadbit_savefig函数的典型用法代码示例。如果您正苦于以下问题:Python tadbit_savefig函数的具体用法?Python tadbit_savefig怎么用?Python tadbit_savefig使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了tadbit_savefig函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: do_3d_plot

def do_3d_plot(nam, outfile, size, count, minmax, sigma=0, log=False):
    fig = plt.figure(figsize=(12,8))
    ax = fig.add_subplot(1, 1, 1, projection='3d')
    beg = -size / 2
    end =  size / 2
    X = np.arange(beg, end, 1)
    Y = np.arange(beg, end, 1)
    X, Y = np.meshgrid(X, Y)
    Z = np.array([np.array([float(i) for i in l.split()]) for l in open(nam) if not l.startswith('#')])
    plt.title(nam + '\nMean: %.3f, median: %.3f, standard-deviation: %.3f (N=%d)' % (np.mean(Z), np.median(Z), np.std(Z), count))
    if sigma:
        Z = ndimage.gaussian_filter(Z, sigma=sigma, order=0)
    if log:
        Z = np.log(Z)
        zspan = minmax if minmax else np.max(np.abs(Z))
        zmax =  zspan
        zmin = -zspan
    else:
        zspan = minmax if minmax else np.max(np.abs(Z - 1))
        zmin = -zspan + 1
        zmax =  zspan + 1
    cmap = 'coolwarm'  # 'coolwarm'
    _ = ax.contourf(X, Y, Z, zdir='z', offset=zmin,
                    cmap=cmap, vmin=zmin, vmax=zmax)
    surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cmap,
                           linewidth=0, antialiased=True, alpha=1,
                           vmin=zmin, vmax=zmax, shade=True)
    ax.set_zlim3d(zmin, zmax)
    ax.view_init(elev=15, azim=25)
    cb = fig.colorbar(surf, shrink=0.5, aspect=20)
    cb.set_label('%sverage normalized interactions%s' %
                 ('Log a' if log else 'A',
                  '\nSmoothed with $\sigma=%s$' % sigma))
    tadbit_savefig(outfile)
开发者ID:3DGenomes,项目名称:TADbit,代码行数:34,代码来源:average_submatrix.py

示例2: correlate_matrices

def correlate_matrices(hic_data1, hic_data2, max_dist=10, intra=False,
                       savefig=None, show=False, savedata=None):
    """
    Compare the iteractions of two Hi-C matrices at a given distance,
    with spearman rank correlation

    :param hic_data1: Hi-C-data object
    :param hic_data2: Hi-C-data object
    :param 1 resolution: to be used for scaling the plot
    :param 10 max_dist: maximum distance from diagonal (e.g. 10 mean we will
       not look further than 10 times the resolution)
    :param None savefig: path to save the plot
    :param False intra: only takes into account intra-chromosomal contacts
    :param False show: displays the plot

    :returns: list of correlations and list of genomic distances
    """
    corr = []
    dist = []
    if (intra and hic_data1.sections and hic_data2.sections and 
        hic_data1.sections == hic_data2.sections):
        for i in xrange(1, max_dist + 1):
            diag1 = []
            diag2 = []
            for crm in hic_data1.section_pos:
                for j in xrange(hic_data1.section_pos[crm][0],
                                hic_data1.section_pos[crm][1] - i):
                    diag1.append(hic_data1[j, i + j])
                    diag2.append(hic_data2[j, i + j])
            corr.append(spearmanr(diag1, diag2)[0])
            dist.append(i)
    else:
        if intra:
            warn('WARNING: hic_dta does not contain chromosome coordinates, ' +
                 'intra set to False')
        for i in xrange(1, max_dist + 1):
            diag1 = []
            diag2 = []
            for j in xrange(len(hic_data1) - i):
                diag1.append(hic_data1[j, i + j])
                diag2.append(hic_data2[j, i + j])
            corr.append(spearmanr(diag1, diag2)[0])
            dist.append(i)
    if show or savefig:
        plt.plot(dist, corr, color='orange', linewidth=3, alpha=.8)
        plt.xlabel('Genomic distance in bins')
        plt.ylabel('Spearman rank correlation')
        plt.xlim((0, dist[-1]))
        if savefig:
            tadbit_savefig(savefig)
        if show:
            plt.show()
        plt.close('all')
    if savedata:
        out = open(savedata, 'w')
        out.write('# genomic distance\tSpearman rank correlation\n')
        for i in xrange(len(corr)):
            out.write('%s\t%s\n' % (dist[i], corr[i]))
        out.close()
    return corr, dist
开发者ID:kipal1988,项目名称:tadbit,代码行数:60,代码来源:analyze.py

示例3: objective_function

    def objective_function(self, log=False, smooth=True,
                           axe=None, savefig=None):
        """
        This function plots the objective function value per each Monte-Carlo
        step.

        :param False log: log plot
        :param True smooth: curve smoothing
        :param None axe: a matplotlib.axes.Axes object to define the plot
           appearance
        :param None savefig: path to a file where to save the image generated;
           if None, the image will be shown using matplotlib GUI (the extension
           of the file name will determine the desired format).

        """
        show = False
        if not axe:
            fig = plt.figure(figsize=(7, 7))
            axe = fig.add_subplot(111)
            show = True
            axe.patch.set_facecolor('lightgrey')
            axe.patch.set_alpha(0.4)
            axe.grid(ls='-', color='w', lw=1.5, alpha=0.6, which='major')
            axe.grid(ls='-', color='w', lw=1, alpha=0.3, which='minor')
            axe.set_axisbelow(True)
            axe.minorticks_on() # always on, not only for log
            # remove tick marks
            axe.tick_params(axis='both', direction='out', top=False,
                            right=False, left=False, bottom=False)
            axe.tick_params(axis='both', direction='out', top=False,
                            right=False, left=False, bottom=False,
                            which='minor')
        else:
            fig = axe.get_figure()
        # text
        plt.xlabel('Iteration number')
        plt.ylabel('IMP Objective Function Value')
        plt.title('Model ' + str(self['rand_init']))
        # smooth
        nrjz = self['log_objfun'][1:]
        if smooth:
            xnew = linspace(0, len(nrjz), 10000)
            nrjz_smooth = spline(range(len(nrjz)), nrjz, xnew,
                                 order=3)
            axe.plot(xnew, nrjz_smooth, color='darkred')
        else:
            axe.plot(nrjz, color='darkred')
        # plot
        axe.plot(nrjz, color='darkred', marker='o', alpha=.5, ms=4, ls='None')
        # log
        if log:
            axe.set_yscale('log')
        if savefig:
            tadbit_savefig(savefig)
        elif show:
            plt.show()
开发者ID:MarcoDiS,项目名称:TADbit,代码行数:56,代码来源:impmodel.py

示例4: plot_iterative_mapping

def plot_iterative_mapping(fnam1, fnam2, total_reads=None, axe=None, savefig=None):
    """
    :param fnam: input file name
    :param total_reads: total number of reads in the initial FASTQ file
    :param None axe: a matplotlib.axes.Axes object to define the plot
       appearance
    :param None savefig: path to a file where to save the image generated;
       if None, the image will be shown using matplotlib GUI (the extension
       of the file name will determine the desired format).
    :returns: a dictionary with the number of reads per mapped length
    """
    count_by_len = {}
    total_reads = total_reads or 1
    if not axe:
        fig=plt.figure()
        _ = fig.add_subplot(111)
    colors = ['olive', 'darkcyan']
    for i, fnam in enumerate([fnam1, fnam2]):
        fhandler = open(fnam)
        line = fhandler.next()
        while line.startswith('#'):
            line = fhandler.next()
        try:
            count_by_len[i] = {}
            while True:
                _, length, _, _ = line.rsplit('\t', 3)
                try:
                    count_by_len[i][int(length)] += 1
                except KeyError:
                    count_by_len[i][int(length)] = 1
                line = fhandler.next()
        except StopIteration:
            pass
        fhandler.close()
        lengths = sorted(count_by_len[i].keys())
        for k in lengths[::-1]:
            count_by_len[i][k] += sum([count_by_len[i][j]
                                       for j in lengths if j < k])
        plt.plot(lengths, [float(count_by_len[i][l]) / total_reads
                           for l in lengths],
                 label='read' + str(i + 1), linewidth=2, color=colors[i])
    plt.xlabel('read length (bp)')
    if total_reads != 1:
        plt.ylabel('Proportion of mapped reads')
    else:
        plt.ylabel('Number of mapped reads')
    plt.legend(loc=4)
    if savefig:
        tadbit_savefig(savefig)
    elif not axe:
        plt.show()
    return count_by_len
开发者ID:Tong-Chen,项目名称:tadbit,代码行数:52,代码来源:analyze.py

示例5: plot_genomic_distribution

def plot_genomic_distribution(fnam, first_read=True, resolution=10000,
                              genome_seq=None, axe=None, ylim=None, savefig=None):
    """
    :param fnam: input file name
    :param True first_read: map first read.
    :param 100 resolution: group reads that are closer than this resolution
       parameter
    :param None axe: a matplotlib.axes.Axes object to define the plot
       appearance
    :param None savefig: path to a file where to save the image generated;
       if None, the image will be shown using matplotlib GUI (the extension
       of the file name will determine the desired format).
    
    """

    distr = {}
    idx1, idx2 = (1, 3) if first_read else (7, 9)
    for line in open(fnam):
        crm, pos = line.split()[idx1:idx2]
        pos = int(pos) / resolution
        try:
            distr[crm][pos] += 1
        except KeyError:
            try:
                distr[crm][pos] = 1
            except KeyError:
                distr[crm] = {pos: 1}

    if not axe:
        fig=plt.figure(figsize=(15, 3 * len(distr.keys())))

    max_y = max([max(distr[c].values()) for c in distr])
    max_x = max([len(distr[c].values()) for c in distr])
    for i, crm in enumerate(genome_seq if genome_seq else distr):
        plt.subplot(len(distr.keys()), 1, i + 1)
        plt.plot(range(max(distr[crm])),
                 [distr[crm].get(j, 0) for j in xrange(max(distr[crm]))],
                 color='red', lw=1.5, alpha=0.7)
        if genome_seq:
            if ylim:
                plt.vlines(len(genome_seq[crm]) / resolution, ylim[0], ylim[1])
            else:
                plt.vlines(len(genome_seq[crm]) / resolution, 0, max_y)
        plt.xlim((0, max_x))
        plt.ylim(ylim or (0, max_y))
        plt.title(crm)

    if savefig:
        tadbit_savefig(savefig)
    elif not axe:
        plt.show()
开发者ID:Adrimel,项目名称:tadbit,代码行数:51,代码来源:analyze.py

示例6: hic_map

def hic_map(data, genome_seq, biases=None, masked=None, resolution=100000,
            savefig=None, show=False, savedata=None, focus=None):
    if isinstance(data, str):
        fnam = data
        cumcs = {} 
        total = 0
        for crm in genome_seq:
            cumcs[crm] = total
            total += len(genome_seq[crm]) / resolution + 1
        # bin the data
        data = [[0 for _ in xrange(total + 1)] for _ in xrange(total + 1)]
        masked = masked or set()
        for line in open(fnam):
            read, cr1, ps1, _, _, _, _, cr2, ps2, _, _, _, _ = line.split()
            if read in masked:
                continue
            ps1 = int(ps1) / resolution
            ps2 = int(ps2) / resolution
            try:
                data[cumcs[cr1] + ps1][cumcs[cr2] + ps2] += 1
            except:
                break
    else:
        hic_data = data
        beg, end = focus if focus else (0, len(hic_data))
        beg -= 1 if focus else 0
        if biases:
            data = [[hic_data[len(hic_data) * i + j] / (biases[i] * biases[j])
                     for j in xrange(beg, end)]
                    for i in xrange(beg, end)]
        else: 
            data = [[hic_data[len(hic_data) * i + j]
                     for j in xrange(beg, end)]
                    for i in xrange(beg, end)]
    # do the plot
    if show or savefig:
        import numpy as np
        plt.figure(figsize=(16, 12))
        plt.imshow(np.log2(data), origin='lower', cmap='gist_earth',
                   interpolation='nearest')
        plt.colorbar()
        if savefig:
            tadbit_savefig(savefig)
        elif show:
            plt.show()
    if savedata:
        out = open(savedata, 'w')
        for line in data:
            out.write('\t'.join([str(cell) for cell in line]) + '\n')
        out.close()
开发者ID:Adrimel,项目名称:tadbit,代码行数:50,代码来源:analyze.py

示例7: plot_distance_vs_interactions

def plot_distance_vs_interactions(fnam, min_diff=100, max_diff=1000000,
                                  resolution=100, axe=None, savefig=None):
    """
    :param fnam: input file name
    :param 100 min_diff: lower limit kn genomic distance (usually equal to read
       length)
    :param 1000000 max_diff: upper limit in genomic distance to look for
    :param 100 resolution: group reads that are closer than this resolution
       parameter
    :param None axe: a matplotlib.axes.Axes object to define the plot
       appearance
    :param None savefig: path to a file where to save the image generated;
       if None, the image will be shown using matplotlib GUI (the extension
       of the file name will determine the desired format).
    
    """
    dist_intr = {}
    for line in open(fnam):
        _, cr1, ps1, _, _, _, _, cr2, ps2, _ = line.rsplit('\t', 9)
        if cr1 != cr2:
            continue
        diff = resolution * (abs(int(ps1) - int(ps2)) / resolution)
        if max_diff > diff > min_diff:
            dist_intr.setdefault(diff, 0)
            dist_intr[diff] += 1
            
    for k in dist_intr.keys()[:]:
        if dist_intr[k] <= 2:
            del(dist_intr[k])
                    
    if not axe:
        fig=plt.figure()
        ax = fig.add_subplot(111)

    x, y = zip(*sorted(dist_intr.items(), key=lambda x:x[0]))
    plt.plot(x, y, 'k.')
    # sigma = 10
    # p_x = gaussian_filter1d(x, sigma)
    # p_y = gaussian_filter1d(y, sigma)
    # plot line of best fit
    # plt.plot(p_x, p_y,color= 'darkgreen', lw=2, label='Gaussian fit')
    plt.yscale('log')
    plt.xscale('log')
    plt.xlabel('Log genomic distance (binned by %d bp)' % resolution)
    plt.ylabel('Log interaction count')
    # plt.legend()
    if savefig:
        tadbit_savefig(savefig)
    elif not axe:
        plt.show()
开发者ID:Adrimel,项目名称:tadbit,代码行数:50,代码来源:analyze.py

示例8: correlate_matrices

def correlate_matrices(hic_data1, hic_data2, max_dist=10,
                       savefig=None, show=False, savedata=None):
    """
    Compare the iteractions of two Hi-C matrices at a given distance,
    with spearman rank correlation

    :param hic_data1: Hi-C-data object
    :param hic_data2: Hi-C-data object
    :param 1 resolution: to be used for scaling the plot
    :param 10 max_dist: maximum distance from diagonal (e.g. 10 mean we will
       not look further than 10 times the resolution)
    :param None savefig: path to save the plot
    :param False show: displays the plot

    :returns: list of correlations and list of genomic distances
    """
    corr = []
    dist = []
    for i in xrange(1, max_dist + 1):
        diag1 = []
        diag2 = []
        for j in xrange(len(hic_data1) - i):
            diag1.append(hic_data1[j, i + j])
            diag2.append(hic_data2[j, i + j])
        corr.append(spearmanr(diag1, diag2)[0])
        dist.append(i)
    if show or savefig:
        plt.plot(dist, corr, color='orange', linewidth=3, alpha=.8)
        plt.xlabel('Genomic distance in bins')
        plt.ylabel('Spearman rank correlation')
        plt.xlim((0, dist[-1]))
        if savefig:
            tadbit_savefig(savefig)
        if show:
            plt.show()
        plt.close('all')
    if savedata:
        out = open(savedata, 'w')
        out.write('# genomic distance\tSpearman rank correlation\n')
        for i in xrange(len(corr)):
            out.write('%s\t%s\n' % (dist[i], corr[i]))
        out.close()

    return corr, dist
开发者ID:yuanbaowen521,项目名称:tadbit,代码行数:44,代码来源:analyze.py

示例9: eig_correlate_matrices

def eig_correlate_matrices(hic_data1, hic_data2, nvect=6, normalized=False, 
                           savefig=None, show=False, savedata=None,
                           remove_bad_columns=True, **kwargs):
    """
    Compare the iteractions of two Hi-C matrices using their 6 first
    eigenvectors, with pearson correlation

    :param hic_data1: Hi-C-data object
    :param hic_data2: Hi-C-data object
    :param 6 nvect: number of eigenvectors to compare
    :param None savefig: path to save the plot
    :param False show: displays the plot
    :param False normalized: use normalized data
    :param True remove_bads: computes the union of bad columns between samples
       and exclude them from the comparison
    :param kwargs: any argument to pass to matplotlib imshow function

    :returns: matrix of correlations
    """
    data1 = hic_data1.get_matrix(normalized=normalized)
    data2 = hic_data2.get_matrix(normalized=normalized)
    ## reduce matrices to remove bad columns
    if remove_bad_columns:
        # union of bad columns
        bads = hic_data1.bads.copy()
        bads.update(hic_data2.bads)
        # remove them form both matrices
        for bad in sorted(bads, reverse=True):
            del(data1[bad])
            del(data2[bad])
            for i in xrange(len(data1)):
                _ = data1[i].pop(bad)
                _ = data2[i].pop(bad)
    # get the log
    size = len(data1)
    data1 = nozero_log(data1, np.log2)
    data2 = nozero_log(data2, np.log2)
    # get the eigenvectors
    ev1, evect1 = eigh(data1)
    ev2, evect2 = eigh(data2)
    corr = [[0 for _ in xrange(nvect)] for _ in xrange(nvect)]
    # sort eigenvectors according to their eigenvalues => first is last!!
    sort_perm = ev1.argsort()
    ev1.sort()
    evect1 = evect1[sort_perm]
    sort_perm = ev2.argsort()
    ev2.sort()
    evect2 = evect2[sort_perm]
    # calculate Pearson correlation
    for i in xrange(nvect):
        for j in xrange(nvect):
            corr[i][j] = abs(pearsonr(evect1[:,-i-1],
                                      evect2[:,-j-1])[0])
    # plot
    axe    = plt.axes([0.1, 0.1, 0.6, 0.8])
    cbaxes = plt.axes([0.85, 0.1, 0.03, 0.8])
    if show or savefig:
        im = axe.imshow(corr, interpolation="nearest",origin='lower', **kwargs)
        axe.set_xlabel('Eigen Vectors exp. 1')
        axe.set_ylabel('Eigen Vectors exp. 2')
        axe.set_xticks(range(nvect))
        axe.set_yticks(range(nvect))
        axe.set_xticklabels(range(1, nvect + 2))
        axe.set_yticklabels(range(1, nvect + 2))
        axe.xaxis.set_tick_params(length=0, width=0)
        axe.yaxis.set_tick_params(length=0, width=0)
        
        cbar = plt.colorbar(im, cax = cbaxes )
        cbar.ax.set_ylabel('Pearson correlation', rotation=90*3,
                           verticalalignment='bottom')
        axe2 = axe.twinx()
        axe2.set_yticks(range(nvect))
        axe2.set_yticklabels(['%.1f' % (e) for e in ev2[-nvect:][::-1]])
        axe2.set_ylabel('corresponding Eigen Values exp. 2', rotation=90*3,
                        verticalalignment='bottom')
        axe2.set_ylim((-0.5, nvect - 0.5))
        axe2.yaxis.set_tick_params(length=0, width=0)
        
        axe3 = axe.twiny()
        axe3.set_xticks(range(nvect))
        axe3.set_xticklabels(['%.1f' % (e) for e in ev1[-nvect:][::-1]])
        axe3.set_xlabel('corresponding Eigen Values exp. 1')
        axe3.set_xlim((-0.5, nvect - 0.5))
        axe3.xaxis.set_tick_params(length=0, width=0)
        
        axe.set_ylim((-0.5, nvect - 0.5))
        axe.set_xlim((-0.5, nvect - 0.5))
        if savefig:
            tadbit_savefig(savefig)
        if show:
            plt.show()
        plt.close('all')

    if savedata:
        out = open(savedata, 'w')
        out.write('# ' + '\t'.join(['Eigen Vector %s'% i
                                    for i in xrange(nvect)]) + '\n')
        for i in xrange(nvect):
            out.write('\t'.join([str(corr[i][j])
                                 for j in xrange(nvect)]) + '\n')
#.........这里部分代码省略.........
开发者ID:kangk1204,项目名称:TADbit,代码行数:101,代码来源:analyze.py

示例10: quality_plot


#.........这里部分代码省略.........
            ax2.plot(sites[r_enz], linewidth=2, color = color.next(),
                     alpha=0.9,
                     label='Undigested RE site (%s: %s)' % (r_enz, r_sites[r_enz])
                     if any([f > 0 for f in fixes[r_enz]])
                     else 'Undigested & Dangling-Ends (%s: %s)' % (r_enz, r_sites[r_enz]))
        ax2.set_ylabel('Undigested')
        ax2.yaxis.label.set_color('darkred')
        ax2.tick_params(axis='y', colors='darkred', **tkw)

        lines, labels = ax2.get_legend_handles_labels()

        ax3 = ax2.twinx()
        color = iter(plt.cm.Blues(linspace(0.3, 0.95, len(liges))))
        for r1, r2 in liges:
            # print 'ligated', r1, r2
            # print liges[(r1, r2)][:20]
            ax3.plot(liges[(r1, r2)], linewidth=2, color=color.next(),
                     alpha=0.9,
                     label = 'Ligated (%s-%s: %s)' % (r1, r2, l_sites[(r1, r2)].upper()))
        ax3.yaxis.label.set_color('darkblue')
        ax3.tick_params(axis='y', colors='darkblue', **tkw)
        ax3.set_ylabel('Ligated')

        tmp_lines, tmp_labels = ax3.get_legend_handles_labels()
        lines.extend(tmp_lines)
        labels.extend(tmp_labels)

        color = iter(plt.cm.Greens(linspace(0.3, 0.95, len(r_enzs))))
        for i, r_enz in enumerate(r_enzs):
            if any([f > 0 for f in fixes[r_enz]]):
                ax4 = ax2.twinx()
                ax4.spines["right"].set_position(("axes", 1.07))
                make_patch_spines_invisible(ax4)
                ax4.spines["right"].set_visible(True)
                # print 'repaired', r_enz
                # print fixes[r_enz][:20]
                ax4.plot(fixes[r_enz], linewidth=2, color=color.next(),
                         alpha=0.9,
                         label='Dangling-ends (%s: %s)' % (r_enz, d_sites[r_enz]))
                ax4.yaxis.label.set_color('darkgreen')
                ax4.tick_params(axis='y', colors='darkgreen', **tkw)
                ax4.set_ylabel('Dangling-ends')
                tmp_lines, tmp_labels = ax4.get_legend_handles_labels()
                lines.extend(tmp_lines)
                labels.extend(tmp_labels)
            else:
                ax2.set_ylabel('Undigested & Dangling-ends')
        ax2.set_xlim((0, max_seq_len))

        # Count ligation sites
        lig_cnt = {}
        for k in liges:
            lig_cnt[k] = (nansum(liges[k]) - liges[k][0] -
                              liges[k][max_seq_len / 2])

        # Count undigested sites
        sit_cnt = {}
        for r_enz in r_enzs:
            sit_cnt[r_enz] = (nansum(sites[r_enz]) - sites[r_enz][0] -
                              sites[r_enz][max_seq_len / 2])

        # Count Dangling-Ends
        des = {}
        for r_enz in r_enzs:
            if any([f > 0 for f in fixes[r_enz]]):
                des[r_enz] = ((100. * (fixes[r_enz][0] + (fixes[r_enz][(max_seq_len / 2)]
                                                          if paired else 0))) / nreads)
            else:
                des[r_enz] = (100. * (sites[r_enz][0] + (sites[r_enz][(max_seq_len / 2)]
                                                         if paired else 0))) / nreads

        # Decorate plot
        title = ''
        for r_enz in r_enzs:
            lcnt = float(sum([lig_cnt[(r_enz1, r_enz2)] * (2 if r_enz1 == r_enz2 else 1)
                              for r_enz1 in r_enzs for r_enz2 in r_enzs
                              if r_enz1 == r_enz or r_enz2 == r_enz]))
            title += ('Percentage of digested sites (not considering Dangling-Ends) '
                      '%s: %.1f%%\n' % (r_enz,
                                        100. * float(lcnt) / (lcnt + sit_cnt[r_enz])))
        for r_enz in r_enzs:
            title += 'Percentage of dangling-ends %s: %.1f%%\n' % (r_enz, des[r_enz])

        for r_enz1 in r_enzs:
            for r_enz2 in r_enzs:
                title += ('Percentage of reads with ligation site (%s-%s): %.1f%% \n' %
                          (r_enz1, r_enz2, (ligep[(r_enz1, r_enz2)] * 100.) / nreads))
        plt.title(title.strip(), size=10, ha='left', x=0)
        plt.subplots_adjust(right=0.85)
        ax2.legend(lines, labels, bbox_to_anchor=(0.75, 1.0),
                   loc=3, borderaxespad=0., frameon=False, fontsize=9)
    plt.tight_layout()
    if savefig:
        tadbit_savefig(savefig)
        plt.close('all')
    elif not axe:
        plt.show()
    for k in ligep:
        ligep[k] = (ligep[k] * 100.) / nreads
    return des, ligep
开发者ID:3DGenomes,项目名称:TADbit,代码行数:101,代码来源:fastq_utils.py

示例11: plot_genomic_distribution

def plot_genomic_distribution(fnam, first_read=True, resolution=10000,
                              axe=None, ylim=None, savefig=None, show=False,
                              savedata=None, chr_names=None, nreads=None):
    """
    :param fnam: input file name
    :param True first_read: uses first read.
    :param 100 resolution: group reads that are closer than this resolution
       parameter
    :param None axe: a matplotlib.axes.Axes object to define the plot
       appearance
    :param None savefig: path to a file where to save the image generated;
       if None, the image will be shown using matplotlib GUI (the extension
       of the file name will determine the desired format).
    :param None savedata: path where to store the output read counts per bin.
    :param None chr_names: can pass a list of chromosome names in case only some
       them the need to be plotted (this option may last even more than default)
    
    """
    distr = {}
    idx1, idx2 = (1, 3) if first_read else (7, 9)
    genome_seq = OrderedDict()
    fhandler = open(fnam)
    line = fhandler.next()
    if chr_names:
        chr_names = set(chr_names)
        cond1 = lambda x: x not in chr_names
    else:
        cond1 = lambda x: False
    if nreads:
        cond2 = lambda x: x >= nreads
    else:
        cond2 = lambda x: False
    cond = lambda x, y: cond1(x) and cond2(y)
    count = 0
    while line.startswith('#'):
        if line.startswith('# CRM '):
            crm, clen = line[6:].split('\t')
            genome_seq[crm] = int(clen)
        line = fhandler.next()
    try:
        while True:
            crm, pos = line.strip().split('\t')[idx1:idx2]
            count += 1
            if cond(crm, count):
                line = fhandler.next()
                if cond2(count):
                    break
                continue
            pos = int(pos) / resolution
            try:
                distr[crm][pos] += 1
            except KeyError:
                try:
                    distr[crm][pos] = 1
                except KeyError:
                    distr[crm] = {pos: 1}
            line = fhandler.next()
    except StopIteration:
        pass
    fhandler.close()
    if not axe:
        _ = plt.figure(figsize=(15, 1 + 3 * len(
                              chr_names if chr_names else distr.keys())))

    max_y = max([max(distr[c].values()) for c in distr])
    max_x = max([len(distr[c].values()) for c in distr])
    ncrms = len(chr_names if chr_names else genome_seq if genome_seq else distr)
    data = {}
    for i, crm in enumerate(chr_names if chr_names else genome_seq
                            if genome_seq else distr):
        try:
            data[crm] = [distr[crm].get(j, 0) for j in xrange(max(distr[crm]))]
            if savefig:
                plt.subplot(ncrms, 1, i + 1)
                plt.plot(range(max(distr[crm])), data[crm],
                         color='red', lw=1.5, alpha=0.7)
        except KeyError:
            pass
        if savefig:
            if ylim:
                plt.vlines(genome_seq[crm] / resolution, ylim[0], ylim[1])
            else:
                plt.vlines(genome_seq[crm] / resolution, 0, max_y)
            plt.xlim((0, max_x))
            plt.ylim(ylim or (0, max_y))
            plt.title(crm)

    if savefig:
        tadbit_savefig(savefig)
        plt.close('all')
    elif show:
        plt.show()

    if savedata:
        out = open(savedata, 'w')
        out.write('# CRM\tstart-end\tcount\n')
        out.write('\n'.join('%s\t%d-%d\t%d' % (c, (i * resolution) + 1, ((i + 1) * resolution), v) for c in data
                            for i, v in enumerate(data[c])))
        out.write('\n')
        out.close()
开发者ID:kangk1204,项目名称:TADbit,代码行数:100,代码来源:analyze.py

示例12: correlate_matrices

def correlate_matrices(hic_data1, hic_data2, max_dist=10, intra=False, axe=None,
                       savefig=None, show=False, savedata=None,
                       normalized=False, remove_bad_columns=True, **kwargs):
    """
    Compare the iteractions of two Hi-C matrices at a given distance,
    with spearman rank correlation

    :param hic_data1: Hi-C-data object
    :param hic_data2: Hi-C-data object
    :param 1 resolution: to be used for scaling the plot
    :param 10 max_dist: maximum distance from diagonal (e.g. 10 mean we will
       not look further than 10 times the resolution)
    :param None savefig: path to save the plot
    :param False intra: only takes into account intra-chromosomal contacts
    :param False show: displays the plot
    :param False normalized: use normalized data
    :param True remove_bads: computes the union of bad columns between samples
       and exclude them from the comparison

    :returns: list of correlations and list of genomic distances
    """
    corrs = []
    dists = []

    if normalized:
        get_the_guy1 = lambda i, j: (hic_data1[j, i] / hic_data1.bias[i] /
                                     hic_data1.bias[j])
        get_the_guy2 = lambda i, j: (hic_data2[j, i] / hic_data2.bias[i] /
                                     hic_data2.bias[j])
    else:
        get_the_guy1 = lambda i, j: hic_data1[j, i]
        get_the_guy2 = lambda i, j: hic_data2[j, i]
    
    if remove_bad_columns:
        # union of bad columns
        bads = hic_data1.bads.copy()
        bads.update(hic_data2.bads)

    if (intra and hic_data1.sections and hic_data2.sections and 
        hic_data1.sections == hic_data2.sections):
        for dist in xrange(1, max_dist + 1):
            diag1 = []
            diag2 = []
            for crm in hic_data1.section_pos:
                for j in xrange(hic_data1.section_pos[crm][0],
                                hic_data1.section_pos[crm][1] - dist):
                    i = j + dist
                    if j in bads or i in bads:
                        continue
                    diag1.append(get_the_guy1(i, j))
                    diag2.append(get_the_guy2(i, j))
            corrs.append(spearmanr(diag1, diag2)[0])
            dists.append(dist)
    else:
        if intra:
            warn('WARNING: hic_dta does not contain chromosome coordinates, ' +
                 'intra set to False')
        for dist in xrange(1, max_dist + 1):
            diag1 = []
            diag2 = []
            for j in xrange(len(hic_data1) - dist):
                i = j + dist
                if j in bads or i in bads:
                    continue
                diag1.append(get_the_guy1(i, j))
                diag2.append(get_the_guy2(i, j))
            corrs.append(spearmanr(diag1, diag2)[0])
            dists.append(dist)
    if show or savefig or axe:
        if not axe:
            fig = plt.figure()
            axe = fig.add_subplot(111)
            given_axe = False
        else:
            given_axe = True
        axe.plot(dists, corrs, color='orange', linewidth=3, alpha=.8)
        axe.set_xlabel('Genomic distance in bins')
        axe.set_ylabel('Spearman rank correlation')
        axe.set_xlim((0, dists[-1]))
        if savefig:
            tadbit_savefig(savefig)
        if show:
            plt.show()
        if not given_axe:
            plt.close('all')
    if savedata:
        out = open(savedata, 'w')
        out.write('# genomic distance\tSpearman rank correlation\n')
        for i in xrange(len(corrs)):
            out.write('%s\t%s\n' % (dists[i], corrs[i]))
        out.close()
    if kwargs.get('get_bads', False):
        return corrs, dists, bads
    else:
        return corrs, dists
开发者ID:kangk1204,项目名称:TADbit,代码行数:95,代码来源:analyze.py

示例13: plot_distance_vs_interactions


#.........这里部分代码省略.........
    for k in xrange(len(xp)):
        if yp[k]:
            x.append(xp[k])
            y.append(yp[k])
    axe.plot(x, y, 'k.')
    best = (float('-inf'), 0, 0, 0, 0, 0, 0, 0, 0, 0)
    logx = np.log(x)
    logy = np.log(y)
    ntries = 100
    # set k for better fit
    # for k in xrange(1, ntries/5, ntries/5/5):
    if resolution == 1:
        k = 1
        for i in xrange(3, ntries-2-k):
            v1 = i * len(x) / ntries
            try:
                a1, b1, r21, _, _ = linregress(logx[ :v1], logy[ :v1])
            except ValueError:
                a1 = b1 = r21 = 0
            r21 *= r21
            for j in xrange(i + 1 + k, ntries - 2 - k):
                v2 = j * len(x) / ntries
                try:
                    a2, b2, r22, _, _ = linregress(logx[v1+k:v2], logy[v1+k:v2])
                    a3, b3, r23, _, _ = linregress(logx[v2+k:  ], logy[v2+k: ])
                except ValueError:
                    a2 = b2 = r22 = 0
                    a3 = b3 = r23 = 0
                r2 = r21 + r22**2 + r23**2
                if r2 > best[0]:
                    best = (r2, v1, v2, a1, a2, a3,
                            b1, b2, b3, k)
        # plot line of best fit
        (v1, v2, 
         a1, a2, a3,
         b1, b2, b3, k) = best[1:]
        yfit1 = lambda xx: np.exp(b1 + a1*np.array (np.log(xx)))
        yfit2 = lambda xx: np.exp(b2 + a2*np.array (np.log(xx)))
        yfit3 = lambda xx: np.exp(b3 + a3*np.array (np.log(xx)))
        axe.plot(x[  :v1], yfit1(x[  :v1] ), color= 'yellow', lw=2,
                 label = r'$\alpha_{%s}=%.2f$' % (
                     '0-0.7 \mathrm{ Mb}' if resolution != 1 else '1', a1))
                 #label = r'$\alpha_1=%.2f$ (0-%d)' % (a1, x[v1]))
        axe.plot(x[v1+k:v2], yfit2(x[v1+k:v2]),  color= 'orange', lw=2,
                 label = r'$\alpha_{%s}=%.2f$' % (
                     '0.7-10 \mathrm{ Mb}' if resolution != 1 else '2', a2))
                 # label = r'$\alpha_2=%.2f$ (%d-%d)' % (a2, x[v1], x[v2]))
        axe.plot(x[v2+k:  ], yfit3(x[v2+k:  ] ), color= 'red'   , lw=2,
                 label = r'$\alpha_{%s}=%.2f$' % (
                     '10 \mathrm{ Mb}-\infty' if resolution != 1 else '3', a3))
                 # label = r'$\alpha_3=%.2f$ (%d-$\infty$)' % (a3, x[v2+k]))
    else:
        # from 0.7 Mb
        v1 = 700000   / resolution
        # to 10 Mb
        v2 = 10000000 / resolution
        try:
            a1, b1, r21, _, _ = linregress(logx[  :v1], logy[  :v1])
        except ValueError:
            a1, b1, r21 = 0, 0, 0
        try:
            a2, b2, r22, _, _ = linregress(logx[v1:v2], logy[v1:v2])
        except ValueError:
            a2, b2, r22 = 0, 0, 0
        try:
            a3, b3, r23, _, _ = linregress(logx[v2:  ], logy[v2:  ])
        except ValueError:
            a3, b3, r23 = 0, 0, 0
        yfit1 = lambda xx: np.exp(b1 + a1*np.array (np.log(xx)))
        yfit2 = lambda xx: np.exp(b2 + a2*np.array (np.log(xx)))
        yfit3 = lambda xx: np.exp(b3 + a3*np.array (np.log(xx)))
        axe.plot(x[  :v1], yfit1(x[  :v1] ), color= 'yellow', lw=2,
                 label = r'$\alpha_{%s}=%.2f$' % (
                     '0-0.7 \mathrm{ Mb}' if resolution != 1 else '1', a1))
                 #label = r'$\alpha_1=%.2f$ (0-%d)' % (a1, x[v1]))
        axe.plot(x[v1:v2], yfit2(x[v1:v2]),  color= 'orange', lw=2,
                 label = r'$\alpha_{%s}=%.2f$' % (
                     '0.7-10 \mathrm{ Mb}' if resolution != 1 else '2', a2))
                 # label = r'$\alpha_2=%.2f$ (%d-%d)' % (a2, x[v1], x[v2]))
        axe.plot(x[v2:  ], yfit3(x[v2:  ] ), color= 'red'   , lw=2,
                 label = r'$\alpha_{%s}=%.2f$' % (
                     '10 \mathrm{ Mb}-\infty' if resolution != 1 else '3', a3))
                 # label = r'$\alpha_3=%.2f$ (%d-$\infty$)' % (a3, x[v2+k]))
    axe.set_ylabel('Log interaction count')
    axe.set_xlabel('Log genomic distance (resolution: %s)' % nicer(resolution))
    axe.legend(loc='lower left', frameon=False)
    axe.set_xscale('log')
    axe.set_yscale('log')
    axe.set_xlim((min_diff, max_diff))
    try:
        axe.set_ylim((0, max(y)))
    except ValueError:
        pass
    if savefig:
        tadbit_savefig(savefig)
        plt.close('all')
    elif show==True:
        plt.show()
        plt.close('all')
    return (a1, b1, r21), (a2, b2, r22), (a3, b3, r23)
开发者ID:kangk1204,项目名称:TADbit,代码行数:101,代码来源:analyze.py

示例14: insert_sizes

def insert_sizes(fnam, savefig=None, nreads=None, max_size=99.9, axe=None,
                 show=False, xlog=False, stats=('median', 'perc_max')):
    """
    Plots the distribution of dangling-ends lengths
    :param fnam: input file name
    :param None savefig: path where to store the output images.
    :param 99.9 max_size: top percentage of distances to consider, within the
       top 0.01% are usually found very long outliers.
    :param False xlog: represent x axis in logarithmic scale
    :param ('median', 'perc_max') stats: returns this set of values calculated from the
       distribution of insert/fragment sizes. Possible values are:
        - 'median' median of the distribution
        - 'perc_max' percentil defined by the other parameter 'max_size'
        - 'first_deacay' starting from the median of the distribution to the
            first window where 10 consecutive insert sizes are counted less than
            a given value (this given value is equal to the sum of all
            sizes divided by 100 000)
        - 'MAD' Double Median Adjusted Deviation

    :returns: the median value and the percentile inputed as max_size.
    """
    distr = {}
    genome_seq = OrderedDict()
    fhandler = open(fnam)
    line = fhandler.next()
    while line.startswith('#'):
        if line.startswith('# CRM '):
            crm, clen = line[6:].split()
            genome_seq[crm] = int(clen)
        line = fhandler.next()
    des = []
    if nreads:
        nreads /= 2
    try:
        while True:
            (crm1, pos1, dir1, _, re1, _,
             crm2, pos2, dir2, _, re2) = line.strip().split('\t')[1:12]
            if re1==re2 and crm1 == crm2 and dir1 != dir2:
                pos1, pos2 = int(pos1), int(pos2)
                if (pos2 > pos1) == int(dir1):
                    des.append(abs(pos2 - pos1))
                if len(des) == nreads:
                    break
            line = fhandler.next()
    except StopIteration:
        pass
    fhandler.close()
    max_perc = np.percentile(des, max_size)
    perc99   = np.percentile(des, 99)
    perc01   = np.percentile(des, 1)
    perc50   = np.percentile(des, 50)
    perc95   = np.percentile(des, 95)
    perc05   = np.percentile(des, 5)
    to_return = {'median': perc50}
    cutoff = len(des) / 100000.
    count  = 0
    for v in xrange(int(perc50), int(max(des))):
        if des.count(v) < cutoff:
            count += 1
        else:
            count = 0
        if count >= 10:
            to_return['first_decay'] = v - 10
            break
    else:
        raise Exception('ERROR: not found')
    to_return['perc_max'] = max_perc
    to_return['MAD'] = mad(des)
    if not savefig and not axe and not show:
        return [to_return[k] for k in stats]
    
    ax = setup_plot(axe, figsize=(10, 5.5))
    desapan = ax.axvspan(perc95, perc99, facecolor='darkolivegreen', alpha=.3,
                         label='1-99%% DEs\n(%.0f-%.0f nts)' % (perc01, perc99))
    ax.axvspan(perc01, perc05, facecolor='darkolivegreen', alpha=.3)
    desapan = ax.axvspan(perc05, perc95, facecolor='darkseagreen', alpha=.3,
                         label='5-95%% DEs\n(%.0f-%.0f nts)' % (perc05, perc95))
    deshist = ax.hist(des, bins=100, range=(0, max_perc),
                      alpha=.7, color='darkred', label='Dangling-ends')
    ylims   = ax.get_ylim()
    plots   = []
    ax.set_xlabel('Genomic distance between reads')
    ax.set_ylabel('Count')
    ax.set_title('Distribution of dangling-ends ' +
                 'lenghts\n(median: %s, top %.1f%%, up to %0.f nts)' % (
                     perc50, max_size, max_perc))
    if xlog:
        ax.set_xscale('log')
    ax.set_xlim((50, max_perc))
    plt.subplots_adjust(left=0.1, right=0.75)
    ax.legend(bbox_to_anchor=(1.4, 1), frameon=False)
    if savefig:
        tadbit_savefig(savefig)
    elif show and not axe:
        plt.show()
    plt.close('all')
    return [to_return[k] for k in stats]
开发者ID:kangk1204,项目名称:TADbit,代码行数:97,代码来源:analyze.py

示例15: filter_by_zero_count

def filter_by_zero_count(matrx, draw_hist=False, savefig=None):
    """
    fits the distribution of Hi-C interaction count by column in the matrix to
    a polynomial. Then searches for the first possible 
    """
    nbins = 100
    # get sum of columns
    cols = []
    for c in sorted(matrx, key=sum):
        cols.append(len(c) - c.count(0))
    cols = np.array(cols)
    if draw_hist:
        plt.figure(figsize=(9, 9))
    median = np.median(cols)
    # mad = np.median([abs(median - c ) for c in cols])
    best =(None, None, None, None)
    # bin the sum of columns
    xmin = min(cols)
    xmax = max(cols)
    y = np.linspace(xmin, xmax, nbins)
    hist = np.digitize(cols, y)
    x = [sum(hist == i) for i in range(1, nbins + 1)]
    if draw_hist:
        hist = plt.hist(cols, bins=100, alpha=.3, color='grey')
    xp = range(0, cols[-1])
    # check if the binning is correct
    # we want at list half of the bins with some data
    while list(x).count(0) > 2*len(x)/3:
        cols = cols[:-1]
        xmin = min(cols)
        xmax = max(cols)
        y = np.linspace(xmin, xmax, nbins)
        hist = np.digitize(cols, y)
        x = [sum(hist == i) for i in range(1, nbins + 1)]
        if draw_hist:
            plt.clf()
            hist = plt.hist(cols, bins=100, alpha=.3, color='grey')
        xp = range(0, cols[-1])
    # find best polynomial fit in a given range
    for order in range(7, 14):
        z = np.polyfit(y, x, order)
        zpp = np.polyder(z, m=1)
        roots = np.roots(np.polyder(z))
        # check that we are concave down, otherwise take next root
        pente = np.polyval(zpp, abs(roots[-2] - roots[-1]) / 2 + roots[-1])
        if pente > 0:
            root = roots[-1]
        else:
            root = roots[-2]
        # root must be higher than zero
        if root <= 0:
            continue
        # and lower than the median
        if root >= median:
            continue
        p  = np.poly1d(z)
        R2 = get_r2(p, x, y)
        if best[0] < R2:
            best = (R2, order, p, z, root)
    p, z, root = best[2:]
    if draw_hist:
        a = plt.plot(xp, p(xp), "--", color='k')
        b = plt.vlines(root, 0, plt.ylim()[1], colors='r', linestyles='dashed')
        try:
            plt.legend(a + [b], ['polyfit \n%s' % (
                ''.join([sub('e([-+][0-9]+)', 'e^{\\1}',
                             '$%s%.1fx^%s$' % ('+' if j>0 else '', j,
                                               '{' + str(i) + '}'))
                         for i, j in enumerate(list(p)[::-1])])),
                                   'first solution of polynomial derivation'],
                       fontsize='x-small')
        except TypeError:
            plt.legend(a + [b], ['polyfit \n%s' % (
                ''.join([sub('e([-+][0-9]+)', 'e^{\\1}',
                             '$%s%.1fx^%s$' % ('+' if j>0 else '', j,
                                               '{' + str(i) + '}'))
                         for i, j in enumerate(list(p)[::-1])])),
                                   'first solution of polynomial derivation'])
        plt.ylim(0, plt.ylim()[1])
        if savefig:
            tadbit_savefig(savefig)
        else:
            plt.show()
    # label as bad the columns with sums lower than the root
    bads = {}
    for i, col in enumerate(matrx):
        if sum(col) < root:
            bads[i] = None
    # now stored in Experiment._zeros, used for getting more accurate z-scores
    return bads
开发者ID:Adrimel,项目名称:tadbit,代码行数:90,代码来源:hic_filtering.py


注:本文中的pytadbit.utils.extraviews.tadbit_savefig函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。