当前位置: 首页>>代码示例>>Python>>正文


Python scipy.in1d函数代码示例

本文整理汇总了Python中scipy.in1d函数的典型用法代码示例。如果您正苦于以下问题:Python in1d函数的具体用法?Python in1d怎么用?Python in1d使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了in1d函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: _generate_masked_mesh

 def _generate_masked_mesh(self, cell_mask=None):
     r"""
     Generates the mesh based on the cell mask provided
     """
     #
     if cell_mask is None:
         cell_mask = sp.ones(self.data_map.shape, dtype=bool)
     #
     # initializing arrays
     self._edges = sp.ones(0, dtype=str)
     self._merge_patch_pairs = sp.ones(0, dtype=str)
     self._create_blocks(cell_mask)
     #
     # building face arrays
     mapper = sp.ravel(sp.array(cell_mask, dtype=int))
     mapper[mapper == 1] = sp.arange(sp.count_nonzero(mapper))
     mapper = sp.reshape(mapper, (self.nz, self.nx))
     mapper[~cell_mask] = -sp.iinfo(int).max
     #
     boundary_dict = {
         'bottom':
             {'bottom': mapper[0, :][cell_mask[0, :]]},
         'top':
             {'top': mapper[-1, :][cell_mask[-1, :]]},
         'left':
             {'left': mapper[:, 0][cell_mask[:, 0]]},
         'right':
             {'right': mapper[:, -1][cell_mask[:, -1]]},
         'front':
             {'front': mapper[cell_mask]},
         'back':
             {'back': mapper[cell_mask]},
         'internal':
             {'bottom': [], 'top': [], 'left': [], 'right': []}
     }
     #
     # determining cells linked to a masked cell
     cell_mask = sp.where(~sp.ravel(cell_mask))[0]
     inds = sp.in1d(self._field._cell_interfaces, cell_mask)
     inds = sp.reshape(inds, (len(self._field._cell_interfaces), 2))
     inds = inds[:, 0].astype(int) + inds[:, 1].astype(int)
     inds = (inds == 1)
     links = self._field._cell_interfaces[inds]
     #
     # adjusting order so masked cells are all on links[:, 1]
     swap = sp.in1d(links[:, 0], cell_mask)
     links[swap] = links[swap, ::-1]
     #
     # setting side based on index difference
     sides = sp.ndarray(len(links), dtype='<U6')
     sides[sp.where(links[:, 1] == links[:, 0]-self.nx)[0]] = 'bottom'
     sides[sp.where(links[:, 1] == links[:, 0]+self.nx)[0]] = 'top'
     sides[sp.where(links[:, 1] == links[:, 0]-1)[0]] = 'left'
     sides[sp.where(links[:, 1] == links[:, 0]+1)[0]] = 'right'
     #
     # adding each block to the internal face dictionary
     inds = sp.ravel(mapper)[links[:, 0]]
     for side, block_id in zip(sides, inds):
         boundary_dict['internal'][side].append(block_id)
     self.set_boundary_patches(boundary_dict, reset=True)
开发者ID:stadelmanma,项目名称:netl-AP_MAP_FLOW,代码行数:60,代码来源:__BlockMeshDict__.py

示例2: plot_overlap_ps

def plot_overlap_ps(result_file, ss_file='/Users/bjarnivilhjalmsson/data/GIANT/GIANT_HEIGHT_Wood_et_al_2014_publicrelease_HapMapCeuFreq.txt',
                   fig_filename='/Users/bjarnivilhjalmsson/data/tmp/manhattan_combPC_HGT.png', method='combPC',
                   ylabel='Comb. PC (HIP,WC,HGT,BMI) $-log_{10}(P$-value$)$', xlabel='Height $-log_{10}(P$-value$)$', p_thres=0.00001):
    # Parse results ans SS file
    res_table = pandas.read_table(result_file)
    ss_table = pandas.read_table(ss_file)
    # Parse 
    res_sids = sp.array(res_table['SNPid'])
    if method == 'MVT':
        comb_ps = sp.array(res_table['pval'])
    elif method == 'combPC':
        comb_ps = sp.array(res_table['combPC'])
    if 'MarkerName' in ss_table.keys():
        ss_sids = sp.array(ss_table['MarkerName'])
    elif 'SNP' in ss_table.keys():
        ss_sids = sp.array(ss_table['SNP'])
    else:
        raise Exception("Don't know where to look for rs IDs")
    marg_ps = sp.array(ss_table['p'])
    
    # Filtering boring p-values
    res_p_filter = comb_ps < p_thres
    res_sids = res_sids[res_p_filter]
    comb_ps = comb_ps[res_p_filter]
#     ss_p_filter = marg_ps<p_thres
#     ss_sids = ss_sids[ss_p_filter]
#     marg_ps = marg_ps[ss_p_filter]
    
    common_sids = sp.intersect1d(res_sids, ss_sids)
    print 'Found %d SNPs in common' % (len(common_sids))
    ss_filter = sp.in1d(ss_sids, common_sids)
    res_filter = sp.in1d(res_sids, common_sids)
    
    ss_sids = ss_sids[ss_filter]
    res_sids = res_sids[res_filter]
    marg_ps = marg_ps[ss_filter]
    comb_ps = comb_ps[res_filter]
    
    print 'Now sorting'
    ss_index = sp.argsort(ss_sids)
    res_index = sp.argsort(res_sids)
    
    marg_ps = -sp.log10(marg_ps[ss_index])
    comb_ps = -sp.log10(comb_ps[res_index])
    
    with plt.style.context('fivethirtyeight'):
        plt.plot(marg_ps, comb_ps, 'b.', alpha=0.2)
        (x_min, x_max) = plt.xlim()
        (y_min, y_max) = plt.ylim()
        
        plt.plot([x_min, x_max], [y_min, y_max], 'k--', alpha=0.2)
        plt.ylabel(ylabel)
        plt.xlabel(xlabel)
        plt.tight_layout()
        plt.savefig(fig_filename)
    plt.clf()
开发者ID:bvilhjal,项目名称:pypcma,代码行数:56,代码来源:analyze_results.py

示例3: test_trim_extend

def test_trim_extend():
    pn = OpenPNM.Network.Cubic(shape=[5, 5, 5])
    assert sp.all(sp.in1d(pn.find_neighbor_pores(pores=0), [1, 5, 25]))
    assert [pn.Np, pn.Nt] == [125, 300]
    pn.trim(pores=[0])
    assert sp.all(sp.in1d(pn.find_neighbor_pores(pores=0), [1, 5, 25]))
    assert [pn.Np, pn.Nt] == [124, 297]
    pn.extend(pore_coords=[0, 0, 0], throat_conns=[[124, 0]])
    assert [pn.Np, pn.Nt] == [125, 298]
    assert sp.all(sp.in1d(pn.find_neighbor_pores(pores=0), [1, 5, 25, 124]))
开发者ID:TomTranter,项目名称:OpenPNM,代码行数:10,代码来源:test_topological_manipulations.py

示例4: make_unique_by_event

def make_unique_by_event(event_list):
    # function event_list = make_unique_by_event(event_list)
    #
    # This script removes all events that share the sam alternative evnt coordinates
    # but differ in the flanking size. The longest of several equal events is kept.

    rm_idx = []
    last_kept = 0
    for i in range(1, event_list.shape[0]):
        if i % 1000 == 0:
            print '.',
            if i % 10000 == 0:
                print '%i' % i
        
        old_coords = event_list[last_kept].get_inner_coords(trafo=True)
        curr_coords = event_list[i].get_inner_coords(trafo=True) 

        if old_coords.shape[0] == curr_coords.shape[0] and sp.all(old_coords == curr_coords):

            ### assertion that we did everything right
            assert(event_list[last_kept].chr == event_list[i].chr)
            assert(event_list[last_kept].strand == event_list[i].strand)
            
            ### check, which event is longer -> keep longer event
            len1 = event_list[last_kept].get_len()
            len2 = event_list[i].get_len()

            if len1 > len2:
                keep_idx = last_kept
                not_keep_idx = i
            else:
                keep_idx = i
                not_keep_idx = last_kept

            ### check if we would loose strains 
            idx = sp.where(~sp.in1d(event_list[not_keep_idx].strain, event_list[keep_idx].strain))[0]
            if idx.shape[0] > 0:
                event_list[keep_idx].strain = sp.r_[event_list[keep_idx].strain, event_list[not_keep_idx].strain[idx]]
                ### TODO !!!!!!!!!!!!! make sure that we keep different coordinates if the strains differ ...
                event_list[keep_idx].gene_name = sp.union1d(event_list[keep_idx].gene_name, event_list[not_keep_idx].gene_name)

            rm_idx.append(not_keep_idx)
            last_kept = keep_idx
        else:
            last_kept = i

    print 'events dropped: %i' % len(rm_idx)
    keep_idx = sp.where(~sp.in1d(sp.arange(event_list.shape[0]), rm_idx))[0]
    event_list = event_list[keep_idx]

    return event_list
开发者ID:ccwang12,项目名称:spladder,代码行数:51,代码来源:events.py

示例5: intersect_rows

def intersect_rows(array1, array2, index=False):
    """Return array with rows that intersect between array1 and array2"""

    tmp1 = sp.array(['-'.join(array1[i, :].astype('str')) for i in range(array1.shape[0])])
    tmp2 = sp.array(['-'.join(array2[i, :].astype('str')) for i in range(array2.shape[0])])
    
    idx = sp.where(sp.in1d(tmp1, tmp2))[0]
    if index:
        idx2 = sp.where(sp.in1d(tmp2, tmp1))[0]

    if index:
        return (array1[idx, :], idx, idx2)
    else:
        return (array1[idx, :], None, None)
开发者ID:ratschlab,项目名称:spladder,代码行数:14,代码来源:utils.py

示例6: calculate_hapmap_pcs

def calculate_hapmap_pcs(hapmap_file, pc_weights_dict, snps_filter=None):
    """
    Calculates the principal components for the hapmap project

    :param hapmap_file: Hapmap file in HDF5 format
    :param pc_weights_dict: dictionary with SNP weights (key = snpid)
    :param snps_filter: list of snp-ids to subset (optional)
    :return: dictionary with pcs and number of snps that were used
    """
    log.info('Calculating Principal components for Hapmap file %s' % hapmap_file)
    ok_sids = np.asarray(list(pc_weights_dict.keys()))
    log.info('Loaded PC weight for %d SNPs' % (len(ok_sids)))
    # Load genotypes
    log.info('Load Hapmap dataset')
    h5f = h5py.File(hapmap_file, 'r')
    num_indivs = len(h5f['indivs']['continent'][...])
    log.info('Found genotypes for %d individuals' % num_indivs)
    pcs = sp.zeros((num_indivs, 2))
    num_nt_issues = 0
    num_snps_used = 0
    log.info('Calculating PCs')
    for chrom in range(1, 23):
        log.info('Working on Chromosome %d' % chrom)
        chrom_str = 'chr%d' % chrom

        log.info('Identifying overlap')
        ok_snp_filter = sp.in1d(ok_sids, snps_filter[chrom_str])
        ok_chrom_sids = ok_sids.compress(ok_snp_filter, axis=0)
        sids = h5f[chrom_str]['variants']['ID'][...]
        ok_snp_filter = sp.in1d(sids, ok_chrom_sids)
        #         assert sids[ok_snp_filter]==ok_sids, 'WTF?'
        sids = sids.compress(ok_snp_filter, axis=0)

        log.info('Loading SNPs')
        snps = h5f[chrom_str]['calldata']['snps'][...]
        snps = snps.compress(ok_snp_filter, axis=0)
        length = len(h5f[chrom_str]['variants/REF'])
        nts = np.hstack((h5f[chrom_str]['variants/REF'][:].reshape(length, 1),
                         h5f[chrom_str]['variants/ALT'][:].reshape(length, 1)))
        nts = nts.compress(ok_snp_filter, axis=0)
        log.info('Updating PCs')
        pcs_per_chr = _calc_pcs(pc_weights_dict, sids, nts, snps)
        pcs += pcs_per_chr['pcs']
        num_nt_issues += pcs_per_chr['num_nt_issues']
        num_snps_used += pcs_per_chr['num_snps_used']

    h5f.close()
    log.info('%d SNPs were excluded from the analysis due to nucleotide issues.' % (num_nt_issues))
    log.info('%d SNPs were used for the analysis.' % (num_snps_used))
    return {'pcs': pcs, 'num_snps_used': num_snps_used}
开发者ID:TheHonestGene,项目名称:ancestor,代码行数:50,代码来源:ancestry.py

示例7: find_interface_throats

    def find_interface_throats(self, labels=[]):
        r"""
        Finds the throats that join two pore labels.

        Parameters
        ----------
        labels : list of strings
            The labels of the two pore groups whose interface is sought

        Returns
        -------
        An array of throat numbers that connect the given pore groups

        Notes
        -----
        This method is meant to find interfaces between TWO groups, regions or
        clusters of pores (as defined by their label).  If the input labels
        overlap or are not adjacent, an empty array is returned.

        Examples
        --------
        >>> import OpenPNM
        >>> pn = OpenPNM.Network.TestNet()
        >>> pn['pore.domain1'] = False
        >>> pn['pore.domain2'] = False
        >>> pn['pore.domain1'][[0, 1, 2]] = True
        >>> pn['pore.domain2'][[5, 6, 7]] = True
        >>> pn.find_interface_throats(labels=['domain1', 'domain2'])
        array([1, 4, 7])

        TODO: It might be a good idea to allow overlapping regions
        """
        Tind = sp.array([], ndmin=1)
        if sp.shape(labels)[0] != 2:
            logger.error('Exactly two labels must be given')
            pass
        else:
            P1 = self.pores(labels=labels[0])
            P2 = self.pores(labels=labels[1])
            # Check if labels overlap
            if sp.sum(sp.in1d(P1, P2)) > 0:
                logger.error('Some labels overlap, iterface cannot be found')
                pass
            else:
                T1 = self.find_neighbor_throats(P1)
                T2 = self.find_neighbor_throats(P2)
                Tmask = sp.in1d(T1, T2)
                Tind = T1[Tmask]
        return Tind
开发者ID:TomTranter,项目名称:OpenPNM,代码行数:49,代码来源:__GenericNetwork__.py

示例8: on_shifted_dwp_curves

    def on_shifted_dwp_curves(self, t):
        a = P4Rm()
        if a.AllDataDict['model'] == 0:
            temp_1 = arange(2, len(a.ParamDict['dwp'])+1)
            temp_2 = temp_1 * t / (len(a.ParamDict['dwp']))
            P4Rm.ParamDict['x_dwp'] = t - temp_2
            shifted_dwp = a.ParamDict['dwp'][:-1:]
            temp_3 = in1d(around(a.ParamDict['depth'], decimals=3),
                          around(a.ParamDict['x_dwp'], decimals=3))
            temp_4 = a.ParamDict['DW_i'][temp_3]
            P4Rm.ParamDict['scale_dw'] = shifted_dwp / temp_4
            P4Rm.ParamDict['scale_dw'][a.ParamDict['scale_dw'] == 0] = 1.

            P4Rm.ParamDict['DW_shifted'] = shifted_dwp/a.ParamDict['scale_dw']
            P4Rm.ParamDict['dw_out'] = a.ParamDict['dwp'][-1]

        elif a.AllDataDict['model'] == 1:
            temp_1 = arange(0, len(a.ParamDict['dwp'])+1-3)
            temp_2 = temp_1 * t / (len(a.ParamDict['dwp'])-3)
            P4Rm.ParamDict['x_dwp'] = t - temp_2
            shifted_dwp = a.ParamDict['dwp'][1:-1:]
            temp_3 = in1d(around(a.ParamDict['depth'], decimals=3),
                          around(a.ParamDict['x_dwp'], decimals=3))
            temp_4 = a.ParamDict['DW_i'][temp_3]
            P4Rm.ParamDict['scale_dw'] = shifted_dwp / temp_4
            P4Rm.ParamDict['scale_dw'][a.ParamDict['scale_dw'] == 0] = 1.

            P4Rm.ParamDict['DW_shifted'] = shifted_dwp/a.ParamDict['scale_dw']
            temp_5 = array([a.ParamDict['dwp'][0], a.ParamDict['dwp'][-1]])
            P4Rm.ParamDict['dw_out'] = temp_5

        elif a.AllDataDict['model'] == 2:
            x_dw_temp = []
            x_dw_temp.append(t*(1-a.ParamDict['dwp'][1]))
            x_dw_temp.append(t*(1-a.ParamDict['dwp'][1] +
                             a.ParamDict['dwp'][2]/2))
            x_dw_temp.append(t*(1-a.ParamDict['dwp'][1] -
                             a.ParamDict['dwp'][3]/2))
            x_dw_temp.append(t*0.05)
            P4Rm.ParamDict['x_dwp'] = x_dw_temp

            y_dw_temp = []
            y_dw_temp.append(a.ParamDict['dwp'][0])
            y_dw_temp.append(1. - (1-a.ParamDict['dwp'][0])/2)
            y_dw_temp.append(1. - (1-a.ParamDict['dwp'][0])/2 -
                             (1-a.ParamDict['dwp'][6])/2)
            y_dw_temp.append(a.ParamDict['dwp'][6])
            P4Rm.ParamDict['DW_shifted'] = y_dw_temp
开发者ID:aboulle,项目名称:RaDMaX,代码行数:48,代码来源:Calcul4Radmax.py

示例9: _check_trapping

    def _check_trapping(self, inv_val):
        r"""
        Determine which pores and throats are trapped by invading phase.  This
        method is called by ``run`` if 'trapping' is set to True.
        """
        # Generate a list containing boolean values for throat state
        Tinvaded = self['throat.inv_Pc'] < sp.inf
        # Add residual throats, if any, to list of invaded throats
        Tinvaded = Tinvaded + self['throat.residual']
        # Invert logic to find defending throats
        Tdefended = ~Tinvaded
        [pclusters, tclusters] = self._net.find_clusters2(mask=Tdefended,
                                                          t_labels=True)
        # See which outlet pores remain uninvaded
        outlets = self['pore.outlets']*(self['pore.inv_Pc'] == sp.inf)
        # Identify clusters connected to remaining outlet sites
        def_clusters = sp.unique(pclusters[outlets])
        temp = sp.in1d(sp.unique(pclusters), def_clusters, invert=True)
        trapped_clusters = sp.unique(pclusters)[temp]
        trapped_clusters = trapped_clusters[trapped_clusters >= 0]

        # Find defending clusters NOT connected to the outlet pores
        pmask = np.in1d(pclusters, trapped_clusters)
        # Store current applied pressure in newly trapped pores
        pinds = (self['pore.trapped'] == sp.inf) * (pmask)
        self['pore.trapped'][pinds] = inv_val

        # Find throats on the trapped defending clusters
        tinds = self._net.find_neighbor_throats(pores=pinds,
                                                mode='intersection')
        self['throat.trapped'][tinds] = inv_val
        self['throat.entry_pressure'][tinds] = 1000000
开发者ID:MichaelHoeh,项目名称:OpenPNM,代码行数:32,代码来源:__Drainage__.py

示例10: test_find_nearby_pores_distance_2_flattened_inclself

 def test_find_nearby_pores_distance_2_flattened_inclself(self):
     a = self.net.find_nearby_pores(pores=[0, 1],
                                    distance=2,
                                    flatten=True,
                                    excl_self=False)
     assert sp.size(a) == 17
     assert sp.all(sp.in1d([0, 1], a))
开发者ID:amirdezashibi,项目名称:OpenPNM,代码行数:7,代码来源:GenericNetworkTest.py

示例11: regenerate

 def regenerate(self, prop_list='',mode=None):
     r'''
     This updates all properties using the selected methods
     
     Parameters
     ----------
     prop_list : string or list of strings
         The names of the properties that should be updated, defaults to all
     mode : string
         Control how the regeneration occurs.  
         
     Examples
     --------
     >>> pn = OpenPNM.Network.TestNet()
     >>> pind = pn.get_pore_indices()
     >>> geom = OpenPNM.Geometry.Stick_and_Ball(network=pn, name='geo_test', locations=pind)
     >>> geom.regenerate()  # Regenerate all properties at once
     >>> geom.regenerate('pore_seed')  # only one property
     >>> geom.regenerate(['pore_seed', 'pore_diameter'])  # or several
     '''
     if prop_list == '':
         prop_list = self._prop_list
     elif type(prop_list) == str:
         prop_list = [prop_list]
     if mode == 'exclude':
         a = sp.array(self._prop_list)
         b = sp.array(prop_list)
         c = a[sp.where(~sp.in1d(a,b))[0]]
         prop_list = list(c)
     for item in prop_list:
         self._logger.debug('Refreshing: '+item)
         getattr(self,item)()
开发者ID:AgustinPerez,项目名称:OpenPNM,代码行数:32,代码来源:__GenericGeometry__.py

示例12: remove_isolated_clusters

def remove_isolated_clusters(conns, nonzero_locs, num_to_keep):
    r"""
    Identifies and removes all disconnected clusters except the number of
    groups specified by "num_to_keep". num_to_keep=N retains the N largest
    clusters
    """
    #
    adj_mat = generate_adjacency_matrix(conns, nonzero_locs)
    #
    logger.info('determining connected components...')
    cs_ids = csgraph.connected_components(csgraph=adj_mat, directed=False)[1]
    groups, counts = sp.unique(cs_ids, return_counts=True)
    order = sp.argsort(counts)[::-1]
    groups = groups[order]
    counts = counts[order]
    #
    msg = '    {} component groups for {} total nodes'
    logger.debug(msg.format(groups.size, cs_ids.size))
    msg = '    largest group number: {}, size {}'
    logger.debug(msg.format(groups[0], counts[0]))
    msg = '    {} % of nodes contained in largest group'
    logger.debug(msg.format(counts[0]/cs_ids.size*100))
    msg = '    {} % of nodes contained in {} retained groups'
    num = sp.sum(counts[0:num_to_keep])/cs_ids.size*100
    logger.debug(msg.format(num, num_to_keep))
    #
    inds = sp.where(sp.in1d(cs_ids, groups[0:num_to_keep]))[0]
    num = nonzero_locs.size
    nonzero_locs = nonzero_locs[inds]
    msg = '    removed {} disconnected nodes'
    logger.debug(msg.format(num - nonzero_locs.size))
    #
    return nonzero_locs
开发者ID:stadelmanma,项目名称:netl-AP_MAP_FLOW,代码行数:33,代码来源:apm_calculate_offset_map.py

示例13: getSizeFactor

def getSizeFactor(fn_anno, data, gid, mode = 'sum', withXYMT = True, filterbyPC = True):
    '''
    input annotation, counts and gene ids
    output sum of protein coding gene levels excluding sex chromosomes and mitochondria genes
    '''
    anno  = sp.loadtxt(fn_anno, delimiter = '\t', dtype = 'string', usecols=[0,2,8])
    anno  = anno[anno[:,1] == 'gene', :]
    if not withXYMT: ### filter xymt
        anno  = anno[anno[:,0] != 'MT',:]
        anno  = anno[anno[:,0] != 'Y',:]
        anno  = anno[anno[:,0] != 'X',:]

    agid   = [x.split(';')[0] for x in anno[:,2]] ### clean gene id's
    agid   = sp.array([x.split(" ")[1].strip('\"') for x in agid])

    if filterbyPC: ### filter protein coding
        gtpe  = [x.split(';')[2] for x in anno[:,2]]
        gtpe  = sp.array([x.split('\"')[1].split('\"')[0] for x in gtpe])
        iPC   = sp.where(gtpe == 'protein_coding')[0]
        agid  = agid[iPC]

    iGn = sp.in1d(gid, agid)
    libsize = sp.sum(data[iGn,:], axis = 0) 
    if mode == 'uq':
         libsize = sp.array([sp.percentile(x[x!=0] ,75) for x in data[iGn,:].T])  * iGn.sum() 

    return libsize
开发者ID:KjongLehmann,项目名称:ICGC_libsizenorm,代码行数:27,代码来源:plotBoxPlot_GeneLevel.UCSCfreeze.py

示例14: make_introns_feasible

def make_introns_feasible(introns, genes, CFG):
# introns = make_introns_feasible(introns, genes, CFG)

    tmp1 = sp.array([x.shape[0] for x in introns[:, 0]])
    tmp2 = sp.array([x.shape[0] for x in introns[:, 1]])
    
    unfeas = sp.where((tmp1 > 200) | (tmp2 > 200))[0]
    print >> CFG['fd_log'], 'found %i unfeasible genes' % unfeas.shape[0]

    while unfeas.shape[0] > 0:
        ### make filter more stringent
        CFG['read_filter']['exon_len'] = min(36, CFG['read_filter']['exon_len'] + 4)
        CFG['read_filter']['mincount'] = 2 * CFG['read_filter']['mincount']
        CFG['read_filter']['mismatch'] = max(CFG['read_filter']['mismatch'] - 1, 0)

        ### get new intron counts
        tmp_introns = get_intron_list(genes[unfeas], CFG)
        introns[unfeas, :] = tmp_introns

        ### stil unfeasible?
        tmp1 = sp.array([x.shape[0] for x in introns[:, 0]])
        tmp2 = sp.array([x.shape[0] for x in introns[:, 1]])

        still_unfeas = sp.where((tmp1 > 200) | (tmp2 > 200))[0]
        idx = sp.where(~sp.in1d(unfeas, still_unfeas))[0]

        for i in unfeas[idx]:
            print >> CFG['fd_log'], '[feasibility] set criteria for gene %s to: min_ex %i, min_conf %i, max_mism %i' % (genes[i].name, CFG['read_filter']['exon_len'], CFG['read_filter']['mincount'], CFG['read_filter']['mismatch'])
        unfeas = still_unfeas;

    return introns
开发者ID:ccwang12,项目名称:spladder,代码行数:31,代码来源:helpers.py

示例15: calculate_ld

def calculate_ld(nt_map_file, kgenomes_file, output_folder, window_size):
    """
    Calculate LD in windows for a reference genome dataset for a given set of SNPIds that are defined in the genotype_file
    """
    log.info('Calculating LD')
    # Load 1K genome
    kg_h5f = h5py.File(kgenomes_file, 'r')

    # load map file.
    with open(nt_map_file, 'rb') as f:
        snp_map_dict = pickle.load(f, encoding='latin1')

    # Figure out overlap (all genotype SNPs should be in the 1K genomes data)..
    for chrom in range(1, 23):
        log.info('Working on Chromosome %s' % chrom)
        chrom_str1 = 'chr%s' % chrom
        kg_cg = kg_h5f[chrom_str1]
        kg_sids = kg_cg['snp_ids'][...]
        chrom_dict = snp_map_dict[chrom_str1]
        g_sids = chrom_dict['sids']

        kg_filter = sp.in1d(kg_sids, g_sids)

        assert sp.sum(kg_filter) == len(g_sids), '..bug...'
        assert sp.all(kg_sids[kg_filter] == g_sids), '...bug'

        snps = kg_cg['snps'][...]
        snps = snps.compress(kg_filter, axis=0)

        snp_stds = kg_cg['snp_stds'][...]
        snp_stds = snp_stds.compress(kg_filter, axis=0)

        snp_means = kg_cg['snp_means'][...]
        snp_means = snp_means.compress(kg_filter, axis=0)

        norm_snps = sp.array((snps - snp_means) / snp_stds, dtype='single')

        # Iterate over SNPs and calculate LD
        num_snps, num_indivs = snps.shape

        ld_mats = []
        boundaries = []

        for snp_i in range(num_snps):
            start_i = max(0, snp_i - window_size / 2)
            end_i = min(snp_i + (window_size / 2) + 1, num_snps)

            X = norm_snps[start_i:end_i]
            D = sp.dot(X, X.T) / num_indivs

            ld_mats.append(D)
            boundaries.append([start_i, end_i])

        ld_dict = {'Ds':ld_mats, 'boundaries':boundaries, 'snp_means':snp_means, 'snp_stds':snp_stds, 'window_size':window_size}
        # Store things

        ld_file = '%s/LD' % output_folder + '_' + chrom_str1 + '.pickled.gz'
        log.info('Saving LD in %s' % ld_file)
        with gzip.open(ld_file, 'w') as f:
            pickle.dump(ld_dict, f, protocol=2)
开发者ID:TheHonestGene,项目名称:imputor,代码行数:60,代码来源:impute.py


注:本文中的scipy.in1d函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。