当前位置: 首页>>代码示例>>Python>>正文


Python GenomicRegionSet.remove_duplicates方法代码示例

本文整理汇总了Python中rgt.GenomicRegionSet.GenomicRegionSet.remove_duplicates方法的典型用法代码示例。如果您正苦于以下问题:Python GenomicRegionSet.remove_duplicates方法的具体用法?Python GenomicRegionSet.remove_duplicates怎么用?Python GenomicRegionSet.remove_duplicates使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在rgt.GenomicRegionSet.GenomicRegionSet的用法示例。


在下文中一共展示了GenomicRegionSet.remove_duplicates方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: _intersect

# 需要导入模块: from rgt.GenomicRegionSet import GenomicRegionSet [as 别名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import remove_duplicates [as 别名]
 def _intersect(self, y, rm_duplicates=False):
     """Return the overlapping regions with three different modes.
                
     (mode = OverlapType.ORIGINAL)
     Return the regions of original GenomicRegionSet which have any intersections with y.
     
         Keyword arguments:
         y -- the GenomicRegionSet which to compare with
         
         Return:
         z -- the regions of original GenomicRegionSet which have any intersections with y
         
         Graphical explanation:
         self       ----------              ------
         y              ----------                    ----
         Result     ----------
         
     """
     a = self
     b = y
     
     z = GenomicRegionSet(a.name + ' + ' + b.name)
     # XXX - someone putted an special symbol and spaces in the name! this is used as file name, never use strange characters.
     if len(a) == 0 or len(b) == 0: return z
     
     else:
         # If there is overlap within self or y, they should be merged first. 
         if a.sorted == False: a.sort()
         if b.sorted == False: b.sort()
         
         iter_a = iter(a)
         s = iter_a.next()
         last_j = len(b)-1
         j = 0
         cont_loop = True
              
         ########################### OverlapType.ORIGINAL ###################################
         while cont_loop:
             #print(str(s),"\t",str(b[j]))
             # When the regions overlap
             if s.overlap(b[j]):
                 z.add(s)
                 
                 
                 try: s = iter_a.next()
                 except: cont_loop = False
                 
             elif s < b[j]:
                 try: s = iter_a.next()
                 except: cont_loop = False
             elif s > b[j]:
                 if j == last_j: cont_loop = False
                 else: j = j + 1
             else:
                 try: s = iter_a.next()
                 except: cont_loop = False
                     
         if rm_duplicates: 
             z.remove_duplicates()
         
         return z
开发者ID:Marvin84,项目名称:reg-gen,代码行数:63,代码来源:GenomicVariantSet.py

示例2: figure

# 需要导入模块: from rgt.GenomicRegionSet import GenomicRegionSet [as 别名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import remove_duplicates [as 别名]
class Boxplot:
    """
    input:
        exps: input experimental matrix
        title: Default = boxplot
        groupby: Group the data by the given factor in the header of experimental matrix

    output:
        parameters: list of records
        figs: a list of figure(s)
    """

    def __init__(self, EMpath, fields, title="boxplot", df=False):
        # Read the Experimental Matrix
        self.title = title
        self.exps = ExperimentalMatrix()
        self.exps.read(EMpath)
        for f in fields:
            if f not in ["None", "reads", "regions", "factor"]:
                self.exps.match_ms_tags(f)
        self.exps.remove_name()
        self.beds = self.exps.get_regionsets()  # A list of GenomicRegionSets
        self.bednames = self.exps.get_regionsnames()
        self.reads = self.exps.get_readsfiles()
        self.readsnames = self.exps.get_readsnames()
        self.fieldsDict = self.exps.fieldsDict
        self.parameter = []
        self.df = df

    def combine_allregions(self):

        self.all_bed = GenomicRegionSet("All regions")
        for bed in self.beds:
            self.all_bed.combine(bed)
        self.all_bed.remove_duplicates()  # all_bed is sorted!!

    def bedCoverage(self):
        """ Return coverage matrix of multiple reads on one bed.
        bed --> GenomicRegionSet
        """
        c = []
        for rp in self.reads:
            print("    processing: ..." + rp[-45:])
            r = os.path.abspath(rp)  # Here change the relative path into absolute path
            cov = CoverageSet(r, self.all_bed)
            cov.coverage_from_genomicset(r)
            cov.normRPM()
            c.append(cov.coverage)
        self.all_table = numpy.transpose(c)

    def quantile_normalization(self):
        """ Return the np.array which contains the normalized values
        """
        rank_matrix = []
        for c in range(self.all_table.shape[1]):
            col = self.all_table[:, c]
            rank_col = mstats.rankdata(col)
            rank_matrix.append(rank_col)

        ranks = numpy.array(rank_matrix)
        trans_rank = numpy.transpose(ranks)

        # Calculate for means of ranks
        print("    Calculating for the mean of ranked data...")
        sort_matrix = numpy.sort(self.all_table, axis=0)
        means = []
        for r in range(self.all_table.shape[0]):
            row = [x for x in sort_matrix[r, :]]
            means.append(numpy.mean(row))

        # Replace the value by new means
        print("    Replacing the data value by normalized mean...")
        normalized_table = numpy.around(trans_rank)
        for i, v in enumerate(means):
            normalized_table[normalized_table == i + 1] = v
        # print(rounded_rank)
        self.norm_table = normalized_table

    def tables_for_plot(self):
        """ Return a Dict which stores all tables for each bed with file name as its key. """
        self.tableDict = OrderedDict()  # Storage all tables for each bed with bedname as the key
        conList = []  # Store containers of beds
        iterList = []

        for i, bed in enumerate(self.beds):
            self.tableDict[bed.name] = []
            bed.sort()
            conList.append(bed.__iter__())
            iterList.append(conList[-1].next())

        for i, r in enumerate(self.all_bed.sequences):
            for j in range(len(self.beds)):
                while r > iterList[j]:
                    try:
                        iterList[j] = conList[j].next()
                    except:
                        break
                if r == iterList[j]:
                    self.tableDict[self.beds[j].name].append(self.norm_table[i])
                elif r < iterList[j]:
#.........这里部分代码省略.........
开发者ID:eggduzao,项目名称:reg-gen,代码行数:103,代码来源:boxplot.py


注:本文中的rgt.GenomicRegionSet.GenomicRegionSet.remove_duplicates方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。