本文整理汇总了Python中rgt.GenomicRegionSet.GenomicRegionSet.remove_duplicates方法的典型用法代码示例。如果您正苦于以下问题:Python GenomicRegionSet.remove_duplicates方法的具体用法?Python GenomicRegionSet.remove_duplicates怎么用?Python GenomicRegionSet.remove_duplicates使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类rgt.GenomicRegionSet.GenomicRegionSet
的用法示例。
在下文中一共展示了GenomicRegionSet.remove_duplicates方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _intersect
# 需要导入模块: from rgt.GenomicRegionSet import GenomicRegionSet [as 别名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import remove_duplicates [as 别名]
def _intersect(self, y, rm_duplicates=False):
"""Return the overlapping regions with three different modes.
(mode = OverlapType.ORIGINAL)
Return the regions of original GenomicRegionSet which have any intersections with y.
Keyword arguments:
y -- the GenomicRegionSet which to compare with
Return:
z -- the regions of original GenomicRegionSet which have any intersections with y
Graphical explanation:
self ---------- ------
y ---------- ----
Result ----------
"""
a = self
b = y
z = GenomicRegionSet(a.name + ' + ' + b.name)
# XXX - someone putted an special symbol and spaces in the name! this is used as file name, never use strange characters.
if len(a) == 0 or len(b) == 0: return z
else:
# If there is overlap within self or y, they should be merged first.
if a.sorted == False: a.sort()
if b.sorted == False: b.sort()
iter_a = iter(a)
s = iter_a.next()
last_j = len(b)-1
j = 0
cont_loop = True
########################### OverlapType.ORIGINAL ###################################
while cont_loop:
#print(str(s),"\t",str(b[j]))
# When the regions overlap
if s.overlap(b[j]):
z.add(s)
try: s = iter_a.next()
except: cont_loop = False
elif s < b[j]:
try: s = iter_a.next()
except: cont_loop = False
elif s > b[j]:
if j == last_j: cont_loop = False
else: j = j + 1
else:
try: s = iter_a.next()
except: cont_loop = False
if rm_duplicates:
z.remove_duplicates()
return z
示例2: figure
# 需要导入模块: from rgt.GenomicRegionSet import GenomicRegionSet [as 别名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import remove_duplicates [as 别名]
class Boxplot:
"""
input:
exps: input experimental matrix
title: Default = boxplot
groupby: Group the data by the given factor in the header of experimental matrix
output:
parameters: list of records
figs: a list of figure(s)
"""
def __init__(self, EMpath, fields, title="boxplot", df=False):
# Read the Experimental Matrix
self.title = title
self.exps = ExperimentalMatrix()
self.exps.read(EMpath)
for f in fields:
if f not in ["None", "reads", "regions", "factor"]:
self.exps.match_ms_tags(f)
self.exps.remove_name()
self.beds = self.exps.get_regionsets() # A list of GenomicRegionSets
self.bednames = self.exps.get_regionsnames()
self.reads = self.exps.get_readsfiles()
self.readsnames = self.exps.get_readsnames()
self.fieldsDict = self.exps.fieldsDict
self.parameter = []
self.df = df
def combine_allregions(self):
self.all_bed = GenomicRegionSet("All regions")
for bed in self.beds:
self.all_bed.combine(bed)
self.all_bed.remove_duplicates() # all_bed is sorted!!
def bedCoverage(self):
""" Return coverage matrix of multiple reads on one bed.
bed --> GenomicRegionSet
"""
c = []
for rp in self.reads:
print(" processing: ..." + rp[-45:])
r = os.path.abspath(rp) # Here change the relative path into absolute path
cov = CoverageSet(r, self.all_bed)
cov.coverage_from_genomicset(r)
cov.normRPM()
c.append(cov.coverage)
self.all_table = numpy.transpose(c)
def quantile_normalization(self):
""" Return the np.array which contains the normalized values
"""
rank_matrix = []
for c in range(self.all_table.shape[1]):
col = self.all_table[:, c]
rank_col = mstats.rankdata(col)
rank_matrix.append(rank_col)
ranks = numpy.array(rank_matrix)
trans_rank = numpy.transpose(ranks)
# Calculate for means of ranks
print(" Calculating for the mean of ranked data...")
sort_matrix = numpy.sort(self.all_table, axis=0)
means = []
for r in range(self.all_table.shape[0]):
row = [x for x in sort_matrix[r, :]]
means.append(numpy.mean(row))
# Replace the value by new means
print(" Replacing the data value by normalized mean...")
normalized_table = numpy.around(trans_rank)
for i, v in enumerate(means):
normalized_table[normalized_table == i + 1] = v
# print(rounded_rank)
self.norm_table = normalized_table
def tables_for_plot(self):
""" Return a Dict which stores all tables for each bed with file name as its key. """
self.tableDict = OrderedDict() # Storage all tables for each bed with bedname as the key
conList = [] # Store containers of beds
iterList = []
for i, bed in enumerate(self.beds):
self.tableDict[bed.name] = []
bed.sort()
conList.append(bed.__iter__())
iterList.append(conList[-1].next())
for i, r in enumerate(self.all_bed.sequences):
for j in range(len(self.beds)):
while r > iterList[j]:
try:
iterList[j] = conList[j].next()
except:
break
if r == iterList[j]:
self.tableDict[self.beds[j].name].append(self.norm_table[i])
elif r < iterList[j]:
#.........这里部分代码省略.........