本文整理匯總了Python中rgt.GenomicRegionSet.GenomicRegionSet.combine方法的典型用法代碼示例。如果您正苦於以下問題:Python GenomicRegionSet.combine方法的具體用法?Python GenomicRegionSet.combine怎麽用?Python GenomicRegionSet.combine使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類rgt.GenomicRegionSet.GenomicRegionSet
的用法示例。
在下文中一共展示了GenomicRegionSet.combine方法的5個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: merge_DBD_regions
# 需要導入模塊: from rgt.GenomicRegionSet import GenomicRegionSet [as 別名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import combine [as 別名]
def merge_DBD_regions(path):
"""Merge all available DBD regions in BED format. """
for t in os.listdir(path):
if os.path.isdir(os.path.join(path, t)):
dbd_pool = GenomicRegionSet(t)
for rna in os.listdir(os.path.join(path,t)):
f = os.path.join(path, t, rna, "DBD_"+rna+".bed")
if os.path.exists(f):
dbd = GenomicRegionSet(rna)
dbd.read_bed(f)
for r in dbd: r.name = rna+"_"+r.name
dbd_pool.combine(dbd)
dbd_pool.write_bed(os.path.join(path, t, "DBD_"+t+".bed"))
示例2: get_bc_signal
# 需要導入模塊: from rgt.GenomicRegionSet import GenomicRegionSet [as 別名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import combine [as 別名]
def get_bc_signal(arguments):
(mpbs_name, mpbs_file1, mpbs_file2, reads_file1, reads_file2, organism,
window_size, forward_shift, reverse_shift, bias_table1, bias_table2) = arguments
mpbs1 = GenomicRegionSet("Motif Predicted Binding Sites of Condition1")
mpbs1.read(mpbs_file1)
mpbs2 = GenomicRegionSet("Motif Predicted Binding Sites of Condition2")
mpbs2.read(mpbs_file2)
mpbs = mpbs1.combine(mpbs2, output=True)
mpbs.sort()
bam1 = Samfile(reads_file1, "rb")
bam2 = Samfile(reads_file2, "rb")
genome_data = GenomeData(organism)
fasta = Fastafile(genome_data.get_genome())
signal_1 = np.zeros(window_size)
signal_2 = np.zeros(window_size)
motif_len = None
pwm = dict([("A", [0.0] * window_size), ("C", [0.0] * window_size),
("G", [0.0] * window_size), ("T", [0.0] * window_size),
("N", [0.0] * window_size)])
mpbs_regions = mpbs.by_names([mpbs_name])
num_motif = len(mpbs_regions)
# Fetch bias corrected signal
for region in mpbs_regions:
if motif_len is None:
motif_len = region.final - region.initial
mid = (region.final + region.initial) / 2
p1 = mid - window_size / 2
p2 = mid + window_size / 2
if p1 <= 0:
continue
# Fetch raw signal
signal1 = bias_correction(chrom=region.chrom, start=p1, end=p2, bam=bam1,
bias_table=bias_table1, genome_file_name=genome_data.get_genome(),
forward_shift=forward_shift, reverse_shift=reverse_shift)
signal2 = bias_correction(chrom=region.chrom, start=p1, end=p2, bam=bam2,
bias_table=bias_table2, genome_file_name=genome_data.get_genome(),
forward_shift=forward_shift, reverse_shift=reverse_shift)
if len(signal1) != len(signal_1) or len(signal2) != len(signal_2):
continue
# smooth the signal
signal_1 = np.add(signal_1, np.array(signal1))
signal_2 = np.add(signal_2, np.array(signal2))
update_pwm(pwm, fasta, region, p1, p2)
return signal_1, signal_2, motif_len, pwm, num_motif
示例3: get_raw_signal
# 需要導入模塊: from rgt.GenomicRegionSet import GenomicRegionSet [as 別名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import combine [as 別名]
def get_raw_signal(arguments):
(mpbs_name, mpbs_file1, mpbs_file2, reads_file1, reads_file2, organism,
window_size, forward_shift, reverse_shift) = arguments
mpbs1 = GenomicRegionSet("Motif Predicted Binding Sites of Condition1")
mpbs1.read(mpbs_file1)
mpbs2 = GenomicRegionSet("Motif Predicted Binding Sites of Condition2")
mpbs2.read(mpbs_file2)
mpbs = mpbs1.combine(mpbs2, output=True)
mpbs.sort()
bam1 = Samfile(reads_file1, "rb")
bam2 = Samfile(reads_file2, "rb")
genome_data = GenomeData(organism)
fasta = Fastafile(genome_data.get_genome())
signal_1 = np.zeros(window_size)
signal_2 = np.zeros(window_size)
motif_len = None
pwm = dict([("A", [0.0] * window_size), ("C", [0.0] * window_size),
("G", [0.0] * window_size), ("T", [0.0] * window_size),
("N", [0.0] * window_size)])
mpbs_regions = mpbs.by_names([mpbs_name])
num_motif = len(mpbs_regions)
for region in mpbs_regions:
if motif_len is None:
motif_len = region.final - region.initial
mid = (region.final + region.initial) / 2
p1 = mid - window_size / 2
p2 = mid + window_size / 2
if p1 <= 0:
continue
# Fetch raw signal
for read in bam1.fetch(region.chrom, p1, p2):
# check if the read is unmapped, according to issue #112
if read.is_unmapped:
continue
if not read.is_reverse:
cut_site = read.pos + forward_shift
if p1 <= cut_site < p2:
signal_1[cut_site - p1] += 1.0
else:
cut_site = read.aend + reverse_shift - 1
if p1 <= cut_site < p2:
signal_1[cut_site - p1] += 1.0
for read in bam2.fetch(region.chrom, p1, p2):
# check if the read is unmapped, according to issue #112
if read.is_unmapped:
continue
if not read.is_reverse:
cut_site = read.pos + forward_shift
if p1 <= cut_site < p2:
signal_2[cut_site - p1] += 1.0
else:
cut_site = read.aend + reverse_shift - 1
if p1 <= cut_site < p2:
signal_2[cut_site - p1] += 1.0
update_pwm(pwm, fasta, region, p1, p2)
return signal_1, signal_2, motif_len, pwm, num_motif
示例4: diff_analysis_run
# 需要導入模塊: from rgt.GenomicRegionSet import GenomicRegionSet [as 別名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import combine [as 別名]
def diff_analysis_run(args):
# Initializing Error Handler
err = ErrorHandler()
output_location = os.path.join(args.output_location, "Lineplots")
try:
if not os.path.isdir(output_location):
os.makedirs(output_location)
except Exception:
err.throw_error("MM_OUT_FOLDER_CREATION")
# Check if the index file exists
base_name1 = "{}.bai".format(args.reads_file1)
if not os.path.exists(base_name1):
pysam.index(args.reads_file1)
base_name2 = "{}.bai".format(args.reads_file2)
if not os.path.exists(base_name2):
pysam.index(args.reads_file2)
mpbs1 = GenomicRegionSet("Motif Predicted Binding Sites of Condition1")
mpbs1.read(args.mpbs_file1)
mpbs2 = GenomicRegionSet("Motif Predicted Binding Sites of Condition2")
mpbs2.read(args.mpbs_file2)
mpbs = mpbs1.combine(mpbs2, output=True)
mpbs.sort()
mpbs.remove_duplicates()
mpbs_name_list = list(set(mpbs.get_names()))
signal_dict_by_tf_1 = dict()
signal_dict_by_tf_2 = dict()
motif_len_dict = dict()
motif_num_dict = dict()
pwm_dict_by_tf = dict()
pool = Pool(processes=args.nc)
# differential analysis using bias corrected signal
if args.bc:
hmm_data = HmmData()
table_F = hmm_data.get_default_bias_table_F_ATAC()
table_R = hmm_data.get_default_bias_table_R_ATAC()
bias_table1 = BiasTable().load_table(table_file_name_F=table_F, table_file_name_R=table_R)
bias_table2 = BiasTable().load_table(table_file_name_F=table_F, table_file_name_R=table_R)
mpbs_list = list()
for mpbs_name in mpbs_name_list:
mpbs_list.append((mpbs_name, args.mpbs_file1, args.mpbs_file2, args.reads_file1, args.reads_file2,
args.organism, args.window_size, args.forward_shift, args.reverse_shift,
bias_table1, bias_table2))
try:
res = pool.map(get_bc_signal, mpbs_list)
except Exception:
logging.exception("get bias corrected signal failed")
# differential analysis using raw signal
else:
mpbs_list = list()
for mpbs_name in mpbs_name_list:
mpbs_list.append((mpbs_name, args.mpbs_file1, args.mpbs_file2, args.reads_file1, args.reads_file2,
args.organism, args.window_size, args.forward_shift, args.reverse_shift))
try:
res = pool.map(get_raw_signal, mpbs_list)
except Exception:
logging.exception("get raw signal failed")
for idx, mpbs_name in enumerate(mpbs_name_list):
signal_dict_by_tf_1[mpbs_name] = res[idx][0]
signal_dict_by_tf_2[mpbs_name] = res[idx][1]
motif_len_dict[mpbs_name] = res[idx][2]
pwm_dict_by_tf[mpbs_name] = res[idx][3]
motif_num_dict[mpbs_name] = res[idx][4]
if args.factor1 is None or args.factor2 is None:
args.factor1, args.factor2 = compute_factors(signal_dict_by_tf_1, signal_dict_by_tf_2)
output_factor(args, args.factor1, args.factor2)
if args.output_profiles:
output_profiles(mpbs_name_list, signal_dict_by_tf_1, output_location, args.condition1)
output_profiles(mpbs_name_list, signal_dict_by_tf_2, output_location, args.condition2)
ps_tc_results_by_tf = dict()
plots_list = list()
for mpbs_name in mpbs_name_list:
plots_list.append((mpbs_name, motif_num_dict[mpbs_name], signal_dict_by_tf_1[mpbs_name],
signal_dict_by_tf_2[mpbs_name], args.factor1, args.factor2, args.condition1,
args.condition2, pwm_dict_by_tf[mpbs_name], output_location, args.window_size,
args.standardize))
pool.map(line_plot, plots_list)
for mpbs_name in mpbs_name_list:
res = get_ps_tc_results(signal_dict_by_tf_1[mpbs_name], signal_dict_by_tf_2[mpbs_name],
args.factor1, args.factor2, motif_num_dict[mpbs_name], motif_len_dict[mpbs_name])
#
# # only use the factors whose protection scores are greater than 0
# if res[0] > 0 and res[1] < 0:
ps_tc_results_by_tf[mpbs_name] = res
#.........這裏部分代碼省略.........
示例5: figure
# 需要導入模塊: from rgt.GenomicRegionSet import GenomicRegionSet [as 別名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import combine [as 別名]
class Boxplot:
"""
input:
exps: input experimental matrix
title: Default = boxplot
groupby: Group the data by the given factor in the header of experimental matrix
output:
parameters: list of records
figs: a list of figure(s)
"""
def __init__(self, EMpath, fields, title="boxplot", df=False):
# Read the Experimental Matrix
self.title = title
self.exps = ExperimentalMatrix()
self.exps.read(EMpath)
for f in fields:
if f not in ["None", "reads", "regions", "factor"]:
self.exps.match_ms_tags(f)
self.exps.remove_name()
self.beds = self.exps.get_regionsets() # A list of GenomicRegionSets
self.bednames = self.exps.get_regionsnames()
self.reads = self.exps.get_readsfiles()
self.readsnames = self.exps.get_readsnames()
self.fieldsDict = self.exps.fieldsDict
self.parameter = []
self.df = df
def combine_allregions(self):
self.all_bed = GenomicRegionSet("All regions")
for bed in self.beds:
self.all_bed.combine(bed)
self.all_bed.remove_duplicates() # all_bed is sorted!!
def bedCoverage(self):
""" Return coverage matrix of multiple reads on one bed.
bed --> GenomicRegionSet
"""
c = []
for rp in self.reads:
print(" processing: ..." + rp[-45:])
r = os.path.abspath(rp) # Here change the relative path into absolute path
cov = CoverageSet(r, self.all_bed)
cov.coverage_from_genomicset(r)
cov.normRPM()
c.append(cov.coverage)
self.all_table = numpy.transpose(c)
def quantile_normalization(self):
""" Return the np.array which contains the normalized values
"""
rank_matrix = []
for c in range(self.all_table.shape[1]):
col = self.all_table[:, c]
rank_col = mstats.rankdata(col)
rank_matrix.append(rank_col)
ranks = numpy.array(rank_matrix)
trans_rank = numpy.transpose(ranks)
# Calculate for means of ranks
print(" Calculating for the mean of ranked data...")
sort_matrix = numpy.sort(self.all_table, axis=0)
means = []
for r in range(self.all_table.shape[0]):
row = [x for x in sort_matrix[r, :]]
means.append(numpy.mean(row))
# Replace the value by new means
print(" Replacing the data value by normalized mean...")
normalized_table = numpy.around(trans_rank)
for i, v in enumerate(means):
normalized_table[normalized_table == i + 1] = v
# print(rounded_rank)
self.norm_table = normalized_table
def tables_for_plot(self):
""" Return a Dict which stores all tables for each bed with file name as its key. """
self.tableDict = OrderedDict() # Storage all tables for each bed with bedname as the key
conList = [] # Store containers of beds
iterList = []
for i, bed in enumerate(self.beds):
self.tableDict[bed.name] = []
bed.sort()
conList.append(bed.__iter__())
iterList.append(conList[-1].next())
for i, r in enumerate(self.all_bed.sequences):
for j in range(len(self.beds)):
while r > iterList[j]:
try:
iterList[j] = conList[j].next()
except:
break
if r == iterList[j]:
self.tableDict[self.beds[j].name].append(self.norm_table[i])
elif r < iterList[j]:
#.........這裏部分代碼省略.........