本文整理匯總了Python中rgt.GenomicRegionSet.GenomicRegionSet.add方法的典型用法代碼示例。如果您正苦於以下問題:Python GenomicRegionSet.add方法的具體用法?Python GenomicRegionSet.add怎麽用?Python GenomicRegionSet.add使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類rgt.GenomicRegionSet.GenomicRegionSet
的用法示例。
在下文中一共展示了GenomicRegionSet.add方法的14個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: create_file
# 需要導入模塊: from rgt.GenomicRegionSet import GenomicRegionSet [as 別名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import add [as 別名]
def create_file(self):
# Expanding summits
tfbs_summit_regions = GenomicRegionSet("TFBS Summit Regions")
tfbs_summit_regions.read_bed(self.tfbs_summit_fname)
for region in iter(tfbs_summit_regions):
summit = int(region.data.split()[-1]) + region.initial
region.initial = max(summit - (self.peak_ext / 2), 0)
region.final = summit + (self.peak_ext / 2)
# Calculating intersections
mpbs_regions = GenomicRegionSet("MPBS Regions")
mpbs_regions.read_bed(self.mpbs_fname)
tfbs_summit_regions.sort()
mpbs_regions.sort()
with_overlap_regions = mpbs_regions.intersect(tfbs_summit_regions, mode=OverlapType.ORIGINAL)
without_overlap_regions = mpbs_regions.subtract(tfbs_summit_regions, whole_region=True)
tfbs_regions = GenomicRegionSet("TFBS Regions")
for region in iter(with_overlap_regions):
region.name = region.name.split(":")[0] + ":Y"
tfbs_regions.add(region)
for region in iter(without_overlap_regions):
region.name = region.name.split(":")[0] + ":N"
tfbs_regions.add(region)
tfbs_regions.sort()
tfbs_fname = os.path.join(self.output_location, "{}.bed".format(self.mpbs_name))
tfbs_regions.write_bed(tfbs_fname)
示例2: fisher_table
# 需要導入模塊: from rgt.GenomicRegionSet import GenomicRegionSet [as 別名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import add [as 別名]
def fisher_table(motif_name, regions, mpbs, gene_set=False, mpbs_set=False):
"""
TODO
Keyword arguments:
motif_name -- TODO
regions -- TODO
mpbs -- TODO
gene_set -- TODO
mpbs_set -- TODO
Return:
a -- TODO
b -- TODO
gene_set -- TODO
mpbs_set -- TODO
"""
# Fetching motif
mpbs_motif = GenomicRegionSet(name="mpbs_motif")
for region in mpbs.sequences:
if motif_name in region.name:
mpbs_motif.add(region)
# Performing intersections
if len(mpbs_motif) > 0:
# regions which are overlapping with mpbs_motif
intersect_original = regions.intersect(mpbs_motif, mode=OverlapType.ORIGINAL, rm_duplicates=True)
# regions which are not overlapping with regions from mpbs_motif
subtract_overlap = regions.subtract(mpbs_motif, whole_region=True)
# Fetching genes
if gene_set:
gene_set_res = GeneSet(motif_name)
for genomic_region in intersect_original.sequences:
if genomic_region.name:
gene_list = [e if e[0] != "." else e[1:] for e in genomic_region.name.split(":")]
for g in gene_list:
gene_set_res.genes.append(g)
gene_set_res.genes = list(set(gene_set_res.genes)) # Keep only unique genes
else:
gene_set_res = None
# Fetching mpbs
if mpbs_set:
mpbs_set_res = mpbs_motif.intersect(regions, mode=OverlapType.ORIGINAL, rm_duplicates=True)
else:
mpbs_set_res = None
return len(intersect_original), len(subtract_overlap), gene_set_res, mpbs_set_res
else:
gene_set_res = GeneSet(motif_name) if gene_set else None
mpbs_set_res = GenomicRegionSet(mpbs_motif.name) if mpbs_set else None
return 0, len(regions), gene_set_res, mpbs_set_res
示例3: initialize
# 需要導入模塊: from rgt.GenomicRegionSet import GenomicRegionSet [as 別名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import add [as 別名]
def initialize(name, dims, genome_path, regions, stepsize, binsize, bamfiles, exts, \
inputs, exts_inputs, factors_inputs, chrom_sizes, verbose, no_gc_content, \
tracker, debug, norm_regions, scaling_factors_ip, save_wig, housekeeping_genes):
"""Initialize the MultiCoverageSet"""
regionset = GenomicRegionSet(name)
chrom_sizes_dict = {}
#if regions option is set, take the values, otherwise the whole set of
#chromosomes as region to search for DPs
if regions is not None:
print("Call DPs on specified regions.", file=sys.stderr)
with open(regions) as f:
for line in f:
line = line.strip()
line = line.split('\t')
c, s, e = line[0], int(line[1]), int(line[2])
regionset.add(GenomicRegion(chrom=c, initial=s, final=e))
chrom_sizes_dict[c] = e
else:
print("Call DPs on whole genome.", file=sys.stderr)
with open(chrom_sizes) as f:
for line in f:
line = line.strip()
line = line.split('\t')
chrom, end = line[0], int(line[1])
regionset.add(GenomicRegion(chrom=chrom, initial=0, final=end))
chrom_sizes_dict[chrom] = end
if norm_regions:
norm_regionset = GenomicRegionSet('norm_regions')
norm_regionset.read_bed(norm_regions)
else:
norm_regionset = None
if housekeeping_genes:
scaling_factors_ip, _ = norm_gene_level(bamfiles, housekeeping_genes, name, verbose=True)
if scaling_factors_ip:
tracker.write(text=map(lambda x: str(x), scaling_factors_ip), header="Scaling factors")
regionset.sequences.sort()
exts, exts_inputs = _compute_extension_sizes(bamfiles, exts, inputs, exts_inputs, verbose)
tracker.write(text=str(exts).strip('[]'), header="Extension size (rep1, rep2, input1, input2)")
multi_cov_set = MultiCoverageSet(name=name, regions=regionset, dims=dims, genome_path=genome_path, binsize=binsize, stepsize=stepsize,rmdup=True,\
path_bamfiles = bamfiles, path_inputs = inputs, exts = exts, exts_inputs = exts_inputs, factors_inputs = factors_inputs, \
chrom_sizes=chrom_sizes, verbose=verbose, no_gc_content=no_gc_content, chrom_sizes_dict=chrom_sizes_dict, debug=debug, \
norm_regionset=norm_regionset, scaling_factors_ip=scaling_factors_ip, save_wig=save_wig)
return multi_cov_set
示例4: merge_delete
# 需要導入模塊: from rgt.GenomicRegionSet import GenomicRegionSet [as 別名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import add [as 別名]
def merge_delete(ext_size, merge, peak_list, pvalue_list):
# peaks_gain = read_diffpeaks(path)
regions_plus = GenomicRegionSet('regions') #pot. mergeable
regions_minus = GenomicRegionSet('regions') #pot. mergeable
regions_unmergable = GenomicRegionSet('regions')
last_orientation = ""
for i, t in enumerate(peak_list):
chrom, start, end, c1, c2, strand, ratio = t[0], t[1], t[2], t[3], t[4], t[5], t[6]
r = GenomicRegion(chrom = chrom, initial = start, final = end, name = '', \
orientation = strand, data = str((c1, c2, pvalue_list[i], ratio)))
if end - start > ext_size:
if strand == '+':
if last_orientation == '+':
region_plus.add(r)
else:
regions_unmergable.add(r)
elif strand == '-':
if last_orientation == '-':
region_mins.add(r)
else:
regions_unmergable.add(r)
if merge:
regions_plus.extend(ext_size/2, ext_size/2)
regions_plus.merge()
regions_plus.extend(-ext_size/2, -ext_size/2)
merge_data(regions_plus)
regions_minus.extend(ext_size/2, ext_size/2)
regions_minus.merge()
regions_minus.extend(-ext_size/2, -ext_size/2)
merge_data(regions_minus)
results = GenomicRegionSet('regions')
for el in regions_plus:
results.add(el)
for el in regions_minus:
results.add(el)
for el in regions_unmergable:
results.add(el)
results.sort()
return results
示例5: get_training_regionset
# 需要導入模塊: from rgt.GenomicRegionSet import GenomicRegionSet [as 別名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import add [as 別名]
def get_training_regionset(self):
r = GenomicRegionSet('')
r.add(self.regionset[self.counter])
if self.counter == len(self.chrom_sizes_dict):
return None
else:
self.counter += 1
return r
#if regions option is set, take the values, otherwise the whole set of
#chromosomes as region to search for DPs
# if test:
# contained_chrom = ['chr1', 'chr2']
# else:
# #contained_chrom = get_all_chrom(bamfiles)
# contained_chrom = ['chr1', 'chr2']
示例6: rna_associated_gene
# 需要導入模塊: from rgt.GenomicRegionSet import GenomicRegionSet [as 別名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import add [as 別名]
def rna_associated_gene(rna_regions, name, organism):
if rna_regions:
s = [ rna_regions[0][0], min([e[1] for e in rna_regions]),
max([e[2] for e in rna_regions]), rna_regions[0][3] ]
g = GenomicRegionSet("RNA associated genes")
g.add( GenomicRegion(chrom=s[0], initial=s[1], final=s[2], name=name, orientation=s[3]) )
asso_genes = g.gene_association(organism=organism, promoterLength=1000, show_dis=True)
genes = asso_genes[0].name.split(":")
closest_genes = []
for n in genes:
if name not in n: closest_genes.append(n)
closest_genes = set(closest_genes)
if len(closest_genes) == 0:
return "."
else:
return ":".join(closest_genes)
else:
return "."
示例7: chip_evaluate
# 需要導入模塊: from rgt.GenomicRegionSet import GenomicRegionSet [as 別名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import add [as 別名]
def chip_evaluate(self):
"""
This evaluation methodology uses motif-predicted binding sites (MPBSs) together with TF ChIP-seq data
to evaluate the footprint predictions.
return:
"""
# Evaluate Statistics
fpr = dict()
tpr = dict()
roc_auc = dict()
roc_auc_1 = dict()
roc_auc_2 = dict()
recall = dict()
precision = dict()
prc_auc = dict()
if "SEG" in self.footprint_type:
mpbs_regions = GenomicRegionSet("TFBS")
mpbs_regions.read_bed(self.tfbs_file)
mpbs_regions.sort()
# Verifying the maximum score of the MPBS file
max_score = -99999999
for region in iter(mpbs_regions):
score = int(region.data)
if score > max_score:
max_score = score
max_score += 1
for i in range(len(self.footprint_file)):
footprints_regions = GenomicRegionSet("Footprints Prediction")
footprints_regions.read_bed(self.footprint_file[i])
# Sort footprint prediction bed files
footprints_regions.sort()
if self.footprint_type[i] == "SEG":
# Increasing the score of MPBS entry once if any overlaps found in the predicted footprints.
increased_score_mpbs_regions = GenomicRegionSet("Increased Regions")
intersect_regions = mpbs_regions.intersect(footprints_regions, mode=OverlapType.ORIGINAL)
for region in iter(intersect_regions):
region.data = str(int(region.data) + max_score)
increased_score_mpbs_regions.add(region)
# Keep the score of remained MPBS entry unchanged
without_intersect_regions = mpbs_regions.subtract(footprints_regions, whole_region=True)
for region in iter(without_intersect_regions):
increased_score_mpbs_regions.add(region)
increased_score_mpbs_regions.sort_score()
fpr[i], tpr[i], roc_auc[i], roc_auc_1[i], roc_auc_2[i] = self.roc_curve(increased_score_mpbs_regions)
recall[i], precision[i], prc_auc[i] = self.precision_recall_curve(increased_score_mpbs_regions)
elif self.footprint_type[i] == "SC":
footprints_regions.sort_score()
fpr[i], tpr[i], roc_auc[i], roc_auc_1[i], roc_auc_2[i] = self.roc_curve(footprints_regions)
recall[i], precision[i], prc_auc[i] = self.precision_recall_curve(footprints_regions)
# Output the statistics results into text
stats_fname = self.output_location + self.tf_name + "_stats.txt"
stats_header = ["METHOD", "AUC_100", "AUC_10", "AUC_1", "AUPR"]
with open(stats_fname, "w") as stats_file:
stats_file.write("\t".join(stats_header) + "\n")
for i in range(len(self.footprint_name)):
stats_file.write(self.footprint_name[i] + "\t" + str(roc_auc[i]) + "\t" + str(roc_auc_1[i]) + "\t"
+ str(roc_auc_2[i]) + "\t" + str(prc_auc[i]) + "\n")
# Output the curves
if self.print_roc_curve:
label_x = "False Positive Rate"
label_y = "True Positive Rate"
curve_name = "ROC"
self.plot_curve(fpr, tpr, roc_auc, label_x, label_y, self.tf_name, curve_name)
if self.print_pr_curve:
label_x = "Recall"
label_y = "Precision"
curve_name = "PRC"
self.plot_curve(recall, precision, prc_auc, label_x, label_y, self.tf_name, curve_name)
self.output_points(self.tf_name, fpr, tpr, recall, precision)
示例8: chip_evaluate
# 需要導入模塊: from rgt.GenomicRegionSet import GenomicRegionSet [as 別名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import add [as 別名]
def chip_evaluate(args):
# Evaluate Statistics
fpr = dict()
tpr = dict()
roc_auc_1 = dict()
roc_auc_10 = dict()
roc_auc_50 = dict()
roc_auc_100 = dict()
recall = dict()
precision = dict()
prc_auc_1 = dict()
prc_auc_10 = dict()
prc_auc_50 = dict()
prc_auc_100 = dict()
footprint_file = args.footprint_file.split(",")
footprint_name = args.footprint_name.split(",")
footprint_type = args.footprint_type.split(",")
max_score = 0
if "SEG" in footprint_type:
mpbs_regions = GenomicRegionSet("TFBS")
mpbs_regions.read(args.tfbs_file)
# Verifying the maximum score of the MPBS file
for region in iter(mpbs_regions):
score = int(region.data.split("\t")[0])
if score > max_score:
max_score = score
max_score += 1
max_points = []
for i in range(len(footprint_file)):
footprints_regions = GenomicRegionSet("Footprints Prediction")
footprints_regions.read(footprint_file[i])
footprints_regions.sort()
if footprint_type[i] == "SEG":
# Increasing the score of MPBS entry once if any overlaps found in the predicted footprints.
increased_score_mpbs_regions = GenomicRegionSet("Increased Regions")
intersect_regions = mpbs_regions.intersect(footprints_regions, mode=OverlapType.ORIGINAL)
for region in iter(intersect_regions):
region.data = str(int(region.data.split("\t")[0]) + max_score)
increased_score_mpbs_regions.add(region)
# Keep the score of remained MPBS entry unchanged
without_intersect_regions = mpbs_regions.subtract(footprints_regions, whole_region=True)
for region in iter(without_intersect_regions):
increased_score_mpbs_regions.add(region)
increased_score_mpbs_regions.sort_score()
fpr[i], tpr[i], roc_auc_1[i], roc_auc_10[i], roc_auc_50[i], roc_auc_100[i] = \
roc_curve(increased_score_mpbs_regions)
recall[i], precision[i], prc_auc_1[i], prc_auc_10[i], prc_auc_50[i], prc_auc_100[i] = \
precision_recall_curve(increased_score_mpbs_regions)
max_points.append(len(intersect_regions))
elif footprint_type[i] == "SC":
footprints_regions.sort_score()
fpr[i], tpr[i], roc_auc_1[i], roc_auc_10[i], roc_auc_50[i], roc_auc_100[i] = \
roc_curve(footprints_regions)
recall[i], precision[i], prc_auc_1[i], prc_auc_10[i], prc_auc_50[i], prc_auc_100[i] = \
precision_recall_curve(footprints_regions)
max_points.append(len(footprints_regions))
# Output the statistics results into text
stats_fname = os.path.join(args.output_location, "{}_stats.txt".format(args.output_prefix))
stats_header = ["METHOD", "AUC_100", "AUC_50", "AUC_10", "AUC_1", "AUPR_100", "AUPR_50", "AUPR_10", "AUPR_1"]
with open(stats_fname, "w") as stats_file:
stats_file.write("\t".join(stats_header) + "\n")
for i in range(len(footprint_name)):
stats_file.write(footprint_name[i] + "\t" +
str(roc_auc_100[i]) + "\t" + str(roc_auc_50[i]) + "\t" + str(roc_auc_10[i]) + "\t" +
str(roc_auc_1[i]) + "\t" + str(prc_auc_100[i]) + "\t" + str(prc_auc_50[i]) + "\t" +
str(prc_auc_10[i]) + "\t" + str(prc_auc_1[i]) + "\n")
# Output the curves
if args.print_roc_curve:
label_x = "False Positive Rate"
label_y = "True Positive Rate"
curve_name = "ROC"
plot_curve(footprint_name, args.output_location, fpr, tpr, roc_auc_100, label_x, label_y, args.output_prefix,
curve_name, max_points=max_points)
if args.print_pr_curve:
label_x = "Recall"
label_y = "Precision"
curve_name = "PRC"
plot_curve(footprint_name, args.output_location, recall, precision, prc_auc_100, label_x, label_y,
args.output_prefix, curve_name, max_points=max_points)
output_points(footprint_name, args.output_location, args.output_prefix, fpr, tpr, recall, precision)
示例9: initialize
# 需要導入模塊: from rgt.GenomicRegionSet import GenomicRegionSet [as 別名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import add [as 別名]
def initialize(name, genome_path, regions, stepsize, binsize, bam_file_1, bam_file_2, ext_1, ext_2, \
input_1, input_factor_1, ext_input_1, input_2, input_factor_2, ext_input_2, chrom_sizes, verbose, norm_strategy, no_gc_content, deadzones,\
factor_input_1, factor_input_2, debug, tracker):
regionset = GenomicRegionSet(name)
chrom_sizes_dict = {}
#if regions option is set, take the values, otherwise the whole set of
#chromosomes as region to search for DPs
if regions is not None:
with open(regions) as f:
for line in f:
line = line.strip()
line = line.split('\t')
c, s, e = line[0], int(line[1]), int(line[2])
regionset.add(GenomicRegion(chrom=c, initial=s, final=e))
chrom_sizes_dict[c] = e
else:
with open(chrom_sizes) as f:
for line in f:
line = line.strip()
line = line.split('\t')
chrom, end = line[0], int(line[1])
regionset.add(GenomicRegion(chrom=chrom, initial=0, final=end))
chrom_sizes_dict[chrom] = end
regionset.sequences.sort()
start = 0
end = 600
ext_stepsize = 5
#TODO: maybe for-loops?
#compute extension size
if [ext_1, ext_2, ext_input_1, ext_input_2].count(None) > 0:
print("Computing read extension sizes...", file=sys.stderr)
if ext_1 is None:
ext_1, values_1 = get_extension_size(bam_file_1, start=start, end=end, stepsize=ext_stepsize)
print("Read extension for first file: %s" %ext_1, file=sys.stderr)
if ext_2 is None:
ext_2, values_2 = get_extension_size(bam_file_2, start=start, end=end, stepsize=ext_stepsize)
print("Read extension for second file: %s" %ext_2, file=sys.stderr)
if input_1 is not None and ext_input_1 is None:
ext_input_1, values_input_1 = get_extension_size(input_1, start=start, end=end, stepsize=ext_stepsize)
print("Read extension for first input file: %s" %ext_input_1, file=sys.stderr)
if input_1 is not None and input_2 is not None and input_1 == input_2 and 'ext_input_1' in locals() and 'values_input_1' in locals():
ext_input_2, values_input_2 = ext_input_1, values_input_1
elif input_2 is not None and ext_input_2 is None:
ext_input_2, values_input_2 = get_extension_size(input_2, start=start, end=end, stepsize=ext_stepsize)
print("Read extension for second input file: %s" %ext_input_2, file=sys.stderr)
tracker.write(text=str(ext_1) + "," + str(ext_2), header="Extension size IP1, IP2")
if input_1 is not None and input_2 is not None:
tracker.write(text=str(ext_input_1) + "," + str(ext_input_2), header="Extension size Control1, Control2")
if verbose:
if 'values_1' in locals() and values_1 is not None:
with open(name + '-read-ext-1', 'w') as f:
for v, i in values_1:
print(i, v, sep='\t', file=f)
if 'values_2' in locals() and values_2 is not None:
with open(name + '-read-ext-2', 'w') as f:
for v, i in values_2:
print(i, v, sep='\t', file=f)
if 'values_input_1' in locals() and values_input_1 is not None:
with open(name + '-read-ext-input-1', 'w') as f:
for v, i in values_input_1:
print(i, v, sep='\t', file=f)
if 'values_input_2' in locals() and values_input_2 is not None:
with open(name + '-read-ext-input-2', 'w') as f:
for v, i in values_input_2:
print(i, v, sep='\t', file=f)
cov_cdp_mpp = DualCoverageSet(name=name, region=regionset, genome_path=genome_path, binsize=binsize, stepsize=stepsize,rmdup=True,\
file_1=bam_file_1, ext_1=ext_1,\
file_2=bam_file_2, ext_2=ext_2, \
input_1=input_1, ext_input_1=ext_input_1, input_factor_1=input_factor_1, \
input_2=input_2, ext_input_2=ext_input_2, input_factor_2=input_factor_2, \
chrom_sizes=chrom_sizes, verbose=verbose, norm_strategy=norm_strategy, no_gc_content=no_gc_content, deadzones=deadzones,\
factor_input_1=factor_input_1, factor_input_2=factor_input_2, chrom_sizes_dict=chrom_sizes_dict, debug=debug, tracker=tracker)
return cov_cdp_mpp, [ext_1, ext_2]
示例10: __iter__
# 需要導入模塊: from rgt.GenomicRegionSet import GenomicRegionSet [as 別名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import add [as 別名]
def __iter__(self):
for el in self.regionset:
tmp = GenomicRegionSet('')
tmp.add(el)
yield tmp
示例11: dbd_regions
# 需要導入模塊: from rgt.GenomicRegionSet import GenomicRegionSet [as 別名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import add [as 別名]
def dbd_regions(exons, sig_region, rna_name, output,out_file=False, temp=None, fasta=True):
"""Generate the BED file of significant DBD regions and FASTA file of the sequences"""
if len(sig_region) == 0:
return
#print(self.rna_regions)
if not exons:
pass
else:
dbd = GenomicRegionSet("DBD")
dbdmap = {}
if len(exons) == 1:
print("## Warning: No information of exons in the given RNA sequence, the DBD position may be problematic. ")
for rbs in sig_region:
loop = True
if exons[0][3] == "-":
while loop:
cf = 0
for exon in exons:
#print(exon)
l = abs(exon[2] - exon[1])
tail = cf + l
if cf <= rbs.initial <= tail:
dbdstart = exon[2] - rbs.initial + cf
if rbs.final <= tail:
#print("1")
dbdend = exon[2] - rbs.final + cf
if dbdstart > dbdend: dbdstart, dbdend = dbdend, dbdstart
dbd.add( GenomicRegion(chrom=exons[0][0],
initial=dbdstart, final=dbdend,
orientation=exons[0][3],
name=str(rbs.initial)+"-"+str(rbs.final) ) )
dbdmap[str(rbs)] = dbd[-1].toString() + " strand:-"
loop = False
break
elif rbs.final > tail:
subtract = l + cf - rbs.initial
#print("2")
#print("Subtract: "+str(subtract))
if dbdstart > exon[1]: dbdstart, exon[1] = exon[1], dbdstart
dbd.add( GenomicRegion(chrom=exons[0][0],
initial=dbdstart, final=exon[1],
orientation=exons[0][3],
name=str(rbs.initial)+"-"+str(rbs.initial+subtract)+"_split1" ) )
elif rbs.initial < cf and rbs.final <= tail:
#print("3")
dbdstart = exon[2]
dbdend = exon[2] - rbs.final + rbs.initial + subtract
if dbdstart > dbdend: dbdstart, dbdend = dbdend, dbdstart
dbd.add( GenomicRegion(chrom=exons[0][0],
initial=dbdstart, final=dbdend,
orientation=exons[0][3],
name=str(cf)+"-"+str(rbs.final)+"_split2" ) )
dbdmap[str(rbs)] = dbd[-2].toString() + " & " + dbd[-1].toString() + " strand:-"
loop = False
break
elif rbs.initial > tail:
pass
cf += l
loop = False
else:
while loop:
cf = 0
for exon in exons:
#print(exon)
l = exon[2] - exon[1]
tail = cf + l
#print("cf: " + str(cf))
#print("tail: " + str(tail) )
if cf <= rbs.initial <= tail:
dbdstart = exon[1] + rbs.initial - cf
if rbs.final <= tail:
#print("1")
dbdend = exon[1] + rbs.final -cf
dbd.add( GenomicRegion(chrom=exons[0][0],
initial=dbdstart, final=dbdend,
orientation=exons[0][3],
name=str(rbs.initial)+"-"+str(rbs.final) ) )
dbdmap[str(rbs)] = dbd[-1].toString() + " strand:+"
loop = False
break
elif rbs.final > tail:
subtract = l + cf - rbs.initial
#print("2")
#print("Subtract: "+str(subtract))
dbd.add( GenomicRegion(chrom=exons[0][0],
initial=dbdstart, final=exon[2],
orientation=exons[0][3],
name=str(rbs.initial)+"-"+str(rbs.initial+subtract)+"_split1" ) )
#.........這裏部分代碼省略.........
示例12: print
# 需要導入模塊: from rgt.GenomicRegionSet import GenomicRegionSet [as 別名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import add [as 別名]
print("output:\t" + args.o)
print("organism:\t" + args.organism)
gene = GenomicRegionSet("genes")
### Input BED file
if args.i.endswith(".bed"):
gene.read_bed(args.i)
promoter = GenomicRegionSet("promoter")
promoterLength = int(args.l)
for s in gene:
if s.orientation == "+":
s.initial, s.final = max(s.initial-promoterLength, 0), s.initial
else: s.initial, s.final = s.final, s.final+promoterLength
promoter.add(s)
### Input gene list
else:
ann = AnnotationSet(gene_source=args.organism, alias_source=args.organism,
filter_havana=False, protein_coding=False, known_only=False)
de_gene = GeneSet("de genes")
de_gene.read(args.i)
print(len(de_gene))
promoter = ann.get_promoters(promoterLength=args.l, gene_set=de_gene, unmaplist=False)
#print(len(de_prom))
#print(len(promoter))
promoter.write_bed(args.o)
示例13: _intersect
# 需要導入模塊: from rgt.GenomicRegionSet import GenomicRegionSet [as 別名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import add [as 別名]
def _intersect(self, y, rm_duplicates=False):
"""Return the overlapping regions with three different modes.
(mode = OverlapType.ORIGINAL)
Return the regions of original GenomicRegionSet which have any intersections with y.
Keyword arguments:
y -- the GenomicRegionSet which to compare with
Return:
z -- the regions of original GenomicRegionSet which have any intersections with y
Graphical explanation:
self ---------- ------
y ---------- ----
Result ----------
"""
a = self
b = y
z = GenomicRegionSet(a.name + ' + ' + b.name)
# XXX - someone putted an special symbol and spaces in the name! this is used as file name, never use strange characters.
if len(a) == 0 or len(b) == 0: return z
else:
# If there is overlap within self or y, they should be merged first.
if a.sorted == False: a.sort()
if b.sorted == False: b.sort()
iter_a = iter(a)
s = iter_a.next()
last_j = len(b)-1
j = 0
cont_loop = True
########################### OverlapType.ORIGINAL ###################################
while cont_loop:
#print(str(s),"\t",str(b[j]))
# When the regions overlap
if s.overlap(b[j]):
z.add(s)
try: s = iter_a.next()
except: cont_loop = False
elif s < b[j]:
try: s = iter_a.next()
except: cont_loop = False
elif s > b[j]:
if j == last_j: cont_loop = False
else: j = j + 1
else:
try: s = iter_a.next()
except: cont_loop = False
if rm_duplicates:
z.remove_duplicates()
return z
示例14: gen_html
# 需要導入模塊: from rgt.GenomicRegionSet import GenomicRegionSet [as 別名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import add [as 別名]
def gen_html(self, directory, parameters, obed, align=50, alpha=0.05, score=False):
"""Generate the HTML file"""
dir_name = os.path.basename(directory)
html_header = "Genomic Region Test: " + dir_name
link_ds = OrderedDict()
link_ds["RNA"] = "index.html"
link_ds["Sig Target Regions"] = "starget_regions.html"
link_ds["Target Regions"] = "target_regions.html"
link_ds["Parameters"] = "parameters.html"
##################################################
# index.html
html = Html(name=html_header, links_dict=link_ds, # fig_dir=os.path.join(directory,"style"),
fig_rpath="../style", RGT_header=False, other_logo="TDF", homepage="../index.html")
# Plots
html.add_figure("lineplot_region.png", align="left", width="45%", more_images=["boxplot_regions.png"])
if self.showdbs:
html.add_figure("lineplot_dbs.png", align="left", width="45%", more_images=["boxplot_dbs.png"])
if self.showdbs:
header_list = [["#", "DBD", "Target Regions", None, "Non-target Regions", None, "Statistics",
"Target Regions", "Non-target Regions", None, "Statistics"],
["", "", "with DBS", "without DBS", "with DBS (average)", "s.d.", "<i>p</i>-value",
"NO. DBSs", "NO. DBSs (average)", "s.d.", "<i>p</i>-value"]]
header_titles = [["Rank", "DNA Binding Domain", "Given target regions on DNA", None,
"Regions from randomization", None, "Statistics based on target regions",
"Given target regions on DNA", "Regions from randomization", None,
"Statistics based on DNA Binding Sites"],
["", "",
"Number of target regions with DBS binding",
"Number of target regions without DBS binding",
"Average number of regions from randomization with DBS binding",
"Standard deviation", "P value",
"Number of related DNA Binding Sites binding to target regions",
"Average number of DNA Binding Sites binding to random regions",
"Standard deviation", "P-value"]]
border_list = [" style=\"border-right:1pt solid gray\"",
" style=\"border-right:1pt solid gray\"", "",
" style=\"border-right:1pt solid gray\"", "",
" style=\"border-right:1pt solid gray\"",
" style=\"border-right:2pt solid gray\"",
" style=\"border-right:1pt solid gray\"", "",
" style=\"border-right:1pt solid gray\"",
" style=\"border-right:1pt solid gray\""]
else:
header_list = [["#", "DBD", "Target Regions", None, "Non-target Regions", None, "Statistics", None],
["", "", "with DBS", "without DBS", "with DBS (average)", "s.d.", "<i>p</i>-value",
"z-score"]]
header_titles = [["Rank", "DNA Binding Domain", "Given target regions on DNA", None,
"Regions from randomization", None, "Statistics based on target regions", None],
["", "",
"Number of target regions with DBS binding",
"Number of target regions without DBS binding",
"Average number of regions from randomization with DBS binding",
"Standard deviation", "P value", "Z-score"]]
border_list = [" style=\"border-right:1pt solid gray\"",
" style=\"border-right:1pt solid gray\"", "",
" style=\"border-right:1pt solid gray\"", "",
" style=\"border-right:1pt solid gray\"",
" style=\"border-right:1pt solid gray\"", ""]
type_list = 'ssssssssssssssss'
col_size_list = [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]
data_table = []
for i, rbs in enumerate(self.rbss):
if self.data["region"]["p"][i] < alpha:
p_region = "<font color=\"red\">" + value2str(self.data["region"]["p"][i]) + "</font>"
else:
p_region = value2str(self.data["region"]["p"][i])
zs = (self.counts_tr[rbs][0] - self.data["region"]["ave"][i]) / self.data["region"]["sd"][i]
new_line = [str(i + 1),
rbs.str_rna(pa=False),
'<a href="dbd_region.html#' + rbs.str_rna() +
'" style="text-align:left">' + str(self.counts_tr[rbs][0]) + '</a>',
str(self.counts_tr[rbs][1]),
value2str(self.data["region"]["ave"][i]),
value2str(self.data["region"]["sd"][i]),
p_region,
value2str(zs)]
if self.showdbs:
if self.data["dbs"]["p"][i] < alpha:
p_dbs = "<font color=\"red\">" + value2str(self.data["dbs"]["p"][i]) + "</font>"
else:
p_dbs = value2str(self.data["dbs"]["p"][i])
new_line += [str(self.counts_dbs[rbs]),
value2str(self.data["dbs"]["ave"][i]),
value2str(self.data["dbs"]["sd"][i]),
p_dbs]
data_table.append(new_line)
data_table = natsort.natsorted(data_table, key=lambda x: x[6])
html.add_zebra_table(header_list, col_size_list, type_list, data_table, align=align, cell_align="left",
auto_width=True, header_titles=header_titles, border_list=border_list, sortable=True)
html.add_heading("Notes")
html.add_list(["RNA name: " + self.rna_name,
#.........這裏部分代碼省略.........