本文整理匯總了Python中rgt.GenomicRegionSet.GenomicRegionSet.intersect方法的典型用法代碼示例。如果您正苦於以下問題:Python GenomicRegionSet.intersect方法的具體用法?Python GenomicRegionSet.intersect怎麽用?Python GenomicRegionSet.intersect使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類rgt.GenomicRegionSet.GenomicRegionSet
的用法示例。
在下文中一共展示了GenomicRegionSet.intersect方法的5個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: create_file
# 需要導入模塊: from rgt.GenomicRegionSet import GenomicRegionSet [as 別名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import intersect [as 別名]
def create_file(self):
# Expanding summits
tfbs_summit_regions = GenomicRegionSet("TFBS Summit Regions")
tfbs_summit_regions.read_bed(self.tfbs_summit_fname)
for region in iter(tfbs_summit_regions):
summit = int(region.data.split()[-1]) + region.initial
region.initial = max(summit - (self.peak_ext / 2), 0)
region.final = summit + (self.peak_ext / 2)
# Calculating intersections
mpbs_regions = GenomicRegionSet("MPBS Regions")
mpbs_regions.read_bed(self.mpbs_fname)
tfbs_summit_regions.sort()
mpbs_regions.sort()
with_overlap_regions = mpbs_regions.intersect(tfbs_summit_regions, mode=OverlapType.ORIGINAL)
without_overlap_regions = mpbs_regions.subtract(tfbs_summit_regions, whole_region=True)
tfbs_regions = GenomicRegionSet("TFBS Regions")
for region in iter(with_overlap_regions):
region.name = region.name.split(":")[0] + ":Y"
tfbs_regions.add(region)
for region in iter(without_overlap_regions):
region.name = region.name.split(":")[0] + ":N"
tfbs_regions.add(region)
tfbs_regions.sort()
tfbs_fname = os.path.join(self.output_location, "{}.bed".format(self.mpbs_name))
tfbs_regions.write_bed(tfbs_fname)
示例2: fisher_table
# 需要導入模塊: from rgt.GenomicRegionSet import GenomicRegionSet [as 別名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import intersect [as 別名]
def fisher_table(motif_name, regions, mpbs, gene_set=False, mpbs_set=False):
"""
TODO
Keyword arguments:
motif_name -- TODO
regions -- TODO
mpbs -- TODO
gene_set -- TODO
mpbs_set -- TODO
Return:
a -- TODO
b -- TODO
gene_set -- TODO
mpbs_set -- TODO
"""
# Fetching motif
mpbs_motif = GenomicRegionSet(name="mpbs_motif")
for region in mpbs.sequences:
if motif_name in region.name:
mpbs_motif.add(region)
# Performing intersections
if len(mpbs_motif) > 0:
# regions which are overlapping with mpbs_motif
intersect_original = regions.intersect(mpbs_motif, mode=OverlapType.ORIGINAL, rm_duplicates=True)
# regions which are not overlapping with regions from mpbs_motif
subtract_overlap = regions.subtract(mpbs_motif, whole_region=True)
# Fetching genes
if gene_set:
gene_set_res = GeneSet(motif_name)
for genomic_region in intersect_original.sequences:
if genomic_region.name:
gene_list = [e if e[0] != "." else e[1:] for e in genomic_region.name.split(":")]
for g in gene_list:
gene_set_res.genes.append(g)
gene_set_res.genes = list(set(gene_set_res.genes)) # Keep only unique genes
else:
gene_set_res = None
# Fetching mpbs
if mpbs_set:
mpbs_set_res = mpbs_motif.intersect(regions, mode=OverlapType.ORIGINAL, rm_duplicates=True)
else:
mpbs_set_res = None
return len(intersect_original), len(subtract_overlap), gene_set_res, mpbs_set_res
else:
gene_set_res = GeneSet(motif_name) if gene_set else None
mpbs_set_res = GenomicRegionSet(mpbs_motif.name) if mpbs_set else None
return 0, len(regions), gene_set_res, mpbs_set_res
示例3: chip_evaluate
# 需要導入模塊: from rgt.GenomicRegionSet import GenomicRegionSet [as 別名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import intersect [as 別名]
def chip_evaluate(self):
"""
This evaluation methodology uses motif-predicted binding sites (MPBSs) together with TF ChIP-seq data
to evaluate the footprint predictions.
return:
"""
# Evaluate Statistics
fpr = dict()
tpr = dict()
roc_auc = dict()
roc_auc_1 = dict()
roc_auc_2 = dict()
recall = dict()
precision = dict()
prc_auc = dict()
if "SEG" in self.footprint_type:
mpbs_regions = GenomicRegionSet("TFBS")
mpbs_regions.read_bed(self.tfbs_file)
mpbs_regions.sort()
# Verifying the maximum score of the MPBS file
max_score = -99999999
for region in iter(mpbs_regions):
score = int(region.data)
if score > max_score:
max_score = score
max_score += 1
for i in range(len(self.footprint_file)):
footprints_regions = GenomicRegionSet("Footprints Prediction")
footprints_regions.read_bed(self.footprint_file[i])
# Sort footprint prediction bed files
footprints_regions.sort()
if self.footprint_type[i] == "SEG":
# Increasing the score of MPBS entry once if any overlaps found in the predicted footprints.
increased_score_mpbs_regions = GenomicRegionSet("Increased Regions")
intersect_regions = mpbs_regions.intersect(footprints_regions, mode=OverlapType.ORIGINAL)
for region in iter(intersect_regions):
region.data = str(int(region.data) + max_score)
increased_score_mpbs_regions.add(region)
# Keep the score of remained MPBS entry unchanged
without_intersect_regions = mpbs_regions.subtract(footprints_regions, whole_region=True)
for region in iter(without_intersect_regions):
increased_score_mpbs_regions.add(region)
increased_score_mpbs_regions.sort_score()
fpr[i], tpr[i], roc_auc[i], roc_auc_1[i], roc_auc_2[i] = self.roc_curve(increased_score_mpbs_regions)
recall[i], precision[i], prc_auc[i] = self.precision_recall_curve(increased_score_mpbs_regions)
elif self.footprint_type[i] == "SC":
footprints_regions.sort_score()
fpr[i], tpr[i], roc_auc[i], roc_auc_1[i], roc_auc_2[i] = self.roc_curve(footprints_regions)
recall[i], precision[i], prc_auc[i] = self.precision_recall_curve(footprints_regions)
# Output the statistics results into text
stats_fname = self.output_location + self.tf_name + "_stats.txt"
stats_header = ["METHOD", "AUC_100", "AUC_10", "AUC_1", "AUPR"]
with open(stats_fname, "w") as stats_file:
stats_file.write("\t".join(stats_header) + "\n")
for i in range(len(self.footprint_name)):
stats_file.write(self.footprint_name[i] + "\t" + str(roc_auc[i]) + "\t" + str(roc_auc_1[i]) + "\t"
+ str(roc_auc_2[i]) + "\t" + str(prc_auc[i]) + "\n")
# Output the curves
if self.print_roc_curve:
label_x = "False Positive Rate"
label_y = "True Positive Rate"
curve_name = "ROC"
self.plot_curve(fpr, tpr, roc_auc, label_x, label_y, self.tf_name, curve_name)
if self.print_pr_curve:
label_x = "Recall"
label_y = "Precision"
curve_name = "PRC"
self.plot_curve(recall, precision, prc_auc, label_x, label_y, self.tf_name, curve_name)
self.output_points(self.tf_name, fpr, tpr, recall, precision)
示例4: chip_evaluate
# 需要導入模塊: from rgt.GenomicRegionSet import GenomicRegionSet [as 別名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import intersect [as 別名]
def chip_evaluate(args):
# Evaluate Statistics
fpr = dict()
tpr = dict()
roc_auc_1 = dict()
roc_auc_10 = dict()
roc_auc_50 = dict()
roc_auc_100 = dict()
recall = dict()
precision = dict()
prc_auc_1 = dict()
prc_auc_10 = dict()
prc_auc_50 = dict()
prc_auc_100 = dict()
footprint_file = args.footprint_file.split(",")
footprint_name = args.footprint_name.split(",")
footprint_type = args.footprint_type.split(",")
max_score = 0
if "SEG" in footprint_type:
mpbs_regions = GenomicRegionSet("TFBS")
mpbs_regions.read(args.tfbs_file)
# Verifying the maximum score of the MPBS file
for region in iter(mpbs_regions):
score = int(region.data.split("\t")[0])
if score > max_score:
max_score = score
max_score += 1
max_points = []
for i in range(len(footprint_file)):
footprints_regions = GenomicRegionSet("Footprints Prediction")
footprints_regions.read(footprint_file[i])
footprints_regions.sort()
if footprint_type[i] == "SEG":
# Increasing the score of MPBS entry once if any overlaps found in the predicted footprints.
increased_score_mpbs_regions = GenomicRegionSet("Increased Regions")
intersect_regions = mpbs_regions.intersect(footprints_regions, mode=OverlapType.ORIGINAL)
for region in iter(intersect_regions):
region.data = str(int(region.data.split("\t")[0]) + max_score)
increased_score_mpbs_regions.add(region)
# Keep the score of remained MPBS entry unchanged
without_intersect_regions = mpbs_regions.subtract(footprints_regions, whole_region=True)
for region in iter(without_intersect_regions):
increased_score_mpbs_regions.add(region)
increased_score_mpbs_regions.sort_score()
fpr[i], tpr[i], roc_auc_1[i], roc_auc_10[i], roc_auc_50[i], roc_auc_100[i] = \
roc_curve(increased_score_mpbs_regions)
recall[i], precision[i], prc_auc_1[i], prc_auc_10[i], prc_auc_50[i], prc_auc_100[i] = \
precision_recall_curve(increased_score_mpbs_regions)
max_points.append(len(intersect_regions))
elif footprint_type[i] == "SC":
footprints_regions.sort_score()
fpr[i], tpr[i], roc_auc_1[i], roc_auc_10[i], roc_auc_50[i], roc_auc_100[i] = \
roc_curve(footprints_regions)
recall[i], precision[i], prc_auc_1[i], prc_auc_10[i], prc_auc_50[i], prc_auc_100[i] = \
precision_recall_curve(footprints_regions)
max_points.append(len(footprints_regions))
# Output the statistics results into text
stats_fname = os.path.join(args.output_location, "{}_stats.txt".format(args.output_prefix))
stats_header = ["METHOD", "AUC_100", "AUC_50", "AUC_10", "AUC_1", "AUPR_100", "AUPR_50", "AUPR_10", "AUPR_1"]
with open(stats_fname, "w") as stats_file:
stats_file.write("\t".join(stats_header) + "\n")
for i in range(len(footprint_name)):
stats_file.write(footprint_name[i] + "\t" +
str(roc_auc_100[i]) + "\t" + str(roc_auc_50[i]) + "\t" + str(roc_auc_10[i]) + "\t" +
str(roc_auc_1[i]) + "\t" + str(prc_auc_100[i]) + "\t" + str(prc_auc_50[i]) + "\t" +
str(prc_auc_10[i]) + "\t" + str(prc_auc_1[i]) + "\n")
# Output the curves
if args.print_roc_curve:
label_x = "False Positive Rate"
label_y = "True Positive Rate"
curve_name = "ROC"
plot_curve(footprint_name, args.output_location, fpr, tpr, roc_auc_100, label_x, label_y, args.output_prefix,
curve_name, max_points=max_points)
if args.print_pr_curve:
label_x = "Recall"
label_y = "Precision"
curve_name = "PRC"
plot_curve(footprint_name, args.output_location, recall, precision, prc_auc_100, label_x, label_y,
args.output_prefix, curve_name, max_points=max_points)
output_points(footprint_name, args.output_location, args.output_prefix, fpr, tpr, recall, precision)
示例5: str
# 需要導入模塊: from rgt.GenomicRegionSet import GenomicRegionSet [as 別名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import intersect [as 別名]
line[6], line[2], str(min(donor)), str(max(acceptor)),
"255,0,0", "2",
",".join([ str(abs(donor[1] - donor[0])),
str(abs(acceptor[1] - acceptor[0])) ]),
"0,"+str(abs(min(donor)-min(acceptor))) ]), file=g)
else:
pass
#print(line)
#sys.exit()
print("tcons:\t" + args.t)
tcons = GenomicRegionSet("tcons")
tcons.read_bed(args.t)
circrna = GenomicRegionSet("circRNA")
circrna.read_bed(args.o)
circ_inTCON = circrna.intersect(y=tcons, mode = OverlapType.COMP_INCL)
circ_TCONs = tcons.intersect(y=circ_inTCON, mode = OverlapType.ORIGINAL)
#print(len(circ_TCONs))
circ_TCONs.write_bed(args.c)
# 0 1 2 3 4 5 6 7 8
# chr1 39449029 + chr1 39448068 + 0 0 0
# 9 10 11 12 13
# 97ZZTR1:411:C4VC3ACXX:5:1102:16097:34171 39448994 35M15S 39448069 35S15M868p50M
############### FASTA slicing #######################################
elif args.mode == "sliceFASTA":
print(os.path.basename(args.i) + " -start "+str(args.p)+" -end "+str(args.p+args.l))
from rgt.SequenceSet import SequenceSet
seq = SequenceSet(name=args.i, seq_type="RNA")