本文整理汇总了Python中rgt.GenomicRegionSet.GenomicRegionSet.read_bed方法的典型用法代码示例。如果您正苦于以下问题:Python GenomicRegionSet.read_bed方法的具体用法?Python GenomicRegionSet.read_bed怎么用?Python GenomicRegionSet.read_bed使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类rgt.GenomicRegionSet.GenomicRegionSet
的用法示例。
在下文中一共展示了GenomicRegionSet.read_bed方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: create_file
# 需要导入模块: from rgt.GenomicRegionSet import GenomicRegionSet [as 别名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import read_bed [as 别名]
def create_file(self):
# Expanding summits
tfbs_summit_regions = GenomicRegionSet("TFBS Summit Regions")
tfbs_summit_regions.read_bed(self.tfbs_summit_fname)
for region in iter(tfbs_summit_regions):
summit = int(region.data.split()[-1]) + region.initial
region.initial = max(summit - (self.peak_ext / 2), 0)
region.final = summit + (self.peak_ext / 2)
# Calculating intersections
mpbs_regions = GenomicRegionSet("MPBS Regions")
mpbs_regions.read_bed(self.mpbs_fname)
tfbs_summit_regions.sort()
mpbs_regions.sort()
with_overlap_regions = mpbs_regions.intersect(tfbs_summit_regions, mode=OverlapType.ORIGINAL)
without_overlap_regions = mpbs_regions.subtract(tfbs_summit_regions, whole_region=True)
tfbs_regions = GenomicRegionSet("TFBS Regions")
for region in iter(with_overlap_regions):
region.name = region.name.split(":")[0] + ":Y"
tfbs_regions.add(region)
for region in iter(without_overlap_regions):
region.name = region.name.split(":")[0] + ":N"
tfbs_regions.add(region)
tfbs_regions.sort()
tfbs_fname = os.path.join(self.output_location, "{}.bed".format(self.mpbs_name))
tfbs_regions.write_bed(tfbs_fname)
示例2: filter_deadzones
# 需要导入模块: from rgt.GenomicRegionSet import GenomicRegionSet [as 别名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import read_bed [as 别名]
def filter_deadzones(bed_deadzones, peak_regions):
"""Filter by peaklist by deadzones"""
deadzones = GenomicRegionSet('deadzones')
deadzones.read_bed(bed_deadzones)
peak_regions = peak_regions.subtract(deadzones, whole_region=True)
return peak_regions
示例3: initialize
# 需要导入模块: from rgt.GenomicRegionSet import GenomicRegionSet [as 别名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import read_bed [as 别名]
def initialize(name, dims, genome_path, regions, stepsize, binsize, bamfiles, exts, \
inputs, exts_inputs, factors_inputs, chrom_sizes, verbose, no_gc_content, \
tracker, debug, norm_regions, scaling_factors_ip, save_wig, housekeeping_genes, \
test, report, chrom_sizes_dict, counter, end, gc_content_cov=None, avg_gc_content=None, \
gc_hist=None, output_bw=True, save_input=False, m_threshold=80, a_threshold=95, rmdup=False):
"""Initialize the MultiCoverageSet"""
regionset = regions
regionset.sequences.sort()
if norm_regions:
norm_regionset = GenomicRegionSet('norm_regions')
norm_regionset.read_bed(norm_regions)
else:
norm_regionset = None
exts, exts_inputs = _compute_extension_sizes(bamfiles, exts, inputs, exts_inputs, report)
multi_cov_set = MultiCoverageSet(name=name, regions=regionset, dims=dims, genome_path=genome_path,
binsize=binsize, stepsize=stepsize, rmdup=rmdup, path_bamfiles=bamfiles,
path_inputs=inputs, exts=exts, exts_inputs=exts_inputs,
factors_inputs=factors_inputs, chrom_sizes=chrom_sizes, verbose=verbose,
no_gc_content=no_gc_content, chrom_sizes_dict=chrom_sizes_dict, debug=debug,
norm_regionset=norm_regionset, scaling_factors_ip=scaling_factors_ip,
save_wig=save_wig, strand_cov=True, housekeeping_genes=housekeeping_genes,
tracker=tracker, gc_content_cov=gc_content_cov, avg_gc_content=avg_gc_content,
gc_hist=gc_hist, end=end, counter=counter, output_bw=output_bw,
folder_report=FOLDER_REPORT, report=report, save_input=save_input,
m_threshold=m_threshold, a_threshold=a_threshold)
return multi_cov_set
示例4: read_bed
# 需要导入模块: from rgt.GenomicRegionSet import GenomicRegionSet [as 别名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import read_bed [as 别名]
def read_bed(self, bedfile, genome_file_dir):
"""Read the sequences defined by BED file on the given genomce"""
# Read BED into GenomicRegionSet
bed = GenomicRegionSet(os.path.basename(bedfile))
bed.read_bed(bedfile)
# Parse each chromosome and fetch the defined region in this chromosome
chroms = list(set(bed.get_chrom()))
chro_files = [x.split(".")[0] for x in os.listdir(genome_file_dir)]
for ch in chroms:
if ch not in chro_files: print(" *** There is no genome FASTA file for: "+ch)
# Read genome in FASTA according to the given chromosome
ch_seq = SequenceSet(name=ch, seq_type=SequenceType.DNA)
try:
ch_seq.read_fasta(os.path.join(genome_file_dir, ch+".fa"))
except:
continue
# Regions in given chromosome
beds = bed.any_chrom(chrom=ch)
for s in beds:
seq = ch_seq[0].seq[s.initial:s.final]
try: strand = s.strand
except: strand = "+"
self.sequences.append(Sequence(seq=seq, name=s.__repr__(),
strand=strand))
示例5: main
# 需要导入模块: from rgt.GenomicRegionSet import GenomicRegionSet [as 别名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import read_bed [as 别名]
def main():
options, vcf_list = input()
#thres_mq = 20
#thres_dp = 20
#filter_dbSNP = True
#tfbs_motifs_path = '/home/manuel/workspace/cluster_p/human_genetics/exp/exp01_motifsearch_sox2/humangenetics_motifs/Match/chr11_mpbs.bed'
sample_data = load_data(vcf_list)
print("##Filter variants of samples", file=sys.stderr)
pipeline(sample_data, options)
if options.list_wt:
wt_data = load_data(options.list_wt)
print("##Filter variants of wildtypes", file=sys.stderr)
pipeline(wt_data, options)
union_wt = GenomicVariantSet(name = "union_wt")
for wt in wt_data:
union_wt.sequences += wt.sequences
print("#wildtype variants:", file=sys.stderr)
print("union WT", len(union_wt), file=sys.stderr, sep="\t")
#delete Wildtype
for sample in sample_data:
sample.subtract(union_wt)
print_length(sample_data, "#variants after subtracting wildtypes")
else:
print("#Do not filter by wildtype", file=sys.stderr)
if options.max_density:
get_max_density(GenomicVariantSets=sample_data, lowerBound=options.lower_bound, upperBound=options.upper_bound)
else:
print("#Do not perform max. density search", file=sys.stderr)
if options.list_bed:
tfbs_motifs = GenomicRegionSet('tfbs_motifs')
tfbs_motifs.read_bed(options.list_bed)
for sample in sample_data:
sample.intersect(tfbs_motifs)
print_length(sample_data, "#variants after filtering by BED file")
else:
print("#Do not filter by BED file", file=sys.stderr)
print("#Compute intersection of sample's subsets (give intersection's name and size)")
output_intersections(sample_data)
print("#Write filtered sample files")
for sample in sample_data:
sample.write_vcf("%s-filtered.vcf" %sample.name)
示例6: initialize
# 需要导入模块: from rgt.GenomicRegionSet import GenomicRegionSet [as 别名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import read_bed [as 别名]
def initialize(name, dims, genome_path, regions, stepsize, binsize, bamfiles, exts, \
inputs, exts_inputs, factors_inputs, chrom_sizes, verbose, no_gc_content, \
tracker, debug, norm_regions, scaling_factors_ip, save_wig, housekeeping_genes):
"""Initialize the MultiCoverageSet"""
regionset = GenomicRegionSet(name)
chrom_sizes_dict = {}
#if regions option is set, take the values, otherwise the whole set of
#chromosomes as region to search for DPs
if regions is not None:
print("Call DPs on specified regions.", file=sys.stderr)
with open(regions) as f:
for line in f:
line = line.strip()
line = line.split('\t')
c, s, e = line[0], int(line[1]), int(line[2])
regionset.add(GenomicRegion(chrom=c, initial=s, final=e))
chrom_sizes_dict[c] = e
else:
print("Call DPs on whole genome.", file=sys.stderr)
with open(chrom_sizes) as f:
for line in f:
line = line.strip()
line = line.split('\t')
chrom, end = line[0], int(line[1])
regionset.add(GenomicRegion(chrom=chrom, initial=0, final=end))
chrom_sizes_dict[chrom] = end
if norm_regions:
norm_regionset = GenomicRegionSet('norm_regions')
norm_regionset.read_bed(norm_regions)
else:
norm_regionset = None
if housekeeping_genes:
scaling_factors_ip, _ = norm_gene_level(bamfiles, housekeeping_genes, name, verbose=True)
if scaling_factors_ip:
tracker.write(text=map(lambda x: str(x), scaling_factors_ip), header="Scaling factors")
regionset.sequences.sort()
exts, exts_inputs = _compute_extension_sizes(bamfiles, exts, inputs, exts_inputs, verbose)
tracker.write(text=str(exts).strip('[]'), header="Extension size (rep1, rep2, input1, input2)")
multi_cov_set = MultiCoverageSet(name=name, regions=regionset, dims=dims, genome_path=genome_path, binsize=binsize, stepsize=stepsize,rmdup=True,\
path_bamfiles = bamfiles, path_inputs = inputs, exts = exts, exts_inputs = exts_inputs, factors_inputs = factors_inputs, \
chrom_sizes=chrom_sizes, verbose=verbose, no_gc_content=no_gc_content, chrom_sizes_dict=chrom_sizes_dict, debug=debug, \
norm_regionset=norm_regionset, scaling_factors_ip=scaling_factors_ip, save_wig=save_wig)
return multi_cov_set
示例7: merge_DBD_regions
# 需要导入模块: from rgt.GenomicRegionSet import GenomicRegionSet [as 别名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import read_bed [as 别名]
def merge_DBD_regions(path):
"""Merge all available DBD regions in BED format. """
for t in os.listdir(path):
if os.path.isdir(os.path.join(path, t)):
dbd_pool = GenomicRegionSet(t)
for rna in os.listdir(os.path.join(path,t)):
f = os.path.join(path, t, rna, "DBD_"+rna+".bed")
if os.path.exists(f):
dbd = GenomicRegionSet(rna)
dbd.read_bed(f)
for r in dbd: r.name = rna+"_"+r.name
dbd_pool.combine(dbd)
dbd_pool.write_bed(os.path.join(path, t, "DBD_"+t+".bed"))
示例8: read_bed
# 需要导入模块: from rgt.GenomicRegionSet import GenomicRegionSet [as 别名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import read_bed [as 别名]
def read_bed(self, bedfile, genome_file_dir):
"""Read the sequences defined by BED file on the given genomce.
*Keyword arguments:*
- bedfile -- The path to the BED file which defines the regions.
- genome_file_dir -- A directory which contains the FASTA files for each chromosome.
"""
# Read BED into GenomicRegionSet
from rgt.GenomicRegionSet import GenomicRegionSet
bed = GenomicRegionSet(os.path.basename(bedfile))
bed.read_bed(bedfile)
self.read_genomic_set(bed, genome_file_dir)
示例9: get_experimental_matrix
# 需要导入模块: from rgt.GenomicRegionSet import GenomicRegionSet [as 别名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import read_bed [as 别名]
def get_experimental_matrix(bams, bed):
"""Load artificially experimental matrix. Only genes in BED file are needed."""
m = ExperimentalMatrix()
m.fields = ['name', 'type', 'file']
m.fieldsDict = {}
names = []
for bam in bams:
n, _ = os.path.splitext(os.path.basename(bam))
m.files[n] = bam
names.append(n)
m.names = np.array(['housekeep'] + names)
m.types = np.array(['regions'] + ['reads']*len(names))
g = GenomicRegionSet('RegionSet')
g.read_bed(bed)
m.objectsDict['housekeep'] = g
return m
示例10: get_dbss
# 需要导入模块: from rgt.GenomicRegionSet import GenomicRegionSet [as 别名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import read_bed [as 别名]
def get_dbss(input_BED,output_BED,rna_fasta,output_rbss,organism,l,e,c,fr,fm,of,mf,rm,temp):
regions = GenomicRegionSet("Target")
regions.read_bed(input_BED)
regions.gene_association(organism=organism, show_dis=True)
connect_rna(rna_fasta, temp=temp, rna_name="RNA")
rnas = SequenceSet(name="rna", seq_type=SequenceType.RNA)
rnas.read_fasta(os.path.join(temp,"rna_temp.fa"))
rna_regions = get_rna_region_str(os.path.join(temp,rna_fasta))
# print(rna_regions)
genome = GenomeData(organism)
genome_path = genome.get_genome()
txp = find_triplex(rna_fasta=rna_fasta, dna_region=regions,
temp=temp, organism=organism, remove_temp=False,
l=l, e=e, c=c, fr=fr, fm=fm, of=of, mf=mf, genome_path=genome_path,
prefix="targeted_region", dna_fine_posi=True)
print("Total binding events:\t",str(len(txp)))
txp.write_bed(output_BED)
txp.write_txp(filename=output_BED.replace(".bed",".txp"))
rbss = txp.get_rbs()
dbd_regions(exons=rna_regions, sig_region=rbss, rna_name="rna", output=output_rbss,
out_file=True, temp=temp, fasta=False)
示例11: find
# 需要导入模块: from rgt.GenomicRegionSet import GenomicRegionSet [as 别名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import read_bed [as 别名]
def find(s, ch):
return [i for i, ltr in enumerate(s) if ltr == ch]
##################################################################################
parser = argparse.ArgumentParser(description='Check the coding potential by PhyloCSF',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('-i', metavar=' ', type=str, help="Input BED file")
parser.add_argument('-o', metavar=' ', type=str, help="Output BED file with the coding-potential score")
parser.add_argument('-organism', metavar=' ', type=str, help="Define the organism")
parser.add_argument('-rmcoding', metavar=' ', type=float, help="Define the cutoff to remove the entries with coding potential")
parser.add_argument('-mafdir', metavar=' ', type=str, help="Define the directory to MAF files")
# python /projects/reg-gen/tools/phylocsf_check.py -i
args = parser.parse_args()
bed = GenomicRegionSet("input")
bed.read_bed(args.i)
num = len(bed)
organisms = { "hg18": "Human",
"panTro2": "Chimp",
"rheMac2": "Rhesus",
"tarSyr1": "Tarsier",
"micMur1": "Mouse_lemur",
"otoGar1": "Bushbaby",
"tupBel1": "Shrew",
"mm9": "Mouse",
"rn4": "Rat",
"dipOrd1": "Kangaroo_Rat",
"cavPor2": "Guinea_Pig",
"speTri1": "Squirrel",
"oryCun1": "Rabbit",
示例12: chip_evaluate
# 需要导入模块: from rgt.GenomicRegionSet import GenomicRegionSet [as 别名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import read_bed [as 别名]
def chip_evaluate(self):
"""
This evaluation methodology uses motif-predicted binding sites (MPBSs) together with TF ChIP-seq data
to evaluate the footprint predictions.
return:
"""
# Evaluate Statistics
fpr = dict()
tpr = dict()
roc_auc = dict()
roc_auc_1 = dict()
roc_auc_2 = dict()
recall = dict()
precision = dict()
prc_auc = dict()
if "SEG" in self.footprint_type:
mpbs_regions = GenomicRegionSet("TFBS")
mpbs_regions.read_bed(self.tfbs_file)
mpbs_regions.sort()
# Verifying the maximum score of the MPBS file
max_score = -99999999
for region in iter(mpbs_regions):
score = int(region.data)
if score > max_score:
max_score = score
max_score += 1
for i in range(len(self.footprint_file)):
footprints_regions = GenomicRegionSet("Footprints Prediction")
footprints_regions.read_bed(self.footprint_file[i])
# Sort footprint prediction bed files
footprints_regions.sort()
if self.footprint_type[i] == "SEG":
# Increasing the score of MPBS entry once if any overlaps found in the predicted footprints.
increased_score_mpbs_regions = GenomicRegionSet("Increased Regions")
intersect_regions = mpbs_regions.intersect(footprints_regions, mode=OverlapType.ORIGINAL)
for region in iter(intersect_regions):
region.data = str(int(region.data) + max_score)
increased_score_mpbs_regions.add(region)
# Keep the score of remained MPBS entry unchanged
without_intersect_regions = mpbs_regions.subtract(footprints_regions, whole_region=True)
for region in iter(without_intersect_regions):
increased_score_mpbs_regions.add(region)
increased_score_mpbs_regions.sort_score()
fpr[i], tpr[i], roc_auc[i], roc_auc_1[i], roc_auc_2[i] = self.roc_curve(increased_score_mpbs_regions)
recall[i], precision[i], prc_auc[i] = self.precision_recall_curve(increased_score_mpbs_regions)
elif self.footprint_type[i] == "SC":
footprints_regions.sort_score()
fpr[i], tpr[i], roc_auc[i], roc_auc_1[i], roc_auc_2[i] = self.roc_curve(footprints_regions)
recall[i], precision[i], prc_auc[i] = self.precision_recall_curve(footprints_regions)
# Output the statistics results into text
stats_fname = self.output_location + self.tf_name + "_stats.txt"
stats_header = ["METHOD", "AUC_100", "AUC_10", "AUC_1", "AUPR"]
with open(stats_fname, "w") as stats_file:
stats_file.write("\t".join(stats_header) + "\n")
for i in range(len(self.footprint_name)):
stats_file.write(self.footprint_name[i] + "\t" + str(roc_auc[i]) + "\t" + str(roc_auc_1[i]) + "\t"
+ str(roc_auc_2[i]) + "\t" + str(prc_auc[i]) + "\n")
# Output the curves
if self.print_roc_curve:
label_x = "False Positive Rate"
label_y = "True Positive Rate"
curve_name = "ROC"
self.plot_curve(fpr, tpr, roc_auc, label_x, label_y, self.tf_name, curve_name)
if self.print_pr_curve:
label_x = "Recall"
label_y = "Precision"
curve_name = "PRC"
self.plot_curve(recall, precision, prc_auc, label_x, label_y, self.tf_name, curve_name)
self.output_points(self.tf_name, fpr, tpr, recall, precision)
示例13: read_states_signals
# 需要导入模块: from rgt.GenomicRegionSet import GenomicRegionSet [as 别名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import read_bed [as 别名]
def read_states_signals(self):
# Read states from the annotation file
states = ""
with open(self.annotate_fname) as annotate_file:
for line in annotate_file:
if len(line) < 2 or "#" in line or "=" in line:
continue
ll = line.strip().split(" ")
for state in ll[1:-1]:
states += state
# If need to estimate bias table
bias_table = BiasTable(output_loc=self.output_locaiton)
genome_data = GenomeData(self.organism)
table = None
if self.estimate_bias_correction:
regions = GenomicRegionSet("Bias Regions")
if self.original_regions.split(".")[-1] == "bed":
regions.read_bed(self.original_regions)
if self.original_regions.split(".")[-1] == "fa":
regions.read_sequence(self.original_regions)
if self.estimate_bias_type == "FRE":
table = bias_table.estimate_table(regions=regions, dnase_file_name=self.bam_file,
genome_file_name=genome_data.get_genome(),
k_nb=self.k_nb,
forward_shift=self.atac_forward_shift,
reverse_shift=self.atac_reverse_shift)
elif self.estimate_bias_type == "PWM":
table = bias_table.estimate_table_pwm(regions=regions, dnase_file_name=self.bam_file,
genome_file_name=genome_data.get_genome(),
k_nb=self.k_nb,
forward_shift=self.atac_forward_shift,
reverse_shift=self.atac_reverse_shift)
bias_fname = os.path.join(self.output_locaiton, "Bias", "{}_{}".format(self.k_nb, self.atac_forward_shift))
bias_table.write_tables(bias_fname, table)
# If the bias table is provided
if self.bias_table:
bias_table_list = self.bias_table.split(",")
table = bias_table.load_table(table_file_name_F=bias_table_list[0],
table_file_name_R=bias_table_list[1])
# Get the normalization and slope signal from the raw bam file
raw_signal = GenomicSignal(self.bam_file)
raw_signal.load_sg_coefs(slope_window_size=9)
norm_signal, slope_signal = raw_signal.get_signal(ref=self.chrom, start=self.start, end=self.end,
downstream_ext=self.atac_downstream_ext,
upstream_ext=self.atac_upstream_ext,
forward_shift=self.atac_forward_shift,
reverse_shift=self.atac_reverse_shift,
initial_clip=self.atac_initial_clip,
bias_table=table,
genome_file_name=genome_data.get_genome(),
print_raw_signal=self.print_raw_signal,
print_bc_signal=self.print_bc_signal,
print_norm_signal=self.print_norm_signal,
print_slope_signal=self.print_slope_signal)
if self.print_bed_file:
self.output_bed_file(states)
return states, norm_signal, slope_signal
示例14: line
# 需要导入模块: from rgt.GenomicRegionSet import GenomicRegionSet [as 别名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import read_bed [as 别名]
def line(self):
signal = GenomicSignal(self.bam_file)
signal.load_sg_coefs(slope_window_size=9)
bias_table = BiasTable()
bias_table_list = self.bias_table.split(",")
table = bias_table.load_table(table_file_name_F=bias_table_list[0],
table_file_name_R=bias_table_list[1])
genome_data = GenomeData(self.organism)
fasta = Fastafile(genome_data.get_genome())
pwm_dict = dict([("A", [0.0] * self.window_size), ("C", [0.0] * self.window_size),
("G", [0.0] * self.window_size), ("T", [0.0] * self.window_size),
("N", [0.0] * self.window_size)])
mean_raw_signal = np.zeros(self.window_size)
mean_bc_signal = np.zeros(self.window_size)
mean_raw_signal_f = np.zeros(self.window_size)
mean_bc_signal_f = np.zeros(self.window_size)
mean_raw_signal_r = np.zeros(self.window_size)
mean_bc_signal_r = np.zeros(self.window_size)
mean_bias_signal_f = np.zeros(self.window_size)
mean_bias_signal_r = np.zeros(self.window_size)
num_sites = 0
mpbs_regions = GenomicRegionSet("Motif Predicted Binding Sites")
mpbs_regions.read_bed(self.motif_file)
total_nc_signal = 0
total_nl_signal = 0
total_nr_signal = 0
for region in mpbs_regions:
if str(region.name).split(":")[-1] == "Y":
num_sites += 1
# Extend by 50 bp
mid = (region.initial + region.final) / 2
p1 = mid - (self.window_size / 2)
p2 = mid + (self.window_size / 2)
if not self.strands_specific:
# Fetch raw signal
raw_signal, _ = signal.get_signal(ref=region.chrom, start=p1, end=p2,
downstream_ext=self.atac_downstream_ext,
upstream_ext=self.atac_upstream_ext,
forward_shift=self.atac_forward_shift,
reverse_shift=self.atac_reverse_shift,
genome_file_name=genome_data.get_genome())
mean_raw_signal = np.add(mean_raw_signal, raw_signal)
# Fetch bias correction signal
bc_signal, _ = signal.get_signal(ref=region.chrom, start=p1, end=p2,
bias_table=table,
downstream_ext=self.atac_downstream_ext,
upstream_ext=self.atac_upstream_ext,
forward_shift=self.atac_forward_shift,
reverse_shift=self.atac_reverse_shift,
genome_file_name=genome_data.get_genome())
mean_bc_signal = np.add(mean_bc_signal, bc_signal)
else:
raw_signal_f, _, raw_signal_r, _ = signal.get_signal_per_strand(ref=region.chrom, start=p1, end=p2,
downstream_ext=self.atac_downstream_ext,
upstream_ext=self.atac_upstream_ext,
forward_shift=self.atac_forward_shift,
reverse_shift=self.atac_reverse_shift,
genome_file_name=genome_data.get_genome())
mean_raw_signal_f = np.add(mean_raw_signal_f, raw_signal_f)
mean_raw_signal_r = np.add(mean_raw_signal_r, raw_signal_r)
bc_signal_f, _, bc_signal_r, _ = signal.get_signal_per_strand(ref=region.chrom, start=p1, end=p2,
bias_table=table,
downstream_ext=self.atac_downstream_ext,
upstream_ext=self.atac_upstream_ext,
forward_shift=self.atac_forward_shift,
reverse_shift=self.atac_reverse_shift,
genome_file_name=genome_data.get_genome())
mean_bc_signal_f = np.add(mean_bc_signal_f, bc_signal_f)
mean_bc_signal_r = np.add(mean_bc_signal_r, bc_signal_r)
# Update pwm
aux_plus = 1
dna_seq = str(fasta.fetch(region.chrom, p1, p2)).upper()
if (region.final - region.initial) % 2 == 0:
aux_plus = 0
dna_seq_rev = AuxiliaryFunctions.revcomp(str(fasta.fetch(region.chrom,
p1 + aux_plus, p2 + aux_plus)).upper())
if region.orientation == "+":
for i in range(0, len(dna_seq)):
pwm_dict[dna_seq[i]][i] += 1
elif region.orientation == "-":
for i in range(0, len(dna_seq_rev)):
pwm_dict[dna_seq_rev[i]][i] += 1
# Create bias signal
bias_table_f = table[0]
bias_table_r = table[1]
self.k_nb = len(bias_table_f.keys()[0])
bias_signal_f = []
#.........这里部分代码省略.........
示例15: print
# 需要导入模块: from rgt.GenomicRegionSet import GenomicRegionSet [as 别名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import read_bed [as 别名]
seq = "\t".join([ch, line[4], line[3], gn, ".", line[6]])
else:
continue
# print(seq)
if not args.g:
print(seq, file=g)
elif select_genes.check(gn) or select_genes.check(gi):
print(seq, file=g)
else:
continue
if args.b:
exons = GenomicRegionSet("output")
exons.read_bed(args.o)
exons.write_bed_blocks(args.o)
# sys.exit(1)
# if args.g:
# select_genes = GeneSet("genes")
# select_genes.read(args.g)
# # if args.t == "gene" or args.t == "transcript":
# with open(args.i, "r") as f,open(args.o, "w") as g:
# find_ind = False
# for line in f:
# if line[0] == "#":
# continue
# elif args.known_only: