本文整理匯總了Python中rgt.GenomicRegionSet.GenomicRegionSet類的典型用法代碼示例。如果您正苦於以下問題:Python GenomicRegionSet類的具體用法?Python GenomicRegionSet怎麽用?Python GenomicRegionSet使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。
在下文中一共展示了GenomicRegionSet類的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: initialize
def initialize(name, dims, genome_path, regions, stepsize, binsize, bamfiles, exts, \
inputs, exts_inputs, factors_inputs, chrom_sizes, verbose, no_gc_content, \
tracker, debug, norm_regions, scaling_factors_ip, save_wig, housekeeping_genes, \
test, report, chrom_sizes_dict, counter, end, gc_content_cov=None, avg_gc_content=None, \
gc_hist=None, output_bw=True, save_input=False, m_threshold=80, a_threshold=95, rmdup=False):
"""Initialize the MultiCoverageSet"""
regionset = regions
regionset.sequences.sort()
if norm_regions:
norm_regionset = GenomicRegionSet('norm_regions')
norm_regionset.read_bed(norm_regions)
else:
norm_regionset = None
exts, exts_inputs = _compute_extension_sizes(bamfiles, exts, inputs, exts_inputs, report)
multi_cov_set = MultiCoverageSet(name=name, regions=regionset, dims=dims, genome_path=genome_path,
binsize=binsize, stepsize=stepsize, rmdup=rmdup, path_bamfiles=bamfiles,
path_inputs=inputs, exts=exts, exts_inputs=exts_inputs,
factors_inputs=factors_inputs, chrom_sizes=chrom_sizes, verbose=verbose,
no_gc_content=no_gc_content, chrom_sizes_dict=chrom_sizes_dict, debug=debug,
norm_regionset=norm_regionset, scaling_factors_ip=scaling_factors_ip,
save_wig=save_wig, strand_cov=True, housekeeping_genes=housekeeping_genes,
tracker=tracker, gc_content_cov=gc_content_cov, avg_gc_content=avg_gc_content,
gc_hist=gc_hist, end=end, counter=counter, output_bw=output_bw,
folder_report=FOLDER_REPORT, report=report, save_input=save_input,
m_threshold=m_threshold, a_threshold=a_threshold)
return multi_cov_set
示例2: read_bed
def read_bed(self, bedfile, genome_file_dir):
"""Read the sequences defined by BED file on the given genomce"""
# Read BED into GenomicRegionSet
bed = GenomicRegionSet(os.path.basename(bedfile))
bed.read_bed(bedfile)
# Parse each chromosome and fetch the defined region in this chromosome
chroms = list(set(bed.get_chrom()))
chro_files = [x.split(".")[0] for x in os.listdir(genome_file_dir)]
for ch in chroms:
if ch not in chro_files: print(" *** There is no genome FASTA file for: "+ch)
# Read genome in FASTA according to the given chromosome
ch_seq = SequenceSet(name=ch, seq_type=SequenceType.DNA)
try:
ch_seq.read_fasta(os.path.join(genome_file_dir, ch+".fa"))
except:
continue
# Regions in given chromosome
beds = bed.any_chrom(chrom=ch)
for s in beds:
seq = ch_seq[0].seq[s.initial:s.final]
try: strand = s.strand
except: strand = "+"
self.sequences.append(Sequence(seq=seq, name=s.__repr__(),
strand=strand))
示例3: estimate_bias_vom
def estimate_bias_vom(args):
regions = GenomicRegionSet("regions")
regions.read(args.regions_file)
create_signal(args, regions)
hmm_data = HmmData()
learn_dependency_model = hmm_data.get_dependency_model()
slim_dimont_predictor = hmm_data.get_slim_dimont_predictor()
test_fa = hmm_data.get_default_test_fa()
shutil.copy(test_fa, args.output_location)
os.chdir(args.output_location)
print(os.getcwd())
output_fname_f_obs = os.path.join(args.output_location, "{}_f_obs.fa".format(str(args.k_nb)))
output_fname_f_exp = os.path.join(args.output_location, "{}_f_exp.fa".format(str(args.k_nb)))
output_fname_r_obs = os.path.join(args.output_location, "{}_r_obs.fa".format(str(args.k_nb)))
output_fname_r_exp = os.path.join(args.output_location, "{}_r_exp.fa".format(str(args.k_nb)))
infix = "{}_f_obs".format(str(args.k_nb))
create_model(args, output_fname_f_obs, infix, learn_dependency_model, slim_dimont_predictor)
infix = "{}_f_exp".format(str(args.k_nb))
create_model(args, output_fname_f_exp, infix, learn_dependency_model, slim_dimont_predictor)
infix = "{}_r_obs".format(str(args.k_nb))
create_model(args, output_fname_r_obs, infix, learn_dependency_model, slim_dimont_predictor)
infix = "{}_r_exp".format(str(args.k_nb))
create_model(args, output_fname_r_exp, infix, learn_dependency_model, slim_dimont_predictor)
os.remove(os.path.join(args.output_location, "test.fa"))
compute_bias(args)
示例4: filter_deadzones
def filter_deadzones(bed_deadzones, peak_regions):
"""Filter by peaklist by deadzones"""
deadzones = GenomicRegionSet('deadzones')
deadzones.read_bed(bed_deadzones)
peak_regions = peak_regions.subtract(deadzones, whole_region=True)
return peak_regions
示例5: fisher_table
def fisher_table(motif_name, regions, mpbs, gene_set=False, mpbs_set=False):
"""
TODO
Keyword arguments:
motif_name -- TODO
regions -- TODO
mpbs -- TODO
gene_set -- TODO
mpbs_set -- TODO
Return:
a -- TODO
b -- TODO
gene_set -- TODO
mpbs_set -- TODO
"""
# Fetching motif
mpbs_motif = GenomicRegionSet(name="mpbs_motif")
for region in mpbs.sequences:
if motif_name in region.name:
mpbs_motif.add(region)
# Performing intersections
if len(mpbs_motif) > 0:
# regions which are overlapping with mpbs_motif
intersect_original = regions.intersect(mpbs_motif, mode=OverlapType.ORIGINAL, rm_duplicates=True)
# regions which are not overlapping with regions from mpbs_motif
subtract_overlap = regions.subtract(mpbs_motif, whole_region=True)
# Fetching genes
if gene_set:
gene_set_res = GeneSet(motif_name)
for genomic_region in intersect_original.sequences:
if genomic_region.name:
gene_list = [e if e[0] != "." else e[1:] for e in genomic_region.name.split(":")]
for g in gene_list:
gene_set_res.genes.append(g)
gene_set_res.genes = list(set(gene_set_res.genes)) # Keep only unique genes
else:
gene_set_res = None
# Fetching mpbs
if mpbs_set:
mpbs_set_res = mpbs_motif.intersect(regions, mode=OverlapType.ORIGINAL, rm_duplicates=True)
else:
mpbs_set_res = None
return len(intersect_original), len(subtract_overlap), gene_set_res, mpbs_set_res
else:
gene_set_res = GeneSet(motif_name) if gene_set else None
mpbs_set_res = GenomicRegionSet(mpbs_motif.name) if mpbs_set else None
return 0, len(regions), gene_set_res, mpbs_set_res
示例6: main
def main():
options, vcf_list = input()
#thres_mq = 20
#thres_dp = 20
#filter_dbSNP = True
#tfbs_motifs_path = '/home/manuel/workspace/cluster_p/human_genetics/exp/exp01_motifsearch_sox2/humangenetics_motifs/Match/chr11_mpbs.bed'
sample_data = load_data(vcf_list)
print("##Filter variants of samples", file=sys.stderr)
pipeline(sample_data, options)
if options.list_wt:
wt_data = load_data(options.list_wt)
print("##Filter variants of wildtypes", file=sys.stderr)
pipeline(wt_data, options)
union_wt = GenomicVariantSet(name = "union_wt")
for wt in wt_data:
union_wt.sequences += wt.sequences
print("#wildtype variants:", file=sys.stderr)
print("union WT", len(union_wt), file=sys.stderr, sep="\t")
#delete Wildtype
for sample in sample_data:
sample.subtract(union_wt)
print_length(sample_data, "#variants after subtracting wildtypes")
else:
print("#Do not filter by wildtype", file=sys.stderr)
if options.max_density:
get_max_density(GenomicVariantSets=sample_data, lowerBound=options.lower_bound, upperBound=options.upper_bound)
else:
print("#Do not perform max. density search", file=sys.stderr)
if options.list_bed:
tfbs_motifs = GenomicRegionSet('tfbs_motifs')
tfbs_motifs.read_bed(options.list_bed)
for sample in sample_data:
sample.intersect(tfbs_motifs)
print_length(sample_data, "#variants after filtering by BED file")
else:
print("#Do not filter by BED file", file=sys.stderr)
print("#Compute intersection of sample's subsets (give intersection's name and size)")
output_intersections(sample_data)
print("#Write filtered sample files")
for sample in sample_data:
sample.write_vcf("%s-filtered.vcf" %sample.name)
示例7: dbd_regions
def dbd_regions(self, sig_region, output):
"""Generate the BED file of significant DBD regions and FASTA file of the sequences"""
dbd_regions(exons=self.rna_regions, sig_region=sig_region, rna_name=self.rna_name, output=output)
self.stat["DBD_all"] = str(len(self.rbss))
self.stat["DBD_sig"] = str(len(self.data["region"]["sig_region"]))
sigDBD = GenomicRegionSet("DBD_sig")
sigDBD.sequences = self.data["region"]["sig_region"]
rbss = self.txp.get_rbs()
overlaps = rbss.intersect(y=sigDBD, mode=OverlapType.ORIGINAL)
self.stat["DBSs_target_DBD_sig"] = str(len(overlaps))
示例8: read_bed
def read_bed(self, bedfile, genome_file_dir):
"""Read the sequences defined by BED file on the given genomce.
*Keyword arguments:*
- bedfile -- The path to the BED file which defines the regions.
- genome_file_dir -- A directory which contains the FASTA files for each chromosome.
"""
# Read BED into GenomicRegionSet
from rgt.GenomicRegionSet import GenomicRegionSet
bed = GenomicRegionSet(os.path.basename(bedfile))
bed.read_bed(bedfile)
self.read_genomic_set(bed, genome_file_dir)
示例9: get_raw_tracks
def get_raw_tracks(args):
# Initializing Error Handler
err = ErrorHandler()
if len(args.input_files) != 2:
err.throw_error("ME_FEW_ARG", add_msg="You must specify reads and regions file.")
output_fname = os.path.join(args.output_location, "{}.wig".format(args.output_prefix))
bam = Samfile(args.input_files[0], "rb")
regions = GenomicRegionSet("Interested regions")
regions.read(args.input_files[1])
regions.merge()
reads_file = GenomicSignal()
with open(output_fname, "a") as output_f:
for region in regions:
# Raw counts
signal = [0.0] * (region.final - region.initial)
for read in bam.fetch(region.chrom, region.initial, region.final):
if not read.is_reverse:
cut_site = read.pos + args.forward_shift
if region.initial <= cut_site < region.final:
signal[cut_site - region.initial] += 1.0
else:
cut_site = read.aend + args.reverse_shift - 1
if region.initial <= cut_site < region.final:
signal[cut_site - region.initial] += 1.0
if args.norm:
signal = reads_file.boyle_norm(signal)
perc = scoreatpercentile(signal, 98)
std = np.std(signal)
signal = reads_file.hon_norm_atac(signal, perc, std)
output_f.write("fixedStep chrom=" + region.chrom + " start=" + str(region.initial + 1) + " step=1\n" +
"\n".join([str(e) for e in np.nan_to_num(signal)]) + "\n")
output_f.close()
if args.bigWig:
genome_data = GenomeData(args.organism)
chrom_sizes_file = genome_data.get_chromosome_sizes()
bw_filename = os.path.join(args.output_location, "{}.bw".format(args.output_prefix))
os.system(" ".join(["wigToBigWig", output_fname, chrom_sizes_file, bw_filename, "-verbose=0"]))
os.remove(output_fname)
示例10: get_bc_signal
def get_bc_signal(arguments):
(mpbs_name, mpbs_file1, mpbs_file2, reads_file1, reads_file2, organism,
window_size, forward_shift, reverse_shift, bias_table1, bias_table2) = arguments
mpbs1 = GenomicRegionSet("Motif Predicted Binding Sites of Condition1")
mpbs1.read(mpbs_file1)
mpbs2 = GenomicRegionSet("Motif Predicted Binding Sites of Condition2")
mpbs2.read(mpbs_file2)
mpbs = mpbs1.combine(mpbs2, output=True)
mpbs.sort()
bam1 = Samfile(reads_file1, "rb")
bam2 = Samfile(reads_file2, "rb")
genome_data = GenomeData(organism)
fasta = Fastafile(genome_data.get_genome())
signal_1 = np.zeros(window_size)
signal_2 = np.zeros(window_size)
motif_len = None
pwm = dict([("A", [0.0] * window_size), ("C", [0.0] * window_size),
("G", [0.0] * window_size), ("T", [0.0] * window_size),
("N", [0.0] * window_size)])
mpbs_regions = mpbs.by_names([mpbs_name])
num_motif = len(mpbs_regions)
# Fetch bias corrected signal
for region in mpbs_regions:
if motif_len is None:
motif_len = region.final - region.initial
mid = (region.final + region.initial) / 2
p1 = mid - window_size / 2
p2 = mid + window_size / 2
if p1 <= 0:
continue
# Fetch raw signal
signal1 = bias_correction(chrom=region.chrom, start=p1, end=p2, bam=bam1,
bias_table=bias_table1, genome_file_name=genome_data.get_genome(),
forward_shift=forward_shift, reverse_shift=reverse_shift)
signal2 = bias_correction(chrom=region.chrom, start=p1, end=p2, bam=bam2,
bias_table=bias_table2, genome_file_name=genome_data.get_genome(),
forward_shift=forward_shift, reverse_shift=reverse_shift)
if len(signal1) != len(signal_1) or len(signal2) != len(signal_2):
continue
# smooth the signal
signal_1 = np.add(signal_1, np.array(signal1))
signal_2 = np.add(signal_2, np.array(signal2))
update_pwm(pwm, fasta, region, p1, p2)
return signal_1, signal_2, motif_len, pwm, num_motif
示例11: get_training_regionset
def get_training_regionset(self):
r = GenomicRegionSet('')
r.add(self.regionset[self.counter])
if self.counter == len(self.chrom_sizes_dict):
return None
else:
self.counter += 1
return r
#if regions option is set, take the values, otherwise the whole set of
#chromosomes as region to search for DPs
# if test:
# contained_chrom = ['chr1', 'chr2']
# else:
# #contained_chrom = get_all_chrom(bamfiles)
# contained_chrom = ['chr1', 'chr2']
示例12: get_experimental_matrix
def get_experimental_matrix(bams, bed):
"""Load artificially experimental matrix. Only genes in BED file are needed."""
m = ExperimentalMatrix()
m.fields = ['name', 'type', 'file']
m.fieldsDict = {}
names = []
for bam in bams:
n, _ = os.path.splitext(os.path.basename(bam))
m.files[n] = bam
names.append(n)
m.names = np.array(['housekeep'] + names)
m.types = np.array(['regions'] + ['reads']*len(names))
g = GenomicRegionSet('RegionSet')
g.read_bed(bed)
m.objectsDict['housekeep'] = g
return m
示例13: rna_associated_gene
def rna_associated_gene(rna_regions, name, organism):
if rna_regions:
s = [ rna_regions[0][0], min([e[1] for e in rna_regions]),
max([e[2] for e in rna_regions]), rna_regions[0][3] ]
g = GenomicRegionSet("RNA associated genes")
g.add( GenomicRegion(chrom=s[0], initial=s[1], final=s[2], name=name, orientation=s[3]) )
asso_genes = g.gene_association(organism=organism, promoterLength=1000, show_dis=True)
genes = asso_genes[0].name.split(":")
closest_genes = []
for n in genes:
if name not in n: closest_genes.append(n)
closest_genes = set(closest_genes)
if len(closest_genes) == 0:
return "."
else:
return ":".join(closest_genes)
else:
return "."
示例14: subtract
def subtract(self, x):
"""
Subtract GenomicVariantSet.
*Keyword arguments:*
- x -- instance of GenomicVariantSet which is subtracted
"""
tmp = GenomicRegionSet.subtract(self, x, whole_region=False)
self.sequences = self._reconstruct_info(tmp)
示例15: initialize
def initialize(name, dims, genome_path, regions, stepsize, binsize, bamfiles, exts, \
inputs, exts_inputs, factors_inputs, chrom_sizes, verbose, no_gc_content, \
tracker, debug, norm_regions, scaling_factors_ip, save_wig, housekeeping_genes):
"""Initialize the MultiCoverageSet"""
regionset = GenomicRegionSet(name)
chrom_sizes_dict = {}
#if regions option is set, take the values, otherwise the whole set of
#chromosomes as region to search for DPs
if regions is not None:
print("Call DPs on specified regions.", file=sys.stderr)
with open(regions) as f:
for line in f:
line = line.strip()
line = line.split('\t')
c, s, e = line[0], int(line[1]), int(line[2])
regionset.add(GenomicRegion(chrom=c, initial=s, final=e))
chrom_sizes_dict[c] = e
else:
print("Call DPs on whole genome.", file=sys.stderr)
with open(chrom_sizes) as f:
for line in f:
line = line.strip()
line = line.split('\t')
chrom, end = line[0], int(line[1])
regionset.add(GenomicRegion(chrom=chrom, initial=0, final=end))
chrom_sizes_dict[chrom] = end
if norm_regions:
norm_regionset = GenomicRegionSet('norm_regions')
norm_regionset.read_bed(norm_regions)
else:
norm_regionset = None
if housekeeping_genes:
scaling_factors_ip, _ = norm_gene_level(bamfiles, housekeeping_genes, name, verbose=True)
if scaling_factors_ip:
tracker.write(text=map(lambda x: str(x), scaling_factors_ip), header="Scaling factors")
regionset.sequences.sort()
exts, exts_inputs = _compute_extension_sizes(bamfiles, exts, inputs, exts_inputs, verbose)
tracker.write(text=str(exts).strip('[]'), header="Extension size (rep1, rep2, input1, input2)")
multi_cov_set = MultiCoverageSet(name=name, regions=regionset, dims=dims, genome_path=genome_path, binsize=binsize, stepsize=stepsize,rmdup=True,\
path_bamfiles = bamfiles, path_inputs = inputs, exts = exts, exts_inputs = exts_inputs, factors_inputs = factors_inputs, \
chrom_sizes=chrom_sizes, verbose=verbose, no_gc_content=no_gc_content, chrom_sizes_dict=chrom_sizes_dict, debug=debug, \
norm_regionset=norm_regionset, scaling_factors_ip=scaling_factors_ip, save_wig=save_wig)
return multi_cov_set