当前位置: 首页>>代码示例>>Python>>正文


Python GenomicRegionSet.GenomicRegionSet类代码示例

本文整理汇总了Python中rgt.GenomicRegionSet.GenomicRegionSet的典型用法代码示例。如果您正苦于以下问题:Python GenomicRegionSet类的具体用法?Python GenomicRegionSet怎么用?Python GenomicRegionSet使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了GenomicRegionSet类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: initialize

def initialize(name, dims, genome_path, regions, stepsize, binsize, bamfiles, exts, \
               inputs, exts_inputs, factors_inputs, chrom_sizes, verbose, no_gc_content, \
               tracker, debug, norm_regions, scaling_factors_ip, save_wig, housekeeping_genes, \
               test, report, chrom_sizes_dict, counter, end, gc_content_cov=None, avg_gc_content=None, \
               gc_hist=None, output_bw=True, save_input=False, m_threshold=80, a_threshold=95, rmdup=False):
    """Initialize the MultiCoverageSet"""
    regionset = regions
    regionset.sequences.sort()
    
    if norm_regions:
        norm_regionset = GenomicRegionSet('norm_regions')
        norm_regionset.read_bed(norm_regions)
    else:
        norm_regionset = None
        
    exts, exts_inputs = _compute_extension_sizes(bamfiles, exts, inputs, exts_inputs, report)
    
    multi_cov_set = MultiCoverageSet(name=name, regions=regionset, dims=dims, genome_path=genome_path,
                                     binsize=binsize, stepsize=stepsize, rmdup=rmdup, path_bamfiles=bamfiles,
                                     path_inputs=inputs, exts=exts, exts_inputs=exts_inputs,
                                     factors_inputs=factors_inputs, chrom_sizes=chrom_sizes, verbose=verbose,
                                     no_gc_content=no_gc_content, chrom_sizes_dict=chrom_sizes_dict, debug=debug,
                                     norm_regionset=norm_regionset, scaling_factors_ip=scaling_factors_ip,
                                     save_wig=save_wig, strand_cov=True, housekeeping_genes=housekeeping_genes,
                                     tracker=tracker, gc_content_cov=gc_content_cov, avg_gc_content=avg_gc_content,
                                     gc_hist=gc_hist, end=end, counter=counter, output_bw=output_bw,
                                     folder_report=FOLDER_REPORT, report=report, save_input=save_input,
                                     m_threshold=m_threshold, a_threshold=a_threshold)
    return multi_cov_set
开发者ID:eggduzao,项目名称:reg-gen,代码行数:29,代码来源:dpc_help.py

示例2: read_bed

    def read_bed(self, bedfile, genome_file_dir):
        """Read the sequences defined by BED file on the given genomce"""

        # Read BED into GenomicRegionSet
        bed = GenomicRegionSet(os.path.basename(bedfile))
        bed.read_bed(bedfile)
        
        # Parse each chromosome and fetch the defined region in this chromosome
        chroms = list(set(bed.get_chrom()))

        chro_files = [x.split(".")[0] for x in os.listdir(genome_file_dir)]

        for ch in chroms:
            if ch not in chro_files: print(" *** There is no genome FASTA file for: "+ch)

            # Read genome in FASTA according to the given chromosome
            ch_seq = SequenceSet(name=ch, seq_type=SequenceType.DNA)
            try: 
                ch_seq.read_fasta(os.path.join(genome_file_dir, ch+".fa"))
            except:
                continue
            
            # Regions in given chromosome
            beds = bed.any_chrom(chrom=ch)

            for s in beds:
                seq = ch_seq[0].seq[s.initial:s.final]
                try: strand = s.strand
                except: strand = "+"
                self.sequences.append(Sequence(seq=seq, name=s.__repr__(), 
                                               strand=strand))
开发者ID:jovesus,项目名称:reg-gen,代码行数:31,代码来源:SequenceSet.py

示例3: estimate_bias_vom

def estimate_bias_vom(args):
    regions = GenomicRegionSet("regions")
    regions.read(args.regions_file)
    create_signal(args, regions)

    hmm_data = HmmData()

    learn_dependency_model = hmm_data.get_dependency_model()
    slim_dimont_predictor = hmm_data.get_slim_dimont_predictor()
    test_fa = hmm_data.get_default_test_fa()

    shutil.copy(test_fa, args.output_location)
    os.chdir(args.output_location)
    print(os.getcwd())

    output_fname_f_obs = os.path.join(args.output_location, "{}_f_obs.fa".format(str(args.k_nb)))
    output_fname_f_exp = os.path.join(args.output_location, "{}_f_exp.fa".format(str(args.k_nb)))
    output_fname_r_obs = os.path.join(args.output_location, "{}_r_obs.fa".format(str(args.k_nb)))
    output_fname_r_exp = os.path.join(args.output_location, "{}_r_exp.fa".format(str(args.k_nb)))

    infix = "{}_f_obs".format(str(args.k_nb))
    create_model(args, output_fname_f_obs, infix, learn_dependency_model, slim_dimont_predictor)

    infix = "{}_f_exp".format(str(args.k_nb))
    create_model(args, output_fname_f_exp, infix, learn_dependency_model, slim_dimont_predictor)

    infix = "{}_r_obs".format(str(args.k_nb))
    create_model(args, output_fname_r_obs, infix, learn_dependency_model, slim_dimont_predictor)

    infix = "{}_r_exp".format(str(args.k_nb))
    create_model(args, output_fname_r_exp, infix, learn_dependency_model, slim_dimont_predictor)

    os.remove(os.path.join(args.output_location, "test.fa"))

    compute_bias(args)
开发者ID:CostaLab,项目名称:reg-gen,代码行数:35,代码来源:Estimation.py

示例4: filter_deadzones

def filter_deadzones(bed_deadzones, peak_regions):
    """Filter by peaklist by deadzones"""
    deadzones = GenomicRegionSet('deadzones')
    deadzones.read_bed(bed_deadzones)
    peak_regions = peak_regions.subtract(deadzones, whole_region=True)
    
    return peak_regions
开发者ID:eggduzao,项目名称:reg-gen,代码行数:7,代码来源:postprocessing.py

示例5: fisher_table

def fisher_table(motif_name, regions, mpbs, gene_set=False, mpbs_set=False):
    """
    TODO

    Keyword arguments:
    motif_name -- TODO
    regions -- TODO
    mpbs -- TODO
    gene_set -- TODO
    mpbs_set -- TODO

    Return:
    a -- TODO
    b -- TODO
    gene_set -- TODO
    mpbs_set -- TODO
    """

    # Fetching motif
    mpbs_motif = GenomicRegionSet(name="mpbs_motif")
    for region in mpbs.sequences:
        if motif_name in region.name:
            mpbs_motif.add(region)

    # Performing intersections
    if len(mpbs_motif) > 0:
        # regions which are overlapping with mpbs_motif
        intersect_original = regions.intersect(mpbs_motif, mode=OverlapType.ORIGINAL, rm_duplicates=True)
        # regions which are not overlapping with regions from mpbs_motif
        subtract_overlap = regions.subtract(mpbs_motif, whole_region=True)

        # Fetching genes
        if gene_set:
            gene_set_res = GeneSet(motif_name)
            for genomic_region in intersect_original.sequences:
                if genomic_region.name:
                    gene_list = [e if e[0] != "." else e[1:] for e in genomic_region.name.split(":")]
                    for g in gene_list:
                        gene_set_res.genes.append(g)
            gene_set_res.genes = list(set(gene_set_res.genes))  # Keep only unique genes
        else:
            gene_set_res = None

        # Fetching mpbs
        if mpbs_set:
            mpbs_set_res = mpbs_motif.intersect(regions, mode=OverlapType.ORIGINAL, rm_duplicates=True)
        else:
            mpbs_set_res = None

        return len(intersect_original), len(subtract_overlap), gene_set_res, mpbs_set_res

    else:
        gene_set_res = GeneSet(motif_name) if gene_set else None
        mpbs_set_res = GenomicRegionSet(mpbs_motif.name) if mpbs_set else None

        return 0, len(regions), gene_set_res, mpbs_set_res
开发者ID:eggduzao,项目名称:reg-gen,代码行数:56,代码来源:Statistics.py

示例6: main

def main():
    options, vcf_list = input()
    
    #thres_mq = 20
    #thres_dp = 20 
    #filter_dbSNP = True
    #tfbs_motifs_path = '/home/manuel/workspace/cluster_p/human_genetics/exp/exp01_motifsearch_sox2/humangenetics_motifs/Match/chr11_mpbs.bed'
    
    sample_data = load_data(vcf_list)
    print("##Filter variants of samples", file=sys.stderr)
    pipeline(sample_data, options)
    
    if options.list_wt:
        wt_data = load_data(options.list_wt)
        print("##Filter variants of wildtypes", file=sys.stderr)
        pipeline(wt_data, options)
        union_wt = GenomicVariantSet(name = "union_wt")
        for wt in wt_data:
            union_wt.sequences += wt.sequences 
        
        print("#wildtype variants:", file=sys.stderr)
        print("union WT", len(union_wt), file=sys.stderr, sep="\t")
        
        #delete Wildtype
        for sample in sample_data:
            sample.subtract(union_wt)
        
        print_length(sample_data, "#variants after subtracting wildtypes")
    else:
        print("#Do not filter by wildtype", file=sys.stderr)
    
    if options.max_density:
        get_max_density(GenomicVariantSets=sample_data, lowerBound=options.lower_bound, upperBound=options.upper_bound)
    else:
        print("#Do not perform max. density search", file=sys.stderr)
        
    if options.list_bed:
        tfbs_motifs = GenomicRegionSet('tfbs_motifs')   
        tfbs_motifs.read_bed(options.list_bed)
        
        for sample in sample_data:
            sample.intersect(tfbs_motifs)
    
        print_length(sample_data, "#variants after filtering by BED file")
    else:
        print("#Do not filter by BED file", file=sys.stderr)
    
    print("#Compute intersection of sample's subsets (give intersection's name and size)")
    output_intersections(sample_data)
    
    print("#Write filtered sample files")
    for sample in sample_data:
        sample.write_vcf("%s-filtered.vcf" %sample.name)
开发者ID:Marvin84,项目名称:reg-gen,代码行数:53,代码来源:filterVCF.py

示例7: dbd_regions

    def dbd_regions(self, sig_region, output):
        """Generate the BED file of significant DBD regions and FASTA file of the sequences"""
        dbd_regions(exons=self.rna_regions, sig_region=sig_region, rna_name=self.rna_name, output=output)

        self.stat["DBD_all"] = str(len(self.rbss))
        self.stat["DBD_sig"] = str(len(self.data["region"]["sig_region"]))

        sigDBD = GenomicRegionSet("DBD_sig")
        sigDBD.sequences = self.data["region"]["sig_region"]
        rbss = self.txp.get_rbs()
        overlaps = rbss.intersect(y=sigDBD, mode=OverlapType.ORIGINAL)
        self.stat["DBSs_target_DBD_sig"] = str(len(overlaps))
开发者ID:eggduzao,项目名称:reg-gen,代码行数:12,代码来源:tdf_regiontest.py

示例8: read_bed

    def read_bed(self, bedfile, genome_file_dir):
        """Read the sequences defined by BED file on the given genomce.

        *Keyword arguments:*

            - bedfile -- The path to the BED file which defines the regions.
            - genome_file_dir -- A directory which contains the FASTA files for each chromosome.
        """

        # Read BED into GenomicRegionSet
        from rgt.GenomicRegionSet import GenomicRegionSet
        bed = GenomicRegionSet(os.path.basename(bedfile))
        bed.read_bed(bedfile)
        self.read_genomic_set(bed, genome_file_dir)
开发者ID:eggduzao,项目名称:reg-gen,代码行数:14,代码来源:SequenceSet.py

示例9: get_raw_tracks

def get_raw_tracks(args):
    # Initializing Error Handler
    err = ErrorHandler()

    if len(args.input_files) != 2:
        err.throw_error("ME_FEW_ARG", add_msg="You must specify reads and regions file.")

    output_fname = os.path.join(args.output_location, "{}.wig".format(args.output_prefix))

    bam = Samfile(args.input_files[0], "rb")
    regions = GenomicRegionSet("Interested regions")
    regions.read(args.input_files[1])
    regions.merge()
    reads_file = GenomicSignal()

    with open(output_fname, "a") as output_f:
        for region in regions:
            # Raw counts
            signal = [0.0] * (region.final - region.initial)
            for read in bam.fetch(region.chrom, region.initial, region.final):
                if not read.is_reverse:
                    cut_site = read.pos + args.forward_shift
                    if region.initial <= cut_site < region.final:
                        signal[cut_site - region.initial] += 1.0
                else:
                    cut_site = read.aend + args.reverse_shift - 1
                    if region.initial <= cut_site < region.final:
                        signal[cut_site - region.initial] += 1.0

            if args.norm:
                signal = reads_file.boyle_norm(signal)
                perc = scoreatpercentile(signal, 98)
                std = np.std(signal)
                signal = reads_file.hon_norm_atac(signal, perc, std)

            output_f.write("fixedStep chrom=" + region.chrom + " start=" + str(region.initial + 1) + " step=1\n" +
                           "\n".join([str(e) for e in np.nan_to_num(signal)]) + "\n")
    output_f.close()

    if args.bigWig:
        genome_data = GenomeData(args.organism)
        chrom_sizes_file = genome_data.get_chromosome_sizes()
        bw_filename = os.path.join(args.output_location, "{}.bw".format(args.output_prefix))
        os.system(" ".join(["wigToBigWig", output_fname, chrom_sizes_file, bw_filename, "-verbose=0"]))
        os.remove(output_fname)
开发者ID:CostaLab,项目名称:reg-gen,代码行数:45,代码来源:Tracks.py

示例10: get_bc_signal

def get_bc_signal(arguments):
    (mpbs_name, mpbs_file1, mpbs_file2, reads_file1, reads_file2, organism,
     window_size, forward_shift, reverse_shift, bias_table1, bias_table2) = arguments

    mpbs1 = GenomicRegionSet("Motif Predicted Binding Sites of Condition1")
    mpbs1.read(mpbs_file1)

    mpbs2 = GenomicRegionSet("Motif Predicted Binding Sites of Condition2")
    mpbs2.read(mpbs_file2)

    mpbs = mpbs1.combine(mpbs2, output=True)
    mpbs.sort()

    bam1 = Samfile(reads_file1, "rb")
    bam2 = Samfile(reads_file2, "rb")

    genome_data = GenomeData(organism)
    fasta = Fastafile(genome_data.get_genome())

    signal_1 = np.zeros(window_size)
    signal_2 = np.zeros(window_size)
    motif_len = None
    pwm = dict([("A", [0.0] * window_size), ("C", [0.0] * window_size),
                ("G", [0.0] * window_size), ("T", [0.0] * window_size),
                ("N", [0.0] * window_size)])

    mpbs_regions = mpbs.by_names([mpbs_name])
    num_motif = len(mpbs_regions)

    # Fetch bias corrected signal
    for region in mpbs_regions:
        if motif_len is None:
            motif_len = region.final - region.initial

        mid = (region.final + region.initial) / 2
        p1 = mid - window_size / 2
        p2 = mid + window_size / 2

        if p1 <= 0:
            continue
        # Fetch raw signal
        signal1 = bias_correction(chrom=region.chrom, start=p1, end=p2, bam=bam1,
                                  bias_table=bias_table1, genome_file_name=genome_data.get_genome(),
                                  forward_shift=forward_shift, reverse_shift=reverse_shift)

        signal2 = bias_correction(chrom=region.chrom, start=p1, end=p2, bam=bam2,
                                  bias_table=bias_table2, genome_file_name=genome_data.get_genome(),
                                  forward_shift=forward_shift, reverse_shift=reverse_shift)

        if len(signal1) != len(signal_1) or len(signal2) != len(signal_2):
            continue

        # smooth the signal
        signal_1 = np.add(signal_1, np.array(signal1))
        signal_2 = np.add(signal_2, np.array(signal2))

        update_pwm(pwm, fasta, region, p1, p2)

    return signal_1, signal_2, motif_len, pwm, num_motif
开发者ID:CostaLab,项目名称:reg-gen,代码行数:59,代码来源:DifferentialAnalysis.py

示例11: get_training_regionset

    def get_training_regionset(self):
        r = GenomicRegionSet('')
        r.add(self.regionset[self.counter])
        
        if self.counter == len(self.chrom_sizes_dict):
            return None
        else:
            self.counter += 1
            return r
        
            
            
#if regions option is set, take the values, otherwise the whole set of 
    #chromosomes as region to search for DPs
#     if test:
#         contained_chrom = ['chr1', 'chr2']
#     else:
#         #contained_chrom = get_all_chrom(bamfiles)
#         contained_chrom = ['chr1', 'chr2']
开发者ID:eggduzao,项目名称:reg-gen,代码行数:19,代码来源:RegionGiver.py

示例12: get_experimental_matrix

def get_experimental_matrix(bams, bed):
    """Load artificially experimental matrix. Only genes in BED file are needed."""
    m = ExperimentalMatrix()
    
    m.fields = ['name', 'type', 'file']
    m.fieldsDict = {}
    
    names = []
    for bam in bams:
        n, _ = os.path.splitext(os.path.basename(bam))
        m.files[n] = bam
        names.append(n) 
    m.names = np.array(['housekeep'] + names)
    m.types = np.array(['regions'] + ['reads']*len(names))
    g = GenomicRegionSet('RegionSet')
    g.read_bed(bed)
    m.objectsDict['housekeep'] = g
    
    return m
开发者ID:Marvin84,项目名称:reg-gen,代码行数:19,代码来源:norm_genelevel.py

示例13: rna_associated_gene

def rna_associated_gene(rna_regions, name, organism):
    if rna_regions:
        s = [ rna_regions[0][0], min([e[1] for e in rna_regions]), 
              max([e[2] for e in rna_regions]), rna_regions[0][3] ]
        g = GenomicRegionSet("RNA associated genes")
        g.add( GenomicRegion(chrom=s[0], initial=s[1], final=s[2], name=name, orientation=s[3]) )
        asso_genes = g.gene_association(organism=organism, promoterLength=1000, show_dis=True)

        genes = asso_genes[0].name.split(":")
        closest_genes = []
        for n in genes:
            if name not in n: closest_genes.append(n)
        closest_genes = set(closest_genes)

        if len(closest_genes) == 0:
            return "."
        else:
            return ":".join(closest_genes)
    else:
        return "."
开发者ID:eggduzao,项目名称:reg-gen,代码行数:20,代码来源:triplexTools.py

示例14: subtract

    def subtract(self, x):
        """
        Subtract GenomicVariantSet.
        
        *Keyword arguments:*

        - x -- instance of GenomicVariantSet which is subtracted
        
        """
        tmp = GenomicRegionSet.subtract(self, x, whole_region=False)
        self.sequences = self._reconstruct_info(tmp)
开发者ID:Marvin84,项目名称:reg-gen,代码行数:11,代码来源:GenomicVariantSet.py

示例15: initialize

def initialize(name, dims, genome_path, regions, stepsize, binsize, bamfiles, exts, \
               inputs, exts_inputs, factors_inputs, chrom_sizes, verbose, no_gc_content, \
               tracker, debug, norm_regions, scaling_factors_ip, save_wig, housekeeping_genes):
    """Initialize the MultiCoverageSet"""

    regionset = GenomicRegionSet(name)
    chrom_sizes_dict = {}
    #if regions option is set, take the values, otherwise the whole set of 
    #chromosomes as region to search for DPs
    if regions is not None:
        print("Call DPs on specified regions.", file=sys.stderr)
        with open(regions) as f:
            for line in f:
                line = line.strip()
                line = line.split('\t')
                c, s, e = line[0], int(line[1]), int(line[2])
                regionset.add(GenomicRegion(chrom=c, initial=s, final=e))
                chrom_sizes_dict[c] = e
    else:
        print("Call DPs on whole genome.", file=sys.stderr)
        with open(chrom_sizes) as f:
            for line in f:
                line = line.strip()
                line = line.split('\t')
                chrom, end = line[0], int(line[1])
                regionset.add(GenomicRegion(chrom=chrom, initial=0, final=end))
                chrom_sizes_dict[chrom] = end
    
    if norm_regions:
        norm_regionset = GenomicRegionSet('norm_regions')
        norm_regionset.read_bed(norm_regions)
    else:
        norm_regionset = None
        
    if housekeeping_genes:
        scaling_factors_ip, _ = norm_gene_level(bamfiles, housekeeping_genes, name, verbose=True)
    
    if scaling_factors_ip:
        tracker.write(text=map(lambda x: str(x), scaling_factors_ip), header="Scaling factors")
    
    regionset.sequences.sort()
    exts, exts_inputs = _compute_extension_sizes(bamfiles, exts, inputs, exts_inputs, verbose)
    tracker.write(text=str(exts).strip('[]'), header="Extension size (rep1, rep2, input1, input2)")
    
    multi_cov_set = MultiCoverageSet(name=name, regions=regionset, dims=dims, genome_path=genome_path, binsize=binsize, stepsize=stepsize,rmdup=True,\
                                  path_bamfiles = bamfiles, path_inputs = inputs, exts = exts, exts_inputs = exts_inputs, factors_inputs = factors_inputs, \
                                  chrom_sizes=chrom_sizes, verbose=verbose, no_gc_content=no_gc_content, chrom_sizes_dict=chrom_sizes_dict, debug=debug, \
                                  norm_regionset=norm_regionset, scaling_factors_ip=scaling_factors_ip, save_wig=save_wig)
    
    return multi_cov_set
开发者ID:jovesus,项目名称:reg-gen,代码行数:50,代码来源:dpc_help.py


注:本文中的rgt.GenomicRegionSet.GenomicRegionSet类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。