当前位置: 首页>>代码示例>>Python>>正文


Python GenomicRegionSet.read_bed方法代码示例

本文整理汇总了Python中rgt.GenomicRegionSet.GenomicRegionSet.read_bed方法的典型用法代码示例。如果您正苦于以下问题:Python GenomicRegionSet.read_bed方法的具体用法?Python GenomicRegionSet.read_bed怎么用?Python GenomicRegionSet.read_bed使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在rgt.GenomicRegionSet.GenomicRegionSet的用法示例。


在下文中一共展示了GenomicRegionSet.read_bed方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: create_file

# 需要导入模块: from rgt.GenomicRegionSet import GenomicRegionSet [as 别名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import read_bed [as 别名]
    def create_file(self):
        # Expanding summits
        tfbs_summit_regions = GenomicRegionSet("TFBS Summit Regions")
        tfbs_summit_regions.read_bed(self.tfbs_summit_fname)

        for region in iter(tfbs_summit_regions):
            summit = int(region.data.split()[-1]) + region.initial
            region.initial = max(summit - (self.peak_ext / 2), 0)
            region.final = summit + (self.peak_ext / 2)

        # Calculating intersections
        mpbs_regions = GenomicRegionSet("MPBS Regions")
        mpbs_regions.read_bed(self.mpbs_fname)

        tfbs_summit_regions.sort()
        mpbs_regions.sort()

        with_overlap_regions = mpbs_regions.intersect(tfbs_summit_regions, mode=OverlapType.ORIGINAL)
        without_overlap_regions = mpbs_regions.subtract(tfbs_summit_regions, whole_region=True)
        tfbs_regions = GenomicRegionSet("TFBS Regions")

        for region in iter(with_overlap_regions):
            region.name = region.name.split(":")[0] + ":Y"
            tfbs_regions.add(region)

        for region in iter(without_overlap_regions):
            region.name = region.name.split(":")[0] + ":N"
            tfbs_regions.add(region)

        tfbs_regions.sort()

        tfbs_fname = os.path.join(self.output_location, "{}.bed".format(self.mpbs_name))
        tfbs_regions.write_bed(tfbs_fname)
开发者ID:eggduzao,项目名称:reg-gen,代码行数:35,代码来源:evidence.py

示例2: filter_deadzones

# 需要导入模块: from rgt.GenomicRegionSet import GenomicRegionSet [as 别名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import read_bed [as 别名]
def filter_deadzones(bed_deadzones, peak_regions):
    """Filter by peaklist by deadzones"""
    deadzones = GenomicRegionSet('deadzones')
    deadzones.read_bed(bed_deadzones)
    peak_regions = peak_regions.subtract(deadzones, whole_region=True)
    
    return peak_regions
开发者ID:eggduzao,项目名称:reg-gen,代码行数:9,代码来源:postprocessing.py

示例3: initialize

# 需要导入模块: from rgt.GenomicRegionSet import GenomicRegionSet [as 别名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import read_bed [as 别名]
def initialize(name, dims, genome_path, regions, stepsize, binsize, bamfiles, exts, \
               inputs, exts_inputs, factors_inputs, chrom_sizes, verbose, no_gc_content, \
               tracker, debug, norm_regions, scaling_factors_ip, save_wig, housekeeping_genes, \
               test, report, chrom_sizes_dict, counter, end, gc_content_cov=None, avg_gc_content=None, \
               gc_hist=None, output_bw=True, save_input=False, m_threshold=80, a_threshold=95, rmdup=False):
    """Initialize the MultiCoverageSet"""
    regionset = regions
    regionset.sequences.sort()
    
    if norm_regions:
        norm_regionset = GenomicRegionSet('norm_regions')
        norm_regionset.read_bed(norm_regions)
    else:
        norm_regionset = None
        
    exts, exts_inputs = _compute_extension_sizes(bamfiles, exts, inputs, exts_inputs, report)
    
    multi_cov_set = MultiCoverageSet(name=name, regions=regionset, dims=dims, genome_path=genome_path,
                                     binsize=binsize, stepsize=stepsize, rmdup=rmdup, path_bamfiles=bamfiles,
                                     path_inputs=inputs, exts=exts, exts_inputs=exts_inputs,
                                     factors_inputs=factors_inputs, chrom_sizes=chrom_sizes, verbose=verbose,
                                     no_gc_content=no_gc_content, chrom_sizes_dict=chrom_sizes_dict, debug=debug,
                                     norm_regionset=norm_regionset, scaling_factors_ip=scaling_factors_ip,
                                     save_wig=save_wig, strand_cov=True, housekeeping_genes=housekeeping_genes,
                                     tracker=tracker, gc_content_cov=gc_content_cov, avg_gc_content=avg_gc_content,
                                     gc_hist=gc_hist, end=end, counter=counter, output_bw=output_bw,
                                     folder_report=FOLDER_REPORT, report=report, save_input=save_input,
                                     m_threshold=m_threshold, a_threshold=a_threshold)
    return multi_cov_set
开发者ID:eggduzao,项目名称:reg-gen,代码行数:31,代码来源:dpc_help.py

示例4: read_bed

# 需要导入模块: from rgt.GenomicRegionSet import GenomicRegionSet [as 别名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import read_bed [as 别名]
    def read_bed(self, bedfile, genome_file_dir):
        """Read the sequences defined by BED file on the given genomce"""

        # Read BED into GenomicRegionSet
        bed = GenomicRegionSet(os.path.basename(bedfile))
        bed.read_bed(bedfile)
        
        # Parse each chromosome and fetch the defined region in this chromosome
        chroms = list(set(bed.get_chrom()))

        chro_files = [x.split(".")[0] for x in os.listdir(genome_file_dir)]

        for ch in chroms:
            if ch not in chro_files: print(" *** There is no genome FASTA file for: "+ch)

            # Read genome in FASTA according to the given chromosome
            ch_seq = SequenceSet(name=ch, seq_type=SequenceType.DNA)
            try: 
                ch_seq.read_fasta(os.path.join(genome_file_dir, ch+".fa"))
            except:
                continue
            
            # Regions in given chromosome
            beds = bed.any_chrom(chrom=ch)

            for s in beds:
                seq = ch_seq[0].seq[s.initial:s.final]
                try: strand = s.strand
                except: strand = "+"
                self.sequences.append(Sequence(seq=seq, name=s.__repr__(), 
                                               strand=strand))
开发者ID:jovesus,项目名称:reg-gen,代码行数:33,代码来源:SequenceSet.py

示例5: main

# 需要导入模块: from rgt.GenomicRegionSet import GenomicRegionSet [as 别名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import read_bed [as 别名]
def main():
    options, vcf_list = input()
    
    #thres_mq = 20
    #thres_dp = 20 
    #filter_dbSNP = True
    #tfbs_motifs_path = '/home/manuel/workspace/cluster_p/human_genetics/exp/exp01_motifsearch_sox2/humangenetics_motifs/Match/chr11_mpbs.bed'
    
    sample_data = load_data(vcf_list)
    print("##Filter variants of samples", file=sys.stderr)
    pipeline(sample_data, options)
    
    if options.list_wt:
        wt_data = load_data(options.list_wt)
        print("##Filter variants of wildtypes", file=sys.stderr)
        pipeline(wt_data, options)
        union_wt = GenomicVariantSet(name = "union_wt")
        for wt in wt_data:
            union_wt.sequences += wt.sequences 
        
        print("#wildtype variants:", file=sys.stderr)
        print("union WT", len(union_wt), file=sys.stderr, sep="\t")
        
        #delete Wildtype
        for sample in sample_data:
            sample.subtract(union_wt)
        
        print_length(sample_data, "#variants after subtracting wildtypes")
    else:
        print("#Do not filter by wildtype", file=sys.stderr)
    
    if options.max_density:
        get_max_density(GenomicVariantSets=sample_data, lowerBound=options.lower_bound, upperBound=options.upper_bound)
    else:
        print("#Do not perform max. density search", file=sys.stderr)
        
    if options.list_bed:
        tfbs_motifs = GenomicRegionSet('tfbs_motifs')   
        tfbs_motifs.read_bed(options.list_bed)
        
        for sample in sample_data:
            sample.intersect(tfbs_motifs)
    
        print_length(sample_data, "#variants after filtering by BED file")
    else:
        print("#Do not filter by BED file", file=sys.stderr)
    
    print("#Compute intersection of sample's subsets (give intersection's name and size)")
    output_intersections(sample_data)
    
    print("#Write filtered sample files")
    for sample in sample_data:
        sample.write_vcf("%s-filtered.vcf" %sample.name)
开发者ID:Marvin84,项目名称:reg-gen,代码行数:55,代码来源:filterVCF.py

示例6: initialize

# 需要导入模块: from rgt.GenomicRegionSet import GenomicRegionSet [as 别名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import read_bed [as 别名]
def initialize(name, dims, genome_path, regions, stepsize, binsize, bamfiles, exts, \
               inputs, exts_inputs, factors_inputs, chrom_sizes, verbose, no_gc_content, \
               tracker, debug, norm_regions, scaling_factors_ip, save_wig, housekeeping_genes):
    """Initialize the MultiCoverageSet"""

    regionset = GenomicRegionSet(name)
    chrom_sizes_dict = {}
    #if regions option is set, take the values, otherwise the whole set of 
    #chromosomes as region to search for DPs
    if regions is not None:
        print("Call DPs on specified regions.", file=sys.stderr)
        with open(regions) as f:
            for line in f:
                line = line.strip()
                line = line.split('\t')
                c, s, e = line[0], int(line[1]), int(line[2])
                regionset.add(GenomicRegion(chrom=c, initial=s, final=e))
                chrom_sizes_dict[c] = e
    else:
        print("Call DPs on whole genome.", file=sys.stderr)
        with open(chrom_sizes) as f:
            for line in f:
                line = line.strip()
                line = line.split('\t')
                chrom, end = line[0], int(line[1])
                regionset.add(GenomicRegion(chrom=chrom, initial=0, final=end))
                chrom_sizes_dict[chrom] = end
    
    if norm_regions:
        norm_regionset = GenomicRegionSet('norm_regions')
        norm_regionset.read_bed(norm_regions)
    else:
        norm_regionset = None
        
    if housekeeping_genes:
        scaling_factors_ip, _ = norm_gene_level(bamfiles, housekeeping_genes, name, verbose=True)
    
    if scaling_factors_ip:
        tracker.write(text=map(lambda x: str(x), scaling_factors_ip), header="Scaling factors")
    
    regionset.sequences.sort()
    exts, exts_inputs = _compute_extension_sizes(bamfiles, exts, inputs, exts_inputs, verbose)
    tracker.write(text=str(exts).strip('[]'), header="Extension size (rep1, rep2, input1, input2)")
    
    multi_cov_set = MultiCoverageSet(name=name, regions=regionset, dims=dims, genome_path=genome_path, binsize=binsize, stepsize=stepsize,rmdup=True,\
                                  path_bamfiles = bamfiles, path_inputs = inputs, exts = exts, exts_inputs = exts_inputs, factors_inputs = factors_inputs, \
                                  chrom_sizes=chrom_sizes, verbose=verbose, no_gc_content=no_gc_content, chrom_sizes_dict=chrom_sizes_dict, debug=debug, \
                                  norm_regionset=norm_regionset, scaling_factors_ip=scaling_factors_ip, save_wig=save_wig)
    
    return multi_cov_set
开发者ID:jovesus,项目名称:reg-gen,代码行数:52,代码来源:dpc_help.py

示例7: merge_DBD_regions

# 需要导入模块: from rgt.GenomicRegionSet import GenomicRegionSet [as 别名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import read_bed [as 别名]
def merge_DBD_regions(path):
    """Merge all available DBD regions in BED format. """

    for t in os.listdir(path):
        if os.path.isdir(os.path.join(path, t)):
            dbd_pool = GenomicRegionSet(t)
            for rna in os.listdir(os.path.join(path,t)):
                f = os.path.join(path, t, rna, "DBD_"+rna+".bed")
                if os.path.exists(f):
                    dbd = GenomicRegionSet(rna)
                    dbd.read_bed(f)
                    for r in dbd: r.name = rna+"_"+r.name
                    dbd_pool.combine(dbd)
            dbd_pool.write_bed(os.path.join(path, t, "DBD_"+t+".bed"))
开发者ID:eggduzao,项目名称:reg-gen,代码行数:16,代码来源:triplexTools.py

示例8: read_bed

# 需要导入模块: from rgt.GenomicRegionSet import GenomicRegionSet [as 别名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import read_bed [as 别名]
    def read_bed(self, bedfile, genome_file_dir):
        """Read the sequences defined by BED file on the given genomce.

        *Keyword arguments:*

            - bedfile -- The path to the BED file which defines the regions.
            - genome_file_dir -- A directory which contains the FASTA files for each chromosome.
        """

        # Read BED into GenomicRegionSet
        from rgt.GenomicRegionSet import GenomicRegionSet
        bed = GenomicRegionSet(os.path.basename(bedfile))
        bed.read_bed(bedfile)
        self.read_genomic_set(bed, genome_file_dir)
开发者ID:eggduzao,项目名称:reg-gen,代码行数:16,代码来源:SequenceSet.py

示例9: get_experimental_matrix

# 需要导入模块: from rgt.GenomicRegionSet import GenomicRegionSet [as 别名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import read_bed [as 别名]
def get_experimental_matrix(bams, bed):
    """Load artificially experimental matrix. Only genes in BED file are needed."""
    m = ExperimentalMatrix()
    
    m.fields = ['name', 'type', 'file']
    m.fieldsDict = {}
    
    names = []
    for bam in bams:
        n, _ = os.path.splitext(os.path.basename(bam))
        m.files[n] = bam
        names.append(n) 
    m.names = np.array(['housekeep'] + names)
    m.types = np.array(['regions'] + ['reads']*len(names))
    g = GenomicRegionSet('RegionSet')
    g.read_bed(bed)
    m.objectsDict['housekeep'] = g
    
    return m
开发者ID:Marvin84,项目名称:reg-gen,代码行数:21,代码来源:norm_genelevel.py

示例10: get_dbss

# 需要导入模块: from rgt.GenomicRegionSet import GenomicRegionSet [as 别名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import read_bed [as 别名]
def get_dbss(input_BED,output_BED,rna_fasta,output_rbss,organism,l,e,c,fr,fm,of,mf,rm,temp):
    regions = GenomicRegionSet("Target")
    regions.read_bed(input_BED)
    regions.gene_association(organism=organism, show_dis=True)

    connect_rna(rna_fasta, temp=temp, rna_name="RNA")
    rnas = SequenceSet(name="rna", seq_type=SequenceType.RNA)
    rnas.read_fasta(os.path.join(temp,"rna_temp.fa"))
    rna_regions = get_rna_region_str(os.path.join(temp,rna_fasta))
    # print(rna_regions)
    genome = GenomeData(organism)
    genome_path = genome.get_genome()
    txp = find_triplex(rna_fasta=rna_fasta, dna_region=regions, 
                       temp=temp, organism=organism, remove_temp=False,
                       l=l, e=e, c=c, fr=fr, fm=fm, of=of, mf=mf, genome_path=genome_path,
                       prefix="targeted_region", dna_fine_posi=True)

    print("Total binding events:\t",str(len(txp)))
    txp.write_bed(output_BED)
    txp.write_txp(filename=output_BED.replace(".bed",".txp"))
    rbss = txp.get_rbs()
    dbd_regions(exons=rna_regions, sig_region=rbss, rna_name="rna", output=output_rbss, 
                out_file=True, temp=temp, fasta=False)
开发者ID:eggduzao,项目名称:reg-gen,代码行数:25,代码来源:triplexTools.py

示例11: find

# 需要导入模块: from rgt.GenomicRegionSet import GenomicRegionSet [as 别名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import read_bed [as 别名]
def find(s, ch):
    return [i for i, ltr in enumerate(s) if ltr == ch]

##################################################################################
parser = argparse.ArgumentParser(description='Check the coding potential by PhyloCSF', 
                                 formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('-i', metavar='  ', type=str, help="Input BED file")
parser.add_argument('-o', metavar='  ', type=str, help="Output BED file with the coding-potential score")
parser.add_argument('-organism', metavar='  ', type=str, help="Define the organism")
parser.add_argument('-rmcoding', metavar='  ', type=float, help="Define the cutoff to remove the entries with coding potential")
parser.add_argument('-mafdir', metavar='  ', type=str, help="Define the directory to MAF files")
# python /projects/reg-gen/tools/phylocsf_check.py -i
args = parser.parse_args()

bed = GenomicRegionSet("input")
bed.read_bed(args.i)
num = len(bed)

organisms = { "hg18": "Human",
              "panTro2": "Chimp",
              "rheMac2": "Rhesus",
              "tarSyr1": "Tarsier",
              "micMur1": "Mouse_lemur",
              "otoGar1": "Bushbaby",
              "tupBel1": "Shrew",
              "mm9": "Mouse",
              "rn4": "Rat",
              "dipOrd1": "Kangaroo_Rat",
              "cavPor2": "Guinea_Pig",
              "speTri1": "Squirrel",
              "oryCun1": "Rabbit",
开发者ID:Marvin84,项目名称:reg-gen,代码行数:33,代码来源:phylocsf_check.py

示例12: chip_evaluate

# 需要导入模块: from rgt.GenomicRegionSet import GenomicRegionSet [as 别名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import read_bed [as 别名]
    def chip_evaluate(self):
        """
        This evaluation methodology uses motif-predicted binding sites (MPBSs) together with TF ChIP-seq data
        to evaluate the footprint predictions.

        return:
        """

        # Evaluate Statistics
        fpr = dict()
        tpr = dict()
        roc_auc = dict()
        roc_auc_1 = dict()
        roc_auc_2 = dict()
        recall = dict()
        precision = dict()
        prc_auc = dict()

        if "SEG" in self.footprint_type:
            mpbs_regions = GenomicRegionSet("TFBS")
            mpbs_regions.read_bed(self.tfbs_file)
            mpbs_regions.sort()

            # Verifying the maximum score of the MPBS file
            max_score = -99999999
            for region in iter(mpbs_regions):
                score = int(region.data)
                if score > max_score:
                    max_score = score
            max_score += 1

        for i in range(len(self.footprint_file)):
            footprints_regions = GenomicRegionSet("Footprints Prediction")
            footprints_regions.read_bed(self.footprint_file[i])

            # Sort footprint prediction bed files
            footprints_regions.sort()

            if self.footprint_type[i] == "SEG":
                # Increasing the score of MPBS entry once if any overlaps found in the predicted footprints.
                increased_score_mpbs_regions = GenomicRegionSet("Increased Regions")
                intersect_regions = mpbs_regions.intersect(footprints_regions, mode=OverlapType.ORIGINAL)
                for region in iter(intersect_regions):
                    region.data = str(int(region.data) + max_score)
                    increased_score_mpbs_regions.add(region)


                # Keep the score of remained MPBS entry unchanged
                without_intersect_regions = mpbs_regions.subtract(footprints_regions, whole_region=True)
                for region in iter(without_intersect_regions):
                    increased_score_mpbs_regions.add(region)

                increased_score_mpbs_regions.sort_score()

                fpr[i], tpr[i], roc_auc[i], roc_auc_1[i], roc_auc_2[i] = self.roc_curve(increased_score_mpbs_regions)
                recall[i], precision[i], prc_auc[i] = self.precision_recall_curve(increased_score_mpbs_regions)
            elif self.footprint_type[i] == "SC":
                footprints_regions.sort_score()
                fpr[i], tpr[i], roc_auc[i], roc_auc_1[i], roc_auc_2[i] = self.roc_curve(footprints_regions)
                recall[i], precision[i], prc_auc[i] = self.precision_recall_curve(footprints_regions)

        # Output the statistics results into text
        stats_fname = self.output_location + self.tf_name + "_stats.txt"
        stats_header = ["METHOD", "AUC_100", "AUC_10", "AUC_1", "AUPR"]
        with open(stats_fname, "w") as stats_file:
            stats_file.write("\t".join(stats_header) + "\n")
            for i in range(len(self.footprint_name)):
                stats_file.write(self.footprint_name[i] + "\t" + str(roc_auc[i]) + "\t" + str(roc_auc_1[i]) + "\t"
                                 + str(roc_auc_2[i]) + "\t" + str(prc_auc[i]) + "\n")

        # Output the curves
        if self.print_roc_curve:
            label_x = "False Positive Rate"
            label_y = "True Positive Rate"
            curve_name = "ROC"
            self.plot_curve(fpr, tpr, roc_auc, label_x, label_y, self.tf_name, curve_name)
        if self.print_pr_curve:
            label_x = "Recall"
            label_y = "Precision"
            curve_name = "PRC"
            self.plot_curve(recall, precision, prc_auc, label_x, label_y, self.tf_name, curve_name)

        self.output_points(self.tf_name, fpr, tpr, recall, precision)
开发者ID:eggduzao,项目名称:reg-gen,代码行数:85,代码来源:evaluation.py

示例13: read_states_signals

# 需要导入模块: from rgt.GenomicRegionSet import GenomicRegionSet [as 别名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import read_bed [as 别名]
    def read_states_signals(self):
        # Read states from the annotation file
        states = ""
        with open(self.annotate_fname) as annotate_file:
            for line in annotate_file:
                if len(line) < 2 or "#" in line or "=" in line:
                    continue
                ll = line.strip().split(" ")
                for state in ll[1:-1]:
                    states += state

        # If need to estimate bias table
        bias_table = BiasTable(output_loc=self.output_locaiton)
        genome_data = GenomeData(self.organism)
        table = None
        if self.estimate_bias_correction:
            regions = GenomicRegionSet("Bias Regions")
            if self.original_regions.split(".")[-1] == "bed":
                regions.read_bed(self.original_regions)
            if self.original_regions.split(".")[-1] == "fa":
                regions.read_sequence(self.original_regions)

            if self.estimate_bias_type == "FRE":
                table = bias_table.estimate_table(regions=regions, dnase_file_name=self.bam_file,
                                                  genome_file_name=genome_data.get_genome(),
                                                  k_nb=self.k_nb,
                                                  forward_shift=self.atac_forward_shift,
                                                  reverse_shift=self.atac_reverse_shift)
            elif self.estimate_bias_type == "PWM":
                table = bias_table.estimate_table_pwm(regions=regions, dnase_file_name=self.bam_file,
                                                      genome_file_name=genome_data.get_genome(),
                                                      k_nb=self.k_nb,
                                                      forward_shift=self.atac_forward_shift,
                                                      reverse_shift=self.atac_reverse_shift)

            bias_fname = os.path.join(self.output_locaiton, "Bias", "{}_{}".format(self.k_nb, self.atac_forward_shift))
            bias_table.write_tables(bias_fname, table)

        # If the bias table is provided
        if self.bias_table:
            bias_table_list = self.bias_table.split(",")
            table = bias_table.load_table(table_file_name_F=bias_table_list[0],
                                          table_file_name_R=bias_table_list[1])

        # Get the normalization and slope signal from the raw bam file
        raw_signal = GenomicSignal(self.bam_file)
        raw_signal.load_sg_coefs(slope_window_size=9)
        norm_signal, slope_signal = raw_signal.get_signal(ref=self.chrom, start=self.start, end=self.end,
                                                          downstream_ext=self.atac_downstream_ext,
                                                          upstream_ext=self.atac_upstream_ext,
                                                          forward_shift=self.atac_forward_shift,
                                                          reverse_shift=self.atac_reverse_shift,
                                                          initial_clip=self.atac_initial_clip,
                                                          bias_table=table,
                                                          genome_file_name=genome_data.get_genome(),
                                                          print_raw_signal=self.print_raw_signal,
                                                          print_bc_signal=self.print_bc_signal,
                                                          print_norm_signal=self.print_norm_signal,
                                                          print_slope_signal=self.print_slope_signal)
        if self.print_bed_file:
            self.output_bed_file(states)

        return states, norm_signal, slope_signal
开发者ID:eggduzao,项目名称:reg-gen,代码行数:65,代码来源:train.py

示例14: line

# 需要导入模块: from rgt.GenomicRegionSet import GenomicRegionSet [as 别名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import read_bed [as 别名]
    def line(self):
        signal = GenomicSignal(self.bam_file)
        signal.load_sg_coefs(slope_window_size=9)
        bias_table = BiasTable()
        bias_table_list = self.bias_table.split(",")
        table = bias_table.load_table(table_file_name_F=bias_table_list[0],
                                      table_file_name_R=bias_table_list[1])
        genome_data = GenomeData(self.organism)
        fasta = Fastafile(genome_data.get_genome())
        pwm_dict = dict([("A", [0.0] * self.window_size), ("C", [0.0] * self.window_size),
                        ("G", [0.0] * self.window_size), ("T", [0.0] * self.window_size),
                        ("N", [0.0] * self.window_size)])


        mean_raw_signal = np.zeros(self.window_size)
        mean_bc_signal = np.zeros(self.window_size)
        mean_raw_signal_f = np.zeros(self.window_size)
        mean_bc_signal_f = np.zeros(self.window_size)
        mean_raw_signal_r = np.zeros(self.window_size)
        mean_bc_signal_r = np.zeros(self.window_size)

        mean_bias_signal_f = np.zeros(self.window_size)
        mean_bias_signal_r = np.zeros(self.window_size)
        num_sites = 0

        mpbs_regions = GenomicRegionSet("Motif Predicted Binding Sites")
        mpbs_regions.read_bed(self.motif_file)

        total_nc_signal = 0
        total_nl_signal = 0
        total_nr_signal = 0

        for region in mpbs_regions:
            if str(region.name).split(":")[-1] == "Y":
                num_sites += 1
                # Extend by 50 bp
                mid = (region.initial + region.final) / 2
                p1 = mid - (self.window_size / 2)
                p2 = mid + (self.window_size / 2)

                if not self.strands_specific:
                    # Fetch raw signal
                    raw_signal, _ = signal.get_signal(ref=region.chrom, start=p1, end=p2,
                                                  downstream_ext=self.atac_downstream_ext,
                                                  upstream_ext=self.atac_upstream_ext,
                                                  forward_shift=self.atac_forward_shift,
                                                  reverse_shift=self.atac_reverse_shift,
                                                  genome_file_name=genome_data.get_genome())

                    mean_raw_signal = np.add(mean_raw_signal, raw_signal)

                    # Fetch bias correction signal
                    bc_signal, _ = signal.get_signal(ref=region.chrom, start=p1, end=p2,
                                                     bias_table=table,
                                                     downstream_ext=self.atac_downstream_ext,
                                                     upstream_ext=self.atac_upstream_ext,
                                                     forward_shift=self.atac_forward_shift,
                                                     reverse_shift=self.atac_reverse_shift,
                                                     genome_file_name=genome_data.get_genome())

                    mean_bc_signal = np.add(mean_bc_signal, bc_signal)
                else:
                    raw_signal_f, _, raw_signal_r, _ =  signal.get_signal_per_strand(ref=region.chrom, start=p1, end=p2,
                                                                        downstream_ext=self.atac_downstream_ext,
                                                                        upstream_ext=self.atac_upstream_ext,
                                                                        forward_shift=self.atac_forward_shift,
                                                                        reverse_shift=self.atac_reverse_shift,
                                                                        genome_file_name=genome_data.get_genome())
                    mean_raw_signal_f = np.add(mean_raw_signal_f, raw_signal_f)
                    mean_raw_signal_r = np.add(mean_raw_signal_r, raw_signal_r)

                    bc_signal_f, _, bc_signal_r, _ = signal.get_signal_per_strand(ref=region.chrom, start=p1, end=p2,
                                                                                  bias_table=table,
                                                                                  downstream_ext=self.atac_downstream_ext,
                                                                                  upstream_ext=self.atac_upstream_ext,
                                                                                  forward_shift=self.atac_forward_shift,
                                                                                  reverse_shift=self.atac_reverse_shift,
                                                                                  genome_file_name=genome_data.get_genome())
                    mean_bc_signal_f = np.add(mean_bc_signal_f, bc_signal_f)
                    mean_bc_signal_r = np.add(mean_bc_signal_r, bc_signal_r)

                # Update pwm
                aux_plus = 1
                dna_seq = str(fasta.fetch(region.chrom, p1, p2)).upper()
                if (region.final - region.initial) % 2 == 0:
                    aux_plus = 0
                dna_seq_rev = AuxiliaryFunctions.revcomp(str(fasta.fetch(region.chrom,
                                                                         p1 + aux_plus, p2 + aux_plus)).upper())
                if region.orientation == "+":
                    for i in range(0, len(dna_seq)):
                        pwm_dict[dna_seq[i]][i] += 1
                elif region.orientation == "-":
                    for i in range(0, len(dna_seq_rev)):
                        pwm_dict[dna_seq_rev[i]][i] += 1

                # Create bias signal
                bias_table_f = table[0]
                bias_table_r = table[1]
                self.k_nb = len(bias_table_f.keys()[0])
                bias_signal_f = []
#.........这里部分代码省略.........
开发者ID:eggduzao,项目名称:reg-gen,代码行数:103,代码来源:plot.py

示例15: print

# 需要导入模块: from rgt.GenomicRegionSet import GenomicRegionSet [as 别名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import read_bed [as 别名]
                        seq = "\t".join([ch, line[4], line[3], gn, ".", line[6]])
                    else:
                        continue
                # print(seq)

                if not args.g:
                    print(seq, file=g)
                elif select_genes.check(gn) or select_genes.check(gi):
                    
                    print(seq, file=g)
                else:
                    continue

        if args.b:
            exons = GenomicRegionSet("output")
            exons.read_bed(args.o)
            exons.write_bed_blocks(args.o)

        # sys.exit(1)

        # if args.g:
        #     select_genes = GeneSet("genes")
        #     select_genes.read(args.g)

        # # if args.t == "gene" or args.t == "transcript":
        # with open(args.i, "r") as f,open(args.o, "w") as g:
        #     find_ind = False
        #     for line in f:
        #         if line[0] == "#": 
        #             continue
        #         elif args.known_only:
开发者ID:Marvin84,项目名称:reg-gen,代码行数:33,代码来源:rgt-convertor.py


注:本文中的rgt.GenomicRegionSet.GenomicRegionSet.read_bed方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。