当前位置: 首页>>代码示例>>Python>>正文


Python MotifConfig.get_gene_dir方法代码示例

本文整理汇总了Python中gimmemotifs.config.MotifConfig.get_gene_dir方法的典型用法代码示例。如果您正苦于以下问题:Python MotifConfig.get_gene_dir方法的具体用法?Python MotifConfig.get_gene_dir怎么用?Python MotifConfig.get_gene_dir使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在gimmemotifs.config.MotifConfig的用法示例。


在下文中一共展示了MotifConfig.get_gene_dir方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: get_genome

# 需要导入模块: from gimmemotifs.config import MotifConfig [as 别名]
# 或者: from gimmemotifs.config.MotifConfig import get_gene_dir [as 别名]
def get_genome(genomebuild, fastadir, indexdir=None):

    config = MotifConfig()
    if not indexdir:
        indexdir = config.get_index_dir()

    genome_dir = os.path.join(fastadir, genomebuild)
    index_dir = os.path.join(indexdir, genomebuild)

    
    # Check for rights to write to directory
    if not os.path.exists(genome_dir):
        try:
            os.mkdir(genome_dir)
        except OSError:
            sys.stderr.write("Could not create genome dir {}\n".format(genome_dir))
            sys.exit(1)

    # Download annotation
    gene_file = os.path.join(config.get_gene_dir(), "%s.bed" % genomebuild)
    download_annotation(genomebuild, gene_file)
    
    # Download genome FASTA file
    download_genome(genomebuild, genome_dir)

    sys.stderr.write("Creating index\n")
    g = GenomeIndex()
    g = g.create_index(genome_dir, index_dir)
    create_bedtools_fa(index_dir, genome_dir)
开发者ID:simonvh,项目名称:gimmemotifs,代码行数:31,代码来源:genome_index.py

示例2: GimmeMotifs

# 需要导入模块: from gimmemotifs.config import MotifConfig [as 别名]
# 或者: from gimmemotifs.config.MotifConfig import get_gene_dir [as 别名]

#.........这里部分代码省略.........


        self.prediction_num, self.validation_num = divide_fa_file(self.inputfile, self.prediction_fa, self.validation_fa, fraction, abs_max)


    def _create_background(self, bg_type, bedfile, fafile, outfile, organism="hg18", width=200, nr_times=10):
        fg = Fasta(fafile)
        if bg_type == "random":
            if int(self.markov_model) >= 6:
                self.logger.warn("Are you sure about the Markov model? It seems too high!")
            else:
                order = {"1":"1st","2":"2nd", "3":"3rd", "4":"4th", "5":"5th"}[str(self.markov_model)]
                self.logger.debug("Creating random background (%s order Markov)" % order)

            m = MarkovFasta(fg, k=int(self.markov_model), n=nr_times * len(fg))
            m.writefasta(outfile)
            self.logger.debug("Random background: %s", outfile)
            # return the number of random sequences created
            return len(m)
        elif bg_type == "genomic":
            self.logger.debug("Creating genomic background")
            index_dir = os.path.join(self.config.get_index_dir(), organism)
            f = RandomGenomicFasta(index_dir, width, nr_times * len(fg))
            f.writefasta(outfile)
            return len(f)
        elif bg_type == "gc":
            self.logger.debug("Creating GC matched background")

            f = MatchedGcFasta(fafile, organism, nr_times * len(fg))
            f.writefasta(outfile)
            self.logger.debug("GC matched background: %s", outfile)
            return len(f)
        elif bg_type == "promoter":
            gene_file = os.path.join(self.config.get_gene_dir(), "%s.bed" % organism)
            index_dir = os.path.join(self.config.get_index_dir(), organism)

            self.logger.info(
                    "Creating random promoter background (%s, using genes in %s)", 
                    organism, gene_file)
            f = PromoterFasta(gene_file, index_dir, width, nr_times * len(fg))
            f.writefasta(outfile)
            self.logger.debug("Random promoter background: %s", outfile)
            return len(f)
        elif bg_type == "user":
            bg_file = self.params["user_background"]
            if not os.path.exists(bg_file):
                self.logger.error(
                        "User-specified background file %s does not exist!", 
                        bg_file)
                sys.exit(1)
            else:
                self.logger.info("Copying user-specified background file %s to %s.",
                        bg_file, outfile)
                fa = Fasta(bg_file)
                l = median([len(seq) for seq in fa.seqs])
                if l < width * 0.95 or l > width * 1.05:
                    self.logger.warn("The user-specified background file %s contains sequences with a median length of %s, while GimmeMotifs predicts motifs in sequences of length %s. This will influence the statistics! It is recommended to use background sequences of the same length.", bg_file, l, width)
                fa.writefasta(outfile)
                return len(fa)

#    def filter_motifs(self, motif_ids, enrichmentfile, e_cutoff, p_cutoff):
#        filt_motifs = []
#        for line in open(enrichmentfile).readlines():
#            if not line.startswith("#"):
#                vals = line.strip().split("\t")
#                if vals[0] in motif_ids:
开发者ID:YichaoOU,项目名称:gimmemotifs,代码行数:70,代码来源:core.py

示例3: background

# 需要导入模块: from gimmemotifs.config import MotifConfig [as 别名]
# 或者: from gimmemotifs.config.MotifConfig import get_gene_dir [as 别名]
def background(args):

    inputfile = args.inputfile
    out = args.outputfile
    bg_type = args.bg_type
    outformat = args.outformat.lower()
    length = args.length

    if not bg_type in BG_TYPES:
        print "The argument 'type' should be one of: %s" % (",".join(BG_TYPES))
        sys.exit(1)

    if outformat == "bed" and bg_type == "random":
        print "Random background can only be generated in FASTA format!"
        sys.exit(1)
        
    if bg_type == "gc" and not inputfile:
        print "need a FASTA formatted input file for background gc"
        sys.exit(1)
    
    # GimmeMotifs configuration for file and directory locations
    config = MotifConfig()
        
    # Genome index location for creation of FASTA files
    index_dir = os.path.join(config.get_index_dir(), args.genome)
    if bg_type in ["gc", "genomic", "promoter"] and outformat == "fasta":
        if not os.path.exists(index_dir):
            print "Index for %s does not exist. Has the genome been indexed for use with GimmeMotifs?" % args.genome
            sys.exit(1)
        
    # Gene definition
    gene_file = os.path.join(config.get_gene_dir(), "%s.bed" % args.genome)
    if bg_type in ["promoter"]:
        if not os.path.exists(gene_file):
            print "Can't find gene definition for %s (%s). See GimmeMotifs documentation on how to add gene files." % (args.genome, gene_file)
            sys.exit(1)
    
    # Number of sequences
    number = None
    if args.number:
        number = args.number
    elif inputfile:
        number = number_of_seqs_in_file(inputfile)
    else:
        sys.stderr.write("please provide either a number or an inputfile\n")
        sys.exit(1)
    
    if bg_type == "random":
        f = Fasta(inputfile)
        m = bg.MarkovFasta(f, n=number, k=args.markov_order)
        m.writefasta(out)
    elif bg_type == "gc":
        if outformat in ["fasta", "fa"]:
            m = bg.MatchedGcFasta(inputfile, args.genome, number=number)
            m.writefasta(out)
        else:
            bg.matched_gc_bedfile(out, inputfile, args.genome, number)
    elif bg_type == "promoter":
        if outformat in ["fasta", "fa"]:
            m = bg.PromoterFasta(gene_file, index_dir, length=length, n=number)
            m.writefasta(out)
        else:
            bg.create_promoter_bedfile(out, gene_file, length, number)
    elif bg_type == "genomic":
        if outformat in ["fasta", "fa"]:
            m = bg.RandomGenomicFasta(index_dir, length, number)
            m.writefasta(out)
        else:
            bg.create_random_genomic_bedfile(out, index_dir, length, number)
开发者ID:PanosFirmpas,项目名称:gimmemotifs,代码行数:71,代码来源:background.py

示例4: genome

# 需要导入模块: from gimmemotifs.config import MotifConfig [as 别名]
# 或者: from gimmemotifs.config.MotifConfig import get_gene_dir [as 别名]
def genome(args):
    
    config = MotifConfig()
    
    if not os.path.exists(args.indexdir):
        print "Index_dir %s does not exist!" % (args.indexdir)
        sys.exit(1)

    if not os.path.exists(args.fastadir):
        print "FASTA dir %s does not exist!" % (args.fastadir)
        sys.exit(1)
    
    pred_bin = "genePredToBed"
    pred = find_executable(pred_bin)
    if not pred:
        sys.stderr.write("{} not found in path!\n".format(pred_bin))
        sys.exit(1)
    
    fastadir = args.fastadir
    genomebuild = args.genomebuild
    genome_dir = os.path.join(fastadir, genomebuild)
    index_dir = os.path.join(args.indexdir, args.genomebuild)

    # Check for rights to write to directory

    if not os.path.exists(genome_dir):
        try:
            os.mkdir(genome_dir)
        except:
            sys.stderr.write("Could not create genome dir {}\n".format(genome_dir))
            sys.exit(1)
    
    # Download gene file based on URL + genomebuild
    gene_file = os.path.join(config.get_gene_dir(), "%s.bed" % genomebuild)
    tmp = NamedTemporaryFile(delete=False, suffix=".gz")
    
    anno = []
    f = urllib2.urlopen(UCSC_GENE_URL.format(genomebuild))
    p = re.compile(r'\w+.Gene.txt.gz')
    for line in f.readlines():
        m = p.search(line)
        if m:
            anno.append(m.group(0))

    sys.stderr.write("Retrieving gene annotation for {}\n".format(genomebuild))
    url = ""
    for a in ANNOS:
        if a in anno:
            url = UCSC_GENE_URL.format(genomebuild) + a
            break
    if url:
        urllib.urlretrieve(
                url,
                tmp.name 
                )

        sp.call("zcat {} | cut -f2-11 | {} /dev/stdin {}".format(tmp.name, pred, gene_file), shell=True)

    else: 
        sys.stderr.write("No annotation found!")
  
    # download genome based on URL + genomebuild
    sys.stderr.write("Downloading {} genome\n".format(genomebuild))
    for genome_url in [UCSC_GENOME_URL, ALT_UCSC_GENOME_URL]:
        
        remote = genome_url.format(genomebuild)

        genome_fa = os.path.join(
                genome_dir, 
                os.path.split(remote)[-1]
                )

        sys.stderr.write("Trying to download {}\n".format(genome_url.format(genomebuild)))
        urllib.urlretrieve(
                genome_url.format(genomebuild),
                genome_fa
                )
        
        if not check_genome_file(genome_fa):    
            continue
        
        break

    if not check_genome_file(genome_fa):
        sys.stderr.write("Failed to download genome\n")
        sys.exit(1)

    sys.stderr.write("Unpacking\n")
    if genome_fa.endswith("tar.gz"):
        cmd = "tar -C {0} -xvzf {1} && rm {1}".format(genome_dir, genome_fa)
    else:
        cmd = "gunzip {0} && rm {0}".format(genome_fa)

    sp.call(cmd, shell=True, cwd=genome_dir)

    fa_files = glob("{}/*.fa".format(genome_dir))
    if len(fa_files) == 1:
        f = Fasta(fa_files[0])
        for n,s in f.items():
            with open("{}/{}.fa".format(n)) as f:
#.........这里部分代码省略.........
开发者ID:georgeg9,项目名称:gimmemotifs,代码行数:103,代码来源:genome.py

示例5: create_background

# 需要导入模块: from gimmemotifs.config import MotifConfig [as 别名]
# 或者: from gimmemotifs.config.MotifConfig import get_gene_dir [as 别名]
def create_background(bg_type, fafile, outfile, genome="hg18", width=200, nr_times=10, custom_background=None):
    """Create background of a specific type.

    Parameters
    ----------
    bg_type : str
        Name of background type.

    fafile : str
        Name of input FASTA file.

    outfile : str
        Name of output FASTA file.

    genome : str, optional
        Genome name.

    width : int, optional
        Size of regions.

    nr_times : int, optional
        Generate this times as many background sequences as compared to 
        input file.
    
    Returns
    -------
    nr_seqs  : int
        Number of sequences created.
    """
    width = int(width)
    config = MotifConfig()
    fg = Fasta(fafile)

    if bg_type in ["genomic", "gc"]:
        if not genome:
            logger.error("Need a genome to create background")
            sys.exit(1)
    
    if bg_type == "random":
        f = MarkovFasta(fg, k=1, n=nr_times * len(fg))
        logger.debug("Random background: %s", outfile)
    elif bg_type == "genomic":
        logger.debug("Creating genomic background")
        f = RandomGenomicFasta(genome, width, nr_times * len(fg))
    elif bg_type == "gc":
        logger.debug("Creating GC matched background")
        f = MatchedGcFasta(fafile, genome, nr_times * len(fg))
        logger.debug("GC matched background: %s", outfile)
    elif bg_type == "promoter":
        fname = Genome(genome).filename
        gene_file = fname.replace(".fa", ".annotation.bed.gz")
        if not gene_file:
            gene_file = os.path.join(config.get_gene_dir(), "%s.bed" % genome)
        if not os.path.exists(gene_file):
            print("Could not find a gene file for genome {}")
            print("Did you use the --annotation flag for genomepy?")
            print("Alternatively make sure there is a file called {}.bed in {}".format(genome, config.get_gene_dir()))
            raise ValueError()

        logger.info(
                "Creating random promoter background (%s, using genes in %s)",
                genome, gene_file)
        f = PromoterFasta(gene_file, genome, width, nr_times * len(fg))
        logger.debug("Random promoter background: %s", outfile)
    elif bg_type == "custom":
        bg_file = custom_background
        if not bg_file:
            raise IOError(
                    "Background file not specified!")

        if not os.path.exists(bg_file):
            raise IOError(
                    "Custom background file %s does not exist!",
                    bg_file)
        else:
            logger.info("Copying custom background file %s to %s.",
                    bg_file, outfile)
            f = Fasta(bg_file)
            l = np.median([len(seq) for seq in f.seqs])
            if l < (width * 0.95) or l > (width * 1.05):
                   logger.warn(
                    "The custom background file %s contains sequences with a "
                    "median length of %s, while GimmeMotifs predicts motifs in sequences "
                    "of length %s. This will influence the statistics! It is recommended "
                    "to use background sequences of the same length.", 
                    bg_file, l, width)
    
    f.writefasta(outfile)
    return len(f)
开发者ID:simonvh,项目名称:gimmemotifs,代码行数:91,代码来源:denovo.py

示例6: background

# 需要导入模块: from gimmemotifs.config import MotifConfig [as 别名]
# 或者: from gimmemotifs.config.MotifConfig import get_gene_dir [as 别名]
def background(args):

    inputfile = args.inputfile
    out = args.outputfile
    bg_type = args.bg_type
    outformat = args.outformat.lower()
    length = args.length

    if bg_type not in BG_TYPES:
        print("The argument 'type' should be one of: %s" % (",".join(BG_TYPES)))
        sys.exit(1)

    if outformat == "bed" and bg_type == "random":
        print("Random background can only be generated in FASTA format!")
        sys.exit(1)
        
    if bg_type == "gc" and not inputfile:
        print("need a FASTA formatted input file for background gc")
        sys.exit(1)
    
    # GimmeMotifs configuration for file and directory locations
    config = MotifConfig()
        
    # Genome index location for creation of FASTA files
    if bg_type in ["gc", "genomic", "promoter"] and outformat == "fasta":
        Genome(args.genome)

    # Gene definition
    fname = Genome(args.genome).filename
    gene_file = fname.replace(".fa", ".annotation.bed.gz")
    if not gene_file:
        gene_file = os.path.join(config.get_gene_dir(), "{}.bed".format(args.genome))
    
    if bg_type in ["promoter"]:
        if not os.path.exists(gene_file):
            print("Could not find a gene file for genome {}".format(args.genome))
            print("Did you use the --annotation flag for genomepy?")
            print("Alternatively make sure there is a file called {}.bed in {}".format(args.genome, config.get_gene_dir()))
            sys.exit(1)

    # Number of sequences
    number = None
    if args.number:
        number = args.number
    elif inputfile:
        number = number_of_seqs_in_file(inputfile)
    else:
        sys.stderr.write("please provide either a number or an inputfile\n")
        sys.exit(1)
    
    if bg_type == "random":
        f = Fasta(inputfile)
        m = bg.MarkovFasta(f, n=number, k=args.markov_order)
        m.writefasta(out)
    elif bg_type == "gc":
        if outformat in ["fasta", "fa"]:
            m = bg.MatchedGcFasta(inputfile, args.genome, number=number)
            m.writefasta(out)
        else:
            bg.matched_gc_bedfile(out, inputfile, args.genome, number)
    elif bg_type == "promoter":
        if outformat in ["fasta", "fa"]:
            m = bg.PromoterFasta(gene_file, args.genome, length=length, n=number)
            m.writefasta(out)
        else:
            bg.create_promoter_bedfile(out, gene_file, length, number)
    elif bg_type == "genomic":
        if outformat in ["fasta", "fa"]:
            m = bg.RandomGenomicFasta(args.genome, length, number)
            m.writefasta(out)
        else:
            bg.create_random_genomic_bedfile(out, args.genome, length, number)
开发者ID:simonvh,项目名称:gimmemotifs,代码行数:74,代码来源:background.py


注:本文中的gimmemotifs.config.MotifConfig.get_gene_dir方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。