本文整理汇总了Python中gimmemotifs.config.MotifConfig.get_gene_dir方法的典型用法代码示例。如果您正苦于以下问题:Python MotifConfig.get_gene_dir方法的具体用法?Python MotifConfig.get_gene_dir怎么用?Python MotifConfig.get_gene_dir使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类gimmemotifs.config.MotifConfig
的用法示例。
在下文中一共展示了MotifConfig.get_gene_dir方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_genome
# 需要导入模块: from gimmemotifs.config import MotifConfig [as 别名]
# 或者: from gimmemotifs.config.MotifConfig import get_gene_dir [as 别名]
def get_genome(genomebuild, fastadir, indexdir=None):
config = MotifConfig()
if not indexdir:
indexdir = config.get_index_dir()
genome_dir = os.path.join(fastadir, genomebuild)
index_dir = os.path.join(indexdir, genomebuild)
# Check for rights to write to directory
if not os.path.exists(genome_dir):
try:
os.mkdir(genome_dir)
except OSError:
sys.stderr.write("Could not create genome dir {}\n".format(genome_dir))
sys.exit(1)
# Download annotation
gene_file = os.path.join(config.get_gene_dir(), "%s.bed" % genomebuild)
download_annotation(genomebuild, gene_file)
# Download genome FASTA file
download_genome(genomebuild, genome_dir)
sys.stderr.write("Creating index\n")
g = GenomeIndex()
g = g.create_index(genome_dir, index_dir)
create_bedtools_fa(index_dir, genome_dir)
示例2: GimmeMotifs
# 需要导入模块: from gimmemotifs.config import MotifConfig [as 别名]
# 或者: from gimmemotifs.config.MotifConfig import get_gene_dir [as 别名]
#.........这里部分代码省略.........
self.prediction_num, self.validation_num = divide_fa_file(self.inputfile, self.prediction_fa, self.validation_fa, fraction, abs_max)
def _create_background(self, bg_type, bedfile, fafile, outfile, organism="hg18", width=200, nr_times=10):
fg = Fasta(fafile)
if bg_type == "random":
if int(self.markov_model) >= 6:
self.logger.warn("Are you sure about the Markov model? It seems too high!")
else:
order = {"1":"1st","2":"2nd", "3":"3rd", "4":"4th", "5":"5th"}[str(self.markov_model)]
self.logger.debug("Creating random background (%s order Markov)" % order)
m = MarkovFasta(fg, k=int(self.markov_model), n=nr_times * len(fg))
m.writefasta(outfile)
self.logger.debug("Random background: %s", outfile)
# return the number of random sequences created
return len(m)
elif bg_type == "genomic":
self.logger.debug("Creating genomic background")
index_dir = os.path.join(self.config.get_index_dir(), organism)
f = RandomGenomicFasta(index_dir, width, nr_times * len(fg))
f.writefasta(outfile)
return len(f)
elif bg_type == "gc":
self.logger.debug("Creating GC matched background")
f = MatchedGcFasta(fafile, organism, nr_times * len(fg))
f.writefasta(outfile)
self.logger.debug("GC matched background: %s", outfile)
return len(f)
elif bg_type == "promoter":
gene_file = os.path.join(self.config.get_gene_dir(), "%s.bed" % organism)
index_dir = os.path.join(self.config.get_index_dir(), organism)
self.logger.info(
"Creating random promoter background (%s, using genes in %s)",
organism, gene_file)
f = PromoterFasta(gene_file, index_dir, width, nr_times * len(fg))
f.writefasta(outfile)
self.logger.debug("Random promoter background: %s", outfile)
return len(f)
elif bg_type == "user":
bg_file = self.params["user_background"]
if not os.path.exists(bg_file):
self.logger.error(
"User-specified background file %s does not exist!",
bg_file)
sys.exit(1)
else:
self.logger.info("Copying user-specified background file %s to %s.",
bg_file, outfile)
fa = Fasta(bg_file)
l = median([len(seq) for seq in fa.seqs])
if l < width * 0.95 or l > width * 1.05:
self.logger.warn("The user-specified background file %s contains sequences with a median length of %s, while GimmeMotifs predicts motifs in sequences of length %s. This will influence the statistics! It is recommended to use background sequences of the same length.", bg_file, l, width)
fa.writefasta(outfile)
return len(fa)
# def filter_motifs(self, motif_ids, enrichmentfile, e_cutoff, p_cutoff):
# filt_motifs = []
# for line in open(enrichmentfile).readlines():
# if not line.startswith("#"):
# vals = line.strip().split("\t")
# if vals[0] in motif_ids:
示例3: background
# 需要导入模块: from gimmemotifs.config import MotifConfig [as 别名]
# 或者: from gimmemotifs.config.MotifConfig import get_gene_dir [as 别名]
def background(args):
inputfile = args.inputfile
out = args.outputfile
bg_type = args.bg_type
outformat = args.outformat.lower()
length = args.length
if not bg_type in BG_TYPES:
print "The argument 'type' should be one of: %s" % (",".join(BG_TYPES))
sys.exit(1)
if outformat == "bed" and bg_type == "random":
print "Random background can only be generated in FASTA format!"
sys.exit(1)
if bg_type == "gc" and not inputfile:
print "need a FASTA formatted input file for background gc"
sys.exit(1)
# GimmeMotifs configuration for file and directory locations
config = MotifConfig()
# Genome index location for creation of FASTA files
index_dir = os.path.join(config.get_index_dir(), args.genome)
if bg_type in ["gc", "genomic", "promoter"] and outformat == "fasta":
if not os.path.exists(index_dir):
print "Index for %s does not exist. Has the genome been indexed for use with GimmeMotifs?" % args.genome
sys.exit(1)
# Gene definition
gene_file = os.path.join(config.get_gene_dir(), "%s.bed" % args.genome)
if bg_type in ["promoter"]:
if not os.path.exists(gene_file):
print "Can't find gene definition for %s (%s). See GimmeMotifs documentation on how to add gene files." % (args.genome, gene_file)
sys.exit(1)
# Number of sequences
number = None
if args.number:
number = args.number
elif inputfile:
number = number_of_seqs_in_file(inputfile)
else:
sys.stderr.write("please provide either a number or an inputfile\n")
sys.exit(1)
if bg_type == "random":
f = Fasta(inputfile)
m = bg.MarkovFasta(f, n=number, k=args.markov_order)
m.writefasta(out)
elif bg_type == "gc":
if outformat in ["fasta", "fa"]:
m = bg.MatchedGcFasta(inputfile, args.genome, number=number)
m.writefasta(out)
else:
bg.matched_gc_bedfile(out, inputfile, args.genome, number)
elif bg_type == "promoter":
if outformat in ["fasta", "fa"]:
m = bg.PromoterFasta(gene_file, index_dir, length=length, n=number)
m.writefasta(out)
else:
bg.create_promoter_bedfile(out, gene_file, length, number)
elif bg_type == "genomic":
if outformat in ["fasta", "fa"]:
m = bg.RandomGenomicFasta(index_dir, length, number)
m.writefasta(out)
else:
bg.create_random_genomic_bedfile(out, index_dir, length, number)
示例4: genome
# 需要导入模块: from gimmemotifs.config import MotifConfig [as 别名]
# 或者: from gimmemotifs.config.MotifConfig import get_gene_dir [as 别名]
def genome(args):
config = MotifConfig()
if not os.path.exists(args.indexdir):
print "Index_dir %s does not exist!" % (args.indexdir)
sys.exit(1)
if not os.path.exists(args.fastadir):
print "FASTA dir %s does not exist!" % (args.fastadir)
sys.exit(1)
pred_bin = "genePredToBed"
pred = find_executable(pred_bin)
if not pred:
sys.stderr.write("{} not found in path!\n".format(pred_bin))
sys.exit(1)
fastadir = args.fastadir
genomebuild = args.genomebuild
genome_dir = os.path.join(fastadir, genomebuild)
index_dir = os.path.join(args.indexdir, args.genomebuild)
# Check for rights to write to directory
if not os.path.exists(genome_dir):
try:
os.mkdir(genome_dir)
except:
sys.stderr.write("Could not create genome dir {}\n".format(genome_dir))
sys.exit(1)
# Download gene file based on URL + genomebuild
gene_file = os.path.join(config.get_gene_dir(), "%s.bed" % genomebuild)
tmp = NamedTemporaryFile(delete=False, suffix=".gz")
anno = []
f = urllib2.urlopen(UCSC_GENE_URL.format(genomebuild))
p = re.compile(r'\w+.Gene.txt.gz')
for line in f.readlines():
m = p.search(line)
if m:
anno.append(m.group(0))
sys.stderr.write("Retrieving gene annotation for {}\n".format(genomebuild))
url = ""
for a in ANNOS:
if a in anno:
url = UCSC_GENE_URL.format(genomebuild) + a
break
if url:
urllib.urlretrieve(
url,
tmp.name
)
sp.call("zcat {} | cut -f2-11 | {} /dev/stdin {}".format(tmp.name, pred, gene_file), shell=True)
else:
sys.stderr.write("No annotation found!")
# download genome based on URL + genomebuild
sys.stderr.write("Downloading {} genome\n".format(genomebuild))
for genome_url in [UCSC_GENOME_URL, ALT_UCSC_GENOME_URL]:
remote = genome_url.format(genomebuild)
genome_fa = os.path.join(
genome_dir,
os.path.split(remote)[-1]
)
sys.stderr.write("Trying to download {}\n".format(genome_url.format(genomebuild)))
urllib.urlretrieve(
genome_url.format(genomebuild),
genome_fa
)
if not check_genome_file(genome_fa):
continue
break
if not check_genome_file(genome_fa):
sys.stderr.write("Failed to download genome\n")
sys.exit(1)
sys.stderr.write("Unpacking\n")
if genome_fa.endswith("tar.gz"):
cmd = "tar -C {0} -xvzf {1} && rm {1}".format(genome_dir, genome_fa)
else:
cmd = "gunzip {0} && rm {0}".format(genome_fa)
sp.call(cmd, shell=True, cwd=genome_dir)
fa_files = glob("{}/*.fa".format(genome_dir))
if len(fa_files) == 1:
f = Fasta(fa_files[0])
for n,s in f.items():
with open("{}/{}.fa".format(n)) as f:
#.........这里部分代码省略.........
示例5: create_background
# 需要导入模块: from gimmemotifs.config import MotifConfig [as 别名]
# 或者: from gimmemotifs.config.MotifConfig import get_gene_dir [as 别名]
def create_background(bg_type, fafile, outfile, genome="hg18", width=200, nr_times=10, custom_background=None):
"""Create background of a specific type.
Parameters
----------
bg_type : str
Name of background type.
fafile : str
Name of input FASTA file.
outfile : str
Name of output FASTA file.
genome : str, optional
Genome name.
width : int, optional
Size of regions.
nr_times : int, optional
Generate this times as many background sequences as compared to
input file.
Returns
-------
nr_seqs : int
Number of sequences created.
"""
width = int(width)
config = MotifConfig()
fg = Fasta(fafile)
if bg_type in ["genomic", "gc"]:
if not genome:
logger.error("Need a genome to create background")
sys.exit(1)
if bg_type == "random":
f = MarkovFasta(fg, k=1, n=nr_times * len(fg))
logger.debug("Random background: %s", outfile)
elif bg_type == "genomic":
logger.debug("Creating genomic background")
f = RandomGenomicFasta(genome, width, nr_times * len(fg))
elif bg_type == "gc":
logger.debug("Creating GC matched background")
f = MatchedGcFasta(fafile, genome, nr_times * len(fg))
logger.debug("GC matched background: %s", outfile)
elif bg_type == "promoter":
fname = Genome(genome).filename
gene_file = fname.replace(".fa", ".annotation.bed.gz")
if not gene_file:
gene_file = os.path.join(config.get_gene_dir(), "%s.bed" % genome)
if not os.path.exists(gene_file):
print("Could not find a gene file for genome {}")
print("Did you use the --annotation flag for genomepy?")
print("Alternatively make sure there is a file called {}.bed in {}".format(genome, config.get_gene_dir()))
raise ValueError()
logger.info(
"Creating random promoter background (%s, using genes in %s)",
genome, gene_file)
f = PromoterFasta(gene_file, genome, width, nr_times * len(fg))
logger.debug("Random promoter background: %s", outfile)
elif bg_type == "custom":
bg_file = custom_background
if not bg_file:
raise IOError(
"Background file not specified!")
if not os.path.exists(bg_file):
raise IOError(
"Custom background file %s does not exist!",
bg_file)
else:
logger.info("Copying custom background file %s to %s.",
bg_file, outfile)
f = Fasta(bg_file)
l = np.median([len(seq) for seq in f.seqs])
if l < (width * 0.95) or l > (width * 1.05):
logger.warn(
"The custom background file %s contains sequences with a "
"median length of %s, while GimmeMotifs predicts motifs in sequences "
"of length %s. This will influence the statistics! It is recommended "
"to use background sequences of the same length.",
bg_file, l, width)
f.writefasta(outfile)
return len(f)
示例6: background
# 需要导入模块: from gimmemotifs.config import MotifConfig [as 别名]
# 或者: from gimmemotifs.config.MotifConfig import get_gene_dir [as 别名]
def background(args):
inputfile = args.inputfile
out = args.outputfile
bg_type = args.bg_type
outformat = args.outformat.lower()
length = args.length
if bg_type not in BG_TYPES:
print("The argument 'type' should be one of: %s" % (",".join(BG_TYPES)))
sys.exit(1)
if outformat == "bed" and bg_type == "random":
print("Random background can only be generated in FASTA format!")
sys.exit(1)
if bg_type == "gc" and not inputfile:
print("need a FASTA formatted input file for background gc")
sys.exit(1)
# GimmeMotifs configuration for file and directory locations
config = MotifConfig()
# Genome index location for creation of FASTA files
if bg_type in ["gc", "genomic", "promoter"] and outformat == "fasta":
Genome(args.genome)
# Gene definition
fname = Genome(args.genome).filename
gene_file = fname.replace(".fa", ".annotation.bed.gz")
if not gene_file:
gene_file = os.path.join(config.get_gene_dir(), "{}.bed".format(args.genome))
if bg_type in ["promoter"]:
if not os.path.exists(gene_file):
print("Could not find a gene file for genome {}".format(args.genome))
print("Did you use the --annotation flag for genomepy?")
print("Alternatively make sure there is a file called {}.bed in {}".format(args.genome, config.get_gene_dir()))
sys.exit(1)
# Number of sequences
number = None
if args.number:
number = args.number
elif inputfile:
number = number_of_seqs_in_file(inputfile)
else:
sys.stderr.write("please provide either a number or an inputfile\n")
sys.exit(1)
if bg_type == "random":
f = Fasta(inputfile)
m = bg.MarkovFasta(f, n=number, k=args.markov_order)
m.writefasta(out)
elif bg_type == "gc":
if outformat in ["fasta", "fa"]:
m = bg.MatchedGcFasta(inputfile, args.genome, number=number)
m.writefasta(out)
else:
bg.matched_gc_bedfile(out, inputfile, args.genome, number)
elif bg_type == "promoter":
if outformat in ["fasta", "fa"]:
m = bg.PromoterFasta(gene_file, args.genome, length=length, n=number)
m.writefasta(out)
else:
bg.create_promoter_bedfile(out, gene_file, length, number)
elif bg_type == "genomic":
if outformat in ["fasta", "fa"]:
m = bg.RandomGenomicFasta(args.genome, length, number)
m.writefasta(out)
else:
bg.create_random_genomic_bedfile(out, args.genome, length, number)