本文整理汇总了Python中gimmemotifs.config.MotifConfig类的典型用法代码示例。如果您正苦于以下问题:Python MotifConfig类的具体用法?Python MotifConfig怎么用?Python MotifConfig使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了MotifConfig类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_genome
def get_genome(genomebuild, fastadir, indexdir=None):
config = MotifConfig()
if not indexdir:
indexdir = config.get_index_dir()
genome_dir = os.path.join(fastadir, genomebuild)
index_dir = os.path.join(indexdir, genomebuild)
# Check for rights to write to directory
if not os.path.exists(genome_dir):
try:
os.mkdir(genome_dir)
except OSError:
sys.stderr.write("Could not create genome dir {}\n".format(genome_dir))
sys.exit(1)
# Download annotation
gene_file = os.path.join(config.get_gene_dir(), "%s.bed" % genomebuild)
download_annotation(genomebuild, gene_file)
# Download genome FASTA file
download_genome(genomebuild, genome_dir)
sys.stderr.write("Creating index\n")
g = GenomeIndex()
g = g.create_index(genome_dir, index_dir)
create_bedtools_fa(index_dir, genome_dir)
示例2: scan_to_table
def scan_to_table(input_table, genome, data_dir, scoring, pwmfile=None):
threshold = check_threshold(data_dir, genome, scoring)
config = MotifConfig()
if pwmfile is None:
pwmfile = config.get_default_params().get("motif_db", None)
if pwmfile is not None:
pwmfile = os.path.join(config.get_motif_dir(), pwmfile)
if pwmfile is None:
raise ValueError("no pwmfile given and no default database specified")
df = pd.read_table(input_table, index_col=0)
regions = list(df.index)
s = Scanner()
s.set_motifs(pwmfile)
s.set_genome(genome)
scores = []
if scoring == "count":
for row in s.count(regions, cutoff=threshold):
scores.append(row)
else:
for row in s.best_score(regions):
scores.append(row)
motif_names = [m.id for m in read_motifs(open(pwmfile))]
return pd.DataFrame(scores, index=df.index, columns=motif_names)
示例3: scan
def scan(infile, motifs, cutoff, nreport=1, it=False):
# Get configuration defaults
config = MotifConfig()
# Cutoff for motif scanning, only used if a cutoff is not supplied
default_cutoff = config.get_default_params()['scan_cutoff']
# Number of CPUs to use
ncpus = config.get_default_params()['ncpus']
cutoffs = parse_cutoff(motifs, cutoff, default_cutoff)
total_result = {}
jobs = []
fa = Fasta(infile)
for motif in motifs:
for i in range(0, len(fa), CHUNK):
total_result[motif] = {}
jobs.append(pool.apply_async(
scan_fa_with_motif,
(fa[i:i + CHUNK],
motif,
cutoffs[motif.id],
nreport,
)))
motifkey = dict([(m.id, m) for m in motifs])
for job in jobs:
motif, result = job.get()
total_result[motifkey[motif.id]].update(result)
return total_result
示例4: scan_it
def scan_it(infile, motifs, cutoff, nreport=1, rc=True):
# Get configuration defaults
config = MotifConfig()
# Cutoff for motif scanning, only used if a cutoff is not supplied
default_cutoff = config.get_default_params()['scan_cutoff']
# Number of CPUs to use
ncpus = config.get_default_params()['ncpus']
cutoffs = parse_cutoff(motifs, cutoff, default_cutoff)
jobs = []
fa = Fasta(infile)
motifkey = dict([(m.id, m) for m in motifs])
for motif in motifs:
for i in range(0, len(fa), CHUNK):
jobs.append(pool.apply_async(
scan_fa_with_motif,
(fa[i:i + CHUNK],
motif,
cutoffs[motif.id],
nreport,
rc,
)))
while len(jobs) > 10:
job = jobs.pop(0)
motif, result = job.get()
yield motifkey[motif.id], result
for job in jobs:
motif, result = job.get()
yield motifkey[motif.id], result
示例5: check_threshold
def check_threshold(outdir, genome, scoring="count"):
# gimme_motifs config, to get defaults
config = MotifConfig()
threshold_file = None
if scoring == "count":
# Motif scanning threshold
threshold_file = os.path.join(outdir, "threshold.{}.txt".format(genome))
if not os.path.exists(threshold_file):
# Random sequences from genome
index_dir = os.path.join(config.get_index_dir(), genome)
bg_file = os.path.join(outdir, "background.{}.fa".format(genome))
if not os.path.exists(bg_file):
m = RandomGenomicFasta(index_dir, BG_LENGTH, BG_NUMBER)
m.writefasta(bg_file)
pwmfile = config.get_default_params().get("motif_db")
pwmfile = os.path.join(config.get_motif_dir(), pwmfile)
cmd = "gimme threshold {} {} {} > {}".format(
pwmfile,
bg_file,
FDR,
threshold_file)
sp.call(cmd, shell=True)
return threshold_file
示例6: scan
def scan(infile, motifs, cutoff, nreport=1, it=False):
# Get configuration defaults
config = MotifConfig()
# Cutoff for motif scanning, only used if a cutoff is not supplied
default_cutoff = config.get_default_params()['scan_cutoff']
# Number of CPUs to use
ncpus = config.get_default_params()['ncpus']
cutoffs = parse_cutoff(motifs, cutoff, default_cutoff)
job_server = pp.Server(secret="beetrootsoup")
if job_server.get_ncpus() > ncpus:
job_server.set_ncpus(ncpus)
total_result = {}
jobs = []
fa = Fasta(infile)
for motif in motifs:
for i in range(0, len(fa), CHUNK):
total_result[motif] = {}
jobs.append(job_server.submit(
scan_fa_with_motif,
(fa[i:i + CHUNK],
motif,
cutoffs[motif.id],
nreport,
),
(),()))
motifkey = dict([(m.id, m) for m in motifs])
for job in jobs:
motif, result = job()
total_result[motifkey[motif.id]].update(result)
return total_result
示例7: scan_it_moods
def scan_it_moods(infile, motifs, cutoff, bgfile, nreport=1, scan_rc=True, pvalue=None, count=False):
tmpdir = mkdtemp()
matrices = []
pseudocount = 1e-3
#sys.stderr.write("bgfile: {}\n".format(bgfile))
bg = MOODS.tools.bg_from_sequence_dna("".join(Fasta(bgfile).seqs), 1)
for motif in motifs:
pfmname = os.path.join(tmpdir, "{}.pfm".format(motif.id))
with open(pfmname, "w") as f:
matrix = np.array(motif.pwm).transpose()
for line in [" ".join([str(x) for x in row]) for row in matrix]:
f.write("{}\n".format(line))
matrices.append(MOODS.parsers.pfm_log_odds(pfmname, bg, pseudocount))
thresholds = []
if pvalue is not None:
thresholds = [MOODS.tools.threshold_from_p(m, bg, float(pvalue)) for m in matrices]
#sys.stderr.write("{}\n".format(thresholds))
else:
thresholds = [calc_threshold_moods(m, float(cutoff)) for m in matrices]
scanner = MOODS.scan.Scanner(7)
scanner.set_motifs(matrices, bg, thresholds)
config = MotifConfig()
ncpus = int(config.get_default_params()['ncpus'])
fa = Fasta(infile)
chunk = 500
if (len(fa) / chunk) < ncpus:
chunk = len(fa) / (ncpus + 1)
jobs = []
func = scan_fa_with_motif_moods
if count:
func = scan_fa_with_motif_moods_count
for i in range(0, len(fa), chunk):
jobs.append(pool.apply_async(
func,
(fa[i:i + chunk],
motifs,
matrices,
bg,
thresholds,
nreport,
scan_rc,
)))
for job in jobs:
for ret in job.get():
yield ret
示例8: default_motifs
def default_motifs():
"""Return list of Motif instances from default motif database."""
config = MotifConfig()
d = config.get_motif_dir()
m = config.get_default_params()['motif_db']
if not d or not m:
raise ValueError("default motif database not configured")
fname = os.path.join(d, m)
with open(fname) as f:
motifs = read_motifs(f)
return motifs
示例9: __init__
def __init__(self, matchfile, genome="hg19", number=None):
config = MotifConfig()
index = os.path.join(config.get_index_dir(), genome)
# Create temporary files
tmpbed = NamedTemporaryFile(dir=mytmpdir()).name
tmpfasta = NamedTemporaryFile(dir=mytmpdir()).name
# Create bed-file with coordinates of random sequences
matched_gc_bedfile(tmpbed, matchfile, genome, number)
# Convert track to fasta
track2fasta(index, tmpbed, tmpfasta)
# Initialize super Fasta object
Fasta.__init__(self, tmpfasta)
# Delete the temporary files
os.remove(tmpbed)
os.remove(tmpfasta)
示例10: _write_report
def _write_report(outdir, ids, tree, clusters):
config = MotifConfig()
env = jinja2.Environment(loader=jinja2.FileSystemLoader([config.get_template_dir()]))
template = env.get_template("cluster_template.jinja.html")
result = template.render(motifs=ids)
with open(os.path.join(outdir, "cluster_report.html"), "w") as f:
f.write(result)
f = open(os.path.join(outdir, "cluster_key.txt"), "w")
for motif_id in ids:
f.write("%s\t%s\n" % (motif_id[0], ",".join([x["alt"] for x in motif_id[2]])))
f.close()
f = open(os.path.join(outdir, "clustered_motifs.pwm"), "w")
if len(clusters) == 1 and len(clusters[0][1]) == 1:
f.write("%s\n" % clusters[0][0].to_pwm())
else:
for motif in tree.get_clustered_motifs():
f.write("%s\n" % motif.to_pwm())
f.close()
示例11: scan_it
def scan_it(infile, motifs, cutoff, nreport=1, rc=True):
# Get configuration defaults
config = MotifConfig()
# Cutoff for motif scanning, only used if a cutoff is not supplied
default_cutoff = config.get_default_params()['scan_cutoff']
# Number of CPUs to use
ncpus = config.get_default_params()['ncpus']
cutoffs = parse_cutoff(motifs, cutoff, default_cutoff)
job_server = pp.Server(secret="beetrootsoup")
pp.SHOW_EXPECTED_EXCEPTIONS # True
if job_server.get_ncpus() > ncpus:
job_server.set_ncpus(ncpus)
jobs = []
fa = Fasta(infile)
motifkey = dict([(m.id, m) for m in motifs])
for motif in motifs:
for i in range(0, len(fa), CHUNK):
jobs.append(job_server.submit(
scan_fa_with_motif,
(fa[i:i + CHUNK],
motif,
cutoffs[motif.id],
nreport,
rc,
),
(),()))
while len(jobs) > 10:
job = jobs.pop(0)
motif, result = job()
yield motifkey[motif.id], result
for job in jobs:
motif, result = job()
yield motifkey[motif.id], result
示例12: pwmfile_location
def pwmfile_location(infile):
config = MotifConfig()
if infile is None:
infile = config.get_default_params().get("motif_db", None)
if infile is None:
raise ValueError("No motif file was given and no default "
"database specified in the config file.")
if isinstance(infile, six.string_types):
if not os.path.exists(infile):
motif_dir = config.get_motif_dir()
checkfile = os.path.join(motif_dir, infile)
if os.path.exists(checkfile):
infile = checkfile
else:
for ext in ['.pfm', '.pwm']:
if os.path.exists(checkfile + ext):
infile = checkfile + ext
break
if not os.path.exists(infile):
raise ValueError("Motif file {} not found".format(infile))
return infile
示例13: __init__
def __init__(self, name=None):
self.config = MotifConfig()
self.server = None
if not name:
name = "%s_%s" % (self.NAME, datetime.today().strftime("%d_%m_%Y"))
self.name = name
# create a directory for all the intermediate and output files
self._setup_output_dir(name)
# setup logging
self._setup_logging()
self.logger.info("%s version %s", self.NAME, GM_VERSION)
self.logger.info("output dir: %s", self.outdir)
# setup the names of the intermediate and output files
self._setup_filenames()
示例14: __init__
def __init__(self):
self.config = MotifConfig()
self.metrics = ["pcc", "ed", "distance", "wic"]
self.combine = ["mean", "sum"]
self._load_scores()
示例15: cluster
def cluster(args):
revcomp = not args.single
outdir = os.path.abspath(args.outdir)
if not os.path.exists(outdir):
os.mkdir(outdir)
trim_ic = 0.2
clusters = []
motifs = pwmfile_to_motifs(args.inputfile)
if len(motifs) == 1:
clusters = [[motifs[0], motifs]]
else:
tree = cluster_motifs(args.inputfile, "total", "wic", "mean", True, threshold=args.threshold, include_bg=True)
clusters = tree.getResult()
ids = []
mc = MotifComparer()
sys.stderr.write("Creating images\n")
for cluster,members in clusters:
cluster.trim(trim_ic)
cluster.to_img(os.path.join(outdir,"%s.png" % cluster.id), format="PNG")
ids.append([cluster.id, {"src":"%s.png" % cluster.id},[]])
if len(members) > 1:
scores = {}
for motif in members:
scores[motif] = mc.compare_motifs(cluster, motif, "total", "wic", "mean", pval=True)
add_pos = sorted(scores.values(),cmp=lambda x,y: cmp(x[1], y[1]))[0][1]
for motif in members:
score, pos, strand = scores[motif]
add = pos - add_pos
if strand in [1,"+"]:
pass
else:
#print "RC %s" % motif.id
rc = motif.rc()
rc.id = motif.id
motif = rc
#print "%s\t%s" % (motif.id, add)
motif.to_img(os.path.join(outdir, "%s.png" % motif.id.replace(" ", "_")), format="PNG", add_left=add)
ids[-1][2] = [dict([("src", "%s.png" % motif.id.replace(" ", "_")), ("alt", motif.id.replace(" ", "_"))]) for motif in members]
config = MotifConfig()
env = jinja2.Environment(loader=jinja2.FileSystemLoader([config.get_template_dir()]))
template = env.get_template("cluster_template.jinja.html")
result = template.render(motifs=ids)
with open(os.path.join(outdir, "cluster_report.html"), "w") as f:
f.write(result.encode('utf-8'))
f = open(os.path.join(outdir, "cluster_key.txt"), "w")
for id in ids:
f.write("%s\t%s\n" % (id[0], ",".join([x["alt"] for x in id[2]])))
f.close()
f = open(os.path.join(outdir, "clustered_motifs.pwm"), "w")
if len(clusters) == 1 and len(clusters[0][1]) == 1:
f.write("%s\n" % clusters[0][0].to_pwm())
else:
for motif in tree.get_clustered_motifs():
f.write("%s\n" % motif.to_pwm())
f.close()