本文整理汇总了Python中gimmemotifs.config.MotifConfig.get_motif_dir方法的典型用法代码示例。如果您正苦于以下问题:Python MotifConfig.get_motif_dir方法的具体用法?Python MotifConfig.get_motif_dir怎么用?Python MotifConfig.get_motif_dir使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类gimmemotifs.config.MotifConfig
的用法示例。
在下文中一共展示了MotifConfig.get_motif_dir方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: scan_to_table
# 需要导入模块: from gimmemotifs.config import MotifConfig [as 别名]
# 或者: from gimmemotifs.config.MotifConfig import get_motif_dir [as 别名]
def scan_to_table(input_table, genome, data_dir, scoring, pwmfile=None):
threshold = check_threshold(data_dir, genome, scoring)
config = MotifConfig()
if pwmfile is None:
pwmfile = config.get_default_params().get("motif_db", None)
if pwmfile is not None:
pwmfile = os.path.join(config.get_motif_dir(), pwmfile)
if pwmfile is None:
raise ValueError("no pwmfile given and no default database specified")
df = pd.read_table(input_table, index_col=0)
regions = list(df.index)
s = Scanner()
s.set_motifs(pwmfile)
s.set_genome(genome)
scores = []
if scoring == "count":
for row in s.count(regions, cutoff=threshold):
scores.append(row)
else:
for row in s.best_score(regions):
scores.append(row)
motif_names = [m.id for m in read_motifs(open(pwmfile))]
return pd.DataFrame(scores, index=df.index, columns=motif_names)
示例2: check_threshold
# 需要导入模块: from gimmemotifs.config import MotifConfig [as 别名]
# 或者: from gimmemotifs.config.MotifConfig import get_motif_dir [as 别名]
def check_threshold(outdir, genome, scoring="count"):
# gimme_motifs config, to get defaults
config = MotifConfig()
threshold_file = None
if scoring == "count":
# Motif scanning threshold
threshold_file = os.path.join(outdir, "threshold.{}.txt".format(genome))
if not os.path.exists(threshold_file):
# Random sequences from genome
index_dir = os.path.join(config.get_index_dir(), genome)
bg_file = os.path.join(outdir, "background.{}.fa".format(genome))
if not os.path.exists(bg_file):
m = RandomGenomicFasta(index_dir, BG_LENGTH, BG_NUMBER)
m.writefasta(bg_file)
pwmfile = config.get_default_params().get("motif_db")
pwmfile = os.path.join(config.get_motif_dir(), pwmfile)
cmd = "gimme threshold {} {} {} > {}".format(
pwmfile,
bg_file,
FDR,
threshold_file)
sp.call(cmd, shell=True)
return threshold_file
示例3: default_motifs
# 需要导入模块: from gimmemotifs.config import MotifConfig [as 别名]
# 或者: from gimmemotifs.config.MotifConfig import get_motif_dir [as 别名]
def default_motifs():
"""Return list of Motif instances from default motif database."""
config = MotifConfig()
d = config.get_motif_dir()
m = config.get_default_params()['motif_db']
if not d or not m:
raise ValueError("default motif database not configured")
fname = os.path.join(d, m)
with open(fname) as f:
motifs = read_motifs(f)
return motifs
示例4: pwmfile_location
# 需要导入模块: from gimmemotifs.config import MotifConfig [as 别名]
# 或者: from gimmemotifs.config.MotifConfig import get_motif_dir [as 别名]
def pwmfile_location(infile):
config = MotifConfig()
if infile is None:
infile = config.get_default_params().get("motif_db", None)
if infile is None:
raise ValueError("No motif file was given and no default "
"database specified in the config file.")
if isinstance(infile, six.string_types):
if not os.path.exists(infile):
motif_dir = config.get_motif_dir()
checkfile = os.path.join(motif_dir, infile)
if os.path.exists(checkfile):
infile = checkfile
else:
for ext in ['.pfm', '.pwm']:
if os.path.exists(checkfile + ext):
infile = checkfile + ext
break
if not os.path.exists(infile):
raise ValueError("Motif file {} not found".format(infile))
return infile
示例5: MotifComparer
# 需要导入模块: from gimmemotifs.config import MotifConfig [as 别名]
# 或者: from gimmemotifs.config.MotifConfig import get_motif_dir [as 别名]
#.........这里部分代码省略.........
if batch_len <= 0:
batch_len = 1
jobs = []
for i in range(0, len(dbmotifs), batch_len):
# submit jobs to the job server
p = pool.apply_async(_get_all_scores,
args=(self, motifs, dbmotifs[i: i + batch_len], match, metric, combine, pval))
jobs.append(p)
pool.close()
for job in jobs:
# Get the job result
result = job.get()
# and update the result score
for m1,v in result.items():
for m2, s in v.items():
if m1 not in scores:
scores[m1] = {}
scores[m1][m2] = s
pool.join()
else:
# Do the whole thing at once if we don't want parallel
scores = _get_all_scores(self, motifs, dbmotifs, match, metric, combine, pval)
return scores
def get_closest_match(self, motifs, dbmotifs=None, match="partial", metric="wic",combine="mean", parallel=True, ncpus=None):
"""Return best match in database for motifs.
Parameters
----------
motifs : list or str
Filename of motifs or list of motifs.
dbmotifs : list or str, optional
Database motifs, default will be used if not specified.
match : str, optional
metric : str, optional
combine : str, optional
ncpus : int, optional
Number of threads to use.
Returns
-------
closest_match : dict
"""
if dbmotifs is None:
pwm = self.config.get_default_params()["motif_db"]
pwmdir = self.config.get_motif_dir()
dbmotifs = os.path.join(pwmdir, pwm)
motifs = parse_motifs(motifs)
dbmotifs = parse_motifs(dbmotifs)
dbmotif_lookup = dict([(m.id, m) for m in dbmotifs])
scores = self.get_all_scores(motifs, dbmotifs, match, metric, combine, parallel=parallel, ncpus=ncpus)
for motif in scores:
scores[motif] = sorted(
scores[motif].items(),
key=lambda x:x[1][0]
)[-1]
for motif in motifs:
dbmotif, score = scores[motif.id]
pval, pos, orient = self.compare_motifs(
motif, dbmotif_lookup[dbmotif], match, metric, combine, True)
scores[motif.id] = [dbmotif, (list(score) + [pval])]
return scores
def generate_score_dist(self, motifs, match, metric, combine):
score_file = os.path.join(self.config.get_score_dir(), "%s_%s_%s_score_dist.txt" % (match, metric, combine))
f = open(score_file, "w")
all_scores = {}
for l in [len(motif) for motif in motifs]:
all_scores[l] = {}
sorted_motifs = {}
for l in all_scores.keys():
sorted_motifs[l] = [motif for motif in motifs if len(motif) == l]
for l1 in all_scores.keys():
for l2 in all_scores.keys():
scores = self.get_all_scores(sorted_motifs[l1], sorted_motifs[l2], match, metric, combine)
scores = [[y[0] for y in x.values() if y] for x in scores.values()]
scores = np.array(scores).ravel()
f.write("%s\t%s\t%s\t%s\n" % (l1, l2, np.mean(scores), np.std(scores)))
f.close()
示例6: _create_graphical_report
# 需要导入模块: from gimmemotifs.config import MotifConfig [as 别名]
# 或者: from gimmemotifs.config.MotifConfig import get_motif_dir [as 别名]
def _create_graphical_report(inputfile, pwm, background, closest_match, outdir, stats, best_id=None):
"""Create main gimme_motifs output html report."""
if best_id is None:
best_id = {}
logger.debug("Creating graphical report")
class ReportMotif(object):
"""Placeholder for motif stats."""
pass
config = MotifConfig()
imgdir = os.path.join(outdir, "images")
if not os.path.exists(imgdir):
os.mkdir(imgdir)
motifs = read_motifs(pwm, fmt="pwm")
roc_img_file = "%s_roc.%s"
dbpwm = config.get_default_params()["motif_db"]
pwmdir = config.get_motif_dir()
dbmotifs = read_motifs(os.path.join(pwmdir, dbpwm), as_dict=True)
report_motifs = []
for motif in motifs:
rm = ReportMotif()
rm.id = motif.id
rm.id_href = {"href": "#%s" % motif.id}
rm.id_name = {"name": motif.id}
rm.img = {"src": os.path.join("images", "%s.png" % motif.id)}
motif.to_img(os.path.join(outdir, "images/{}.png".format(motif.id)), fmt="PNG")
# TODO: fix best ID
rm.best = "Gimme"#best_id[motif.id]
rm.consensus = motif.to_consensus()
rm.stars = int(np.mean(
[stats[str(motif)][bg].get("stars", 0) for bg in background]
) + 0.5)
rm.bg = {}
for bg in background:
rm.bg[bg] = {}
this_stats = stats.get(str(motif), {}).get(bg)
# TODO: fix these stats
rm.bg[bg]["e"] = "%0.2f" % this_stats.get("enr_at_fpr", 1.0)
rm.bg[bg]["p"] = "%0.2f" % this_stats.get("phyper_at_fpr", 1.0)
rm.bg[bg]["auc"] = "%0.3f" % this_stats.get("roc_auc", 0.5)
rm.bg[bg]["mncp"] = "%0.3f" % this_stats.get("mncp", 1.0)
rm.bg[bg]["roc_img"] = {"src": "images/" + os.path.basename(roc_img_file % (motif.id, bg)) + ".png"}
rm.bg[bg][u"roc_img_link"] = {u"href": "images/" + os.path.basename(roc_img_file % (motif.id, bg)) + ".png"}
rm.histogram_img = {"data":"images/%s_histogram.svg" % motif.id}
rm.histogram_link= {"href":"images/%s_histogram.svg" % motif.id}
match_id = closest_match[motif.id][0]
dbmotifs[match_id].to_img(os.path.join(outdir, "images/{}.png".format(match_id)), fmt="PNG")
rm.match_img = {"src": "images/{}.png".format(match_id)}
rm.match_id = closest_match[motif.id][0]
rm.match_pval = "%0.2e" % closest_match[motif.id][1][-1]
report_motifs.append(rm)
total_report = os.path.join(outdir, "motif_report.html")
star_img = os.path.join(config.get_template_dir(), "star.png")
shutil.copyfile(star_img, os.path.join(outdir, "images", "star.png"))
env = jinja2.Environment(loader=jinja2.FileSystemLoader([config.get_template_dir()]))
template = env.get_template("report_template.jinja.html")
# TODO: title
result = template.render(
motifs=report_motifs,
inputfile=inputfile,
date=datetime.today().strftime("%d/%m/%Y"),
version=__version__,
bg_types=list(background.keys()))
with open(total_report, "wb") as f:
f.write(result.encode('utf-8'))
示例7: GimmeMotifs
# 需要导入模块: from gimmemotifs.config import MotifConfig [as 别名]
# 或者: from gimmemotifs.config.MotifConfig import get_motif_dir [as 别名]
#.........这里部分代码省略.........
rm.stars = stats["%s_%s" % (motif.id, motif.to_consensus())]["stars"]
rm.bg = {}
for bg in background:
rm.bg[bg] = {}
rm.bg[bg]["e"] = "%0.2f" % self.e[bg].setdefault(motif.id, 0.0)
rm.bg[bg]["p"] = "%0.2f" % self.p[bg].setdefault(motif.id, 1.0)
rm.bg[bg]["auc"] = "%0.3f" % self.auc[bg][motif.id]
rm.bg[bg]["mncp"] = "%0.3f" % self.mncp[bg][motif.id]
rm.bg[bg]["roc_img"] = {"src": "images/" + os.path.basename(roc_img_file % (motif.id, bg)) + ".png"}
rm.bg[bg]["roc_img_link"] = {"href": "images/" + os.path.basename(roc_img_file % (motif.id, bg)) + ".png"}
rm.histogram_img = {"data":"images/%s_histogram.svg" % motif.id}
rm.histogram_link= {"href":"images/%s_histogram.svg" % motif.id}
rm.match_img = {"src": "images/%s.png" % self.closest_match[motif.id][0].id}
rm.match_id = self.closest_match[motif.id][0].id
rm.match_pval = "%0.2e" % self.closest_match[motif.id][1]
report_motifs.append(rm)
total_report = self.motif_report
env = jinja2.Environment(loader=jinja2.FileSystemLoader([self.config.get_template_dir()]))
template = env.get_template("report_template.jinja.html")
result = template.render(expname=self.basename, motifs=report_motifs, inputfile=self.inputfile, date=datetime.today().strftime("%d/%m/%Y"), version=GM_VERSION)
f = open(total_report, "w")
f.write(result.encode('utf-8'))
f.close()
def determine_closest_match(self, motifs):
self.logger.debug("Determining closest matching motifs in database")
motif_db = self.config.get_default_params()["motif_db"]
db = os.path.join(self.config.get_motif_dir(), motif_db)
db_motifs = []
if db.endswith("pwm") or db.endswith("pfm"):
db_motifs = read_motifs(open(db), fmt="pwm")
elif db.endswith("transfac"):
db_motifs = read_motifs(db, fmt="transfac")
closest_match = {}
mc = MotifComparer()
db_motif_lookup = dict([(m.id, m) for m in db_motifs])
match = mc.get_closest_match(motifs, db_motifs, "partial", "wic", "mean", parallel=False)
for motif in motifs:
# Calculate p-value
pval, pos, orient = mc.compare_motifs(motif, db_motif_lookup[match[motif.id][0]], "partial", "wic", "mean", pval=True)
closest_match[motif.id] = [db_motif_lookup[match[motif.id][0]], pval]
return closest_match
def _determine_best_motif_in_cluster(self, clusters, pwm, sample_fa, bg_fa, imgdir=None):
num_cluster = {}
best_id = {}
out = open(pwm, "w")
for i, (clus, singles) in enumerate(clusters):
motifs = [clus] + singles
tmp = NamedTemporaryFile(dir=mytmpdir())
tmp2 = NamedTemporaryFile(dir=mytmpdir())
for m in motifs:
tmp.write("%s\n" % m.to_pwm())
tmp.flush()
auc,mncp = self._roc_metrics(tmp.name, sample_fa, bg_fa, tmp2.name)
bla = sorted(motifs, cmp=lambda x,y: cmp(mncp[x.id], mncp[y.id]))
for m in bla:
self.logger.debug("sorted: %s %s %s",
str(m), mncp[m.id], auc[m.id])
示例8: scan_to_table
# 需要导入模块: from gimmemotifs.config import MotifConfig [as 别名]
# 或者: from gimmemotifs.config.MotifConfig import get_motif_dir [as 别名]
def scan_to_table(input_table, genome, scoring, pwmfile=None, ncpus=None):
"""Scan regions in input table with motifs.
Parameters
----------
input_table : str
Filename of input table. Can be either a text-separated tab file or a
feather file.
genome : str
Genome name. Can be either the name of a FASTA-formatted file or a
genomepy genome name.
scoring : str
"count" or "score"
pwmfile : str, optional
Specify a PFM file for scanning.
ncpus : int, optional
If defined this specifies the number of cores to use.
Returns
-------
table : pandas.DataFrame
DataFrame with motif ids as column names and regions as index. Values
are either counts or scores depending on the 'scoring' parameter.s
"""
config = MotifConfig()
if pwmfile is None:
pwmfile = config.get_default_params().get("motif_db", None)
if pwmfile is not None:
pwmfile = os.path.join(config.get_motif_dir(), pwmfile)
if pwmfile is None:
raise ValueError("no pwmfile given and no default database specified")
logger.info("reading table")
if input_table.endswith("feather"):
df = pd.read_feather(input_table)
idx = df.iloc[:,0].values
else:
df = pd.read_table(input_table, index_col=0, comment="#")
idx = df.index
regions = list(idx)
s = Scanner(ncpus=ncpus)
s.set_motifs(pwmfile)
s.set_genome(genome)
s.set_background(genome=genome)
nregions = len(regions)
scores = []
if scoring == "count":
logger.info("setting threshold")
s.set_threshold(fpr=FPR)
logger.info("creating count table")
for row in s.count(regions):
scores.append(row)
logger.info("done")
else:
s.set_threshold(threshold=0.0)
logger.info("creating score table")
for row in s.best_score(regions, normalize=True):
scores.append(row)
logger.info("done")
motif_names = [m.id for m in read_motifs(pwmfile)]
logger.info("creating dataframe")
return pd.DataFrame(scores, index=idx, columns=motif_names)