当前位置: 首页>>代码示例>>Python>>正文


Python MotifConfig.get_motif_dir方法代码示例

本文整理汇总了Python中gimmemotifs.config.MotifConfig.get_motif_dir方法的典型用法代码示例。如果您正苦于以下问题:Python MotifConfig.get_motif_dir方法的具体用法?Python MotifConfig.get_motif_dir怎么用?Python MotifConfig.get_motif_dir使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在gimmemotifs.config.MotifConfig的用法示例。


在下文中一共展示了MotifConfig.get_motif_dir方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: scan_to_table

# 需要导入模块: from gimmemotifs.config import MotifConfig [as 别名]
# 或者: from gimmemotifs.config.MotifConfig import get_motif_dir [as 别名]
def scan_to_table(input_table, genome, data_dir, scoring, pwmfile=None):
    threshold = check_threshold(data_dir, genome, scoring)
    
    config = MotifConfig()
    
    if pwmfile is None:
        pwmfile = config.get_default_params().get("motif_db", None)
        if pwmfile is not None:
            pwmfile = os.path.join(config.get_motif_dir(), pwmfile)

    if pwmfile is None:
        raise ValueError("no pwmfile given and no default database specified")

    df = pd.read_table(input_table, index_col=0)
    regions = list(df.index)
    s = Scanner()
    s.set_motifs(pwmfile)
    s.set_genome(genome)

    scores = []
    if scoring == "count":
        for row in s.count(regions, cutoff=threshold):
            scores.append(row)
    else:
        for row in s.best_score(regions):
            scores.append(row)
   
    motif_names = [m.id for m in read_motifs(open(pwmfile))]
    return pd.DataFrame(scores, index=df.index, columns=motif_names)
开发者ID:YichaoOU,项目名称:gimmemotifs,代码行数:31,代码来源:maelstrom.py

示例2: check_threshold

# 需要导入模块: from gimmemotifs.config import MotifConfig [as 别名]
# 或者: from gimmemotifs.config.MotifConfig import get_motif_dir [as 别名]
def check_threshold(outdir, genome, scoring="count"):
    # gimme_motifs config, to get defaults
    config = MotifConfig()
    
    threshold_file = None
    if scoring == "count":
        # Motif scanning threshold
        threshold_file = os.path.join(outdir, "threshold.{}.txt".format(genome))
        if not os.path.exists(threshold_file):
        # Random sequences from genome
            index_dir = os.path.join(config.get_index_dir(), genome)
            bg_file = os.path.join(outdir, "background.{}.fa".format(genome))
            if not os.path.exists(bg_file):
                m = RandomGenomicFasta(index_dir, BG_LENGTH, BG_NUMBER)
                m.writefasta(bg_file)
    
            pwmfile = config.get_default_params().get("motif_db")
            pwmfile = os.path.join(config.get_motif_dir(), pwmfile)
            
            cmd = "gimme threshold {} {} {} > {}".format(
                    pwmfile,
                    bg_file,
                    FDR,
                    threshold_file)
            sp.call(cmd, shell=True)
        return threshold_file
开发者ID:YichaoOU,项目名称:gimmemotifs,代码行数:28,代码来源:maelstrom.py

示例3: default_motifs

# 需要导入模块: from gimmemotifs.config import MotifConfig [as 别名]
# 或者: from gimmemotifs.config.MotifConfig import get_motif_dir [as 别名]
def default_motifs():
    """Return list of Motif instances from default motif database."""
    config = MotifConfig()
    d = config.get_motif_dir()
    m = config.get_default_params()['motif_db']

    if not d or not m:
        raise ValueError("default motif database not configured")

    fname = os.path.join(d, m)
    with open(fname) as f:
        motifs = read_motifs(f)
    
    return motifs
开发者ID:simonvh,项目名称:gimmemotifs,代码行数:16,代码来源:motif.py

示例4: pwmfile_location

# 需要导入模块: from gimmemotifs.config import MotifConfig [as 别名]
# 或者: from gimmemotifs.config.MotifConfig import get_motif_dir [as 别名]
def pwmfile_location(infile):
    config = MotifConfig()

    if infile is None:
        infile = config.get_default_params().get("motif_db", None)
        if infile is None:
            raise ValueError("No motif file was given and no default "
                    "database specified in the config file.")

    if isinstance(infile, six.string_types):
        if not os.path.exists(infile):
            motif_dir = config.get_motif_dir()
            checkfile = os.path.join(motif_dir, infile)
            if os.path.exists(checkfile):
                infile = checkfile
            else:
                for ext in ['.pfm', '.pwm']:
                    if os.path.exists(checkfile + ext):
                        infile = checkfile + ext
                    break
            if not os.path.exists(infile):
                raise ValueError("Motif file {} not found".format(infile))

    return infile
开发者ID:simonvh,项目名称:gimmemotifs,代码行数:26,代码来源:utils.py

示例5: MotifComparer

# 需要导入模块: from gimmemotifs.config import MotifConfig [as 别名]
# 或者: from gimmemotifs.config.MotifConfig import get_motif_dir [as 别名]

#.........这里部分代码省略.........
            if batch_len <= 0:
                batch_len = 1
            jobs = []
            for i in range(0, len(dbmotifs), batch_len): 
                # submit jobs to the job server
                
                p = pool.apply_async(_get_all_scores, 
                    args=(self, motifs, dbmotifs[i: i + batch_len], match, metric, combine, pval))
                jobs.append(p)
            
            pool.close()
            for job in jobs:
                # Get the job result
                result = job.get()
                # and update the result score
                for m1,v in result.items():
                    for m2, s in v.items():
                        if m1 not in scores:
                            scores[m1] = {}
                        scores[m1][m2] = s
        
            pool.join()
        else:
            # Do the whole thing at once if we don't want parallel
            scores = _get_all_scores(self, motifs, dbmotifs, match, metric, combine, pval)
        
        return scores

    def get_closest_match(self, motifs, dbmotifs=None, match="partial", metric="wic",combine="mean", parallel=True, ncpus=None):
        """Return best match in database for motifs.

        Parameters
        ----------
        motifs : list or str
            Filename of motifs or list of motifs.

        dbmotifs : list or str, optional
            Database motifs, default will be used if not specified.

        match : str, optional

        metric : str, optional

        combine : str, optional

        ncpus : int, optional
            Number of threads to use.

        Returns
        -------
        closest_match : dict
        """

        if dbmotifs is None:
            pwm = self.config.get_default_params()["motif_db"]
            pwmdir = self.config.get_motif_dir()
            dbmotifs = os.path.join(pwmdir, pwm)
       
        motifs = parse_motifs(motifs)
        dbmotifs = parse_motifs(dbmotifs)

        dbmotif_lookup = dict([(m.id, m) for m in dbmotifs])

        scores = self.get_all_scores(motifs, dbmotifs, match, metric, combine, parallel=parallel, ncpus=ncpus)
        for motif in scores:
            scores[motif] = sorted(
                    scores[motif].items(), 
                    key=lambda x:x[1][0]
                    )[-1]
        
        for motif in motifs:
            dbmotif, score = scores[motif.id]
            pval, pos, orient = self.compare_motifs(
                    motif, dbmotif_lookup[dbmotif], match, metric, combine, True)
            
            scores[motif.id] = [dbmotif, (list(score) + [pval])]
        
        return scores

    def generate_score_dist(self, motifs, match, metric, combine):
        
        score_file = os.path.join(self.config.get_score_dir(), "%s_%s_%s_score_dist.txt" % (match, metric, combine))    
        f = open(score_file, "w")

        all_scores = {}
        for l in [len(motif) for motif in motifs]:
            all_scores[l] = {}

        sorted_motifs = {}
        for l in all_scores.keys():
            sorted_motifs[l] = [motif for motif in motifs if len(motif) == l]
        
        for l1 in all_scores.keys():
            for l2 in all_scores.keys():
                scores = self.get_all_scores(sorted_motifs[l1], sorted_motifs[l2], match, metric, combine)
                scores = [[y[0] for y in x.values() if y] for x in scores.values()]
                scores = np.array(scores).ravel()
                f.write("%s\t%s\t%s\t%s\n" % (l1, l2, np.mean(scores), np.std(scores)))

        f.close()    
开发者ID:simonvh,项目名称:gimmemotifs,代码行数:104,代码来源:comparison.py

示例6: _create_graphical_report

# 需要导入模块: from gimmemotifs.config import MotifConfig [as 别名]
# 或者: from gimmemotifs.config.MotifConfig import get_motif_dir [as 别名]
def _create_graphical_report(inputfile, pwm, background, closest_match, outdir, stats, best_id=None):
    """Create main gimme_motifs output html report."""
    if best_id is None:
        best_id = {}

    logger.debug("Creating graphical report")
    
    class ReportMotif(object):
        """Placeholder for motif stats."""
        pass

    config = MotifConfig()
    
    imgdir = os.path.join(outdir, "images")
    if not os.path.exists(imgdir):
        os.mkdir(imgdir)
    
    motifs = read_motifs(pwm, fmt="pwm")
    
    roc_img_file = "%s_roc.%s"

    dbpwm = config.get_default_params()["motif_db"]
    pwmdir = config.get_motif_dir()

    dbmotifs = read_motifs(os.path.join(pwmdir, dbpwm), as_dict=True)
    
    report_motifs = []
    for motif in motifs:
        
        rm = ReportMotif()
        rm.id = motif.id
        rm.id_href = {"href": "#%s" % motif.id}
        rm.id_name = {"name": motif.id}
        rm.img = {"src":  os.path.join("images", "%s.png" % motif.id)}
        motif.to_img(os.path.join(outdir, "images/{}.png".format(motif.id)), fmt="PNG")
        
        # TODO: fix best ID
        rm.best = "Gimme"#best_id[motif.id]

        rm.consensus = motif.to_consensus()
        rm.stars = int(np.mean(
                [stats[str(motif)][bg].get("stars", 0) for bg in background]
                ) + 0.5)

        rm.bg = {}
        for bg in background:
            rm.bg[bg] = {}
            this_stats = stats.get(str(motif), {}).get(bg)
            # TODO: fix these stats
            rm.bg[bg]["e"] = "%0.2f" % this_stats.get("enr_at_fpr", 1.0)
            rm.bg[bg]["p"] = "%0.2f" % this_stats.get("phyper_at_fpr", 1.0)
            rm.bg[bg]["auc"] = "%0.3f" % this_stats.get("roc_auc", 0.5)
            rm.bg[bg]["mncp"] = "%0.3f" % this_stats.get("mncp", 1.0)
            rm.bg[bg]["roc_img"] = {"src": "images/" + os.path.basename(roc_img_file % (motif.id, bg)) + ".png"}
            rm.bg[bg][u"roc_img_link"] = {u"href": "images/" + os.path.basename(roc_img_file % (motif.id, bg)) + ".png"}

        rm.histogram_img = {"data":"images/%s_histogram.svg" % motif.id}
        rm.histogram_link= {"href":"images/%s_histogram.svg" % motif.id}
        
        match_id = closest_match[motif.id][0]
        dbmotifs[match_id].to_img(os.path.join(outdir, "images/{}.png".format(match_id)), fmt="PNG")
    
        rm.match_img = {"src":  "images/{}.png".format(match_id)}
        rm.match_id = closest_match[motif.id][0]
        rm.match_pval = "%0.2e" % closest_match[motif.id][1][-1]

        report_motifs.append(rm)
    
    total_report = os.path.join(outdir, "motif_report.html")

    star_img = os.path.join(config.get_template_dir(), "star.png")
    shutil.copyfile(star_img, os.path.join(outdir, "images", "star.png"))

    env = jinja2.Environment(loader=jinja2.FileSystemLoader([config.get_template_dir()]))
    template = env.get_template("report_template.jinja.html")
    # TODO: title
    result = template.render(
                    motifs=report_motifs, 
                    inputfile=inputfile, 
                    date=datetime.today().strftime("%d/%m/%Y"), 
                    version=__version__,
                    bg_types=list(background.keys()))

    with open(total_report, "wb") as f:
        f.write(result.encode('utf-8'))
开发者ID:simonvh,项目名称:gimmemotifs,代码行数:87,代码来源:report.py

示例7: GimmeMotifs

# 需要导入模块: from gimmemotifs.config import MotifConfig [as 别名]
# 或者: from gimmemotifs.config.MotifConfig import get_motif_dir [as 别名]

#.........这里部分代码省略.........
            rm.stars = stats["%s_%s" % (motif.id, motif.to_consensus())]["stars"]

            rm.bg = {}
            for bg in background:
                rm.bg[bg] = {}
                rm.bg[bg]["e"] = "%0.2f" % self.e[bg].setdefault(motif.id, 0.0)
                rm.bg[bg]["p"] = "%0.2f" % self.p[bg].setdefault(motif.id, 1.0)
                rm.bg[bg]["auc"] = "%0.3f" % self.auc[bg][motif.id]
                rm.bg[bg]["mncp"] = "%0.3f" % self.mncp[bg][motif.id]
                rm.bg[bg]["roc_img"] = {"src": "images/" + os.path.basename(roc_img_file % (motif.id, bg)) + ".png"}
                rm.bg[bg]["roc_img_link"] = {"href": "images/" + os.path.basename(roc_img_file % (motif.id, bg)) + ".png"}

            rm.histogram_img = {"data":"images/%s_histogram.svg" % motif.id}
            rm.histogram_link= {"href":"images/%s_histogram.svg" % motif.id}
            rm.match_img = {"src":  "images/%s.png" % self.closest_match[motif.id][0].id}
            rm.match_id = self.closest_match[motif.id][0].id
            rm.match_pval = "%0.2e" % self.closest_match[motif.id][1]

            report_motifs.append(rm)

        total_report = self.motif_report
        
        env = jinja2.Environment(loader=jinja2.FileSystemLoader([self.config.get_template_dir()]))
        template = env.get_template("report_template.jinja.html")
        result = template.render(expname=self.basename, motifs=report_motifs, inputfile=self.inputfile, date=datetime.today().strftime("%d/%m/%Y"), version=GM_VERSION)
        
        f = open(total_report, "w")
        f.write(result.encode('utf-8'))
        f.close()

    def determine_closest_match(self, motifs):
        self.logger.debug("Determining closest matching motifs in database")
        motif_db = self.config.get_default_params()["motif_db"]
        db = os.path.join(self.config.get_motif_dir(), motif_db)
        db_motifs = []
        if db.endswith("pwm") or db.endswith("pfm"):
            db_motifs = read_motifs(open(db), fmt="pwm")
        elif db.endswith("transfac"):
            db_motifs = read_motifs(db, fmt="transfac")

        closest_match = {}
        mc = MotifComparer()
        db_motif_lookup = dict([(m.id, m) for m in db_motifs])
        match = mc.get_closest_match(motifs, db_motifs, "partial", "wic", "mean", parallel=False)
        for motif in motifs:
            # Calculate p-value
            pval, pos, orient = mc.compare_motifs(motif, db_motif_lookup[match[motif.id][0]], "partial", "wic", "mean", pval=True)
            closest_match[motif.id] = [db_motif_lookup[match[motif.id][0]], pval]
        return closest_match

    def _determine_best_motif_in_cluster(self, clusters, pwm, sample_fa, bg_fa, imgdir=None):
        num_cluster = {}
        best_id = {}
        out = open(pwm, "w")
        for i, (clus, singles) in enumerate(clusters):
            motifs = [clus] + singles
            tmp = NamedTemporaryFile(dir=mytmpdir())
            tmp2 = NamedTemporaryFile(dir=mytmpdir())
            for m in motifs:
                tmp.write("%s\n" % m.to_pwm())
            tmp.flush()
            auc,mncp = self._roc_metrics(tmp.name, sample_fa, bg_fa, tmp2.name)
            bla = sorted(motifs, cmp=lambda x,y: cmp(mncp[x.id], mncp[y.id]))
            for m in bla:
                self.logger.debug("sorted: %s %s %s", 
                        str(m), mncp[m.id], auc[m.id])
开发者ID:YichaoOU,项目名称:gimmemotifs,代码行数:70,代码来源:core.py

示例8: scan_to_table

# 需要导入模块: from gimmemotifs.config import MotifConfig [as 别名]
# 或者: from gimmemotifs.config.MotifConfig import get_motif_dir [as 别名]
def scan_to_table(input_table, genome, scoring, pwmfile=None, ncpus=None):
    """Scan regions in input table with motifs.

    Parameters
    ----------
    input_table : str
        Filename of input table. Can be either a text-separated tab file or a
        feather file.
    
    genome : str
        Genome name. Can be either the name of a FASTA-formatted file or a 
        genomepy genome name.
    
    scoring : str
        "count" or "score"
    
    pwmfile : str, optional
        Specify a PFM file for scanning.
    
    ncpus : int, optional
        If defined this specifies the number of cores to use.
    
    Returns
    -------
    table : pandas.DataFrame
        DataFrame with motif ids as column names and regions as index. Values
        are either counts or scores depending on the 'scoring' parameter.s
    """
    config = MotifConfig()
    
    if pwmfile is None:
        pwmfile = config.get_default_params().get("motif_db", None)
        if pwmfile is not None:
            pwmfile = os.path.join(config.get_motif_dir(), pwmfile)

    if pwmfile is None:
        raise ValueError("no pwmfile given and no default database specified")

    logger.info("reading table")
    if input_table.endswith("feather"):
        df = pd.read_feather(input_table)
        idx = df.iloc[:,0].values
    else:
        df = pd.read_table(input_table, index_col=0, comment="#")
        idx = df.index
    
    regions = list(idx)
    s = Scanner(ncpus=ncpus)
    s.set_motifs(pwmfile)
    s.set_genome(genome)
    s.set_background(genome=genome)
    
    nregions = len(regions)

    scores = []
    if scoring == "count":
        logger.info("setting threshold")
        s.set_threshold(fpr=FPR)
        logger.info("creating count table")
        for row in s.count(regions):
            scores.append(row)
        logger.info("done")
    else:
        s.set_threshold(threshold=0.0)
        logger.info("creating score table")
        for row in s.best_score(regions, normalize=True):
            scores.append(row)
        logger.info("done")
   
    motif_names = [m.id for m in read_motifs(pwmfile)]
    logger.info("creating dataframe")
    return pd.DataFrame(scores, index=idx, columns=motif_names)
开发者ID:simonvh,项目名称:gimmemotifs,代码行数:74,代码来源:maelstrom.py


注:本文中的gimmemotifs.config.MotifConfig.get_motif_dir方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。