当前位置: 首页>>代码示例>>Python>>正文


Python Database.executewait方法代码示例

本文整理汇总了Python中CGAT.Database.executewait方法的典型用法代码示例。如果您正苦于以下问题:Python Database.executewait方法的具体用法?Python Database.executewait怎么用?Python Database.executewait使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在CGAT.Database的用法示例。


在下文中一共展示了Database.executewait方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: loadHypergeometricAnalysis

# 需要导入模块: from CGAT import Database [as 别名]
# 或者: from CGAT.Database import executewait [as 别名]
def loadHypergeometricAnalysis(infile, outfile):
    '''load GO results.'''

    track = P.toTable(outfile)
    tablename = 'hypergeometric_%s_summary' % track
    P.load(infile, outfile, tablename=tablename)

    dbh = connect()
    ontologies = [x[0] for x in Database.executewait(
        dbh,
        '''SELECT DISTINCT ontology FROM %s''' % tablename).fetchall()]

    genelists = [x[0] for x in Database.executewait(
        dbh,
        '''SELECT DISTINCT genelist FROM %s''' % tablename).fetchall()]

    # output files from runGO.py
    sections = ('results', 'parameters', 'withgenes')

    for section in sections:
        tablename = 'hypergeometric_%s_%s' % (track, section)
        statement = '''
        python %(scriptsdir)s/combine_tables.py
        --cat=track
        --regex-filename="hypergeometric.dir/%(track)s.tsv.dir/(\S+).%(section)s"
        hypergeometric.dir/%(track)s.tsv.dir/*.%(section)s
        | python %(scriptsdir)s/csv2db.py
        %(csv2db_options)s
        --table=%(tablename)s
        >> %(outfile)s'''
        P.run()

    for ontology in ontologies:

        fn = os.path.join(infile + ".dir", "all_alldesc.%s.l2fold" % ontology)

        if not os.path.exists(fn):
            E.warn("file %s does not exist" % fn)
            continue

        P.load(fn,
               outfile,
               tablename='hypergeometric_%s_%s_l2fold' % (track, ontology),
               options='--allow-empty')

        fn = os.path.join(
            infile + ".dir", "all_alldesc.%s.l10pvalue" % ontology)

        P.load(fn,
               outfile,
               tablename='hypergeometric_%s_%s_l10pvalue' % (track, ontology),
               options='--allow-empty')

        fn = os.path.join(
            infile + ".dir", "all_alldesc.%s.l10qvalue" % ontology)

        P.load(fn,
               outfile,
               tablename='hypergeometric_%s_%s_l10qvalue' % (track, ontology),
               options='--allow-empty')
开发者ID:Charlie-George,项目名称:cgat,代码行数:62,代码来源:pipeline_genesets.py

示例2: createViewMapping

# 需要导入模块: from CGAT import Database [as 别名]
# 或者: from CGAT.Database import executewait [as 别名]
def createViewMapping(infile, outfile):
    """create view in database for alignment stats.

    This view aggregates all information on a per-track basis.

    The table is built from the following tracks:

    mapping_stats
    bam_stats
    """

    tablename = P.toTable(outfile)
    # can not create views across multiple database, so use table
    view_type = "TABLE"

    dbhandle = connect()
    Database.executewait(dbhandle, "DROP %(view_type)s IF EXISTS %(tablename)s" % locals())

    statement = """
    CREATE %(view_type)s %(tablename)s AS
    SELECT *
    FROM bam_stats AS b
    """

    Database.executewait(dbhandle, statement % locals())
开发者ID:hainm,项目名称:CGATPipelines,代码行数:27,代码来源:pipeline_benchmark_rnaseqmappers.py

示例3: ReadGene2GOFromDatabase

# 需要导入模块: from CGAT import Database [as 别名]
# 或者: from CGAT.Database import executewait [as 别名]
def ReadGene2GOFromDatabase(dbhandle, go_type, database, species):
    """read go assignments from ensembl database.

    returns a dictionary of lists.
    (one to many mapping of genes to GO categories)
    and a dictionary of go-term to go information

    Note: assumes that external_db_id for GO is 1000
    """

    statement = GetGOStatement(go_type, database, species)
    result = Database.executewait(dbhandle, statement,
                                  retries=0).fetchall()

    gene2go = {}
    go2info = collections.defaultdict(GOInfo)
    for gene_id, goid, description, evidence in result:
        gm = GOMatch(goid, go_type, description, evidence)
        gi = GOInfo(goid, go_type, description)
        if gene_id not in gene2go:
            gene2go[gene_id] = []
        gene2go[gene_id].append(gm)
        go2info[goid] = gi

    return gene2go, go2info
开发者ID:CGATOxford,项目名称:cgat,代码行数:27,代码来源:GO.py

示例4: importCodingPotential

# 需要导入模块: from CGAT import Database [as 别名]
# 或者: from CGAT.Database import executewait [as 别名]
def importCodingPotential( infile, outfile ):
    '''import annotations'''
    
    table = outfile[:-len(".import")]

    statement = '''
   python %(scriptsdir)s/csv2db.py %(csv2db_options)s 
              --allow-empty
              --index=gene_id 
              --map=gene_id:str 
              --table=%(table)s 
    < %(infile)s 
    > %(outfile)s'''

    P.run()

    # set the is_coding flag
    dbhandle = sqlite3.connect( PARAMS["database"] )
    Database.executewait( dbhandle, '''ALTER TABLE %(table)s ADD COLUMN is_coding INTEGER''' % locals())
    Database.executewait( dbhandle, '''UPDATE %(table)s SET is_coding = (f_iscoding == 'coding') OR (r_iscoding == 'coding') ''' % locals())
    dbhandle.commit()
开发者ID:BioinformaticsArchive,项目名称:cgat,代码行数:23,代码来源:pipeline_kamilah.py

示例5: mergeEffectsPerGene

# 需要导入模块: from CGAT import Database [as 别名]
# 或者: from CGAT.Database import executewait [as 别名]
def mergeEffectsPerGene( infile, outfile ):
    '''summarize effects on a per-gene level.'''
    
    tablename = outfile[:-len(".load")]

    dbhandle = connect()

    statement = '''
    CREATE TABLE %(tablename)s AS
    SELECT DISTINCT 
           track,
           gene_id, 
           COUNT(*) AS ntranscripts,
           MIN(e.nalleles) AS min_nalleles,
           MAX(e.nalleles) AS max_nalleles,
           MIN(e.stop_min) AS min_stop_min,
           MAX(e.stop_min) AS max_stop_min,
           MIN(e.stop_max) AS min_stop_max,
           MAX(e.stop_max) AS max_stop_max,
           SUM( CASE WHEN stop_min > 0 AND cds_len - stop_min * 3 < last_exon_start THEN 1  
                     ELSE 0 END) AS nmd_knockout,
           SUM( CASE WHEN stop_max > 0 AND cds_len - stop_max * 3 < last_exon_start THEN 1  
                     ELSE 0 END) AS nmd_affected
    FROM annotations.transcript_info as i, effects AS e
    WHERE i.transcript_id = e.transcript_id
    GROUP BY i.gene_id, track
    ''' % locals()
    
    Database.executewait( dbhandle, "DROP TABLE IF EXISTS %(tablename)s" % locals() )
    Database.executewait( dbhandle, statement )
    Database.executewait( dbhandle, "CREATE INDEX %(tablename)s_gene_id ON %(tablename)s (gene_id)" % locals())
    dbhandle.commit()

    P.touch(outfile)
开发者ID:BioinformaticsArchive,项目名称:cgat,代码行数:36,代码来源:pipeline_variant_annotation.py

示例6: loadCodingPotential

# 需要导入模块: from CGAT import Database [as 别名]
# 或者: from CGAT.Database import executewait [as 别名]
def loadCodingPotential( infile, outfile ):
    '''load annotations'''
    
    table = P.toTable( outfile )

    statement = '''
    gunzip < %(infile)s 
    | python %(scriptsdir)s/csv2db.py 
              %(csv2db_options)s 
              --allow-empty
              --index=gene_id 
              --map=gene_id:str 
              --table=%(table)s 
    > %(outfile)s'''

    P.run()

    # set the is_coding flag
    dbhandle = sqlite3.connect( PARAMS["database"] )
    Database.executewait( dbhandle, '''ALTER TABLE %(table)s ADD COLUMN is_coding INTEGER''' % locals())
    Database.executewait( dbhandle, '''UPDATE %(table)s SET is_coding = (result == 'coding')''' % locals())
    dbhandle.commit()
开发者ID:BioinformaticsArchive,项目名称:cgat,代码行数:24,代码来源:pipeline_transcriptome.py

示例7: DumpGOFromDatabase

# 需要导入模块: from CGAT import Database [as 别名]
# 或者: from CGAT.Database import executewait [as 别名]
def DumpGOFromDatabase(outfile,
                       dbhandle,
                       options):
    """read go assignments from database.

    and dump them into a flatfile.
    (one to many mapping of genes to GO categories)
    and a dictionary of go-term to go information
    """

    E.info("category\ttotal\tgenes\tcategories")

    all_genes = collections.defaultdict(int)
    all_categories = collections.defaultdict(int)
    all_ntotal = 0

    outfile.write("go_type\tgene_id\tgo_id\tdescription\tevidence\n")

    for go_type in options.ontology:

        genes = collections.defaultdict(int)
        categories = collections.defaultdict(int)
        ntotal = 0
        statement = GetGOStatement(go_type, options.database_name,
                                   options.species)

        results = Database.executewait(
            dbhandle, statement, retries=0).fetchall()

        for result in results:
            outfile.write("\t".join(map(str, (go_type,) + result)) + "\n")
            gene_id, goid, description, evidence = result
            genes[gene_id] += 1
            categories[goid] += 1
            ntotal += 1
            all_genes[gene_id] += 1
            all_categories[goid] += 1
            all_ntotal += 1

        E.info("%s\t%i\t%i\t%i" % (go_type, ntotal,
                                   len(genes),
                                   len(categories)))

    E.info("%s\t%i\t%i\t%i" % ("all",
                               all_ntotal,
                               len(all_genes),
                               len(all_categories)))

    return
开发者ID:CGATOxford,项目名称:cgat,代码行数:51,代码来源:GO.py

示例8: buildCuffdiffPlots

# 需要导入模块: from CGAT import Database [as 别名]
# 或者: from CGAT.Database import executewait [as 别名]
def buildCuffdiffPlots(infile, outfile):
    '''create summaries of cufflinks results (including some diagnostic plots)

    Plots are created in the <exportdir>/cuffdiff directory.

    Plots are:

    <geneset>_<method>_<level>_<track1>_vs_<track2>_significance.png
        fold change against expression level
    '''
    ###########################################
    ###########################################
    # create diagnostic plots
    ###########################################
    outdir = os.path.join(PARAMS["exportdir"], "cuffdiff")

    dbhandle = sqlite3.connect(PARAMS["database"])

    prefix = P.snip(infile, ".load")

    geneset, method = prefix.split("_")

    for level in CUFFDIFF_LEVELS:
        tablename_diff = prefix + "_%s_diff" % level
        tablename_levels = prefix + "_%s_levels" % level

        # note that the ordering of EXPERIMENTS and the _diff table
        # needs to be the same as only one triangle is stored of the
        # pairwise results.  do not plot "undefined" lfold values
        # (where treatment_mean or control_mean = 0) do not plot lfold
        # values where the confidence bounds contain 0.
        for track1, track2 in itertools.combinations(EXPERIMENTS, 2):
            statement = """
            SELECT CASE WHEN d.treatment_mean < d.control_mean
            THEN d.treatment_mean
            ELSE d.control_mean END,
            d.l2fold, d.significant
            FROM %(tablename_diff)s AS d
            WHERE treatment_name = '%(track1)s' AND
            control_name = '%(track2)s' AND
            status = 'OK' AND
            treatment_mean > 0 AND
            control_mean > 0
            """ % locals()

            data = zip(*Database.executewait(dbhandle, statement))

            pngfile = "%(outdir)s/%(geneset)s_%(method)s_%(level)s_%(track1)s_vs_%(track2)s_significance.png" % locals()

            # ian: Bug fix: moved R.png to after data check so that no
            #     plot is started if there is no data this was leading
            #     to R falling over from too many open devices

            if len(data) == 0:
                E.warn("no plot for %s - %s -%s vs %s" %
                       (pngfile, level, track1, track2))
                continue

            R.png(pngfile)
            R.plot(ro.FloatVector(data[0]),
                   ro.FloatVector(data[1]),
                   xlab='min(FPKM)',
                   ylab='log2fold',
                   log="x", pch=20, cex=.1,
                   col=R.ifelse(ro.IntVector(data[2]), "red", "black"))

            R['dev.off']()

    P.touch(outfile)
开发者ID:jmadzo,项目名称:cgat,代码行数:71,代码来源:pipeline_rnaseqdiffexpression.py

示例9: buildExpressionStats

# 需要导入模块: from CGAT import Database [as 别名]
# 或者: from CGAT.Database import executewait [as 别名]
def buildExpressionStats(tables, method, outfile, outdir):
    '''build expression summary statistics.

    Creates also diagnostic plots in

    <exportdir>/<method> directory.
    '''
    dbhandle = sqlite3.connect(PARAMS["database"])

    def _split(tablename):
        # this would be much easier, if feature_counts/gene_counts/etc.
        # would not contain an underscore.
        try:
            design, geneset, counting_method = re.match(
                "([^_]+)_vs_([^_]+)_(.*)_%s" % method,
                tablename).groups()
        except AttributeError:
            try:
                design, geneset = re.match(
                    "([^_]+)_([^_]+)_%s" % method,
                    tablename).groups()
                counting_method = "na"
            except AttributeError:
                raise ValueError("can't parse tablename %s" % tablename)

        return design, geneset, counting_method

        # return re.match("([^_]+)_", tablename ).groups()[0]

    keys_status = "OK", "NOTEST", "FAIL", "NOCALL"

    outf = IOTools.openFile(outfile, "w")
    outf.write("\t".join(
        ("design",
         "geneset",
         "level",
         "treatment_name",
         "counting_method",
         "control_name",
         "tested",
         "\t".join(["status_%s" % x for x in keys_status]),
         "significant",
         "twofold")) + "\n")

    all_tables = set(Database.getTables(dbhandle))

    for level in CUFFDIFF_LEVELS:

        for tablename in tables:

            tablename_diff = "%s_%s_diff" % (tablename, level)
            tablename_levels = "%s_%s_diff" % (tablename, level)
            design, geneset, counting_method = _split(tablename_diff)
            if tablename_diff not in all_tables:
                continue

            def toDict(vals, l=2):
                return collections.defaultdict(
                    int,
                    [(tuple(x[:l]), x[l]) for x in vals])

            tested = toDict(
                Database.executewait(
                    dbhandle,
                    "SELECT treatment_name, control_name, "
                    "COUNT(*) FROM %(tablename_diff)s "
                    "GROUP BY treatment_name,control_name" % locals()
                    ).fetchall())
            status = toDict(Database.executewait(
                dbhandle,
                "SELECT treatment_name, control_name, status, "
                "COUNT(*) FROM %(tablename_diff)s "
                "GROUP BY treatment_name,control_name,status"
                % locals()).fetchall(), 3)
            signif = toDict(Database.executewait(
                dbhandle,
                "SELECT treatment_name, control_name, "
                "COUNT(*) FROM %(tablename_diff)s "
                "WHERE significant "
                "GROUP BY treatment_name,control_name" % locals()
                ).fetchall())

            fold2 = toDict(Database.executewait(
                dbhandle,
                "SELECT treatment_name, control_name, "
                "COUNT(*) FROM %(tablename_diff)s "
                "WHERE (l2fold >= 1 or l2fold <= -1) AND significant "
                "GROUP BY treatment_name,control_name,significant"
                % locals()).fetchall())

            for treatment_name, control_name in tested.keys():
                outf.write("\t".join(map(str, (
                    design,
                    geneset,
                    level,
                    counting_method,
                    treatment_name,
                    control_name,
                    tested[(treatment_name, control_name)],
                    "\t".join(
#.........这里部分代码省略.........
开发者ID:jmadzo,项目名称:cgat,代码行数:103,代码来源:pipeline_rnaseqdiffexpression.py

示例10: buildDMRStats

# 需要导入模块: from CGAT import Database [as 别名]
# 或者: from CGAT.Database import executewait [as 别名]
def buildDMRStats(tables, method, outfile, dbhandle):
    """build dmr summary statistics.

    This method counts the number of up/down, 2fold up/down, etc.
    genes in output from (:mod:`scripts/runExpression`).

    This method also creates diagnostic plots in the
    <exportdir>/<method> directory.

    Tables should be labeled <tileset>_<design>_<method>.

    Arguments
    ---------
    tables ; list
        List of tables with DMR output
    method : string
        Method name
    outfile : string
        Output filename. Tab separated file summarizing

    """

    def togeneset(tablename):
        return re.match("([^_]+)_", tablename).groups()[0]

    keys_status = "OK", "NOTEST", "FAIL", "NOCALL"

    outf = IOTools.openFile(outfile, "w")
    outf.write(
        "\t".join(
            (
                "tileset",
                "design",
                "track1",
                "track2",
                "tested",
                "\t".join(["status_%s" % x for x in keys_status]),
                "significant",
                "up",
                "down",
                "twofold",
                "twofold_up",
                "twofold_down",
            )
        )
        + "\n"
    )

    all_tables = set(Database.getTables(dbhandle))
    outdir = os.path.join(PARAMS["exportdir"], "diff_methylation")

    for tablename in tables:

        prefix = P.snip(tablename, "_%s" % method)
        tileset, design = prefix.split("_")

        def toDict(vals, l=2):
            return collections.defaultdict(int, [(tuple(x[:l]), x[l]) for x in vals])

        E.info("collecting data from %s" % tablename)

        tested = toDict(
            Database.executewait(
                dbhandle,
                """SELECT treatment_name, control_name, COUNT(*)
            FROM %(tablename)s 
            GROUP BY treatment_name,control_name"""
                % locals(),
            ).fetchall()
        )
        status = toDict(
            Database.executewait(
                dbhandle,
                """SELECT treatment_name, control_name, status,
            COUNT(*) FROM %(tablename)s 
            GROUP BY treatment_name,control_name,status"""
                % locals(),
            ).fetchall(),
            3,
        )
        signif = toDict(
            Database.executewait(
                dbhandle,
                """SELECT treatment_name, control_name,
            COUNT(*) FROM %(tablename)s 
            WHERE significant
            GROUP BY treatment_name,control_name"""
                % locals(),
            ).fetchall()
        )
        fold2 = toDict(
            Database.executewait(
                dbhandle,
                """SELECT treatment_name, control_name,
            COUNT(*) FROM %(tablename)s
            WHERE (l2fold >= 1 or l2fold <= -1) AND significant
            GROUP BY treatment_name,control_name,significant"""
                % locals(),
            ).fetchall()
        )
#.........这里部分代码省略.........
开发者ID:hainm,项目名称:CGATPipelines,代码行数:103,代码来源:PipelineMedip.py

示例11: loadSummary

# 需要导入模块: from CGAT import Database [as 别名]
# 或者: from CGAT.Database import executewait [as 别名]
def loadSummary( infile, outfile ):
    '''load several rates into a single convenience table.
    '''

    stmt_select = []
    stmt_from = []
    stmt_where = ["1"]

    track = infile[:-len(".gtf.gz")]

    tablename = "%s_evol" % track

    if os.path.exists( "%s_rates.load" % track ):
        stmt_select.append( "a.distance AS ks, a.aligned AS aligned" )
        stmt_from.append('''LEFT JOIN %(track)s_rates AS a
                     ON r.gene_id = a.gene_id AND 
                     a.aligned >= %(rates_min_aligned)i AND 
                     a.distance <= %(rates_max_rate)f''' )

    if os.path.exists( "%s_coverage.load" % track ):
        stmt_select.append("cov.nmatches AS nreads, cov.mean AS meancoverage" )
        stmt_from.append("LEFT JOIN %(track)s_coverage AS cov ON r.gene_id = cov.gene_id" )

    if os.path.exists( "%s_repeats_gc.load" % track ):
        stmt_select.append("ar_gc.exons_mean AS repeats_gc" )
        stmt_from.append("LEFT JOIN %(track)s_repeats_gc AS ar_gc ON r.gene_id = ar_gc.gene_id" )

    if os.path.exists( "%s_repeats_rates.load" % track ):
        stmt_select.append("ar.exons_length AS ar_aligned, ar.exons_median AS ka, a.distance/ar.exons_median AS kska" )
        stmt_from.append('''LEFT JOIN %(track)s_repeats_rates AS ar 
                     ON r.gene_id = ar.gene_id AND 
                     ar.exons_nval >= %(rates_min_repeats)i''' )

    if os.path.exists( "%s_introns_rates.load" % track ):
        stmt_select.append("ir.aligned AS ir_aligned, ir.distance AS ki, a.distance/ir.distance AS kski" )
        stmt_from.append('''LEFT JOIN %(track)s_introns_rates AS ir 
                            ON r.gene_id = ir.gene_id AND 
                            ir.aligned >= %(rates_min_aligned)i''' )

    x = locals()
    x.update( PARAMS )
    stmt_select = ", ".join( stmt_select ) % x 
    stmt_from = " ".join( stmt_from ) % x
    stmt_where = " AND ".join( stmt_where ) % x

    dbhandle = sqlite3.connect( PARAMS["database"] )

    Database.executewait( dbhandle, "DROP TABLE IF EXISTS %(tablename)s " % locals() )

    statement = '''
    CREATE TABLE %(tablename)s AS
    SELECT
         CAST(r.gene_id AS TEXT) AS gene_id, 
				r.exons_sum as length, 
				r.exons_pGC as pgc,
                                %(stmt_select)s
	FROM 
          %(track)s_annotation AS r 
	  %(stmt_from)s
        WHERE %(stmt_where)s
    ''' % locals()

    Database.executewait( dbhandle, statement)
    dbhandle.commit()
    P.touch(outfile)
开发者ID:BioinformaticsArchive,项目名称:cgat,代码行数:67,代码来源:pipeline_transcriptome.py

示例12: buildExpressionStats

# 需要导入模块: from CGAT import Database [as 别名]
# 或者: from CGAT.Database import executewait [as 别名]
def buildExpressionStats(
        dbhandle,
        outfile,
        tablenames,
        outdir,
        regex_table="(?P<design>[^_]+)_"
        "(?P<geneset>[^_]+)_"
        "(?P<counting_method>[^_]+)_"
        "(?P<method>[^_]+)_"
        "(?P<level>[^_]+)_diff"):
    """compile expression summary statistics from database.

    This method outputs a table with the number of genes tested,
    failed, differentially expressed, etc. for a series of DE tests.

    Arguments
    ---------
    dbhandle : object
        Database handle.
    tables : list
        List of tables to process.
    outfile : string
        Output filename in :term:`tsv` format.
    outdir : string
        Output directory for diagnostic plots.
    regex : string
        Regular expression to extract experimental information
        from table name.
    """

    keys_status = "OK", "NOTEST", "FAIL", "NOCALL"

    outf = IOTools.openFile(outfile, "w")
    outf.write("\t".join(
        ("design",
         "geneset",
         "level",
         "counting_method",
         "treatment_name",
         "control_name",
         "tested",
         "\t".join(["status_%s" % x for x in keys_status]),
         "significant",
         "twofold")) + "\n")

    for tablename in tablenames:
        r = re.search(regex_table, tablename)
        if r is None:
            raise ValueError(
                "can't match tablename '%s' to regex" % tablename)
        geneset = r.group("geneset")
        design = r.group("design")
        level = r.group("level")
        counting_method = r.group("counting_method")
        geneset = r.group("geneset")

        def toDict(vals, l=2):
            return collections.defaultdict(
                int,
                [(tuple(x[:l]), x[l]) for x in vals])

        tested = toDict(Database.executewait(
            dbhandle,
            "SELECT treatment_name, control_name, "
            "COUNT(*) FROM %(tablename)s "
            "GROUP BY treatment_name,control_name" % locals()
            ).fetchall())
        status = toDict(Database.executewait(
            dbhandle,
            "SELECT treatment_name, control_name, status, "
            "COUNT(*) FROM %(tablename)s "
            "GROUP BY treatment_name,control_name,status"
            % locals()).fetchall(), 3)
        signif = toDict(Database.executewait(
            dbhandle,
            "SELECT treatment_name, control_name, "
            "COUNT(*) FROM %(tablename)s "
            "WHERE significant "
            "GROUP BY treatment_name,control_name" % locals()
            ).fetchall())

        fold2 = toDict(Database.executewait(
            dbhandle,
            "SELECT treatment_name, control_name, "
            "COUNT(*) FROM %(tablename)s "
            "WHERE (l2fold >= 1 or l2fold <= -1) AND significant "
            "GROUP BY treatment_name,control_name,significant"
            % locals()).fetchall())

        for treatment_name, control_name in tested.keys():
            outf.write("\t".join(map(str, (
                design,
                geneset,
                level,
                counting_method,
                treatment_name,
                control_name,
                tested[(treatment_name, control_name)],
                "\t".join(
                    [str(status[(treatment_name, control_name, x)])
#.........这里部分代码省略.........
开发者ID:gjaime,项目名称:CGATPipelines,代码行数:103,代码来源:PipelineRnaseq.py

示例13: createView

# 需要导入模块: from CGAT import Database [as 别名]
# 或者: from CGAT.Database import executewait [as 别名]
def createView(dbhandle, tables, tablename, outfile,
               view_type="TABLE",
               ignore_duplicates=True):
    '''create a database view for a list of tables.

    This method performs a join across multiple tables and stores the
    result either as a view or a table in the database.

    Arguments
    ---------
    dbhandle :
        A database handle.
    tables : list of tuples
        Tables to merge. Each tuple contains the name of a table and
        the field to join with the first table. For example::

            tables = (
                "reads_summary", "track",
                "bam_stats", "track",
                "context_stats", "track",
                "picard_stats_alignment_summary_metrics", "track")

    tablename : string
        Name of the view or table to be created.
    outfile : string
        Output filename for status information.
    view_type : string
        Type of view, either ``VIEW`` or ``TABLE``.  If a view is to be
        created across multiple databases, use ``TABLE``.
    ignore_duplicates : bool
        If set to False, duplicate column names will be added with the
        tablename as prefix. The default is to ignore.

    '''

    Database.executewait(
        dbhandle,
        "DROP %(view_type)s IF EXISTS %(tablename)s" % locals())

    tracks, columns = [], []
    tablenames = [x[0] for x in tables]
    for table, track in tables:
        d = Database.executewait(
            dbhandle,
            "SELECT COUNT(DISTINCT %s) FROM %s" % (track, table))
        tracks.append(d.fetchone()[0])
        columns.append(
            [x.lower() for x in Database.getColumnNames(dbhandle, table)
             if x != track])

    E.info("creating %s from the following tables: %s" %
           (tablename, str(list(zip(tablenames, tracks)))))
    if min(tracks) != max(tracks):
        raise ValueError(
            "number of rows not identical - will not create view")

    from_statement = " , ".join(
        ["%s as t%i" % (y[0], x) for x, y in enumerate(tables)])
    f = tables[0][1]
    where_statement = " AND ".join(
        ["t0.%s = t%i.%s" % (f, x + 1, y[1])
         for x, y in enumerate(tables[1:])])

    all_columns, taken = [], set()
    for x, c in enumerate(columns):
        i = set(taken).intersection(set(c))
        if i:
            E.warn("duplicate column names: %s " % i)
            if not ignore_duplicates:
                table = tables[x][0]
                all_columns.extend(
                    ["t%i.%s AS %s_%s" % (x, y, table, y) for y in i])
                c = [y for y in c if y not in i]

        all_columns.extend(["t%i.%s" % (x, y) for y in c])
        taken.update(set(c))

    all_columns = ",".join(all_columns)
    statement = '''
    CREATE %(view_type)s %(tablename)s AS SELECT t0.track, %(all_columns)s
    FROM %(from_statement)s
    WHERE %(where_statement)s
    ''' % locals()

    Database.executewait(dbhandle, statement)

    nrows = Database.executewait(
        dbhandle, "SELECT COUNT(*) FROM view_mapping").fetchone()[0]

    if nrows == 0:
        raise ValueError(
            "empty view mapping, check statement = %s" %
            (statement % locals()))
    if nrows != min(tracks):
        E.warn("view creates duplicate rows, got %i, expected %i" %
               (nrows, min(tracks)))

    E.info("created view_mapping with %i rows" % nrows)
    touchFile(outfile)
开发者ID:CGATOxford,项目名称:CGATPipelines,代码行数:101,代码来源:Database.py

示例14: importLincRNA

# 需要导入模块: from CGAT import Database [as 别名]
# 或者: from CGAT.Database import executewait [as 别名]
def importLincRNA( infile, outfile ):
    '''build a linc RNA set.

        * no coding potential
        * unknown and intergenic transcripts
        * no overlap with ``linc_exclude`` (usually: human refseq)
        * at least ``linc_min_length`` bp in length
        * at least ``linc_min_reads`` reads in transcript

    '''

    table = outfile[:-len(".import")]
    track = table[:-len("Linc")]

    dbhandle = sqlite3.connect( PARAMS["database"] )

    Database.executewait( dbhandle, '''DROP TABLE IF EXISTS %(table)s''' % locals())
    Database.executewait( dbhandle, '''CREATE TABLE %(table)s (gene_id TEXT)''' % locals())
    Database.executewait( dbhandle, '''CREATE INDEX %(table)s_index1 ON %(table)s (gene_id)''' % locals())

    joins, wheres = [], ["1"]

    if PARAMS["linc_min_reads"] > 0:
        joins.append( ", %(track)s_coverage as cov" % locals() )
        wheres.append( "cov.gene_id = m.gene_id2 AND cov.nmatches >= %(i)" % PARAMS["linc_min_reads"] )

    if PARAMS["linc_exclude"] > 0:
        joins.append( "LEFT JOIN %s_vs_%s_ovl as ovl on ovl.gene_id2 = a.gene_id" %\
                      (PARAMS["linc_exclude"], track ) )
        wheres.append( "ovl.gene_id1 IS NULL" )

    wheres = " AND ".join( wheres )
    joins = " ".join( joins )

    statement = '''INSERT INTO %(table)s 
                   SELECT DISTINCT(a.gene_id) FROM 
                          %(track)s_annotation as a
                          %(joins)s
                          LEFT JOIN %(track)s_coding AS c on c.gene_id = a.gene_id 
                    WHERE is_unknown 
                          AND is_intergenic 
                          AND exons_sum >= %(linc_min_length)i
                          AND (c.is_coding IS NULL or not c.is_coding) 
                          AND %(wheres)s
                     ''' % dict( PARAMS.items() + locals().items() )

    E.debug( "statement to build lincRNA: %s" % statement)

    Database.executewait( dbhandle, statement % locals())

    dbhandle.commit()
    
    cc = dbhandle.cursor()
    result = cc.execute("SELECT COUNT(*) FROM %(table)s" % locals() ).fetchall()[0][0]
    E.info( "build lincRNA set for %s: %i entries" % ( track, result ))

    outgtf = "%s.gtf.gz" % table

    E.info( "creating gtf file `%s`" % outgtf )
    # output gtf file
    statement = '''%(cmd-sql)s %(database)s "SELECT g.* FROM %(track)s_gtf as g, %(table)s AS t
                          WHERE t.gene_id = g.gene_id" 
                | python %(scriptsdir)s/gtf2tsv.py --invert --log=%(outfile)s
                | gzip
                > %(outgtf)s'''
    
    P.run()
开发者ID:BioinformaticsArchive,项目名称:cgat,代码行数:69,代码来源:pipeline_kamilah.py


注:本文中的CGAT.Database.executewait方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。