本文整理汇总了Python中CGAT.Database.executewait方法的典型用法代码示例。如果您正苦于以下问题:Python Database.executewait方法的具体用法?Python Database.executewait怎么用?Python Database.executewait使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类CGAT.Database
的用法示例。
在下文中一共展示了Database.executewait方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: loadHypergeometricAnalysis
# 需要导入模块: from CGAT import Database [as 别名]
# 或者: from CGAT.Database import executewait [as 别名]
def loadHypergeometricAnalysis(infile, outfile):
'''load GO results.'''
track = P.toTable(outfile)
tablename = 'hypergeometric_%s_summary' % track
P.load(infile, outfile, tablename=tablename)
dbh = connect()
ontologies = [x[0] for x in Database.executewait(
dbh,
'''SELECT DISTINCT ontology FROM %s''' % tablename).fetchall()]
genelists = [x[0] for x in Database.executewait(
dbh,
'''SELECT DISTINCT genelist FROM %s''' % tablename).fetchall()]
# output files from runGO.py
sections = ('results', 'parameters', 'withgenes')
for section in sections:
tablename = 'hypergeometric_%s_%s' % (track, section)
statement = '''
python %(scriptsdir)s/combine_tables.py
--cat=track
--regex-filename="hypergeometric.dir/%(track)s.tsv.dir/(\S+).%(section)s"
hypergeometric.dir/%(track)s.tsv.dir/*.%(section)s
| python %(scriptsdir)s/csv2db.py
%(csv2db_options)s
--table=%(tablename)s
>> %(outfile)s'''
P.run()
for ontology in ontologies:
fn = os.path.join(infile + ".dir", "all_alldesc.%s.l2fold" % ontology)
if not os.path.exists(fn):
E.warn("file %s does not exist" % fn)
continue
P.load(fn,
outfile,
tablename='hypergeometric_%s_%s_l2fold' % (track, ontology),
options='--allow-empty')
fn = os.path.join(
infile + ".dir", "all_alldesc.%s.l10pvalue" % ontology)
P.load(fn,
outfile,
tablename='hypergeometric_%s_%s_l10pvalue' % (track, ontology),
options='--allow-empty')
fn = os.path.join(
infile + ".dir", "all_alldesc.%s.l10qvalue" % ontology)
P.load(fn,
outfile,
tablename='hypergeometric_%s_%s_l10qvalue' % (track, ontology),
options='--allow-empty')
示例2: createViewMapping
# 需要导入模块: from CGAT import Database [as 别名]
# 或者: from CGAT.Database import executewait [as 别名]
def createViewMapping(infile, outfile):
"""create view in database for alignment stats.
This view aggregates all information on a per-track basis.
The table is built from the following tracks:
mapping_stats
bam_stats
"""
tablename = P.toTable(outfile)
# can not create views across multiple database, so use table
view_type = "TABLE"
dbhandle = connect()
Database.executewait(dbhandle, "DROP %(view_type)s IF EXISTS %(tablename)s" % locals())
statement = """
CREATE %(view_type)s %(tablename)s AS
SELECT *
FROM bam_stats AS b
"""
Database.executewait(dbhandle, statement % locals())
示例3: ReadGene2GOFromDatabase
# 需要导入模块: from CGAT import Database [as 别名]
# 或者: from CGAT.Database import executewait [as 别名]
def ReadGene2GOFromDatabase(dbhandle, go_type, database, species):
"""read go assignments from ensembl database.
returns a dictionary of lists.
(one to many mapping of genes to GO categories)
and a dictionary of go-term to go information
Note: assumes that external_db_id for GO is 1000
"""
statement = GetGOStatement(go_type, database, species)
result = Database.executewait(dbhandle, statement,
retries=0).fetchall()
gene2go = {}
go2info = collections.defaultdict(GOInfo)
for gene_id, goid, description, evidence in result:
gm = GOMatch(goid, go_type, description, evidence)
gi = GOInfo(goid, go_type, description)
if gene_id not in gene2go:
gene2go[gene_id] = []
gene2go[gene_id].append(gm)
go2info[goid] = gi
return gene2go, go2info
示例4: importCodingPotential
# 需要导入模块: from CGAT import Database [as 别名]
# 或者: from CGAT.Database import executewait [as 别名]
def importCodingPotential( infile, outfile ):
'''import annotations'''
table = outfile[:-len(".import")]
statement = '''
python %(scriptsdir)s/csv2db.py %(csv2db_options)s
--allow-empty
--index=gene_id
--map=gene_id:str
--table=%(table)s
< %(infile)s
> %(outfile)s'''
P.run()
# set the is_coding flag
dbhandle = sqlite3.connect( PARAMS["database"] )
Database.executewait( dbhandle, '''ALTER TABLE %(table)s ADD COLUMN is_coding INTEGER''' % locals())
Database.executewait( dbhandle, '''UPDATE %(table)s SET is_coding = (f_iscoding == 'coding') OR (r_iscoding == 'coding') ''' % locals())
dbhandle.commit()
示例5: mergeEffectsPerGene
# 需要导入模块: from CGAT import Database [as 别名]
# 或者: from CGAT.Database import executewait [as 别名]
def mergeEffectsPerGene( infile, outfile ):
'''summarize effects on a per-gene level.'''
tablename = outfile[:-len(".load")]
dbhandle = connect()
statement = '''
CREATE TABLE %(tablename)s AS
SELECT DISTINCT
track,
gene_id,
COUNT(*) AS ntranscripts,
MIN(e.nalleles) AS min_nalleles,
MAX(e.nalleles) AS max_nalleles,
MIN(e.stop_min) AS min_stop_min,
MAX(e.stop_min) AS max_stop_min,
MIN(e.stop_max) AS min_stop_max,
MAX(e.stop_max) AS max_stop_max,
SUM( CASE WHEN stop_min > 0 AND cds_len - stop_min * 3 < last_exon_start THEN 1
ELSE 0 END) AS nmd_knockout,
SUM( CASE WHEN stop_max > 0 AND cds_len - stop_max * 3 < last_exon_start THEN 1
ELSE 0 END) AS nmd_affected
FROM annotations.transcript_info as i, effects AS e
WHERE i.transcript_id = e.transcript_id
GROUP BY i.gene_id, track
''' % locals()
Database.executewait( dbhandle, "DROP TABLE IF EXISTS %(tablename)s" % locals() )
Database.executewait( dbhandle, statement )
Database.executewait( dbhandle, "CREATE INDEX %(tablename)s_gene_id ON %(tablename)s (gene_id)" % locals())
dbhandle.commit()
P.touch(outfile)
示例6: loadCodingPotential
# 需要导入模块: from CGAT import Database [as 别名]
# 或者: from CGAT.Database import executewait [as 别名]
def loadCodingPotential( infile, outfile ):
'''load annotations'''
table = P.toTable( outfile )
statement = '''
gunzip < %(infile)s
| python %(scriptsdir)s/csv2db.py
%(csv2db_options)s
--allow-empty
--index=gene_id
--map=gene_id:str
--table=%(table)s
> %(outfile)s'''
P.run()
# set the is_coding flag
dbhandle = sqlite3.connect( PARAMS["database"] )
Database.executewait( dbhandle, '''ALTER TABLE %(table)s ADD COLUMN is_coding INTEGER''' % locals())
Database.executewait( dbhandle, '''UPDATE %(table)s SET is_coding = (result == 'coding')''' % locals())
dbhandle.commit()
示例7: DumpGOFromDatabase
# 需要导入模块: from CGAT import Database [as 别名]
# 或者: from CGAT.Database import executewait [as 别名]
def DumpGOFromDatabase(outfile,
dbhandle,
options):
"""read go assignments from database.
and dump them into a flatfile.
(one to many mapping of genes to GO categories)
and a dictionary of go-term to go information
"""
E.info("category\ttotal\tgenes\tcategories")
all_genes = collections.defaultdict(int)
all_categories = collections.defaultdict(int)
all_ntotal = 0
outfile.write("go_type\tgene_id\tgo_id\tdescription\tevidence\n")
for go_type in options.ontology:
genes = collections.defaultdict(int)
categories = collections.defaultdict(int)
ntotal = 0
statement = GetGOStatement(go_type, options.database_name,
options.species)
results = Database.executewait(
dbhandle, statement, retries=0).fetchall()
for result in results:
outfile.write("\t".join(map(str, (go_type,) + result)) + "\n")
gene_id, goid, description, evidence = result
genes[gene_id] += 1
categories[goid] += 1
ntotal += 1
all_genes[gene_id] += 1
all_categories[goid] += 1
all_ntotal += 1
E.info("%s\t%i\t%i\t%i" % (go_type, ntotal,
len(genes),
len(categories)))
E.info("%s\t%i\t%i\t%i" % ("all",
all_ntotal,
len(all_genes),
len(all_categories)))
return
示例8: buildCuffdiffPlots
# 需要导入模块: from CGAT import Database [as 别名]
# 或者: from CGAT.Database import executewait [as 别名]
def buildCuffdiffPlots(infile, outfile):
'''create summaries of cufflinks results (including some diagnostic plots)
Plots are created in the <exportdir>/cuffdiff directory.
Plots are:
<geneset>_<method>_<level>_<track1>_vs_<track2>_significance.png
fold change against expression level
'''
###########################################
###########################################
# create diagnostic plots
###########################################
outdir = os.path.join(PARAMS["exportdir"], "cuffdiff")
dbhandle = sqlite3.connect(PARAMS["database"])
prefix = P.snip(infile, ".load")
geneset, method = prefix.split("_")
for level in CUFFDIFF_LEVELS:
tablename_diff = prefix + "_%s_diff" % level
tablename_levels = prefix + "_%s_levels" % level
# note that the ordering of EXPERIMENTS and the _diff table
# needs to be the same as only one triangle is stored of the
# pairwise results. do not plot "undefined" lfold values
# (where treatment_mean or control_mean = 0) do not plot lfold
# values where the confidence bounds contain 0.
for track1, track2 in itertools.combinations(EXPERIMENTS, 2):
statement = """
SELECT CASE WHEN d.treatment_mean < d.control_mean
THEN d.treatment_mean
ELSE d.control_mean END,
d.l2fold, d.significant
FROM %(tablename_diff)s AS d
WHERE treatment_name = '%(track1)s' AND
control_name = '%(track2)s' AND
status = 'OK' AND
treatment_mean > 0 AND
control_mean > 0
""" % locals()
data = zip(*Database.executewait(dbhandle, statement))
pngfile = "%(outdir)s/%(geneset)s_%(method)s_%(level)s_%(track1)s_vs_%(track2)s_significance.png" % locals()
# ian: Bug fix: moved R.png to after data check so that no
# plot is started if there is no data this was leading
# to R falling over from too many open devices
if len(data) == 0:
E.warn("no plot for %s - %s -%s vs %s" %
(pngfile, level, track1, track2))
continue
R.png(pngfile)
R.plot(ro.FloatVector(data[0]),
ro.FloatVector(data[1]),
xlab='min(FPKM)',
ylab='log2fold',
log="x", pch=20, cex=.1,
col=R.ifelse(ro.IntVector(data[2]), "red", "black"))
R['dev.off']()
P.touch(outfile)
示例9: buildExpressionStats
# 需要导入模块: from CGAT import Database [as 别名]
# 或者: from CGAT.Database import executewait [as 别名]
def buildExpressionStats(tables, method, outfile, outdir):
'''build expression summary statistics.
Creates also diagnostic plots in
<exportdir>/<method> directory.
'''
dbhandle = sqlite3.connect(PARAMS["database"])
def _split(tablename):
# this would be much easier, if feature_counts/gene_counts/etc.
# would not contain an underscore.
try:
design, geneset, counting_method = re.match(
"([^_]+)_vs_([^_]+)_(.*)_%s" % method,
tablename).groups()
except AttributeError:
try:
design, geneset = re.match(
"([^_]+)_([^_]+)_%s" % method,
tablename).groups()
counting_method = "na"
except AttributeError:
raise ValueError("can't parse tablename %s" % tablename)
return design, geneset, counting_method
# return re.match("([^_]+)_", tablename ).groups()[0]
keys_status = "OK", "NOTEST", "FAIL", "NOCALL"
outf = IOTools.openFile(outfile, "w")
outf.write("\t".join(
("design",
"geneset",
"level",
"treatment_name",
"counting_method",
"control_name",
"tested",
"\t".join(["status_%s" % x for x in keys_status]),
"significant",
"twofold")) + "\n")
all_tables = set(Database.getTables(dbhandle))
for level in CUFFDIFF_LEVELS:
for tablename in tables:
tablename_diff = "%s_%s_diff" % (tablename, level)
tablename_levels = "%s_%s_diff" % (tablename, level)
design, geneset, counting_method = _split(tablename_diff)
if tablename_diff not in all_tables:
continue
def toDict(vals, l=2):
return collections.defaultdict(
int,
[(tuple(x[:l]), x[l]) for x in vals])
tested = toDict(
Database.executewait(
dbhandle,
"SELECT treatment_name, control_name, "
"COUNT(*) FROM %(tablename_diff)s "
"GROUP BY treatment_name,control_name" % locals()
).fetchall())
status = toDict(Database.executewait(
dbhandle,
"SELECT treatment_name, control_name, status, "
"COUNT(*) FROM %(tablename_diff)s "
"GROUP BY treatment_name,control_name,status"
% locals()).fetchall(), 3)
signif = toDict(Database.executewait(
dbhandle,
"SELECT treatment_name, control_name, "
"COUNT(*) FROM %(tablename_diff)s "
"WHERE significant "
"GROUP BY treatment_name,control_name" % locals()
).fetchall())
fold2 = toDict(Database.executewait(
dbhandle,
"SELECT treatment_name, control_name, "
"COUNT(*) FROM %(tablename_diff)s "
"WHERE (l2fold >= 1 or l2fold <= -1) AND significant "
"GROUP BY treatment_name,control_name,significant"
% locals()).fetchall())
for treatment_name, control_name in tested.keys():
outf.write("\t".join(map(str, (
design,
geneset,
level,
counting_method,
treatment_name,
control_name,
tested[(treatment_name, control_name)],
"\t".join(
#.........这里部分代码省略.........
示例10: buildDMRStats
# 需要导入模块: from CGAT import Database [as 别名]
# 或者: from CGAT.Database import executewait [as 别名]
def buildDMRStats(tables, method, outfile, dbhandle):
"""build dmr summary statistics.
This method counts the number of up/down, 2fold up/down, etc.
genes in output from (:mod:`scripts/runExpression`).
This method also creates diagnostic plots in the
<exportdir>/<method> directory.
Tables should be labeled <tileset>_<design>_<method>.
Arguments
---------
tables ; list
List of tables with DMR output
method : string
Method name
outfile : string
Output filename. Tab separated file summarizing
"""
def togeneset(tablename):
return re.match("([^_]+)_", tablename).groups()[0]
keys_status = "OK", "NOTEST", "FAIL", "NOCALL"
outf = IOTools.openFile(outfile, "w")
outf.write(
"\t".join(
(
"tileset",
"design",
"track1",
"track2",
"tested",
"\t".join(["status_%s" % x for x in keys_status]),
"significant",
"up",
"down",
"twofold",
"twofold_up",
"twofold_down",
)
)
+ "\n"
)
all_tables = set(Database.getTables(dbhandle))
outdir = os.path.join(PARAMS["exportdir"], "diff_methylation")
for tablename in tables:
prefix = P.snip(tablename, "_%s" % method)
tileset, design = prefix.split("_")
def toDict(vals, l=2):
return collections.defaultdict(int, [(tuple(x[:l]), x[l]) for x in vals])
E.info("collecting data from %s" % tablename)
tested = toDict(
Database.executewait(
dbhandle,
"""SELECT treatment_name, control_name, COUNT(*)
FROM %(tablename)s
GROUP BY treatment_name,control_name"""
% locals(),
).fetchall()
)
status = toDict(
Database.executewait(
dbhandle,
"""SELECT treatment_name, control_name, status,
COUNT(*) FROM %(tablename)s
GROUP BY treatment_name,control_name,status"""
% locals(),
).fetchall(),
3,
)
signif = toDict(
Database.executewait(
dbhandle,
"""SELECT treatment_name, control_name,
COUNT(*) FROM %(tablename)s
WHERE significant
GROUP BY treatment_name,control_name"""
% locals(),
).fetchall()
)
fold2 = toDict(
Database.executewait(
dbhandle,
"""SELECT treatment_name, control_name,
COUNT(*) FROM %(tablename)s
WHERE (l2fold >= 1 or l2fold <= -1) AND significant
GROUP BY treatment_name,control_name,significant"""
% locals(),
).fetchall()
)
#.........这里部分代码省略.........
示例11: loadSummary
# 需要导入模块: from CGAT import Database [as 别名]
# 或者: from CGAT.Database import executewait [as 别名]
def loadSummary( infile, outfile ):
'''load several rates into a single convenience table.
'''
stmt_select = []
stmt_from = []
stmt_where = ["1"]
track = infile[:-len(".gtf.gz")]
tablename = "%s_evol" % track
if os.path.exists( "%s_rates.load" % track ):
stmt_select.append( "a.distance AS ks, a.aligned AS aligned" )
stmt_from.append('''LEFT JOIN %(track)s_rates AS a
ON r.gene_id = a.gene_id AND
a.aligned >= %(rates_min_aligned)i AND
a.distance <= %(rates_max_rate)f''' )
if os.path.exists( "%s_coverage.load" % track ):
stmt_select.append("cov.nmatches AS nreads, cov.mean AS meancoverage" )
stmt_from.append("LEFT JOIN %(track)s_coverage AS cov ON r.gene_id = cov.gene_id" )
if os.path.exists( "%s_repeats_gc.load" % track ):
stmt_select.append("ar_gc.exons_mean AS repeats_gc" )
stmt_from.append("LEFT JOIN %(track)s_repeats_gc AS ar_gc ON r.gene_id = ar_gc.gene_id" )
if os.path.exists( "%s_repeats_rates.load" % track ):
stmt_select.append("ar.exons_length AS ar_aligned, ar.exons_median AS ka, a.distance/ar.exons_median AS kska" )
stmt_from.append('''LEFT JOIN %(track)s_repeats_rates AS ar
ON r.gene_id = ar.gene_id AND
ar.exons_nval >= %(rates_min_repeats)i''' )
if os.path.exists( "%s_introns_rates.load" % track ):
stmt_select.append("ir.aligned AS ir_aligned, ir.distance AS ki, a.distance/ir.distance AS kski" )
stmt_from.append('''LEFT JOIN %(track)s_introns_rates AS ir
ON r.gene_id = ir.gene_id AND
ir.aligned >= %(rates_min_aligned)i''' )
x = locals()
x.update( PARAMS )
stmt_select = ", ".join( stmt_select ) % x
stmt_from = " ".join( stmt_from ) % x
stmt_where = " AND ".join( stmt_where ) % x
dbhandle = sqlite3.connect( PARAMS["database"] )
Database.executewait( dbhandle, "DROP TABLE IF EXISTS %(tablename)s " % locals() )
statement = '''
CREATE TABLE %(tablename)s AS
SELECT
CAST(r.gene_id AS TEXT) AS gene_id,
r.exons_sum as length,
r.exons_pGC as pgc,
%(stmt_select)s
FROM
%(track)s_annotation AS r
%(stmt_from)s
WHERE %(stmt_where)s
''' % locals()
Database.executewait( dbhandle, statement)
dbhandle.commit()
P.touch(outfile)
示例12: buildExpressionStats
# 需要导入模块: from CGAT import Database [as 别名]
# 或者: from CGAT.Database import executewait [as 别名]
def buildExpressionStats(
dbhandle,
outfile,
tablenames,
outdir,
regex_table="(?P<design>[^_]+)_"
"(?P<geneset>[^_]+)_"
"(?P<counting_method>[^_]+)_"
"(?P<method>[^_]+)_"
"(?P<level>[^_]+)_diff"):
"""compile expression summary statistics from database.
This method outputs a table with the number of genes tested,
failed, differentially expressed, etc. for a series of DE tests.
Arguments
---------
dbhandle : object
Database handle.
tables : list
List of tables to process.
outfile : string
Output filename in :term:`tsv` format.
outdir : string
Output directory for diagnostic plots.
regex : string
Regular expression to extract experimental information
from table name.
"""
keys_status = "OK", "NOTEST", "FAIL", "NOCALL"
outf = IOTools.openFile(outfile, "w")
outf.write("\t".join(
("design",
"geneset",
"level",
"counting_method",
"treatment_name",
"control_name",
"tested",
"\t".join(["status_%s" % x for x in keys_status]),
"significant",
"twofold")) + "\n")
for tablename in tablenames:
r = re.search(regex_table, tablename)
if r is None:
raise ValueError(
"can't match tablename '%s' to regex" % tablename)
geneset = r.group("geneset")
design = r.group("design")
level = r.group("level")
counting_method = r.group("counting_method")
geneset = r.group("geneset")
def toDict(vals, l=2):
return collections.defaultdict(
int,
[(tuple(x[:l]), x[l]) for x in vals])
tested = toDict(Database.executewait(
dbhandle,
"SELECT treatment_name, control_name, "
"COUNT(*) FROM %(tablename)s "
"GROUP BY treatment_name,control_name" % locals()
).fetchall())
status = toDict(Database.executewait(
dbhandle,
"SELECT treatment_name, control_name, status, "
"COUNT(*) FROM %(tablename)s "
"GROUP BY treatment_name,control_name,status"
% locals()).fetchall(), 3)
signif = toDict(Database.executewait(
dbhandle,
"SELECT treatment_name, control_name, "
"COUNT(*) FROM %(tablename)s "
"WHERE significant "
"GROUP BY treatment_name,control_name" % locals()
).fetchall())
fold2 = toDict(Database.executewait(
dbhandle,
"SELECT treatment_name, control_name, "
"COUNT(*) FROM %(tablename)s "
"WHERE (l2fold >= 1 or l2fold <= -1) AND significant "
"GROUP BY treatment_name,control_name,significant"
% locals()).fetchall())
for treatment_name, control_name in tested.keys():
outf.write("\t".join(map(str, (
design,
geneset,
level,
counting_method,
treatment_name,
control_name,
tested[(treatment_name, control_name)],
"\t".join(
[str(status[(treatment_name, control_name, x)])
#.........这里部分代码省略.........
示例13: createView
# 需要导入模块: from CGAT import Database [as 别名]
# 或者: from CGAT.Database import executewait [as 别名]
def createView(dbhandle, tables, tablename, outfile,
view_type="TABLE",
ignore_duplicates=True):
'''create a database view for a list of tables.
This method performs a join across multiple tables and stores the
result either as a view or a table in the database.
Arguments
---------
dbhandle :
A database handle.
tables : list of tuples
Tables to merge. Each tuple contains the name of a table and
the field to join with the first table. For example::
tables = (
"reads_summary", "track",
"bam_stats", "track",
"context_stats", "track",
"picard_stats_alignment_summary_metrics", "track")
tablename : string
Name of the view or table to be created.
outfile : string
Output filename for status information.
view_type : string
Type of view, either ``VIEW`` or ``TABLE``. If a view is to be
created across multiple databases, use ``TABLE``.
ignore_duplicates : bool
If set to False, duplicate column names will be added with the
tablename as prefix. The default is to ignore.
'''
Database.executewait(
dbhandle,
"DROP %(view_type)s IF EXISTS %(tablename)s" % locals())
tracks, columns = [], []
tablenames = [x[0] for x in tables]
for table, track in tables:
d = Database.executewait(
dbhandle,
"SELECT COUNT(DISTINCT %s) FROM %s" % (track, table))
tracks.append(d.fetchone()[0])
columns.append(
[x.lower() for x in Database.getColumnNames(dbhandle, table)
if x != track])
E.info("creating %s from the following tables: %s" %
(tablename, str(list(zip(tablenames, tracks)))))
if min(tracks) != max(tracks):
raise ValueError(
"number of rows not identical - will not create view")
from_statement = " , ".join(
["%s as t%i" % (y[0], x) for x, y in enumerate(tables)])
f = tables[0][1]
where_statement = " AND ".join(
["t0.%s = t%i.%s" % (f, x + 1, y[1])
for x, y in enumerate(tables[1:])])
all_columns, taken = [], set()
for x, c in enumerate(columns):
i = set(taken).intersection(set(c))
if i:
E.warn("duplicate column names: %s " % i)
if not ignore_duplicates:
table = tables[x][0]
all_columns.extend(
["t%i.%s AS %s_%s" % (x, y, table, y) for y in i])
c = [y for y in c if y not in i]
all_columns.extend(["t%i.%s" % (x, y) for y in c])
taken.update(set(c))
all_columns = ",".join(all_columns)
statement = '''
CREATE %(view_type)s %(tablename)s AS SELECT t0.track, %(all_columns)s
FROM %(from_statement)s
WHERE %(where_statement)s
''' % locals()
Database.executewait(dbhandle, statement)
nrows = Database.executewait(
dbhandle, "SELECT COUNT(*) FROM view_mapping").fetchone()[0]
if nrows == 0:
raise ValueError(
"empty view mapping, check statement = %s" %
(statement % locals()))
if nrows != min(tracks):
E.warn("view creates duplicate rows, got %i, expected %i" %
(nrows, min(tracks)))
E.info("created view_mapping with %i rows" % nrows)
touchFile(outfile)
示例14: importLincRNA
# 需要导入模块: from CGAT import Database [as 别名]
# 或者: from CGAT.Database import executewait [as 别名]
def importLincRNA( infile, outfile ):
'''build a linc RNA set.
* no coding potential
* unknown and intergenic transcripts
* no overlap with ``linc_exclude`` (usually: human refseq)
* at least ``linc_min_length`` bp in length
* at least ``linc_min_reads`` reads in transcript
'''
table = outfile[:-len(".import")]
track = table[:-len("Linc")]
dbhandle = sqlite3.connect( PARAMS["database"] )
Database.executewait( dbhandle, '''DROP TABLE IF EXISTS %(table)s''' % locals())
Database.executewait( dbhandle, '''CREATE TABLE %(table)s (gene_id TEXT)''' % locals())
Database.executewait( dbhandle, '''CREATE INDEX %(table)s_index1 ON %(table)s (gene_id)''' % locals())
joins, wheres = [], ["1"]
if PARAMS["linc_min_reads"] > 0:
joins.append( ", %(track)s_coverage as cov" % locals() )
wheres.append( "cov.gene_id = m.gene_id2 AND cov.nmatches >= %(i)" % PARAMS["linc_min_reads"] )
if PARAMS["linc_exclude"] > 0:
joins.append( "LEFT JOIN %s_vs_%s_ovl as ovl on ovl.gene_id2 = a.gene_id" %\
(PARAMS["linc_exclude"], track ) )
wheres.append( "ovl.gene_id1 IS NULL" )
wheres = " AND ".join( wheres )
joins = " ".join( joins )
statement = '''INSERT INTO %(table)s
SELECT DISTINCT(a.gene_id) FROM
%(track)s_annotation as a
%(joins)s
LEFT JOIN %(track)s_coding AS c on c.gene_id = a.gene_id
WHERE is_unknown
AND is_intergenic
AND exons_sum >= %(linc_min_length)i
AND (c.is_coding IS NULL or not c.is_coding)
AND %(wheres)s
''' % dict( PARAMS.items() + locals().items() )
E.debug( "statement to build lincRNA: %s" % statement)
Database.executewait( dbhandle, statement % locals())
dbhandle.commit()
cc = dbhandle.cursor()
result = cc.execute("SELECT COUNT(*) FROM %(table)s" % locals() ).fetchall()[0][0]
E.info( "build lincRNA set for %s: %i entries" % ( track, result ))
outgtf = "%s.gtf.gz" % table
E.info( "creating gtf file `%s`" % outgtf )
# output gtf file
statement = '''%(cmd-sql)s %(database)s "SELECT g.* FROM %(track)s_gtf as g, %(table)s AS t
WHERE t.gene_id = g.gene_id"
| python %(scriptsdir)s/gtf2tsv.py --invert --log=%(outfile)s
| gzip
> %(outgtf)s'''
P.run()