本文整理汇总了Python中CGAT.Database.getColumnNames方法的典型用法代码示例。如果您正苦于以下问题:Python Database.getColumnNames方法的具体用法?Python Database.getColumnNames怎么用?Python Database.getColumnNames使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类CGAT.Database
的用法示例。
在下文中一共展示了Database.getColumnNames方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: createView
# 需要导入模块: from CGAT import Database [as 别名]
# 或者: from CGAT.Database import getColumnNames [as 别名]
def createView(dbhandle, tables, tablename, outfile,
view_type="TABLE",
ignore_duplicates=True):
'''create a database view for a list of tables.
This method performs a join across multiple tables and stores the
result either as a view or a table in the database.
Arguments
---------
dbhandle :
A database handle.
tables : list of tuples
Tables to merge. Each tuple contains the name of a table and
the field to join with the first table. For example::
tables = (
"reads_summary", "track",
"bam_stats", "track",
"context_stats", "track",
"picard_stats_alignment_summary_metrics", "track")
tablename : string
Name of the view or table to be created.
outfile : string
Output filename for status information.
view_type : string
Type of view, either ``VIEW`` or ``TABLE``. If a view is to be
created across multiple databases, use ``TABLE``.
ignore_duplicates : bool
If set to False, duplicate column names will be added with the
tablename as prefix. The default is to ignore.
'''
Database.executewait(
dbhandle,
"DROP %(view_type)s IF EXISTS %(tablename)s" % locals())
tracks, columns = [], []
tablenames = [x[0] for x in tables]
for table, track in tables:
d = Database.executewait(
dbhandle,
"SELECT COUNT(DISTINCT %s) FROM %s" % (track, table))
tracks.append(d.fetchone()[0])
columns.append(
[x.lower() for x in Database.getColumnNames(dbhandle, table)
if x != track])
E.info("creating %s from the following tables: %s" %
(tablename, str(list(zip(tablenames, tracks)))))
if min(tracks) != max(tracks):
raise ValueError(
"number of rows not identical - will not create view")
from_statement = " , ".join(
["%s as t%i" % (y[0], x) for x, y in enumerate(tables)])
f = tables[0][1]
where_statement = " AND ".join(
["t0.%s = t%i.%s" % (f, x + 1, y[1])
for x, y in enumerate(tables[1:])])
all_columns, taken = [], set()
for x, c in enumerate(columns):
i = set(taken).intersection(set(c))
if i:
E.warn("duplicate column names: %s " % i)
if not ignore_duplicates:
table = tables[x][0]
all_columns.extend(
["t%i.%s AS %s_%s" % (x, y, table, y) for y in i])
c = [y for y in c if y not in i]
all_columns.extend(["t%i.%s" % (x, y) for y in c])
taken.update(set(c))
all_columns = ",".join(all_columns)
statement = '''
CREATE %(view_type)s %(tablename)s AS SELECT t0.track, %(all_columns)s
FROM %(from_statement)s
WHERE %(where_statement)s
''' % locals()
Database.executewait(dbhandle, statement)
nrows = Database.executewait(
dbhandle, "SELECT COUNT(*) FROM view_mapping").fetchone()[0]
if nrows == 0:
raise ValueError(
"empty view mapping, check statement = %s" %
(statement % locals()))
if nrows != min(tracks):
E.warn("view creates duplicate rows, got %i, expected %i" %
(nrows, min(tracks)))
E.info("created view_mapping with %i rows" % nrows)
touchFile(outfile)
示例2: loadCuffdiff
# 需要导入模块: from CGAT import Database [as 别名]
# 或者: from CGAT.Database import getColumnNames [as 别名]
#.........这里部分代码省略.........
sample_lookup[cuffdiff_sample_name] = our_sample_name
inf.close()
for fn, level in (("cds.read_group_tracking.gz", "cds"),
("genes.read_group_tracking.gz", "gene"),
("isoforms.read_group_tracking.gz", "isoform"),
("tss_groups.read_group_tracking.gz", "tss")):
tablename = prefix + "_" + level + "sample_fpkms"
tmpf = P.getTempFilename(".")
inf = IOTools.openFile(os.path.join(indir, fn)).readlines()
outf = IOTools.openFile(tmpf, "w")
samples = []
genes = {}
is_first = True
for line in inf:
if is_first:
is_first = False
continue
line = line.split()
gene_id = line[0]
condition = line[1]
replicate = line[2]
fpkm = line[6]
status = line[8]
sample_id = condition + "_" + replicate
if sample_id not in samples:
samples.append(sample_id)
# IMS: The following block keeps getting its indenting messed
# up. It is not part of the 'if sample_id not in samples' block
# please make sure it does not get made part of it
if gene_id not in genes:
genes[gene_id] = {}
genes[gene_id][sample_id] = fpkm
else:
if sample_id in genes[gene_id]:
raise ValueError(
'sample_id %s appears twice in file for gene_id %s'
% (sample_id, gene_id))
else:
if status != "OK":
genes[gene_id][sample_id] = status
else:
genes[gene_id][sample_id] = fpkm
samples = sorted(samples)
# IMS - CDS files might be empty if not cds has been
# calculated for the genes in the long term need to add CDS
# annotation to denovo predicted genesets in meantime just
# skip if cds tracking file is empty
if len(samples) == 0:
continue
headers = "gene_id\t" + "\t".join([sample_lookup[x] for x in samples])
outf.write(headers + "\n")
for gene in genes.iterkeys():
outf.write(gene + "\t")
s = 0
while x < len(samples) - 1:
outf.write(genes[gene][samples[s]] + "\t")
s += 1
# IMS: Please be careful with this line. It keeps getting moved
# into the above while block where it does not belong
outf.write(genes[gene][samples[len(samples) - 1]] + "\n")
outf.close()
P.load(tmpf,
outfile,
tablename=tablename,
options="--allow-empty-file "
" --add-index=gene_id")
os.unlink(tmpf)
# build convenience table with tracks
tablename = prefix + "_isoform_levels"
tracks = Database.getColumnNames(dbhandle, tablename)
tracks = [x[:-len("_FPKM")] for x in tracks if x.endswith("_FPKM")]
tmpfile = P.getTempFile(dir=".")
tmpfile.write("track\n")
tmpfile.write("\n".join(tracks) + "\n")
tmpfile.close()
P.load(tmpfile.name, outfile)
os.unlink(tmpfile.name)
示例3: loadCuffdiff
# 需要导入模块: from CGAT import Database [as 别名]
# 或者: from CGAT.Database import getColumnNames [as 别名]
#.........这里部分代码省略.........
inf.close()
for fn, level in (("cds.read_group_tracking.gz", "cds"),
("genes.read_group_tracking.gz", "gene"),
("isoforms.read_group_tracking.gz", "isoform"),
("tss_groups.read_group_tracking.gz", "tss")):
tablename = prefix + "_" + level + "sample_fpkms"
tmpf = P.getTempFilename(".")
inf = IOTools.openFile(os.path.join(indir, fn)).readlines()
outf = IOTools.openFile(tmpf, "w")
samples = []
genes = {}
x = 0
for line in inf:
if x == 0:
x += 1
continue
line = line.split()
gene_id = line[0]
condition = line[1]
replicate = line[2]
fpkm = line[6]
status = line[8]
sample_id = condition + "_" + replicate
if sample_id not in samples:
samples.append(sample_id)
# IMS: The following block keeps getting its indenting messed
# up. It is not part of the 'if sample_id not in samples' block
# plesae make sure it does not get made part of it
if gene_id not in genes:
genes[gene_id] = {}
genes[gene_id][sample_id] = fpkm
else:
if sample_id in genes[gene_id]:
raise ValueError(
'sample_id %s appears twice in file for gene_id %s'
% (sample_id, gene_id))
else:
if status != "OK":
genes[gene_id][sample_id] = status
else:
genes[gene_id][sample_id] = fpkm
samples = sorted(samples)
# IMS - CDS files might be empty if not cds has been
# calculated for the genes in the long term need to add CDS
# annotation to denovo predicted genesets in meantime just
# skip if cds tracking file is empty
if len(samples) == 0:
continue
headers = "gene_id\t" + "\t".join([sample_lookup[x] for x in samples])
outf.write(headers + "\n")
for gene in genes.iterkeys():
outf.write(gene + "\t")
x = 0
while x < len(samples) - 1:
outf.write(genes[gene][samples[x]] + "\t")
x += 1
# IMS: Please be careful with this line. It keeps getting moved
# into the above while block where it does not belong
outf.write(genes[gene][samples[len(samples) - 1]] + "\n")
outf.close()
statement = ("cat %(tmpf)s |"
" python %(scriptsdir)s/csv2db.py "
" %(csv2db_options)s"
" --allow-empty-file"
" --add-index=gene_id"
" --table=%(tablename)s"
" >> %(outfile)s.log")
P.run()
os.unlink(tmpf)
# build convenience table with tracks
tablename = prefix + "_isoform_levels"
tracks = Database.getColumnNames(dbhandle, tablename)
tracks = [x[:-len("_FPKM")] for x in tracks if x.endswith("_FPKM")]
tmpfile = P.getTempFile(dir=".")
tmpfile.write("track\n")
tmpfile.write("\n".join(tracks) + "\n")
tmpfile.close()
P.load(tmpfile.name, outfile)
os.unlink(tmpfile.name)