本文整理汇总了Python中scilifelab.db.statusdb.SampleRunMetricsConnection.get_samples方法的典型用法代码示例。如果您正苦于以下问题:Python SampleRunMetricsConnection.get_samples方法的具体用法?Python SampleRunMetricsConnection.get_samples怎么用?Python SampleRunMetricsConnection.get_samples使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类scilifelab.db.statusdb.SampleRunMetricsConnection
的用法示例。
在下文中一共展示了SampleRunMetricsConnection.get_samples方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_4_get_samples
# 需要导入模块: from scilifelab.db.statusdb import SampleRunMetricsConnection [as 别名]
# 或者: from scilifelab.db.statusdb.SampleRunMetricsConnection import get_samples [as 别名]
def test_4_get_samples(self):
"""Test getting samples given flowcell and sample_prj."""
sample_con = SampleRunMetricsConnection(username=self.user, password=self.pw, url=self.url)
samples = sample_con.get_samples(fc_id=self.examples["flowcell"])
print "Number of samples before subsetting: " + str(len(samples))
samples = sample_con.get_samples(fc_id=self.examples["flowcell"], sample_prj=self.examples["project"])
print "Number of samples after subsetting: " + str(len(samples))
示例2: fastq_screen
# 需要导入模块: from scilifelab.db.statusdb import SampleRunMetricsConnection [as 别名]
# 或者: from scilifelab.db.statusdb.SampleRunMetricsConnection import get_samples [as 别名]
def fastq_screen(project_name=None, flowcell=None,
username=None, password=None, url=None, dbname="samples", **kw):
"""Perform application specific qc on a project.
:param project_name: project name
:param flowcell: flowcell identifier
:param username: database username
:param password: database password
:param url: database url
:param dbname: samples database name
"""
LOG.debug("Running fastq screen summary on project {}, flowcell ".format(project_name, flowcell))
output_data = {'stdout':StringIO(), 'stderr':StringIO()}
s_con = SampleRunMetricsConnection(dbname=dbname, username=username, password=password, url=url)
samples = s_con.get_samples(fc_id=flowcell, sample_prj=project_name)
for s in samples:
LOG.debug("Checking fastq_screen data for sample {}, id {}, project {}".format(s.get("name", None), s.get("_id", None), s.get("sample_prj", None)))
fqscreen_data = s.get("fastq_scr", {})
output_data["stdout"].write(s["barcode_name"] + "\n")
if fqscreen_data:
header = [[x for x in v.keys()] for k, v in fqscreen_data.iteritems()]
output_data["stdout"].write("\t\t" + "".join("{:>27}".format(x) for x in header[0]) + "\n")
vals = ["{:>12}\t{}\n".format(k, "".join(["{:>27}".format(x) for x in v.values()])) for k, v in fqscreen_data.iteritems()]
for v in vals:
output_data["stdout"].write(v)
return output_data
示例3: test_2_make_note
# 需要导入模块: from scilifelab.db.statusdb import SampleRunMetricsConnection [as 别名]
# 或者: from scilifelab.db.statusdb.SampleRunMetricsConnection import get_samples [as 别名]
def test_2_make_note(self):
"""Make a note subset by example flowcell and project"""
s_con = SampleRunMetricsConnection(username=self.user, password=self.pw, url=self.url)
fc_con = FlowcellRunMetricsConnection(username=self.user, password=self.pw, url=self.url)
p_con = ProjectSummaryConnection(username=self.user, password=self.pw, url=self.url)
paragraphs = sample_note_paragraphs()
headers = sample_note_headers()
samples = s_con.get_samples(self.examples["flowcell"], self.examples["project"])
project = p_con.get_entry(self.examples["project"])
samples = p_con.map_srm_to_name(self.examples["project"], fc_id=self.examples["flowcell"], use_bc_map=True)
for k,v in samples.items():
s_param = parameters
s = s_con.get_entry(k)
s_param.update({key:s[srm_to_parameter[key]] for key in srm_to_parameter.keys()})
fc = "{}_{}".format(s["date"], s["flowcell"])
s_param["phix_error_rate"] = fc_con.get_phix_error_rate(str(fc), s["lane"])
s_param['avg_quality_score'] = s_con.calc_avg_qv(s["name"])
s_param['rounded_read_count'] = round(float(s_param['rounded_read_count'])/1e6,1) if s_param['rounded_read_count'] else None
s_param['customer_name'] = project['samples'][v["sample"]].get('customer_name', None)
if project:
s_param['ordered_amount'] = p_con.get_ordered_amount(self.examples["project"])
s_param['customer_reference'] = s_param.get('customer_reference', project['customer_reference'])
s_param['uppnex_project_id'] = s_param.get('uppnex_project_id', project['uppnex_id'])
s_param['success'] = sequencing_success(s_param, cutoffs)
s_param.update({k:"N/A" for k in s_param.keys() if s_param[k] is None})
make_note("{}.pdf".format(s["barcode_name"]), headers, paragraphs, **s_param)
示例4: test_get_samples_wrong_info
# 需要导入模块: from scilifelab.db.statusdb import SampleRunMetricsConnection [as 别名]
# 或者: from scilifelab.db.statusdb.SampleRunMetricsConnection import get_samples [as 别名]
def test_get_samples_wrong_info(self):
"""Test getting samples when either flowcell or project id information is wrong"""
sample_con = SampleRunMetricsConnection(dbname="samples-test", username=self.user, password=self.pw, url=self.url)
samples = sample_con.get_samples(sample_prj="bogusproject", fc_id=self.examples["flowcell"])
LOG.info("Selecting on bogus project, subsetting on flowcell: " + str(len(samples)))
self.assertEqual(len(samples), 0)
示例5: test_get_samples
# 需要导入模块: from scilifelab.db.statusdb import SampleRunMetricsConnection [as 别名]
# 或者: from scilifelab.db.statusdb.SampleRunMetricsConnection import get_samples [as 别名]
def test_get_samples(self):
"""Test getting samples given flowcell and sample_prj."""
sample_con = SampleRunMetricsConnection(dbname="samples-test", username=self.user, password=self.pw, url=self.url)
samples = sample_con.get_samples(fc_id=self.examples["flowcell"])
LOG.info("Selecting on flowcell: " + str(len(samples)))
self.assertEqual(len(samples), 5)
samples = sample_con.get_samples(fc_id=self.examples["flowcell"], sample_prj=self.examples["project"])
LOG.info("Selecting on flowcell, subsetting on project: " + str(len(samples)))
self.assertEqual(len(samples), 2)
samples = sample_con.get_samples(sample_prj=self.examples["project"])
LOG.info("Selecting on project: " + str(len(samples)))
self.assertEqual(len(samples), 3)
samples = sample_con.get_samples(sample_prj=self.examples["project"], fc_id=self.examples["flowcell"])
LOG.info("Selecting on project, subsetting on flowcell: " + str(len(samples)))
self.assertEqual(len(samples), 2)
示例6: update
# 需要导入模块: from scilifelab.db.statusdb import SampleRunMetricsConnection [as 别名]
# 或者: from scilifelab.db.statusdb.SampleRunMetricsConnection import get_samples [as 别名]
def update(self):
if not self._check_pargs(["sample_prj"]):
return
url = self.pargs.url if self.pargs.url else self.app.config.get("db", "url")
if not url:
self.app.log.warn("Please provide a valid url: got {}".format(url))
return
s_con = SampleRunMetricsConnection(dbname=self.app.config.get("db", "samples"), **vars(self.app.pargs))
samples = s_con.get_samples(sample_prj=self.pargs.sample_prj)
if self.pargs.project_id:
self.app.log.debug("Going to update 'project_id' to {} for sample runs with 'sample_prj' == {}".format(self.pargs.project_id, self.pargs.sample_prj))
for s in samples:
if not s.get("project_id", None) is None:
if not query_yes_no("'project_id':{} for sample {}; are you sure you want to overwrite?".format(s["project_id"], s["name"]), force=self.pargs.force):
continue
s["project_id"] = self.pargs.project_id
s_con.save(s)
if self.pargs.names:
self.app.log.debug("Going to update 'project_sample_name' for sample runs with 'sample_prj' == {}".format(self.pargs.sample_prj))
if os.path.exists(self.pargs.names):
with open(self.pargs.names) as fh:
names_d = json.load(fh)
else:
names_d= ast.literal_eval(self.pargs.names)
samples_sort = sorted(samples, key=lambda s:s["barcode_name"])
groups = {}
for k, g in itertools.groupby(samples_sort, key=lambda x:x["barcode_name"]):
groups[k] = list(g)
for barcode_name in names_d:
sample_list = groups.get(barcode_name, None)
if not sample_list:
continue
for s in sample_list:
if not s.get("project_sample_name", None) is None:
if not query_yes_no("'project_sample_name':{} for sample {}; are you sure you want to overwrite?".format(s["project_sample_name"], s["name"]), force=self.pargs.force):
continue
s["project_sample_name"] = names_d[barcode_name]
s_con.save(s)
else:
self.app.log.info("Trying to use extensive matching...")
p_con = ProjectSummaryConnection(dbname=self.app.config.get("db", "projects"), **vars(self.app.pargs))
project_name = self.pargs.sample_prj
if self.pargs.project_alias:
project_name = self.pargs.project_alias
for s in samples:
project_sample = p_con.get_project_sample(project_name, s["barcode_name"], extensive_matching=True)
if project_sample:
self.app.log.info("using mapping '{} : {}'...".format(s["barcode_name"], project_sample["sample_name"]))
s["project_sample_name"] = project_sample["sample_name"]
s_con.save(s)
示例7: data_delivery_note
# 需要导入模块: from scilifelab.db.statusdb import SampleRunMetricsConnection [as 别名]
# 或者: from scilifelab.db.statusdb.SampleRunMetricsConnection import get_samples [as 别名]
def data_delivery_note(**kw):
"""Create an easily parseable information file with information about the data delivery
"""
output_data = {'stdout':StringIO(), 'stderr':StringIO(), 'debug':StringIO()}
project_name = kw.get('project_name',None)
flowcell = kw.get('flowcell',None)
LOG.debug("Generating data delivery note for project {}{}.".format(project_name,' and flowcell {}'.format(flowcell if flowcell else '')))
# Get a connection to the project and sample databases
p_con = ProjectSummaryConnection(**kw)
assert p_con, "Could not connect to project database"
s_con = SampleRunMetricsConnection(**kw)
assert s_con, "Could not connect to sample database"
# Get the entry for the project and samples from the database
LOG.debug("Fetching samples from sample database")
samples = s_con.get_samples(sample_prj=project_name, fc_id=flowcell)
LOG.debug("Got {} samples from database".format(len(samples)))
# Get the customer sample names from the project database
LOG.debug("Fetching samples from project database")
project_samples = p_con.get_entry(project_name, "samples")
customer_names = {sample_name:sample.get('customer_name','N/A') for sample_name, sample in project_samples.items()}
data = [['SciLifeLab ID','Submitted ID','Flowcell','Lane','Barcode','Read','Path','MD5','Size (bytes)','Timestamp']]
for sample in samples:
sname = sample.get('project_sample_name','N/A')
cname = customer_names.get(sname,'N/A')
fc = sample.get('flowcell','N/A')
lane = sample.get('lane','N/A')
barcode = sample.get('sequence','N/A')
if 'raw_data_delivery' not in sample:
data.append([sname,cname,'','','','','','','',''])
continue
delivery = sample['raw_data_delivery']
tstamp = delivery.get('timestamp','N/A')
for read, file in delivery.get('files',{}).items():
data.append([sname,
cname,
fc,
lane,
barcode,
read,
file.get('path','N/A'),
file.get('md5','N/A'),
file.get('size_in_bytes','N/A'),
tstamp,])
# Write the data to a csv file
outfile = "{}{}_data_delivery.csv".format(project_name,'_{}'.format(flowcell) if flowcell else '')
LOG.debug("Writing delivery data to {}".format(outfile))
with open(outfile,"w") as outh:
csvw = csv.writer(outh)
for row in data:
csvw.writerow(row)
# Write Texttable formatted output to stdout
tt = texttable.Texttable(180)
tt.add_rows(data)
output_data['stdout'].write(tt.draw())
return output_data
示例8: raw_data
# 需要导入模块: from scilifelab.db.statusdb import SampleRunMetricsConnection [as 别名]
# 或者: from scilifelab.db.statusdb.SampleRunMetricsConnection import get_samples [as 别名]
def raw_data(self):
if not self._check_pargs(["project"]):
return
# if necessary, reformat flowcell identifier
if self.pargs.flowcell:
self.pargs.flowcell = self.pargs.flowcell.split("_")[-1]
# get the uid and gid to use for destination files
uid = os.getuid()
gid = os.getgid()
if self.pargs.group is not None and len(self.pargs.group) > 0:
gid = grp.getgrnam(group).gr_gid
self.log.debug("Connecting to project database")
p_con = ProjectSummaryConnection(**vars(self.pargs))
assert p_con, "Could not get connection to project databse"
self.log.debug("Connecting to samples database")
s_con = SampleRunMetricsConnection(**vars(self.pargs))
assert s_con, "Could not get connection to samples databse"
# Fetch the Uppnex project to deliver to
if not self.pargs.uppmax_project:
self.pargs.uppmax_project = p_con.get_entry(self.pargs.project, "uppnex_id")
if not self.pargs.uppmax_project:
self.log.error("Uppmax project was not specified and could not be fetched from project database")
return
# Extract the list of samples and runs associated with the project and sort them
samples = sorted(s_con.get_samples(fc_id=self.pargs.flowcell, sample_prj=self.pargs.project), key=lambda k: (k.get('project_sample_name','NA'), k.get('flowcell','NA'), k.get('lane','NA')))
# Setup paths and verify parameters
self._meta.production_root = self.app.config.get("production", "root")
self._meta.root_path = self._meta.production_root
proj_base_dir = os.path.join(self._meta.root_path, self.pargs.project)
assert os.path.exists(self._meta.production_root), "No such directory {}; check your production config".format(self._meta.production_root)
assert os.path.exists(proj_base_dir), "No project {} in production path {}".format(self.pargs.project,self._meta.root_path)
try:
self._meta.uppnex_project_root = self.app.config.get("deliver", "uppnex_project_root")
except Exception as e:
self.log.warn("{}, will use '/proj' as uppnext_project_root".format(e))
self._meta.uppnex_project_root = '/proj'
try:
self._meta.uppnex_delivery_dir = self.app.config.get("deliver", "uppnex_project_delivery_path")
except Exception as e:
self.log.warn("{}, will use 'INBOX' as uppnext_project_delivery_path".format(e))
self._meta.uppnex_delivery_dir = 'INBOX'
destination_root = os.path.join(self._meta.uppnex_project_root,self.pargs.uppmax_project,self._meta.uppnex_delivery_dir)
assert os.path.exists(destination_root), "Delivery destination folder {} does not exist".format(destination_root)
destination_root = os.path.join(destination_root,self.pargs.project)
# If interactively select, build a list of samples to skip
if self.pargs.interactive:
to_process = []
for sample in samples:
sname = sample.get("project_sample_name")
index = sample.get("sequence")
fcid = sample.get("flowcell")
lane = sample.get("lane")
date = sample.get("date")
self.log.info("Sample: {}, Barcode: {}, Flowcell: {}, Lane: {}, Started on: {}".format(sname,
index,
fcid,
lane,
date))
if query_yes_no("Deliver sample?", default="no"):
to_process.append(sample)
samples = to_process
# Find uncompressed fastq
uncompressed = self._find_uncompressed_fastq_files(proj_base_dir,samples)
if len(uncompressed) > 0:
self.log.warn("The following samples have uncompressed *.fastq files that cannot be delivered: {}".format(",".join(uncompressed)))
if not query_yes_no("Continue anyway?", default="no"):
return
self.log.info("Will deliver data for {} samples from project {} to {}".format(len(samples),self.pargs.project,destination_root))
if not query_yes_no("Continue?"):
return
# Get the list of files to transfer and the destination
self.log.debug("Gathering list of files to copy")
to_copy = self.get_file_copy_list(proj_base_dir,
destination_root,
samples)
# Make sure that transfer will be with rsync
if not self.pargs.rsync:
self.log.warn("Files must be transferred using rsync")
if not query_yes_no("Do you wish to continue delivering using rsync?", default="yes"):
return
self.pargs.rsync = True
# Process each sample run
for id, files in to_copy.items():
# get the sample database object
[sample] = [s for s in samples if s.get('_id') == id]
#.........这里部分代码省略.........