本文整理匯總了Python中scilifelab.db.statusdb.ProjectSummaryConnection.get_entry方法的典型用法代碼示例。如果您正苦於以下問題:Python ProjectSummaryConnection.get_entry方法的具體用法?Python ProjectSummaryConnection.get_entry怎麽用?Python ProjectSummaryConnection.get_entry使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類scilifelab.db.statusdb.ProjectSummaryConnection
的用法示例。
在下文中一共展示了ProjectSummaryConnection.get_entry方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: test_dbcon
# 需要導入模塊: from scilifelab.db.statusdb import ProjectSummaryConnection [as 別名]
# 或者: from scilifelab.db.statusdb.ProjectSummaryConnection import get_entry [as 別名]
def test_dbcon(self):
"""Test database connection and that we get expected values."""
s_con = SampleRunMetricsConnection(dbname="samples-test", username="u", password="p")
samples = [s_con.get_entry(x) for x in s_con.name_view]
samples_d = {x["name"]: x for x in samples}
self.assertEqual(samples_d["1_120924_AC003CCCXX_TGACCA"]["date"], "120924")
self.assertEqual(samples_d["1_121015_BB002BBBXX_TGACCA"]["flowcell"], "BB002BBBXX")
self.assertEqual(samples_d["2_120924_AC003CCCXX_ACAGTG"]["entity_type"], "sample_run_metrics")
self.assertEqual(samples_d["3_120924_AC003CCCXX_ACAGTG"]["lane"], "3")
self.assertEqual(samples_d["4_120924_AC003CCCXX_CGTTAA"]["sequence"], "CGTTAA")
self.assertEqual(samples_d["2_121015_BB002BBBXX_TGACCA"]["project_id"], "P002")
fc_con = FlowcellRunMetricsConnection(dbname="flowcells-test", username="u", password="p")
flowcells = [fc_con.get_entry(x) for x in fc_con.name_view]
flowcells_d = {x["name"]: x for x in flowcells}
self.assertEqual(flowcells_d["120924_AC003CCCXX"]["name"], "120924_AC003CCCXX")
self.assertEqual(flowcells_d["121015_BB002BBBXX"]["name"], "121015_BB002BBBXX")
self.assertEqual(flowcells_d["120924_AC003CCCXX"]["entity_type"], "flowcell_run_metrics")
p_con = ProjectSummaryConnection(dbname="projects-test", username="u", password="p")
projects = [p_con.get_entry(x) for x in p_con.name_view]
projects_d = {x["project_name"]: x for x in projects}
self.assertEqual(projects_d["J.Doe_00_01"]["min_m_reads_per_sample_ordered"], 0.1)
self.assertEqual(projects_d["J.Doe_00_01"]["no_of_samples"], 2)
self.assertEqual(
set(projects_d["J.Doe_00_01"]["samples"].keys()), set(["P001_101_index3", "P001_102", "P001_103"])
)
self.assertEqual(projects_d["J.Doe_00_01"]["customer_reference"], "GnuGenome")
self.assertEqual(projects_d["J.Doe_00_02"]["min_m_reads_per_sample_ordered"], 0.2)
self.assertEqual(projects_d["J.Doe_00_03"]["samples"].keys(), ["3_index6"])
self.assertIn("A", projects_d["J.Doe_00_03"]["samples"]["3_index6"]["library_prep"])
示例2: test_2_make_note
# 需要導入模塊: from scilifelab.db.statusdb import ProjectSummaryConnection [as 別名]
# 或者: from scilifelab.db.statusdb.ProjectSummaryConnection import get_entry [as 別名]
def test_2_make_note(self):
"""Make a note subset by example flowcell and project"""
s_con = SampleRunMetricsConnection(username=self.user, password=self.pw, url=self.url)
fc_con = FlowcellRunMetricsConnection(username=self.user, password=self.pw, url=self.url)
p_con = ProjectSummaryConnection(username=self.user, password=self.pw, url=self.url)
paragraphs = sample_note_paragraphs()
headers = sample_note_headers()
samples = s_con.get_samples(self.examples["flowcell"], self.examples["project"])
project = p_con.get_entry(self.examples["project"])
samples = p_con.map_srm_to_name(self.examples["project"], fc_id=self.examples["flowcell"], use_bc_map=True)
for k,v in samples.items():
s_param = parameters
s = s_con.get_entry(k)
s_param.update({key:s[srm_to_parameter[key]] for key in srm_to_parameter.keys()})
fc = "{}_{}".format(s["date"], s["flowcell"])
s_param["phix_error_rate"] = fc_con.get_phix_error_rate(str(fc), s["lane"])
s_param['avg_quality_score'] = s_con.calc_avg_qv(s["name"])
s_param['rounded_read_count'] = round(float(s_param['rounded_read_count'])/1e6,1) if s_param['rounded_read_count'] else None
s_param['customer_name'] = project['samples'][v["sample"]].get('customer_name', None)
if project:
s_param['ordered_amount'] = p_con.get_ordered_amount(self.examples["project"])
s_param['customer_reference'] = s_param.get('customer_reference', project['customer_reference'])
s_param['uppnex_project_id'] = s_param.get('uppnex_project_id', project['uppnex_id'])
s_param['success'] = sequencing_success(s_param, cutoffs)
s_param.update({k:"N/A" for k in s_param.keys() if s_param[k] is None})
make_note("{}.pdf".format(s["barcode_name"]), headers, paragraphs, **s_param)
示例3: list_projects
# 需要導入模塊: from scilifelab.db.statusdb import ProjectSummaryConnection [as 別名]
# 或者: from scilifelab.db.statusdb.ProjectSummaryConnection import get_entry [as 別名]
def list_projects(self):
if not self._check_pargs(["flowcell"]):
return
url = self.pargs.url if self.pargs.url else self.app.config.get("db", "url")
if not url:
self.app.log.warn("Please provide a valid url: got {}".format(url))
return
if not validate_fc_directory_format(self.pargs.flowcell):
self.app.log.warn(
"Path '{}' does not conform to bcbio flowcell directory format; aborting".format(self.pargs.flowcell)
)
return
out_data = [[self.pargs.flowcell]]
s = self.pargs.flowcell.split("_")
fcid = "_".join([s[0], s[-1]])
self.log.debug("Establishing FlowcellRunMetricsConnection")
fc_con = FlowcellRunMetricsConnection(dbname=self.app.config.get("db", "flowcells"), **vars(self.app.pargs))
self.log.debug("Establishing ProjectSummaryConnection")
p_con = ProjectSummaryConnection(dbname=self.app.config.get("db", "projects"), **vars(self.app.pargs))
self.log.debug("Fetching flowcell metric document for flowcell {}".format(fcid))
fc = fc_con.get_entry(fcid)
if fc is None:
self.log.warn("No flowcell metric document for flowcell {}".format(fcid))
return
self.log.debug("Fetching csv samplesheet data for flowcell {}".format(fcid))
ssheet_data = self._get_samplesheet_sample_data(fc)
if len(ssheet_data) == 0:
self.log.warn("No csv samplesheet data for flowcell {}".format(fcid))
return
self.log.debug("Fetch runParameter data for flowcell {}".format(fcid))
run_data = self._get_run_parameter_data(fc)
if len(run_data) == 0:
self.log.warn("No runParameter data for flowcell {}".format(fcid))
out_data = [
[self.pargs.flowcell, run_data.get("InstrumentType", "HiSeq2000"), run_data.get("RunMode", "High Output")]
]
# Extract the project names
projects = set([proj[0].replace("__", ".") for data in ssheet_data.values() for proj in data.values()])
# Extract application for each project
for project in projects:
self.log.debug("Fetching project data document for project {}".format(project))
pdoc = p_con.get_entry(project)
if pdoc is None:
self.log.warn("No project data document for project {}".format(project))
pdoc = {}
application = pdoc.get("application", "N/A")
out_data.append([project, application])
self.app._output_data["stdout"].write("\n".join(["\t".join([str(r) for r in row]) for row in out_data]))
示例4: bcbb_configuration_from_samplesheet
# 需要導入模塊: from scilifelab.db.statusdb import ProjectSummaryConnection [as 別名]
# 或者: from scilifelab.db.statusdb.ProjectSummaryConnection import get_entry [as 別名]
def bcbb_configuration_from_samplesheet(csv_samplesheet, couch_credentials):
"""Parse an illumina csv-samplesheet and return a dictionary suitable for the bcbb-pipeline
"""
tfh, yaml_file = tempfile.mkstemp('.yaml','samplesheet')
os.close(tfh)
yaml_file = bcbio.solexa.samplesheet.csv2yaml(csv_samplesheet,yaml_file)
with open(yaml_file) as fh:
config = yaml.load(fh)
application_setup = {
'Amplicon': {'analysis': 'Align_standard'},
'ChIP-seq': {'analysis': 'RNA-seq'},
'Custom capture': {'analysis': 'Align_standard_seqcap'},
'de novo': {'analysis': 'Align_standard',
'genome_build': 'unknown'},
'Exome capture': {'analysis': 'Align_standard_seqcap'},
'Finished library': {'analysis': 'Align_standard',
'genome_build': 'unknown'},
'Mate-pair': {'analysis': 'Align_standard',
'genome_build': 'unknown'},
'Metagenome': {'analysis': 'Align_standard',
'genome_build': 'unknown'},
'miRNA-seq': {'analysis': 'Align_standard',
'genome_build': 'unknown'},
'RNA-seq (mRNA)': {'analysis': 'RNA-seq'},
'RNA-seq (total RNA)': {'analysis': 'RNA-seq'},
'WG re-seq': {'analysis': 'Align_standard'},
'default': {'analysis': 'Align_standard'},
}
#Connect to maggie to get project application
try:
p_con = ProjectSummaryConnection(**couch_credentials)
except:
print "Can't connect to maggie to get application"
p_con = None
# Replace the default analysis
## TODO: This is an ugly hack, should be replaced by a custom config
for lane in config:
for plex in lane.get('multiplex',[]):
application='default'
if p_con is not None:
try:
Proj=plex.get('sample_prj','')
project = p_con.get_entry(Proj)
if project is not None:
application = project.get("application", 'default').strip()
except:
application='default'
setup = application_setup.get(application,application_setup['default'])
for key, val in setup.items():
plex[key] = val
# Remove the yaml file, we will write a new one later
os.remove(yaml_file)
return config
示例5: test_2_make_project_note
# 需要導入模塊: from scilifelab.db.statusdb import ProjectSummaryConnection [as 別名]
# 或者: from scilifelab.db.statusdb.ProjectSummaryConnection import get_entry [as 別名]
def test_2_make_project_note(self):
"""Make a project note subset by flowcell and project"""
s_con = SampleRunMetricsConnection(username=self.user, password=self.pw, url=self.url)
fc_con = FlowcellRunMetricsConnection(username=self.user, password=self.pw, url=self.url)
p_con = ProjectSummaryConnection(username=self.user, password=self.pw, url=self.url)
paragraphs = project_note_paragraphs()
headers = project_note_headers()
param = parameters
project = p_con.get_entry(self.examples["project"])
if not project:
print "No project named {}".format(self.examples["project"])
return
if project:
ordered_amount = p_con.get_ordered_amount(self.examples["project"])
else:
return
ordered_amount = self.pargs.ordered_million_reads
## Start collecting the data
sample_table = []
sample_list = project['samples']
param.update({key:project.get(ps_to_parameter[key], None) for key in ps_to_parameter.keys()})
samples = p_con.map_name_to_srm(self.examples["project"], check_consistency=True, use_bc_map=True)
all_passed = True
for k,v in samples.items():
if k=="Unexpected":
continue
project_sample = sample_list[k]
vals = {x:project_sample.get(prjs_to_table[x], None) for x in prjs_to_table.keys()}
vals['MOrdered'] = ordered_amount
vals['BarcodeSeq'] = s_con.get_entry(v.keys()[0], "sequence")
## Set status
vals['Status'] = set_status(vals) if vals['Status'] is None else vals['Status']
vals.update({k:"N/A" for k in vals.keys() if vals[k] is None})
if vals['Status']=="N/A" or vals['Status']=="NP": all_passed = False
sample_table.append([vals[k] for k in table_keys])
if all_passed: param["finished"] = 'Project finished.'
sample_table.sort()
sample_table = list(sample_table for sample_table,_ in itertools.groupby(sample_table))
sample_table.insert(0, ['ScilifeID', 'CustomerID', 'BarcodeSeq', 'MSequenced', 'MOrdered', 'Status'])
paragraphs["Samples"]["tpl"] = make_sample_table(sample_table)
make_note("{}.pdf".format(self.examples["project"]), headers, paragraphs, **param)
示例6: application_qc
# 需要導入模塊: from scilifelab.db.statusdb import ProjectSummaryConnection [as 別名]
# 或者: from scilifelab.db.statusdb.ProjectSummaryConnection import get_entry [as 別名]
def application_qc(project_name=None, flowcell=None, application=None,
username=None, password=None, url=None,
sampledb="samples", projectdb="projects", **kw):
"""Perform application specific qc on a project.
:param project_name: project name
:param flowcell: flowcell identifier
:param application: application for which to perform qc
:param username: database username
:param password: database password
:param url: database url
:param sampledb: samples database name
:param projectdb: project database name
"""
LOG.debug("Doing application qc for project {}, flowcell {}".format(project_name, flowcell))
output_data = {'stdout':StringIO(), 'stderr':StringIO()}
p_con = ProjectSummaryConnection(dbname=projectdb, username=username, password=password, url=url)
s_con = SampleRunMetricsConnection(dbname=sampledb, username=username, password=password, url=url)
prj_summary = p_con.get_entry(project_name)
qc_data = get_qc_data(project_name, p_con, s_con, flowcell)
if not prj_summary is None:
qc_data = get_qc_data(project_name, p_con, s_con, flowcell)
if prj_summary.get("application") not in APPLICATION_MAP.keys():
if not application:
LOG.warn("No such application {}. Please use the application option (available choices {})".format(application, ",".join(QC_CUTOFF.keys())))
return output_data
application = application
else:
application = APPLICATION_MAP[prj_summary.get("application")]
else:
LOG.info("No such project {} in project summary. Trying to get qc data anyway.".format(project_name))
if not application:
LOG.warn("No application provided. Please use the application option (available choices {})".format(",".join(QC_CUTOFF.keys())))
return output_data
qc_data = _get_sample_qc_data(project_name, application, s_con, flowcell)
output_data = _qc_info_header(project_name, application, output_data)
for k,v in sorted(qc_data.iteritems()):
y = [str(x) for x in assess_qc(v, application)]
output_data["stdout"].write("".join(y) + "\n")
return output_data
示例7: sample_status_note
# 需要導入模塊: from scilifelab.db.statusdb import ProjectSummaryConnection [as 別名]
# 或者: from scilifelab.db.statusdb.ProjectSummaryConnection import get_entry [as 別名]
def sample_status_note(project_name=None, flowcell=None, username=None, password=None, url=None,
ordered_million_reads=None, uppnex_id=None, customer_reference=None, bc_count=None,
project_alias=[], projectdb="projects", samplesdb="samples", flowcelldb="flowcells",
phix=None, is_paired=True, **kw):
"""Make a sample status note. Used keywords:
:param project_name: project name
:param flowcell: flowcell id
:param username: db username
:param password: db password
:param url: db url
:param ordered_million_reads: number of ordered reads in millions
:param uppnex_id: the uppnex id
:param customer_reference: customer project name
:param project_alias: project alias name
:param phix: phix error rate
:param is_paired: True if run is paired-end, False for single-end
"""
# Cutoffs
cutoffs = {
"phix_err_cutoff" : 2.0,
"qv_cutoff" : 30,
}
instrument = _parse_instrument_config(os.path.expanduser(kw.get("instrument_config","")))
instrument_dict = {i['instrument_id']: i for i in instrument}
# parameters
parameters = {
"project_name" : None,
"start_date" : None,
"FC_id" : None,
"scilifelab_name" : None,
"rounded_read_count" : None,
"phix_error_rate" : None,
"avg_quality_score" : None,
"pct_q30_bases" : None,
"success" : None,
"run_mode":None,
"is_paired":True
}
# key mapping from sample_run_metrics to parameter keys
srm_to_parameter = {"project_name":"sample_prj", "FC_id":"flowcell",
"scilifelab_name":"barcode_name", "start_date":"date",
"rounded_read_count":"bc_count", "lane": "lane"}
LOG.debug("got parameters {}".format(parameters))
output_data = {'stdout':StringIO(), 'stderr':StringIO(), 'debug':StringIO()}
if not _assert_flowcell_format(flowcell):
LOG.warn("Wrong flowcell format {}; skipping. Please use the flowcell id (format \"[A-Z0-9\-]+\")".format(flowcell) )
return output_data
output_data = _update_sample_output_data(output_data, cutoffs)
# Connect and run
s_con = SampleRunMetricsConnection(dbname=samplesdb, username=username, password=password, url=url)
fc_con = FlowcellRunMetricsConnection(dbname=flowcelldb, username=username, password=password, url=url)
p_con = ProjectSummaryConnection(dbname=projectdb, username=username, password=password, url=url)
# Set up paragraphs
paragraphs = sample_note_paragraphs()
headers = sample_note_headers()
# Get project
project = p_con.get_entry(project_name)
source = p_con.get_info_source(project_name)
if not project:
LOG.warn("No such project '{}'".format(project_name))
return output_data
# Set samples list
sample_run_list = _set_sample_run_list(project_name, flowcell, project_alias, s_con)
if len(sample_run_list) == 0:
LOG.warn("No samples for project '{}', flowcell '{}'. Maybe there are no sample run metrics in statusdb?".format(project_name, flowcell))
return output_data
# Set options
ordered_million_reads = _literal_eval_option(ordered_million_reads)
bc_count = _literal_eval_option(bc_count)
phix = _literal_eval_option(phix)
# Count number of times a sample has been run on a flowcell; if several, make lane-specific reports
sample_count = Counter([x.get("barcode_name") for x in sample_run_list])
# Loop samples and collect information
s_param_out = []
fcdoc = None
for s in sample_run_list:
s_param = {}
LOG.debug("working on sample '{}', sample run metrics name '{}', id '{}'".format(s.get("barcode_name", None), s.get("name", None), s.get("_id", None)))
s_param.update(parameters)
s_param.update({key:s[srm_to_parameter[key]] for key in srm_to_parameter.keys()})
fc = "{}_{}".format(s.get("date"), s.get("flowcell"))
# Get instrument
try:
s_param.update(instrument_dict[fc_con.get_instrument(str(fc))])
except:
LOG.warn("Failed to set instrument and software versions for flowcell {} in report due to missing RunInfo -> Instrument field in statusdb. Either rerun 'pm qc update-qc' or search-and-replace 'NN' in the sample report.".format(fc))
s_param.update(instrument_dict['default'])
# Get run mode
if not fcdoc or fcdoc.get("name") != fc:
#.........這裏部分代碼省略.........
示例8: raw_data
# 需要導入模塊: from scilifelab.db.statusdb import ProjectSummaryConnection [as 別名]
# 或者: from scilifelab.db.statusdb.ProjectSummaryConnection import get_entry [as 別名]
def raw_data(self):
if not self._check_pargs(["project"]):
return
# if necessary, reformat flowcell identifier
if self.pargs.flowcell:
self.pargs.flowcell = self.pargs.flowcell.split("_")[-1]
# get the uid and gid to use for destination files
uid = os.getuid()
gid = os.getgid()
if self.pargs.group is not None and len(self.pargs.group) > 0:
gid = grp.getgrnam(group).gr_gid
self.log.debug("Connecting to project database")
p_con = ProjectSummaryConnection(**vars(self.pargs))
assert p_con, "Could not get connection to project databse"
self.log.debug("Connecting to samples database")
s_con = SampleRunMetricsConnection(**vars(self.pargs))
assert s_con, "Could not get connection to samples databse"
# Fetch the Uppnex project to deliver to
if not self.pargs.uppmax_project:
self.pargs.uppmax_project = p_con.get_entry(self.pargs.project, "uppnex_id")
if not self.pargs.uppmax_project:
self.log.error("Uppmax project was not specified and could not be fetched from project database")
return
# Extract the list of samples and runs associated with the project and sort them
samples = sorted(s_con.get_samples(fc_id=self.pargs.flowcell, sample_prj=self.pargs.project), key=lambda k: (k.get('project_sample_name','NA'), k.get('flowcell','NA'), k.get('lane','NA')))
# Setup paths and verify parameters
self._meta.production_root = self.app.config.get("production", "root")
self._meta.root_path = self._meta.production_root
proj_base_dir = os.path.join(self._meta.root_path, self.pargs.project)
assert os.path.exists(self._meta.production_root), "No such directory {}; check your production config".format(self._meta.production_root)
assert os.path.exists(proj_base_dir), "No project {} in production path {}".format(self.pargs.project,self._meta.root_path)
try:
self._meta.uppnex_project_root = self.app.config.get("deliver", "uppnex_project_root")
except Exception as e:
self.log.warn("{}, will use '/proj' as uppnext_project_root".format(e))
self._meta.uppnex_project_root = '/proj'
try:
self._meta.uppnex_delivery_dir = self.app.config.get("deliver", "uppnex_project_delivery_path")
except Exception as e:
self.log.warn("{}, will use 'INBOX' as uppnext_project_delivery_path".format(e))
self._meta.uppnex_delivery_dir = 'INBOX'
destination_root = os.path.join(self._meta.uppnex_project_root,self.pargs.uppmax_project,self._meta.uppnex_delivery_dir)
assert os.path.exists(destination_root), "Delivery destination folder {} does not exist".format(destination_root)
destination_root = os.path.join(destination_root,self.pargs.project)
# If interactively select, build a list of samples to skip
if self.pargs.interactive:
to_process = []
for sample in samples:
sname = sample.get("project_sample_name")
index = sample.get("sequence")
fcid = sample.get("flowcell")
lane = sample.get("lane")
date = sample.get("date")
self.log.info("Sample: {}, Barcode: {}, Flowcell: {}, Lane: {}, Started on: {}".format(sname,
index,
fcid,
lane,
date))
if query_yes_no("Deliver sample?", default="no"):
to_process.append(sample)
samples = to_process
# Find uncompressed fastq
uncompressed = self._find_uncompressed_fastq_files(proj_base_dir,samples)
if len(uncompressed) > 0:
self.log.warn("The following samples have uncompressed *.fastq files that cannot be delivered: {}".format(",".join(uncompressed)))
if not query_yes_no("Continue anyway?", default="no"):
return
self.log.info("Will deliver data for {} samples from project {} to {}".format(len(samples),self.pargs.project,destination_root))
if not query_yes_no("Continue?"):
return
# Get the list of files to transfer and the destination
self.log.debug("Gathering list of files to copy")
to_copy = self.get_file_copy_list(proj_base_dir,
destination_root,
samples)
# Make sure that transfer will be with rsync
if not self.pargs.rsync:
self.log.warn("Files must be transferred using rsync")
if not query_yes_no("Do you wish to continue delivering using rsync?", default="yes"):
return
self.pargs.rsync = True
# Process each sample run
for id, files in to_copy.items():
# get the sample database object
[sample] = [s for s in samples if s.get('_id') == id]
#.........這裏部分代碼省略.........
示例9: GDocsUpdater
# 需要導入模塊: from scilifelab.db.statusdb import ProjectSummaryConnection [as 別名]
# 或者: from scilifelab.db.statusdb.ProjectSummaryConnection import get_entry [as 別名]
class GDocsUpdater(rm.RunMonitor):
def __init__(self, config):
super(GDocsUpdater, self).__init__(config)
# Connect to the Google Docs api
gdconf = self.config.get("gdocs",{})
creds = os.path.expanduser(gdconf.get("credentials_file",""))
assert os.path.exists(creds), "Supplied GDocs credentials file does not exist"
self.gdcon = SpreadSheet(get_credentials(creds))
assert self.gdcon, "Could not get a SpreadSheet object, please verify gdocs credentials"
doc = gdconf.get("qc_checklist",None)
assert doc, "No QC checklist specified in configuration, please specify"
ssheet = self.gdcon.get_spreadsheet(doc)
assert ssheet, "Could not locate QC checklist '{}' on Google Docs. Please make sure it exists".format(doc)
self.gdcon.ssheet = ssheet
# Get the Ongoing, Finished and Coming worksheets
self.ongoing = self.gdcon.get_worksheet("Ongoing")
self.coming = self.gdcon.get_worksheet("Coming")
self.finished = self.gdcon.get_worksheet("Finished")
assert self.ongoing and self.coming and self.finished, "Could not get 'Ongoing', 'Finished' and 'Coming' worksheets from '{}'. Please make sure that they exist".format(doc)
# Get a connection to the StatusDB project database
dbconf = self.config.get("statusdb",{})
try:
self.pcon = ProjectSummaryConnection(url=dbconf.get("url","localhost"),
username=dbconf.get("user","user"),
password=dbconf.get("password","pass"))
except ConnectionError:
self.pcon = None
def _list_runs(self, lists):
# Loop over the lists and fetch the cards
runs = {}
for tlist in lists:
list_obj = self.trello.get_list(self.trello_board,tlist,True)
if not list_obj:
continue
# Loop over the cards in the list
for card in list_obj.list_cards():
# Get the description and convert it to a dictionary
runs[card.name] = self.description_to_dict(card.description)
return runs
def coming_runs(self):
"""Return a dictionary with runs that are currently in process, i.e. not handed over to
the processing pipeline on Uppmax. The key in the dictionary is the run id and the values
is a metadata dictionary
"""
# Runs in these lists are to be considered "coming"
lists = [rm.FIRSTREAD,
rm.INDEXREAD,
rm.SECONDREAD,
rm.PROCESSING,
rm.UPPMAX,
rm.STALLED]
return self._list_runs(lists)
def ongoing_runs(self):
"""Return a dictionary with runs that have finished and have been handed over to
the processing pipeline on Uppmax. The key in the dictionary is the run id and the values
is a metadata dictionary
"""
# Runs in these lists are to be considered "coming"
lists = [rm.COMPLETED]
return self._list_runs(lists)
def reshape_run_info(self, runs, skiplist=[]):
"""Take the dictionary of runs and convert to a sorted list of lists with elements
corresponding to the columns in the checklist"""
run_projects = []
for id,data in runs.items():
p = data.get('Projects',[''])
if type(p) is not list:
p = [p]
for project in p:
if len(project) == 0:
project = 'Unknown, please check!'
if "{}_{}".format(id,project) not in skiplist:
application, tp = '',''#self.lookup_project(project)
run_projects.append([id,project,application,tp,'',data.get('Run mode',[''])[0]])
return run_projects
def lookup_project(self, project):
"""Lookup project application and type in StatusDB"""
application = ""
type = ""
if self.pcon:
pdoc = self.pcon.get_entry(project)
#.........這裏部分代碼省略.........
示例10: ProjectSummaryConnection
# 需要導入模塊: from scilifelab.db.statusdb import ProjectSummaryConnection [as 別名]
# 或者: from scilifelab.db.statusdb.ProjectSummaryConnection import get_entry [as 別名]
from scilifelab.db.statusdb import ProjectSummaryConnection
pcon = ProjectSummaryConnection(url="tools.scilifelab.se", username="mario", password="MNcv78df!")
project = pcon.get_entry('C.Dixelius_13_01')
for sample in project.get("samples",{}).values():
print("\t".join([sample.get('scilife_name'),sample.get('customer_name')]))
示例11: initiate_survey
# 需要導入模塊: from scilifelab.db.statusdb import ProjectSummaryConnection [as 別名]
# 或者: from scilifelab.db.statusdb.ProjectSummaryConnection import get_entry [as 別名]
def initiate_survey(report, project, **kw):
# Get a connection to the database
pcon = ProjectSummaryConnection(**kw)
if not pcon:
report.log.error("Could not get connection to database".format(project))
return False
# Get the document for the project
pdoc = pcon.get_entry(project)
if not pdoc:
report.log.error("No such project: {} in database".format(project))
return False
# get a project instance from lims
lproj = lims_project(report, pdoc.get("project_id"))
if not lproj:
report.log.error("Could not initiate LIMS object for project {}".format(project))
return False
# check if project is closed
closed = project_closed(lproj)
if closed is None:
report.log.warn("Project {} is not closed".format(project))
return False
report.log.debug("Project {} closed on {}".format(project,datetime.datetime.strptime(closed,report._meta.date_format)))
# check if a user survey has already been sent
if survey_sent(lproj):
report.log.info("Survey already sent for project {}".format(project))
return False
report.log.debug("No previous survey sent for {}".format(project))
# get email addresses for persons connected to the project
emails = project_email(report,lproj)
if len(emails) == 0:
report.log.warn("No email addresses found associated with project {}".format(project))
return False
# verify the format of the email address
recipients = []
for email in emails:
if email is None or not re.match(r'^[_a-z0-9-]+(\.[_a-z0-9-]+)*@[a-z0-9-]+(\.[a-z0-9-]+)*(\.[a-z]{2,3})$',email):
report.log.warn("Illegal email format: {}".format(email))
continue
recipients.append(email)
# send the survey email to each recipient
sent = send_survey(report,
project,
recipients,
sender = kw.get("sender"),
smtphost=kw.get("smtphost"),
smtpport=kw.get("smtpport"),
dryrun=report.pargs.dry_run)
# update the project udf to indicate that we have sent out the survey
if sent:
report.log.info("Survey sent to recipients {} successfully".format(",".join(recipients)))
lproj.udf['Survey sent'] = datetime.datetime.now().date()
if not report.pargs.dry_run:
lproj.put()
elif not sent:
report.log.warn("Sending survey to recipients {} failed".format(",".join(recipients)))
return sent
示例12: project_status_note
# 需要導入模塊: from scilifelab.db.statusdb import ProjectSummaryConnection [as 別名]
# 或者: from scilifelab.db.statusdb.ProjectSummaryConnection import get_entry [as 別名]
def project_status_note(project_name=None, username=None, password=None, url=None,
use_ps_map=True, use_bc_map=False, check_consistency=False,
ordered_million_reads=None, uppnex_id=None, customer_reference=None,
exclude_sample_ids={}, project_alias=None, sample_aliases={},
projectdb="projects", samplesdb="samples", flowcelldb="flowcells",
include_all_samples=False, **kw):
"""Make a project status note. Used keywords:
:param project_name: project name
:param user: db user name
:param password: db password
:param url: db url
:param use_ps_map: use project summary mapping
:param use_bc_map: use project to barcode name mapping
:param check_consistency: check consistency between mappings
:param ordered_million_reads: number of ordered reads in millions
:param uppnex_id: the uppnex id
:param customer_reference: customer project name
:param exclude_sample_ids: exclude some sample ids from project note
:param project_alias: project alias name
:param sample_aliases: sample alias names
:param projectdb: project db name
:param samplesdb: samples db name
:param flowcelldb: flowcells db name
:param include_all_samples: include all samples in report
"""
# parameters
parameters = {
"project_name" : project_name,
"finished" : "Not finished, or cannot yet assess if finished.",
}
# mapping project_summary to parameter keys
ps_to_parameter = {"scilife_name":"scilife_name", "customer_name":"customer_name", "project_name":"project_name"}
# mapping project sample to table
table_keys = ['ScilifeID', 'CustomerID', 'BarcodeSeq', 'MSequenced', 'MOrdered', 'Status']
output_data = {'stdout':StringIO(), 'stderr':StringIO(), 'debug':StringIO()}
# Connect and run
s_con = SampleRunMetricsConnection(dbname=samplesdb, username=username, password=password, url=url)
fc_con = FlowcellRunMetricsConnection(dbname=flowcelldb, username=username, password=password, url=url)
p_con = ProjectSummaryConnection(dbname=projectdb, username=username, password=password, url=url)
# Set report paragraphs
paragraphs = project_note_paragraphs()
headers = project_note_headers()
# Set local param variable
param = parameters
# Get project summary from project database
sample_aliases = _literal_eval_option(sample_aliases, default={})
prj_summary = p_con.get_entry(project_name)
if not prj_summary:
LOG.warn("No such project '{}'".format(project_name))
return
LOG.debug("Working on project '{}'.".format(project_name))
# Get sample run list and loop samples to make mapping sample -> {sampleruns}
sample_run_list = _set_sample_run_list(project_name, flowcell=None, project_alias=project_alias, s_con=s_con)
samples = {}
for s in sample_run_list:
prj_sample = p_con.get_project_sample(project_name, s.get("project_sample_name", None))
if prj_sample:
sample_name = prj_sample['project_sample'].get("scilife_name", None)
s_d = {s["name"] : {'sample':sample_name, 'id':s["_id"]}}
samples.update(s_d)
else:
if s["barcode_name"] in sample_aliases:
s_d = {sample_aliases[s["barcode_name"]] : {'sample':sample_aliases[s["barcode_name"]], 'id':s["_id"]}}
samples.update(s_d)
else:
s_d = {s["name"]:{'sample':s["name"], 'id':s["_id"], 'barcode_name':s["barcode_name"]}}
LOG.warn("No mapping found for sample run:\n '{}'".format(s_d))
# Convert to mapping from desired sample name to list of aliases
# Less important for the moment; one solution is to update the
# Google docs summary table to use the P names
sample_dict = prj_summary['samples']
param.update({key:prj_summary.get(ps_to_parameter[key], None) for key in ps_to_parameter.keys()})
param["ordered_amount"] = param.get("ordered_amount", p_con.get_ordered_amount(project_name))
param['customer_reference'] = param.get('customer_reference', prj_summary.get('customer_reference'))
param['uppnex_project_id'] = param.get('uppnex_project_id', prj_summary.get('uppnex_id'))
# Override database values if options passed at command line
if uppnex_id:
param["uppnex_project_id"] = uppnex_id
if customer_reference:
param["customer_reference"] = customer_reference
# Process options
ordered_million_reads = _literal_eval_option(ordered_million_reads)
exclude_sample_ids = _literal_eval_option(exclude_sample_ids, default={})
## Start collecting the data
sample_table = []
samples_excluded = []
all_passed = True
last_library_preps = p_con.get_latest_library_prep(project_name)
last_library_preps_srm = [x for l in last_library_preps.values() for x in l]
LOG.debug("Looping through sample map that maps project sample names to sample run metrics ids")
for k,v in samples.items():
#.........這裏部分代碼省略.........
示例13: sample_status_note
# 需要導入模塊: from scilifelab.db.statusdb import ProjectSummaryConnection [as 別名]
# 或者: from scilifelab.db.statusdb.ProjectSummaryConnection import get_entry [as 別名]
def sample_status_note(project_name=None, flowcell=None, username=None, password=None, url=None,
ordered_million_reads=None, uppnex_id=None, customer_reference=None, bc_count=None,
project_alias=[], projectdb="projects", samplesdb="samples", flowcelldb="flowcells",
phix=None, **kw):
"""Make a sample status note. Used keywords:
:param project_name: project name
:param flowcell: flowcell id
:param username: db username
:param password: db password
:param url: db url
:param ordered_million_reads: number of ordered reads in millions
:param uppnex_id: the uppnex id
:param customer_reference: customer project name
:param project_alias: project alias name
:param phix: phix error rate
"""
# Cutoffs
cutoffs = {
"phix_err_cutoff" : 2.0,
"qv_cutoff" : 30,
}
# parameters
parameters = {
"project_name" : None,
"start_date" : None,
"FC_id" : None,
"scilifelab_name" : None,
"rounded_read_count" : None,
"phix_error_rate" : None,
"avg_quality_score" : None,
"success" : None,
"run_mode":None,
}
# key mapping from sample_run_metrics to parameter keys
srm_to_parameter = {"project_name":"sample_prj", "FC_id":"flowcell",
"scilifelab_name":"barcode_name", "start_date":"date", "rounded_read_count":"bc_count"}
LOG.debug("got parameters {}".format(parameters))
output_data = {'stdout':StringIO(), 'stderr':StringIO(), 'debug':StringIO()}
if not _assert_flowcell_format(flowcell):
LOG.warn("Wrong flowcell format {}; skipping. Please use the flowcell id (format \"[A-Z0-9]+XX\")".format(flowcell) )
return output_data
output_data = _update_sample_output_data(output_data, cutoffs)
# Connect and run
s_con = SampleRunMetricsConnection(dbname=samplesdb, username=username, password=password, url=url)
fc_con = FlowcellRunMetricsConnection(dbname=flowcelldb, username=username, password=password, url=url)
p_con = ProjectSummaryConnection(dbname=projectdb, username=username, password=password, url=url)
# Set up paragraphs
paragraphs = sample_note_paragraphs()
headers = sample_note_headers()
# Get project
project = p_con.get_entry(project_name)
if not project:
LOG.warn("No such project '{}'".format(project_name))
return output_data
# Set samples list
sample_run_list = _set_sample_run_list(project_name, flowcell, project_alias, s_con)
if len(sample_run_list) == 0:
LOG.warn("No samples for project '{}', flowcell '{}'. Maybe there are no sample run metrics in statusdb?".format(project_name, flowcell))
return output_data
# Set options
ordered_million_reads = _literal_eval_option(ordered_million_reads)
bc_count = _literal_eval_option(bc_count)
phix = _literal_eval_option(phix)
# Count number of times a sample has been run on a flowcell; if several, make lane-specific reports
sample_count = Counter([x.get("barcode_name") for x in sample_run_list])
# Loop samples and collect information
s_param_out = []
for s in sample_run_list:
s_param = {}
LOG.debug("working on sample '{}', sample run metrics name '{}', id '{}'".format(s.get("barcode_name", None), s.get("name", None), s.get("_id", None)))
s_param.update(parameters)
s_param.update({key:s[srm_to_parameter[key]] for key in srm_to_parameter.keys()})
fc = "{}_{}".format(s.get("date"), s.get("flowcell"))
# Get instrument
try:
s_param.update(instrument[fc_con.get_instrument(str(fc))])
except:
LOG.warn("Failed to set instrument and software versions for flowcell {} in report due to missing RunInfo -> Instrument field in statusdb. Either rerun 'pm qc update-qc' or search-and-replace 'NN' in the sample report.".format(fc))
s_param.update(instrument['default'])
# Get run mode
s_param["run_mode"] = fc_con.get_run_mode(str(fc))
s_param.update(software_versions)
s_param["phix_error_rate"] = fc_con.get_phix_error_rate(str(fc), s["lane"])
if phix:
s_param["phix_error_rate"] = _get_phix_error_rate(s["lane"], phix)
s_param['avg_quality_score'] = calc_avg_qv(s)
if not s_param['avg_quality_score']:
LOG.warn("Calculation of average quality failed for sample {}, id {}".format(s.get("name"), s.get("_id")))
# Compare phix error and qv to cutoffs
#.........這裏部分代碼省略.........
示例14: TestDbConnection
# 需要導入模塊: from scilifelab.db.statusdb import ProjectSummaryConnection [as 別名]
# 或者: from scilifelab.db.statusdb.ProjectSummaryConnection import get_entry [as 別名]
class TestDbConnection(unittest.TestCase):
def setUp(self):
self.user = "user"
self.pw = "pw"
self.url = "localhost"
self.examples = {"sample": "1_120924_AC003CCCXX_TGACCA", "flowcell": "AC003CCCXX", "project": "J.Doe_00_01"}
self.p_con = ProjectSummaryConnection(
dbname="projects-test", username=self.user, password=self.pw, url=self.url
)
def test_connection(self):
"""Test database connection"""
sample_con = SampleRunMetricsConnection(
dbname="samples-test", username=self.user, password=self.pw, url=self.url
)
self.assertEqual(sample_con.url_string, "http://{}:5984".format(self.url))
def test_get_flowcell(self):
"""Test getting a flowcell for a given sample"""
sample_con = SampleRunMetricsConnection(
dbname="samples-test", username=self.user, password=self.pw, url=self.url
)
fc = sample_con.get_entry(self.examples["sample"], "flowcell")
self.assertEqual(str(fc), self.examples["flowcell"])
def test_get_sample_ids(self):
"""Test getting sample ids given flowcell and sample_prj"""
sample_con = SampleRunMetricsConnection(
dbname="samples-test", username=self.user, password=self.pw, url=self.url
)
sample_ids = sample_con.get_sample_ids(fc_id=self.examples["flowcell"])
LOG.info("Number of samples before subsetting: " + str(len(sample_ids)))
self.assertEqual(len(sample_ids), 4)
sample_ids = sample_con.get_sample_ids(fc_id=self.examples["flowcell"], sample_prj=self.examples["project"])
LOG.info("Number of samples after subsetting: " + str(len(sample_ids)))
self.assertEqual(len(sample_ids), 2)
def test_get_samples(self):
"""Test getting samples given flowcell and sample_prj."""
sample_con = SampleRunMetricsConnection(
dbname="samples-test", username=self.user, password=self.pw, url=self.url
)
samples = sample_con.get_samples(fc_id=self.examples["flowcell"])
LOG.info("Selecting on flowcell: " + str(len(samples)))
self.assertEqual(len(samples), 4)
samples = sample_con.get_samples(fc_id=self.examples["flowcell"], sample_prj=self.examples["project"])
LOG.info("Selecting on flowcell, subsetting on project: " + str(len(samples)))
self.assertEqual(len(samples), 2)
samples = sample_con.get_samples(sample_prj=self.examples["project"])
LOG.info("Selecting on project: " + str(len(samples)))
self.assertEqual(len(samples), 3)
samples = sample_con.get_samples(sample_prj=self.examples["project"], fc_id=self.examples["flowcell"])
LOG.info("Selecting on project, subsetting on flowcell: " + str(len(samples)))
self.assertEqual(len(samples), 2)
def test_get_samples_wrong_info(self):
"""Test getting samples when either flowcell or project id information is wrong"""
sample_con = SampleRunMetricsConnection(
dbname="samples-test", username=self.user, password=self.pw, url=self.url
)
samples = sample_con.get_samples(sample_prj="bogusproject", fc_id=self.examples["flowcell"])
LOG.info("Selecting on bogus project, subsetting on flowcell: " + str(len(samples)))
self.assertEqual(len(samples), 0)
def test_get_project_sample_ids(self):
"""Test getting project sample ids"""
sample_con = SampleRunMetricsConnection(
dbname="samples-test", username=self.user, password=self.pw, url=self.url
)
sample_ids = sample_con.get_sample_ids(sample_prj=self.examples["project"])
sample_names = [sample_con.db.get(x)["name"] for x in sample_ids]
self.assertEqual(
set(sample_names),
set(["1_120924_AC003CCCXX_TGACCA", "2_120924_AC003CCCXX_ACAGTG", "1_121015_BB002BBBXX_TGACCA"]),
)
def test_get_latest_library_prep(self):
"""Test getting latest library prep"""
prj = self.p_con.get_entry("J.Doe_00_01")
prj["samples"]["P001_102"]["library_prep"]["B"] = {"sample_run_metrics": {"2_120924_AC003CCCXX_TTGGAA": None}}
self.p_con.save(prj)
preps = self.p_con.get_latest_library_prep(project_name=self.examples["project"])
srm = [x for l in preps.values() for x in l]
# Make sure A prep not in list
self.assertNotIn("2_120924_AC003CCCXX_ACAGTG", srm)
# Make sure B prep in list
self.assertIn("2_120924_AC003CCCXX_TTGGAA", srm)
# Reset data
prj = self.p_con.get_entry("J.Doe_00_01")
del prj["samples"]["P001_102"]["library_prep"]["B"]
self.p_con.save(prj)
示例15: data_delivery_note
# 需要導入模塊: from scilifelab.db.statusdb import ProjectSummaryConnection [as 別名]
# 或者: from scilifelab.db.statusdb.ProjectSummaryConnection import get_entry [as 別名]
def data_delivery_note(**kw):
"""Create an easily parseable information file with information about the data delivery
"""
output_data = {'stdout':StringIO(), 'stderr':StringIO(), 'debug':StringIO()}
project_name = kw.get('project_name',None)
flowcell = kw.get('flowcell',None)
LOG.debug("Generating data delivery note for project {}{}.".format(project_name,' and flowcell {}'.format(flowcell if flowcell else '')))
# Get a connection to the project and sample databases
p_con = ProjectSummaryConnection(**kw)
assert p_con, "Could not connect to project database"
s_con = SampleRunMetricsConnection(**kw)
assert s_con, "Could not connect to sample database"
# Get the entry for the project and samples from the database
LOG.debug("Fetching samples from sample database")
samples = s_con.get_samples(sample_prj=project_name, fc_id=flowcell)
LOG.debug("Got {} samples from database".format(len(samples)))
# Get the customer sample names from the project database
LOG.debug("Fetching samples from project database")
project_samples = p_con.get_entry(project_name, "samples")
customer_names = {sample_name:sample.get('customer_name','N/A') for sample_name, sample in project_samples.items()}
data = [['SciLifeLab ID','Submitted ID','Flowcell','Lane','Barcode','Read','Path','MD5','Size (bytes)','Timestamp']]
for sample in samples:
sname = sample.get('project_sample_name','N/A')
cname = customer_names.get(sname,'N/A')
fc = sample.get('flowcell','N/A')
lane = sample.get('lane','N/A')
barcode = sample.get('sequence','N/A')
if 'raw_data_delivery' not in sample:
data.append([sname,cname,'','','','','','','',''])
continue
delivery = sample['raw_data_delivery']
tstamp = delivery.get('timestamp','N/A')
for read, file in delivery.get('files',{}).items():
data.append([sname,
cname,
fc,
lane,
barcode,
read,
file.get('path','N/A'),
file.get('md5','N/A'),
file.get('size_in_bytes','N/A'),
tstamp,])
# Write the data to a csv file
outfile = "{}{}_data_delivery.csv".format(project_name,'_{}'.format(flowcell) if flowcell else '')
LOG.debug("Writing delivery data to {}".format(outfile))
with open(outfile,"w") as outh:
csvw = csv.writer(outh)
for row in data:
csvw.writerow(row)
# Write Texttable formatted output to stdout
tt = texttable.Texttable(180)
tt.add_rows(data)
output_data['stdout'].write(tt.draw())
return output_data