本文整理汇总了Python中scilifelab.db.statusdb.ProjectSummaryConnection.get_info_source方法的典型用法代码示例。如果您正苦于以下问题:Python ProjectSummaryConnection.get_info_source方法的具体用法?Python ProjectSummaryConnection.get_info_source怎么用?Python ProjectSummaryConnection.get_info_source使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类scilifelab.db.statusdb.ProjectSummaryConnection
的用法示例。
在下文中一共展示了ProjectSummaryConnection.get_info_source方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _project_status_note_table
# 需要导入模块: from scilifelab.db.statusdb import ProjectSummaryConnection [as 别名]
# 或者: from scilifelab.db.statusdb.ProjectSummaryConnection import get_info_source [as 别名]
def _project_status_note_table(project_name=None, username=None, password=None, url=None,
use_ps_map=True, use_bc_map=False, check_consistency=False,
ordered_million_reads=None, uppnex_id=None, customer_reference=None,
exclude_sample_ids={}, project_alias=None, sample_aliases={},
projectdb="projects", samplesdb="samples", flowcelldb="flowcells",
include_all_samples=False, param={}, **kw):
# mapping project_summary to parameter keys
ps_to_parameter = {"scilife_name":"scilife_name", "customer_name":"customer_name", "project_name":"project_name"}
# mapping project sample to table
table_keys = ['ScilifeID', 'SubmittedID', 'BarcodeSeq', 'MSequenced', 'MOrdered']
output_data = {'stdout':StringIO(), 'stderr':StringIO(), 'debug':StringIO()}
# Connect and run
s_con = SampleRunMetricsConnection(dbname=samplesdb, username=username, password=password, url=url)
fc_con = FlowcellRunMetricsConnection(dbname=flowcelldb, username=username, password=password, url=url)
p_con = ProjectSummaryConnection(dbname=projectdb, username=username, password=password, url=url)
#Get the information source for this project
source = p_con.get_info_source(project_name)
# Get project summary from project database
sample_aliases = _literal_eval_option(sample_aliases, default={})
prj_summary = p_con.get_entry(project_name)
if not prj_summary:
LOG.warn("No such project '{}'".format(project_name))
return
LOG.debug("Working on project '{}'.".format(project_name))
# Determine if project is finished by getting all samples sequenced date
try:
all_samples_sequenced = prj_summary['project_summary']['all_samples_sequenced']
except (TypeError,KeyError):
all_samples_sequenced = False
# Get sample run list and loop samples to make mapping sample -> {sampleruns}
sample_run_list = _set_sample_run_list(project_name, flowcell=None, project_alias=project_alias, s_con=s_con)
samples = {}
for s in sample_run_list:
prj_sample = p_con.get_project_sample(project_name, s.get("project_sample_name", None))
if prj_sample:
sample_name = prj_sample['project_sample'].get("scilife_name", None)
s_d = {s["name"] : {'sample':sample_name, 'id':s["_id"]}}
samples.update(s_d)
else:
if s["barcode_name"] in sample_aliases:
s_d = {sample_aliases[s["barcode_name"]] : {'sample':sample_aliases[s["barcode_name"]], 'id':s["_id"]}}
samples.update(s_d)
else:
s_d = {s["name"]:{'sample':s["name"], 'id':s["_id"], 'barcode_name':s["barcode_name"]}}
LOG.warn("No mapping found for sample run:\n '{}'".format(s_d))
# Convert to mapping from desired sample name to list of aliases
# Less important for the moment; one solution is to update the
# Google docs summary table to use the P names
sample_dict = prj_summary['samples']
param.update({key:prj_summary.get(ps_to_parameter[key], None) for key in ps_to_parameter.keys()})
param["ordered_amount"] = param.get("ordered_amount", p_con.get_ordered_amount(project_name, samples=sample_dict))
if not param.get('customer_reference') :
try:
param['customer_reference'] = prj_summary['details']['customer_project_reference']
except (TypeError,KeyError):
param['customer_reference'] = prj_summary.get('customer_reference')
param['uppnex_project_id'] = param.get('uppnex_project_id', prj_summary.get('uppnex_id'))
# Override database values if options passed at command line
if uppnex_id:
param["uppnex_project_id"] = uppnex_id
if customer_reference:
param["customer_reference"] = customer_reference
# Process options
ordered_million_reads = _literal_eval_option(ordered_million_reads)
exclude_sample_ids = _literal_eval_option(exclude_sample_ids, default={})
## Start collecting the data
sample_table = []
samples_excluded = []
last_library_preps = p_con.get_latest_library_prep(project_name)
last_library_preps_srm = [x for l in last_library_preps.values() for x in l]
LOG.debug("Looping through sample map that maps project sample names to sample run metrics ids")
for k,v in samples.items():
LOG.debug("project sample '{}' maps to '{}'".format(k, v))
if not include_all_samples:
if v['sample'] not in last_library_preps.keys():
LOG.info("No library prep information for sample {}; keeping in report".format(v['sample']))
else:
if k not in last_library_preps_srm:
LOG.info("Sample run {} ('{}') is not latest library prep ({}) for project sample {}: excluding from report".format(k, v["id"], ",".join(list(set(last_library_preps[v['sample']].values()))), v['sample']))
continue
else:
pass
if re.search("Unexpected", k):
continue
barcode_seq = s_con.get_entry(k, "sequence")
# Exclude sample id?
if _exclude_sample_id(exclude_sample_ids, v['sample'], barcode_seq):
samples_excluded.append(v['sample'])
#.........这里部分代码省略.........
示例2: sample_status_note
# 需要导入模块: from scilifelab.db.statusdb import ProjectSummaryConnection [as 别名]
# 或者: from scilifelab.db.statusdb.ProjectSummaryConnection import get_info_source [as 别名]
def sample_status_note(project_name=None, flowcell=None, username=None, password=None, url=None,
ordered_million_reads=None, uppnex_id=None, customer_reference=None, bc_count=None,
project_alias=[], projectdb="projects", samplesdb="samples", flowcelldb="flowcells",
phix=None, is_paired=True, **kw):
"""Make a sample status note. Used keywords:
:param project_name: project name
:param flowcell: flowcell id
:param username: db username
:param password: db password
:param url: db url
:param ordered_million_reads: number of ordered reads in millions
:param uppnex_id: the uppnex id
:param customer_reference: customer project name
:param project_alias: project alias name
:param phix: phix error rate
:param is_paired: True if run is paired-end, False for single-end
"""
# Cutoffs
cutoffs = {
"phix_err_cutoff" : 2.0,
"qv_cutoff" : 30,
}
instrument = _parse_instrument_config(os.path.expanduser(kw.get("instrument_config","")))
instrument_dict = {i['instrument_id']: i for i in instrument}
# parameters
parameters = {
"project_name" : None,
"start_date" : None,
"FC_id" : None,
"scilifelab_name" : None,
"rounded_read_count" : None,
"phix_error_rate" : None,
"avg_quality_score" : None,
"pct_q30_bases" : None,
"success" : None,
"run_mode":None,
"is_paired":True
}
# key mapping from sample_run_metrics to parameter keys
srm_to_parameter = {"project_name":"sample_prj", "FC_id":"flowcell",
"scilifelab_name":"barcode_name", "start_date":"date",
"rounded_read_count":"bc_count", "lane": "lane"}
LOG.debug("got parameters {}".format(parameters))
output_data = {'stdout':StringIO(), 'stderr':StringIO(), 'debug':StringIO()}
if not _assert_flowcell_format(flowcell):
LOG.warn("Wrong flowcell format {}; skipping. Please use the flowcell id (format \"[A-Z0-9\-]+\")".format(flowcell) )
return output_data
output_data = _update_sample_output_data(output_data, cutoffs)
# Connect and run
s_con = SampleRunMetricsConnection(dbname=samplesdb, username=username, password=password, url=url)
fc_con = FlowcellRunMetricsConnection(dbname=flowcelldb, username=username, password=password, url=url)
p_con = ProjectSummaryConnection(dbname=projectdb, username=username, password=password, url=url)
# Set up paragraphs
paragraphs = sample_note_paragraphs()
headers = sample_note_headers()
# Get project
project = p_con.get_entry(project_name)
source = p_con.get_info_source(project_name)
if not project:
LOG.warn("No such project '{}'".format(project_name))
return output_data
# Set samples list
sample_run_list = _set_sample_run_list(project_name, flowcell, project_alias, s_con)
if len(sample_run_list) == 0:
LOG.warn("No samples for project '{}', flowcell '{}'. Maybe there are no sample run metrics in statusdb?".format(project_name, flowcell))
return output_data
# Set options
ordered_million_reads = _literal_eval_option(ordered_million_reads)
bc_count = _literal_eval_option(bc_count)
phix = _literal_eval_option(phix)
# Count number of times a sample has been run on a flowcell; if several, make lane-specific reports
sample_count = Counter([x.get("barcode_name") for x in sample_run_list])
# Loop samples and collect information
s_param_out = []
fcdoc = None
for s in sample_run_list:
s_param = {}
LOG.debug("working on sample '{}', sample run metrics name '{}', id '{}'".format(s.get("barcode_name", None), s.get("name", None), s.get("_id", None)))
s_param.update(parameters)
s_param.update({key:s[srm_to_parameter[key]] for key in srm_to_parameter.keys()})
fc = "{}_{}".format(s.get("date"), s.get("flowcell"))
# Get instrument
try:
s_param.update(instrument_dict[fc_con.get_instrument(str(fc))])
except:
LOG.warn("Failed to set instrument and software versions for flowcell {} in report due to missing RunInfo -> Instrument field in statusdb. Either rerun 'pm qc update-qc' or search-and-replace 'NN' in the sample report.".format(fc))
s_param.update(instrument_dict['default'])
# Get run mode
if not fcdoc or fcdoc.get("name") != fc:
#.........这里部分代码省略.........