当前位置: 首页>>代码示例>>Python>>正文


Python SampleRunMetricsConnection.get_samples方法代码示例

本文整理汇总了Python中scilifelab.db.statusdb.SampleRunMetricsConnection.get_samples方法的典型用法代码示例。如果您正苦于以下问题:Python SampleRunMetricsConnection.get_samples方法的具体用法?Python SampleRunMetricsConnection.get_samples怎么用?Python SampleRunMetricsConnection.get_samples使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在scilifelab.db.statusdb.SampleRunMetricsConnection的用法示例。


在下文中一共展示了SampleRunMetricsConnection.get_samples方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_4_get_samples

# 需要导入模块: from scilifelab.db.statusdb import SampleRunMetricsConnection [as 别名]
# 或者: from scilifelab.db.statusdb.SampleRunMetricsConnection import get_samples [as 别名]
 def test_4_get_samples(self):
     """Test getting samples given flowcell and sample_prj."""
     sample_con = SampleRunMetricsConnection(username=self.user, password=self.pw, url=self.url)
     samples = sample_con.get_samples(fc_id=self.examples["flowcell"])
     print "Number of samples before subsetting: " + str(len(samples))
     samples = sample_con.get_samples(fc_id=self.examples["flowcell"], sample_prj=self.examples["project"])
     print "Number of samples after subsetting: " + str(len(samples))
开发者ID:hussius,项目名称:scilifelab,代码行数:9,代码来源:test_db.py

示例2: fastq_screen

# 需要导入模块: from scilifelab.db.statusdb import SampleRunMetricsConnection [as 别名]
# 或者: from scilifelab.db.statusdb.SampleRunMetricsConnection import get_samples [as 别名]
def fastq_screen(project_name=None, flowcell=None,
                 username=None, password=None, url=None, dbname="samples", **kw):
    """Perform application specific qc on a project.

    :param project_name: project name
    :param flowcell: flowcell identifier
    :param username: database username
    :param password: database password
    :param url: database url
    :param dbname: samples database name
    """
    LOG.debug("Running fastq screen summary on project {}, flowcell ".format(project_name, flowcell))
    output_data = {'stdout':StringIO(), 'stderr':StringIO()}
    s_con = SampleRunMetricsConnection(dbname=dbname, username=username, password=password, url=url)
    samples = s_con.get_samples(fc_id=flowcell, sample_prj=project_name)
    for s in samples:
        LOG.debug("Checking fastq_screen data for sample {}, id {}, project {}".format(s.get("name", None), s.get("_id", None), s.get("sample_prj", None)))
        fqscreen_data = s.get("fastq_scr", {})
        output_data["stdout"].write(s["barcode_name"] + "\n")
        if fqscreen_data:
            header = [[x for x in v.keys()] for k, v in fqscreen_data.iteritems()]
            output_data["stdout"].write("\t\t" + "".join("{:>27}".format(x) for x in header[0]) + "\n")
            vals = ["{:>12}\t{}\n".format(k, "".join(["{:>27}".format(x) for x in v.values()])) for k, v in fqscreen_data.iteritems()]
            for v in vals:
                output_data["stdout"].write(v)
    return output_data
开发者ID:Galithil,项目名称:scilifelab,代码行数:28,代码来源:qc.py

示例3: test_2_make_note

# 需要导入模块: from scilifelab.db.statusdb import SampleRunMetricsConnection [as 别名]
# 或者: from scilifelab.db.statusdb.SampleRunMetricsConnection import get_samples [as 别名]
    def test_2_make_note(self):
        """Make a note subset by example flowcell and project"""
        s_con = SampleRunMetricsConnection(username=self.user, password=self.pw, url=self.url)
        fc_con = FlowcellRunMetricsConnection(username=self.user, password=self.pw, url=self.url)
        p_con = ProjectSummaryConnection(username=self.user, password=self.pw, url=self.url)
        paragraphs = sample_note_paragraphs()
        headers = sample_note_headers()
        samples = s_con.get_samples(self.examples["flowcell"], self.examples["project"])
        project = p_con.get_entry(self.examples["project"])
        samples = p_con.map_srm_to_name(self.examples["project"], fc_id=self.examples["flowcell"], use_bc_map=True)
        for k,v  in samples.items():
            s_param = parameters
            s = s_con.get_entry(k)
            s_param.update({key:s[srm_to_parameter[key]] for key in srm_to_parameter.keys()})
            fc = "{}_{}".format(s["date"], s["flowcell"])
            s_param["phix_error_rate"] = fc_con.get_phix_error_rate(str(fc), s["lane"])
            s_param['avg_quality_score'] = s_con.calc_avg_qv(s["name"])
            s_param['rounded_read_count'] = round(float(s_param['rounded_read_count'])/1e6,1) if s_param['rounded_read_count'] else None
            s_param['customer_name'] = project['samples'][v["sample"]].get('customer_name', None)

            if project:
                s_param['ordered_amount'] = p_con.get_ordered_amount(self.examples["project"])
                s_param['customer_reference'] = s_param.get('customer_reference', project['customer_reference'])
                s_param['uppnex_project_id'] = s_param.get('uppnex_project_id', project['uppnex_id'])
            s_param['success'] = sequencing_success(s_param, cutoffs)
            s_param.update({k:"N/A" for k in s_param.keys() if s_param[k] is None})
            make_note("{}.pdf".format(s["barcode_name"]), headers, paragraphs, **s_param)
开发者ID:hussius,项目名称:scilifelab,代码行数:29,代码来源:test_sample_delivery_note.py

示例4: test_get_samples_wrong_info

# 需要导入模块: from scilifelab.db.statusdb import SampleRunMetricsConnection [as 别名]
# 或者: from scilifelab.db.statusdb.SampleRunMetricsConnection import get_samples [as 别名]
    def test_get_samples_wrong_info(self):
        """Test getting samples when either flowcell or project id information is wrong"""
        sample_con = SampleRunMetricsConnection(dbname="samples-test", username=self.user, password=self.pw, url=self.url)

        samples = sample_con.get_samples(sample_prj="bogusproject", fc_id=self.examples["flowcell"])
        LOG.info("Selecting on bogus project, subsetting on flowcell: " + str(len(samples)))
        self.assertEqual(len(samples), 0)
开发者ID:Galithil,项目名称:scilifelab,代码行数:9,代码来源:test_db.py

示例5: test_get_samples

# 需要导入模块: from scilifelab.db.statusdb import SampleRunMetricsConnection [as 别名]
# 或者: from scilifelab.db.statusdb.SampleRunMetricsConnection import get_samples [as 别名]
    def test_get_samples(self):
        """Test getting samples given flowcell and sample_prj."""
        sample_con = SampleRunMetricsConnection(dbname="samples-test", username=self.user, password=self.pw, url=self.url)

        samples = sample_con.get_samples(fc_id=self.examples["flowcell"])
        LOG.info("Selecting on flowcell: " + str(len(samples)))
        self.assertEqual(len(samples), 5)
        samples = sample_con.get_samples(fc_id=self.examples["flowcell"], sample_prj=self.examples["project"])
        LOG.info("Selecting on flowcell, subsetting on project: " + str(len(samples)))
        self.assertEqual(len(samples), 2)

        samples = sample_con.get_samples(sample_prj=self.examples["project"])
        LOG.info("Selecting on project: " + str(len(samples)))
        self.assertEqual(len(samples), 3)
        samples = sample_con.get_samples(sample_prj=self.examples["project"], fc_id=self.examples["flowcell"])
        LOG.info("Selecting on project, subsetting on flowcell: " + str(len(samples)))
        self.assertEqual(len(samples), 2)
开发者ID:Galithil,项目名称:scilifelab,代码行数:19,代码来源:test_db.py

示例6: update

# 需要导入模块: from scilifelab.db.statusdb import SampleRunMetricsConnection [as 别名]
# 或者: from scilifelab.db.statusdb.SampleRunMetricsConnection import get_samples [as 别名]
    def update(self):
        if not self._check_pargs(["sample_prj"]):
            return
        url = self.pargs.url if self.pargs.url else self.app.config.get("db", "url")
        if not url:
            self.app.log.warn("Please provide a valid url: got {}".format(url))
            return

        s_con = SampleRunMetricsConnection(dbname=self.app.config.get("db", "samples"), **vars(self.app.pargs))
        samples = s_con.get_samples(sample_prj=self.pargs.sample_prj)

        if self.pargs.project_id:
            self.app.log.debug("Going to update 'project_id' to {} for sample runs with 'sample_prj' == {}".format(self.pargs.project_id, self.pargs.sample_prj))
            for s in samples:
                if not s.get("project_id", None) is None:
                    if not query_yes_no("'project_id':{} for sample {}; are you sure you want to overwrite?".format(s["project_id"], s["name"]), force=self.pargs.force):
                        continue
                s["project_id"] = self.pargs.project_id
                s_con.save(s)
        if self.pargs.names:
            self.app.log.debug("Going to update 'project_sample_name' for sample runs with 'sample_prj' == {}".format(self.pargs.sample_prj))
            if os.path.exists(self.pargs.names):
                with open(self.pargs.names) as fh:
                    names_d = json.load(fh)
            else:
                names_d= ast.literal_eval(self.pargs.names)
            samples_sort = sorted(samples, key=lambda s:s["barcode_name"])
            groups = {}
            for k, g in itertools.groupby(samples_sort, key=lambda x:x["barcode_name"]):
                groups[k] = list(g)
            for barcode_name in names_d:
                sample_list = groups.get(barcode_name, None)
                if not sample_list:
                    continue
                for s in sample_list:
                    if not s.get("project_sample_name", None) is None:
                        if not query_yes_no("'project_sample_name':{} for sample {}; are you sure you want to overwrite?".format(s["project_sample_name"], s["name"]), force=self.pargs.force):
                            continue
                    s["project_sample_name"] = names_d[barcode_name]
                    s_con.save(s)
        else:
            self.app.log.info("Trying to use extensive matching...")
            p_con = ProjectSummaryConnection(dbname=self.app.config.get("db", "projects"), **vars(self.app.pargs))
            project_name = self.pargs.sample_prj
            if self.pargs.project_alias:
                project_name = self.pargs.project_alias
            for s in samples:
                project_sample = p_con.get_project_sample(project_name, s["barcode_name"], extensive_matching=True)
                if project_sample:
                    self.app.log.info("using mapping '{} : {}'...".format(s["barcode_name"], project_sample["sample_name"]))
                    s["project_sample_name"] = project_sample["sample_name"]
                    s_con.save(s)
开发者ID:emmser,项目名称:scilifelab,代码行数:54,代码来源:ext_qc.py

示例7: data_delivery_note

# 需要导入模块: from scilifelab.db.statusdb import SampleRunMetricsConnection [as 别名]
# 或者: from scilifelab.db.statusdb.SampleRunMetricsConnection import get_samples [as 别名]
def data_delivery_note(**kw):
    """Create an easily parseable information file with information about the data delivery
    """
    output_data = {'stdout':StringIO(), 'stderr':StringIO(), 'debug':StringIO()}

    project_name = kw.get('project_name',None)
    flowcell = kw.get('flowcell',None)
    LOG.debug("Generating data delivery note for project {}{}.".format(project_name,' and flowcell {}'.format(flowcell if flowcell else '')))

    # Get a connection to the project and sample databases
    p_con = ProjectSummaryConnection(**kw)
    assert p_con, "Could not connect to project database"
    s_con = SampleRunMetricsConnection(**kw)
    assert s_con, "Could not connect to sample database"

    # Get the entry for the project and samples from the database
    LOG.debug("Fetching samples from sample database")
    samples = s_con.get_samples(sample_prj=project_name, fc_id=flowcell)
    LOG.debug("Got {} samples from database".format(len(samples)))

    # Get the customer sample names from the project database
    LOG.debug("Fetching samples from project database")
    project_samples = p_con.get_entry(project_name, "samples")
    customer_names = {sample_name:sample.get('customer_name','N/A') for sample_name, sample in project_samples.items()}

    data = [['SciLifeLab ID','Submitted ID','Flowcell','Lane','Barcode','Read','Path','MD5','Size (bytes)','Timestamp']]
    for sample in samples:
        sname = sample.get('project_sample_name','N/A')
        cname = customer_names.get(sname,'N/A')
        fc = sample.get('flowcell','N/A')
        lane = sample.get('lane','N/A')
        barcode = sample.get('sequence','N/A')
        if 'raw_data_delivery' not in sample:
            data.append([sname,cname,'','','','','','','',''])
            continue
        delivery = sample['raw_data_delivery']
        tstamp = delivery.get('timestamp','N/A')
        for read, file in delivery.get('files',{}).items():
            data.append([sname,
                         cname,
                         fc,
                         lane,
                         barcode,
                         read,
                         file.get('path','N/A'),
                         file.get('md5','N/A'),
                         file.get('size_in_bytes','N/A'),
                         tstamp,])

    # Write the data to a csv file
    outfile = "{}{}_data_delivery.csv".format(project_name,'_{}'.format(flowcell) if flowcell else '')
    LOG.debug("Writing delivery data to {}".format(outfile))
    with open(outfile,"w") as outh:
        csvw = csv.writer(outh)
        for row in data:
            csvw.writerow(row)

    # Write Texttable formatted output to stdout
    tt = texttable.Texttable(180)
    tt.add_rows(data)
    output_data['stdout'].write(tt.draw())

    return output_data
开发者ID:guillermo-carrasco,项目名称:scilifelab,代码行数:65,代码来源:delivery_notes.py

示例8: raw_data

# 需要导入模块: from scilifelab.db.statusdb import SampleRunMetricsConnection [as 别名]
# 或者: from scilifelab.db.statusdb.SampleRunMetricsConnection import get_samples [as 别名]
    def raw_data(self):
        if not self._check_pargs(["project"]):
            return

        # if necessary, reformat flowcell identifier
        if self.pargs.flowcell:
            self.pargs.flowcell = self.pargs.flowcell.split("_")[-1]

        # get the uid and gid to use for destination files
        uid = os.getuid()
        gid = os.getgid()
        if self.pargs.group is not None and len(self.pargs.group) > 0:
            gid = grp.getgrnam(group).gr_gid

        self.log.debug("Connecting to project database")
        p_con = ProjectSummaryConnection(**vars(self.pargs))
        assert p_con, "Could not get connection to project databse"
        self.log.debug("Connecting to samples database")
        s_con = SampleRunMetricsConnection(**vars(self.pargs))
        assert s_con, "Could not get connection to samples databse"

        # Fetch the Uppnex project to deliver to
        if not self.pargs.uppmax_project:
            self.pargs.uppmax_project = p_con.get_entry(self.pargs.project, "uppnex_id")
            if not self.pargs.uppmax_project:
                self.log.error("Uppmax project was not specified and could not be fetched from project database")
                return

        # Extract the list of samples and runs associated with the project and sort them
        samples = sorted(s_con.get_samples(fc_id=self.pargs.flowcell, sample_prj=self.pargs.project), key=lambda k: (k.get('project_sample_name','NA'), k.get('flowcell','NA'), k.get('lane','NA')))

        # Setup paths and verify parameters
        self._meta.production_root = self.app.config.get("production", "root")
        self._meta.root_path = self._meta.production_root
        proj_base_dir = os.path.join(self._meta.root_path, self.pargs.project)
        assert os.path.exists(self._meta.production_root), "No such directory {}; check your production config".format(self._meta.production_root)
        assert os.path.exists(proj_base_dir), "No project {} in production path {}".format(self.pargs.project,self._meta.root_path)

        try:
            self._meta.uppnex_project_root = self.app.config.get("deliver", "uppnex_project_root")
        except Exception as e:
            self.log.warn("{}, will use '/proj' as uppnext_project_root".format(e))
            self._meta.uppnex_project_root = '/proj'

        try:
            self._meta.uppnex_delivery_dir = self.app.config.get("deliver", "uppnex_project_delivery_path")
        except Exception as e:
            self.log.warn("{}, will use 'INBOX' as uppnext_project_delivery_path".format(e))
            self._meta.uppnex_delivery_dir = 'INBOX'

        destination_root = os.path.join(self._meta.uppnex_project_root,self.pargs.uppmax_project,self._meta.uppnex_delivery_dir)
        assert os.path.exists(destination_root), "Delivery destination folder {} does not exist".format(destination_root)
        destination_root = os.path.join(destination_root,self.pargs.project)

        # If interactively select, build a list of samples to skip
        if self.pargs.interactive:
            to_process = []
            for sample in samples:
                sname = sample.get("project_sample_name")
                index = sample.get("sequence")
                fcid = sample.get("flowcell")
                lane = sample.get("lane")
                date = sample.get("date")
                self.log.info("Sample: {}, Barcode: {}, Flowcell: {}, Lane: {}, Started on: {}".format(sname,
                                                                                                           index,
                                                                                                           fcid,
                                                                                                           lane,
                                                                                                           date))
                if query_yes_no("Deliver sample?", default="no"):
                    to_process.append(sample)
            samples = to_process

        # Find uncompressed fastq
        uncompressed = self._find_uncompressed_fastq_files(proj_base_dir,samples)
        if len(uncompressed) > 0:
            self.log.warn("The following samples have uncompressed *.fastq files that cannot be delivered: {}".format(",".join(uncompressed)))
            if not query_yes_no("Continue anyway?", default="no"):
                return

        self.log.info("Will deliver data for {} samples from project {} to {}".format(len(samples),self.pargs.project,destination_root))
        if not query_yes_no("Continue?"):
            return

        # Get the list of files to transfer and the destination
        self.log.debug("Gathering list of files to copy")
        to_copy = self.get_file_copy_list(proj_base_dir,
                                          destination_root,
                                          samples)

        # Make sure that transfer will be with rsync
        if not self.pargs.rsync:
            self.log.warn("Files must be transferred using rsync")
            if not query_yes_no("Do you wish to continue delivering using rsync?", default="yes"):
                return
            self.pargs.rsync = True

        # Process each sample run
        for id, files in to_copy.items():
            # get the sample database object
            [sample] = [s for s in samples if s.get('_id') == id]
#.........这里部分代码省略.........
开发者ID:guillermo-carrasco,项目名称:scilifelab,代码行数:103,代码来源:deliver.py


注:本文中的scilifelab.db.statusdb.SampleRunMetricsConnection.get_samples方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。