当前位置: 首页>>代码示例>>Python>>正文


Python dask.compute方法代码示例

本文整理汇总了Python中dask.compute方法的典型用法代码示例。如果您正苦于以下问题:Python dask.compute方法的具体用法?Python dask.compute怎么用?Python dask.compute使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在dask的用法示例。


在下文中一共展示了dask.compute方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: detect_outliers

# 需要导入模块: import dask [as 别名]
# 或者: from dask import compute [as 别名]
def detect_outliers(request):
    """
    Detect outliers end point
    """
    dataset_id = int(request.GET.get("dataset_id"))

    if dataset_id is None:
        return JsonResponse({"status": "failure", "message": "Dataset id is not provided"})

    dataset = Dataset.objects.get(pk=dataset_id)
    file_path = dataset.path
    delete_features = json.loads(dataset.deleted_features)

    # Create a detection experiment and start outlier detection
    process = Process.objects.get(name='Detection')
    process_status = ProcessStatus.objects.get(name='Running')
    experiment = Experiment(dataset=dataset, process=process, process_status=process_status)
    experiment.save()
    results = delayed(detect_all)(os.path.join(settings.MEDIA_ROOT, file_path), experiment.id, settings.RESULTS_ROOT,
                                  delete_features)
    dask.compute(results)

    return JsonResponse(
        {'status': 'success', 'message': 'Detection started successfully', 'experiment_id': experiment.id}) 
开发者ID:MateLabs,项目名称:AutoOut,代码行数:26,代码来源:views.py

示例2: _load

# 需要导入模块: import dask [as 别名]
# 或者: from dask import compute [as 别名]
def _load(self, files_in, files_out, urlpath, meta=True):
        """Download a set of files"""
        import dask
        out = []
        outnames = []
        for file_in, file_out in zip(files_in, files_out):
            cache_path = file_out.path
            outnames.append(cache_path)

            # If `_munge_path` did not find a match we want to avoid
            # writing to the urlpath.
            if cache_path == urlpath:
                continue

            if not os.path.isfile(cache_path):
                logger.debug("Caching file: {}".format(file_in.path))
                logger.debug("Original path: {}".format(urlpath))
                logger.debug("Cached at: {}".format(cache_path))
                if meta:
                    self._log_metadata(urlpath, file_in.path, cache_path)
                ddown = dask.delayed(_download)
                out.append(ddown(file_in, file_out, self.blocksize,
                                 self.output))
        dask.compute(*out)
        return outnames 
开发者ID:intake,项目名称:intake,代码行数:27,代码来源:cache.py

示例3: _data_to_source

# 需要导入模块: import dask [as 别名]
# 或者: from dask import compute [as 别名]
def _data_to_source(b, path, encoder=None, storage_options=None, **kwargs):
        import dask.bag as db
        import posixpath
        from fsspec import open_files
        import dask
        import pickle
        import json
        from intake.source.textfiles import TextFilesSource
        encoder = {None: str, 'str': str, 'json': json.dumps,
           'pickle': pickle.dumps}.get(encoder, encoder)

        if not hasattr(b, 'to_textfiles'):
            try:
                b = db.from_sequence(b, npartitions=1)
            except TypeError:
                raise NotImplementedError

        files = open_files(posixpath.join(path, 'part.*'), mode='wt',
                           num=b.npartitions, **(storage_options or {}))
        dwrite = dask.delayed(write_file)
        out = [dwrite(part, f, encoder)
               for part, f in zip(b.to_delayed(), files)]
        dask.compute(out)
        s = TextFilesSource(posixpath.join(path, 'part.*'), storage_options=storage_options)
        return s 
开发者ID:intake,项目名称:intake,代码行数:27,代码来源:semistructured.py

示例4: persist

# 需要导入模块: import dask [as 别名]
# 或者: from dask import compute [as 别名]
def persist(self, columns=None):
        """
        Return a CatalogSource, where the selected columns are
        computed and persist in memory.
        """

        import dask.array as da
        if columns is None:
            columns = self.columns

        r = {}
        for key in columns:
            r[key] = self[key]

        r = da.compute(r)[0] # particularity of dask

        from nbodykit.source.catalog.array import ArrayCatalog
        c = ArrayCatalog(r, comm=self.comm)
        c.attrs.update(self.attrs)

        return c 
开发者ID:bccp,项目名称:nbodykit,代码行数:23,代码来源:catalog.py

示例5: calculate_stats

# 需要导入模块: import dask [as 别名]
# 或者: from dask import compute [as 别名]
def calculate_stats(cls, df, target_var):
        """Calculates descriptive stats of the dataframe required for cleaning.

        Arguments:
                df : dask dataframe, The dataframe at hand
                target_var : string, Dependent variable for the analysis

        Returns:
                mean : dask series, mean of each column
                median : dask series, median of each column
                dict(zip(categorical_cols, mode)) : dict, Dictionary containing
                        categorical column as keys and their modes as values
                std : dask series, standard deviation of each column
        """
        categorical_columns = [
            col for col in df.columns if col != target_var and df[col].dtype == 'object']
        mean_op = df.mean()
        std_op = df.std()
        median_op = df.quantile(0.5)
        mode_op = [df[col].value_counts().idxmax()
                   for col in categorical_columns]
        mean, median, mode, std = dask.compute(
            mean_op, median_op, mode_op, std_op)
        return mean, median, dict(zip(categorical_columns, mode)), std 
开发者ID:GoogleCloudPlatform,项目名称:professional-services,代码行数:26,代码来源:input_pipeline_dask.py

示例6: impute

# 需要导入模块: import dask [as 别名]
# 或者: from dask import compute [as 别名]
def impute(cls, df, target_var, median, mode):
        """Imputing missing values using median for continuous columns and mode
        for categorical columns.

        Arguments:
                df : dask dataframe, The dataframe at hand
                target_var : string, Dependent variable for the analysis
                median : list, median of all columns in data
                mode : list, mode of all columns in data
        Returns:
                df : dask dataframe, Dataframe without missing values
        """
        missing_stats = df.isna().sum().compute()
        cols = [col for col in df.columns if col != target_var]
        for col in cols:
            if missing_stats[col] > 0 and df[col].dtype == 'object':
                df[col] = df[col].fillna(mode[col])
            elif missing_stats[col] > 0:
                df[col] = df[col].fillna(median[col])
        return df 
开发者ID:GoogleCloudPlatform,项目名称:professional-services,代码行数:22,代码来源:input_pipeline_dask.py

示例7: kmeans_input_fn

# 需要导入模块: import dask [as 别名]
# 或者: from dask import compute [as 别名]
def kmeans_input_fn(self, name, csv_path=None):
        """Input function for kmeans

        Arguments:
                name : string, Name of the data [Train or Eval]
                csv_path : The path of the csv on any storage system

        Returns:
                A batch of features
        """
        pattern = self._get_pattern(name, csv_path)
        tf.logging.info('The Pattern of files is : %s', pattern)
        df = dd.read_csv(pattern)
        vectors = dask.compute(df.values)
        return tf.train.limit_epochs(
            tf.convert_to_tensor(vectors[0], dtype=tf.float32), num_epochs=1) 
开发者ID:GoogleCloudPlatform,项目名称:professional-services,代码行数:18,代码来源:input_pipeline_dask.py

示例8: test_clean_data

# 需要导入模块: import dask [as 别名]
# 或者: from dask import compute [as 别名]
def test_clean_data(self):
        """
        Testing function clean_csv
        """
        copyfile(CSV_PATH, '/tmp/data.csv')
        iread = self.init_inputreader()
        stats = self.init_basicstats()
        ddf, _ = iread._parse_csv()
        data, mean, std_dev, csv_defaults = stats.clean_data(
            df=ddf,
            task_type=TASK_TYPE,
            target_var=TARGET_VAR,
            name=NAME
        )

        self_computed_mean = dask.compute(ddf.mean())
        self.assertListEqual(list(mean), list(self_computed_mean[0]))
        self_computed_std_dev = dask.compute(ddf.std(axis=0, skipna=True))
        self.assertListEqual(list(std_dev), list(self_computed_std_dev[0]))
        self.assertIsInstance(data, dask.dataframe.core.DataFrame)
        self.assertIsInstance(mean, pd.core.series.Series)
        self.assertIsInstance(std_dev, pd.core.series.Series)
        self.assertIsInstance(csv_defaults, list) 
开发者ID:GoogleCloudPlatform,项目名称:professional-services,代码行数:25,代码来源:tests_input_dask.py

示例9: test_calculate_stats

# 需要导入模块: import dask [as 别名]
# 或者: from dask import compute [as 别名]
def test_calculate_stats(self):
        """
        Testing function calculate_stats
        """
        iread = self.init_inputreader()
        stats = self.init_basicstats()
        ddf, _ = iread._parse_csv()
        mean, median, mode_dict, std_dev = stats.calculate_stats(
            df=ddf,
            target_var=TARGET_VAR
        )
        self_computed_mean = dask.compute(ddf.mean())
        self.assertListEqual(list(mean), list(self_computed_mean[0]))
        self_computed_std_dev = dask.compute(ddf.std(axis=0, skipna=True))
        self.assertListEqual(list(std_dev), list(self_computed_std_dev[0]))
        self_computed_median = dask.compute(ddf.quantile(0.5))
        self.assertListEqual(list(median), list(self_computed_median[0]))
        self.assertIsInstance(mean, pd.core.series.Series)
        self.assertIsInstance(std_dev, pd.core.series.Series)
        self.assertIsInstance(median, pd.core.series.Series)
        self.assertIsInstance(mode_dict, dict) 
开发者ID:GoogleCloudPlatform,项目名称:professional-services,代码行数:23,代码来源:tests_input_dask.py

示例10: test_impute

# 需要导入模块: import dask [as 别名]
# 或者: from dask import compute [as 别名]
def test_impute(self):
        """
        Testing function impute
        """
        iread = self.init_inputreader()
        stats = self.init_basicstats()
        ddf, _ = iread._parse_csv()
        _, median, _, _ = stats.calculate_stats(
            df=ddf,
            target_var=TARGET_VAR
        )
        data = stats.impute(
            df=ddf,
            target_var=TARGET_VAR,
            median=median,
            mode=MODE
        )
        imputed_data = dask.compute(data.isnull().sum())
        rows = ddf.columns
        for row in rows:
            col = imputed_data[0][row]
            self.assertEqual(col, 0)
        self.assertIsInstance(data, dask.dataframe.core.DataFrame) 
开发者ID:GoogleCloudPlatform,项目名称:professional-services,代码行数:25,代码来源:tests_input_dask.py

示例11: calculate_centroids_old

# 需要导入模块: import dask [as 别名]
# 或者: from dask import compute [as 别名]
def calculate_centroids_old(cnmds, window, grp_dim=['animal', 'session']):
    print("computing centroids")
    cnt_list = []
    for anm, cur_anm in cnmds.groupby('animal'):
        for ss, cur_ss in cur_anm.groupby('session'):
            # cnt = centroids(cur_ss['A_shifted'], window.sel(animal=anm))
            cnt = da.delayed(centroids)(
                cur_ss['A_shifted'], window.sel(animal=anm))
            cnt_list.append(cnt)
    with ProgressBar():
        cnt_list, = da.compute(cnt_list)
    cnts_ds = pd.concat(cnt_list, ignore_index=True)
    cnts_ds.height = cnts_ds.height.astype(float)
    cnts_ds.width = cnts_ds.width.astype(float)
    cnts_ds.unit_id = cnts_ds.unit_id.astype(int)
    cnts_ds.animal = cnts_ds.animal.astype(str)
    cnts_ds.session = cnts_ds.session.astype(str)
    cnts_ds.session_id = cnts_ds.session_id.astype(str)
    return cnts_ds 
开发者ID:DeniseCaiLab,项目名称:minian,代码行数:21,代码来源:cross_registration.py

示例12: centroids_distance_old

# 需要导入模块: import dask [as 别名]
# 或者: from dask import compute [as 别名]
def centroids_distance_old(cents,
                       A,
                       window,
                       shift,
                       hamming,
                       corr,
                       tile=(50, 50)):
    sessions = cents['session'].unique()
    dim_h = (np.min(cents['height']), np.max(cents['height']))
    dim_w = (np.min(cents['width']), np.max(cents['width']))
    dist_list = []
    for ssA, ssB in itt.combinations(sessions, 2):
        # dist = _calc_cent_dist(ssA, ssB, cents, cnmds, window, tile, dim_h, dim_w)
        dist = da.delayed(_calc_cent_dist)(ssA, ssB, cents, A, window,
                                           tile, dim_h, dim_w, shift, hamming,
                                           corr)
        dist_list.append(dist)
    with ProgressBar():
        dist_list, = da.compute(dist_list)
    dists = pd.concat(dist_list, ignore_index=True)
    return dists 
开发者ID:DeniseCaiLab,项目名称:minian,代码行数:23,代码来源:cross_registration.py

示例13: get_noise_welch

# 需要导入模块: import dask [as 别名]
# 或者: from dask import compute [as 别名]
def get_noise_welch(varr,
                    noise_range=(0.25, 0.5),
                    noise_method='logmexp',
                    compute=True):
    print("estimating noise")
    sn = xr.apply_ufunc(
        noise_welch,
        varr.chunk(dict(frame=-1)),
        input_core_dims=[['frame']],
        dask='parallelized',
        vectorize=True,
        kwargs=dict(noise_range=noise_range, noise_method=noise_method),
        output_dtypes=[varr.dtype])
    if compute:
        sn = sn.compute()
    return sn 
开发者ID:DeniseCaiLab,项目名称:minian,代码行数:18,代码来源:cnmf.py

示例14: run

# 需要导入模块: import dask [as 别名]
# 或者: from dask import compute [as 别名]
def run(self):
        self._validate_setup()
        write_locks = {}
        for times in self._times:
            filename = self._get_output_filename(times)
            self.setup_netcdf_output(filename, times)
            write_locks[filename] = combine_locks([NETCDFC_LOCK, get_write_lock(filename)])
        self.logger.info('Starting {} chunks...'.format(len(self.slices)))

        delayed_objs = [wrap_run_slice(self.params, write_locks, dslice)
                        for dslice in self.slices]
        persisted = dask.persist(delayed_objs, num_workers=self.params['num_workers'])
        self.progress_bar(persisted)
        dask.compute(persisted)
        self.logger.info('Cleaning up...')
        try:
            self._client.cluster.close()
            self._client.close()
            if self.params['verbose'] == logging.DEBUG:
                print()
                print('closed dask cluster/client')
        except Exception:
            pass 
开发者ID:UW-Hydro,项目名称:MetSim,代码行数:25,代码来源:metsim.py

示例15: benchmark

# 需要导入模块: import dask [as 别名]
# 或者: from dask import compute [as 别名]
def benchmark(datasets=None, datasets_path=None, distributed=True, timeout=None):
    if datasets is None:
        if datasets_path is None:
            datasets = get_available_demos().name
        else:
            datasets = os.listdir(datasets_path)

    if distributed:
        import dask

        global score_dataset
        score_dataset = dask.delayed(score_dataset)

    scores = list()
    for dataset in datasets:
        scores.append(score_dataset(dataset, datasets_path, timeout))

    if distributed:
        scores = dask.compute(*scores)

    return pd.DataFrame(scores) 
开发者ID:sdv-dev,项目名称:SDV,代码行数:23,代码来源:benchmark.py


注:本文中的dask.compute方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。