当前位置: 首页>>代码示例>>Python>>正文


Python feather.read_dataframe方法代码示例

本文整理汇总了Python中feather.read_dataframe方法的典型用法代码示例。如果您正苦于以下问题:Python feather.read_dataframe方法的具体用法?Python feather.read_dataframe怎么用?Python feather.read_dataframe使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在feather的用法示例。


在下文中一共展示了feather.read_dataframe方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: load

# 需要导入模块: import feather [as 别名]
# 或者: from feather import read_dataframe [as 别名]
def load(filename):
    """Loads data saved with save() (or just normally saved with pickle). Autodetects gzip if filename ends in '.gz'
    Also reads feather files denoted .feather or .fthr.

    Keyword arguments:
    filename -- String with the relative filename of the pickle/feather to load.
    """
    fl = filename.lower()
    if fl.endswith('.gz'):
        if fl.endswith('.feather.gz') or fl.endswith('.fthr.gz'):
            raise NotImplementedError('Compressed feather is not supported.')
        else:
            fp = gzip.open(filename, 'rb')
            return pickle.load(fp)
    else:
        if fl.endswith('.feather') or fl.endswith('.fthr'):
            import feather
            return feather.read_dataframe(filename)
        else:
            fp = open(filename, 'rb')
            return pickle.load(fp) 
开发者ID:mxbi,项目名称:mlcrate,代码行数:23,代码来源:__init__.py

示例2: _load_table

# 需要导入模块: import feather [as 别名]
# 或者: from feather import read_dataframe [as 别名]
def _load_table(self, src, fmt, dst=None, post=None, *args, **kwargs):
        """ Load a data frame from table formats: csv, hdf5, feather """
        if fmt == 'csv':
            _data = pd.read_csv(src, *args, **kwargs)
        elif fmt == 'feather':
            _data = feather.read_dataframe(src, *args, **kwargs)
        elif fmt == 'hdf5':
            _data = pd.read_hdf(src, *args, **kwargs)

        # Put into this batch only part of it (defined by index)
        if isinstance(_data, pd.DataFrame):
            _data = _data.loc[self.indices]
        elif isinstance(_data, dd.DataFrame):
            # dask.DataFrame.loc supports advanced indexing only with lists
            _data = _data.loc[list(self.indices)].compute()

        if callable(post):
            _data = post(_data, src=src, fmt=fmt, dst=dst, **kwargs)

        self.load(src=_data, dst=dst) 
开发者ID:analysiscenter,项目名称:batchflow,代码行数:22,代码来源:batch.py

示例3: __init__

# 需要导入模块: import feather [as 别名]
# 或者: from feather import read_dataframe [as 别名]
def __init__(self, path: str = None, keep_in_memory: bool = True) -> None:
        if not os.path.exists(path + '.meta'):
            raise FileNotFoundError(os.path.abspath(path + '.meta'))

        # pandas 0.22 has the fastest MultiIndex
        if pd.__version__.startswith('0.22'):
            import feather
            cols = feather.read_dataframe(path + '.meta')
        else:
            cols = pd.read_feather(path + '.meta')

        ohlcv = cols.ohlcv.values
        adjustments = cols.adjustments.values[:2]
        if adjustments[0] is None:
            adjustments = None
        super().__init__(path, ohlcv, adjustments)
        self.keep_in_memory = keep_in_memory
        self._cache = None 
开发者ID:Heerozh,项目名称:spectre,代码行数:20,代码来源:arrow.py

示例4: read_feather

# 需要导入模块: import feather [as 别名]
# 或者: from feather import read_dataframe [as 别名]
def read_feather(path, nthreads=1):
    """
    Load a feather-format object from the file path

    .. versionadded 0.20.0

    Parameters
    ----------
    path : string file path, or file-like object
    nthreads : int, default 1
        Number of CPU threads to use when reading to pandas.DataFrame

       .. versionadded 0.21.0

    Returns
    -------
    type of object stored in file

    """

    feather = _try_import()
    path = _stringify_path(path)

    if LooseVersion(feather.__version__) < LooseVersion('0.4.0'):
        return feather.read_dataframe(path)

    return feather.read_dataframe(path, nthreads=nthreads) 
开发者ID:birforce,项目名称:vnpy_crypto,代码行数:29,代码来源:feather_format.py

示例5: load_full_dataset

# 需要导入模块: import feather [as 别名]
# 或者: from feather import read_dataframe [as 别名]
def load_full_dataset(self):
        """Loads the dataset from the file system.

        Returns:
            dset_df: Dataset as a DataFrame loaded in from a CSV or feather file
        
        Raises:
            exception: if dataset is empty or failed to load
        """
        dataset_path = self.params.dataset_key
        if not os.path.exists(dataset_path):
            raise Exception("Dataset file %s does not exist" % dataset_path)
        if dataset_path.endswith('.feather'):
            if not feather_supported:
                raise Exception("feather package not installed in current environment")
            dset_df = feather.read_dataframe(dataset_path)
        elif dataset_path.endswith('.csv'):
            dset_df = pd.read_csv(dataset_path, index_col=False)
        else:
            raise Exception('Dataset %s is not a recognized format (csv or feather)' % dataset_path)

        if dset_df is None:
            raise Exception("Failed to load dataset %s" % dataset_path)
        if dset_df.empty:
            raise Exception("Dataset %s is empty" % dataset_path)

        return dset_df

    # **************************************************************************************** 
开发者ID:ATOMconsortium,项目名称:AMPL,代码行数:31,代码来源:model_datasets.py

示例6: _load

# 需要导入模块: import feather [as 别名]
# 或者: from feather import read_dataframe [as 别名]
def _load(self) -> pd.DataFrame:
        if self._cache is not None:
            return self._cache

        if pd.__version__.startswith('0.22'):
            import feather
            df = feather.read_dataframe(self._path)
        else:
            df = pd.read_feather(self._path)
        df.set_index(['date', 'asset'], inplace=True)

        if self.keep_in_memory:
            self._cache = df
        return df 
开发者ID:Heerozh,项目名称:spectre,代码行数:16,代码来源:arrow.py

示例7: read_feather

# 需要导入模块: import feather [as 别名]
# 或者: from feather import read_dataframe [as 别名]
def read_feather(path, nthreads=1):
    """
    Load a feather-format object from the file path

    .. versionadded 0.20.0

    Parameters
    ----------
    path : string file path, or file-like object
    nthreads : int, default 1
        Number of CPU threads to use when reading to pandas.DataFrame

       .. versionadded 0.21.0

    Returns
    -------
    type of object stored in file

    """

    feather = _try_import()
    path = _stringify_path(path)

    if feather.__version__ < LooseVersion('0.4.0'):
        return feather.read_dataframe(path)

    return feather.read_dataframe(path, nthreads=nthreads) 
开发者ID:nccgroup,项目名称:Splunking-Crime,代码行数:29,代码来源:feather_format.py

示例8: load_pair_data

# 需要导入模块: import feather [as 别名]
# 或者: from feather import read_dataframe [as 别名]
def load_pair_data(path):
    if path.endswith('.txt.gz'):
        return pd.read_csv(path, sep='\t', usecols=['pair_id', 'pval_nominal'], index_col=0, dtype={'pair_id':str, 'pval_nominal':np.float64})
    elif path.endswith('.ft'):
        df = feather.read_dataframe(path, columns=['pair_id', 'pval_nominal'])
        df.set_index('pair_id', inplace=True)
        return df
    else:
        raise ValueError('Input format not recognized.') 
开发者ID:broadinstitute,项目名称:gtex-pipeline,代码行数:11,代码来源:metasoft_postprocess.py

示例9: read_gct

# 需要导入模块: import feather [as 别名]
# 或者: from feather import read_dataframe [as 别名]
def read_gct(gct_file, sample_ids=None, dtype=None):
    """
    Load GCT as DataFrame. The first two columns must be 'Name' and 'Description'.
    """
    if sample_ids is not None:
        sample_ids = ['Name']+list(sample_ids)

    if gct_file.endswith('.gct.gz') or gct_file.endswith('.gct'):
        if dtype is not None:
            with gzip.open(gct_file, 'rt') as gct:
                gct.readline()
                gct.readline()
                sample_ids = gct.readline().strip().split()
            dtypes = {i:dtype for i in sample_ids[2:]}
            dtypes['Name'] = str
            dtypes['Description'] = str
            df = pd.read_csv(gct_file, sep='\t', skiprows=2, usecols=sample_ids, index_col=0, dtype=dtypes)
        else:
            df = pd.read_csv(gct_file, sep='\t', skiprows=2, usecols=sample_ids, index_col=0)
    elif gct_file.endswith('.parquet'):
        df = pd.read_parquet(gct_file, columns=sample_ids)
    elif gct_file.endswith('.ft'):  # feather format
        df = feather.read_dataframe(gct_file, columns=sample_ids)
        df = df.set_index('Name')
    else:
        raise ValueError('Unsupported input format.')
    df.index.name = 'gene_id'
    if 'Description' in df.columns:
        df = df.drop('Description', axis=1)
    return df 
开发者ID:broadinstitute,项目名称:gtex-pipeline,代码行数:32,代码来源:eqtl_prepare_expression.py

示例10: load_pair_data

# 需要导入模块: import feather [as 别名]
# 或者: from feather import read_dataframe [as 别名]
def load_pair_data(path):
    if path.endswith('.txt.gz'):
        return pd.read_csv(path, sep='\t', usecols=['pair_id', 'slope', 'slope_se'], index_col=0, dtype={'pair_id':str, 'slope':np.float32, 'slope_se':np.float32})
    elif path.endswith('.ft'):
        df = feather.read_dataframe(path, columns=['pair_id', 'slope', 'slope_se'])
        df.set_index('pair_id', inplace=True)
        return df
    else:
        raise ValueError('Input format not recognized.') 
开发者ID:broadinstitute,项目名称:gtex-pipeline,代码行数:11,代码来源:metasoft_prepare_input.py

示例11: load_df

# 需要导入模块: import feather [as 别名]
# 或者: from feather import read_dataframe [as 别名]
def load_df(path):
    if file_format(path) != 'feather':
        return default_csv_loader(path)
    elif featherpmm and feather:
        ds = featherpmm.read_dataframe(path)
        return ds.df
    elif feather:
        return feather.read_dataframe(path)
    else:
        raise Exception('The Python feather module is not installed.\n'
                        'Use:\n    pip install feather-format\n'
                        'to add capability.\n') 
开发者ID:tdda,项目名称:tdda,代码行数:14,代码来源:constraints.py

示例12: fun

# 需要导入模块: import feather [as 别名]
# 或者: from feather import read_dataframe [as 别名]
def fun(nGPUs=1, nFolds=1, nLambdas=100, nAlphas=8, validFraction=0.2, verbose=0,family="elasticnet", print_all_errors=False, tolerance=.001):
    name = str(sys._getframe().f_code.co_name)
    name = sys._getframe(1).f_code.co_name

    t = time.time()

    print("cwd: %s" % (os.getcwd()))
    sys.stdout.flush()

    print("Reading Data")
    df = feather.read_dataframe("./data/bnp.feather")
    print(df.shape)
    X = np.array(df.iloc[:, :df.shape[1] - 1], dtype='float32', order='C')
    y = np.array(df.iloc[:, df.shape[1] - 1], dtype='float32', order='C')
    print("Y")
    print(y)

    t1 = time.time()

    logloss_train, logloss_test = run_glm(X, y, nGPUs=nGPUs, nlambda=nLambdas, nfolds=nFolds, nalpha=nAlphas,
                validFraction=validFraction, verbose=verbose,family=family,print_all_errors=print_all_errors,tolerance=tolerance, name=name)

    # check logloss
    print(logloss_train[0, 0])
    print(logloss_train[0, 1])
    print(logloss_train[0, 2])
    print(logloss_test[0, 2])
    sys.stdout.flush()

    #Always checking the first 3 alphas with specific logloss scores (.48,.44)
    if validFraction==0.0 and nFolds > 0:
        assert logloss_train[0, 0] < .49
        assert logloss_train[0, 1] < .49
        assert logloss_train[1, 0] < .52
        assert logloss_train[1, 1] < .52
        assert logloss_train[2, 0] < .49
        assert logloss_train[2, 1] < .49
    if validFraction > 0.0:
        assert logloss_train[0, 0] < .49
        assert logloss_train[0, 1] < .49
        assert logloss_train[0, 2] < .49
        assert logloss_train[1, 0] < .50
        assert logloss_train[1, 1] < .51
        assert logloss_train[1, 2] < .51
        assert logloss_train[2, 0] < .49
        assert logloss_train[2, 1] < .49
        assert logloss_train[2, 2] < .49

    sys.stdout.flush()

    print('/n Total execution time:%d' % (time.time() - t1))

    print("TEST PASSED")
    sys.stdout.flush()

    print("Time taken: {}".format(time.time() - t))

    print("DONE.")
    sys.stdout.flush() 
开发者ID:h2oai,项目名称:h2o4gpu,代码行数:61,代码来源:test_glm_paribas.py


注:本文中的feather.read_dataframe方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。