本文整理汇总了Python中pylearn2.utils.string_utils.preprocess函数的典型用法代码示例。如果您正苦于以下问题:Python preprocess函数的具体用法?Python preprocess怎么用?Python preprocess使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了preprocess函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_preprocess
def test_preprocess():
"""
Tests that `preprocess` fills in environment variables using various
interfaces and raises a ValueError if a needed environment variable
definition is missing.
"""
try:
keys = ["PYLEARN2_" + str(uuid.uuid1())[:8] for _ in xrange(3)]
strs = ["${%s}" % k for k in keys]
os.environ[keys[0]] = keys[1]
# Test with os.environ only.
assert preprocess(strs[0]) == keys[1]
# Test with provided dict only.
assert preprocess(strs[1], environ={keys[1]: keys[2]}) == keys[2]
# Provided overrides os.environ.
assert preprocess(strs[0], environ={keys[0]: keys[2]}) == keys[2]
raised = False
try:
preprocess(strs[2], environ={keys[1]: keys[0]})
except ValueError:
raised = True
assert raised
finally:
for key in keys:
if key in os.environ:
del os.environ[key]
示例2: get_key
def get_key(config_file = '${HOME}/.key_chain'):
"""
read and returns auth key from config file
"""
config_file = preprocess(config_file)
f = open(config_file)
config = ConfigParser.RawConfigParser()
config.read(preprocess(config_file))
return config.get('mashape', 'key')
示例3: __init__
def __init__(self):
default_path = "${PYLEARN2_DATA_PATH}"
local_path = "${PYLEARN2_LOCAL_DATA_PATH}"
self.pid = os.getpid()
try:
self.dataset_remote_dir = string_utils.preprocess(default_path)
self.dataset_local_dir = string_utils.preprocess(local_path)
except (ValueError, string_utils.NoDataPathError, string_utils.EnvironmentVariableError):
# Local cache seems to be deactivated
self.dataset_remote_dir = ""
self.dataset_local_dir = ""
示例4: __init__
def __init__(self, whichset, path=None):
# here, final refers to the unlabled images from which
# we should make predictions (images_test_rev1)
# the train/test/valid sets come from images_training_rev1
# bigtrain is just the whole unsplit images_traininng_rev1
assert whichset in ['train','test','valid','final','bigtrain']
self.whichset = whichset
# this is the final desired shape
# the original shape is 424, 424
self.img_shape = (100,100,3)
self.target_shape = (37,)
if path is None:
path = '${PYLEARN2_DATA_PATH}/galaxy-data/'
# load data
path = preprocess(path)
file_n = "{}_arrays.h5".format(os.path.join(path, "h5", whichset))
if os.path.isfile(file_n):
# just open file
self.h5file = tables.openFile(file_n, mode='r')
else:
# create file and fill with data
self.first_time(whichset, path, file_n)
#axes=('b', 0, 1, 'c') # not sure what this means
#view_converter = DefaultViewConverter((100, 100, 3), axes)
super(galaxy_zoo_dataset, self).__init__(X=root.images, y=root.targets,
axes=axes)
示例5: __init__
def __init__(self, path, n_labels=2, start=None, stop=None, del_raw=True, x_only=False):
self.del_raw = del_raw
path = preprocess(path)
x, y = CSVDataset._load_data(path, del_raw=del_raw)
if np.isnan(np.min(y)):
y = None
else:
y = y.astype(int).reshape(-1, 1)
if start is not None:
if stop is None:
stop = x.shape[0]
assert start >= 0
assert start < stop
if not (stop <= x.shape[0]):
raise ValueError("stop must be less than the # of examples but " +
"stop is " + str(stop) + " and there are " + str(x.shape[0]) +
" examples.")
x = x[start:stop, :]
if y is not None:
y = y[start:stop, :]
if x_only:
y = None
n_labels = None
super(CSVDataset, self).__init__(X=x, y=y, y_labels=n_labels)
示例6: load
def load(filepath, recurse_depth=0, retry=True):
"""
Parameters
----------
filepath : str
A path to a file to load. Should be a pickle, Matlab, or NumPy
file.
recurse_depth : int
End users should not use this argument. It is used by the function
itself to implement the `retry` option recursively.
retry : bool
If True, will make a handful of attempts to load the file before
giving up. This can be useful if you are for example calling
show_weights.py on a file that is actively being written to by a
training script--sometimes the load attempt might fail if the
training script writes at the same time show_weights tries to
read, but if you try again after a few seconds you should be able
to open the file.
Returns
-------
loaded_object : object
The object that was stored in the file.
..todo
Refactor to hide recurse_depth from end users
"""
try:
import joblib
joblib_available = True
except ImportError:
joblib_available = False
if recurse_depth == 0:
filepath = preprocess(filepath)
if filepath.endswith(".npy") or filepath.endswith(".npz"):
return np.load(filepath)
if filepath.endswith(".mat"):
global io
if io is None:
import scipy.io
io = scipy.io
try:
return io.loadmat(filepath)
except NotImplementedError, nei:
if str(nei).find("HDF reader") != -1:
global hdf_reader
if hdf_reader is None:
import h5py
hdf_reader = h5py
return hdf_reader.File(filepath)
else:
raise
# this code should never be reached
assert False
示例7: __init__
def __init__(self, dataset, model, algorithm=None, save_path=None,
save_freq=0, extensions=None, allow_overwrite=True):
"""
Construct a Train instance.
Parameters
----------
dataset : `pylearn2.datasets.dataset.Dataset`
model : `pylearn2.models.model.Model`
algorithm : <Optional>
`pylearn2.training_algorithms.training_algorithm.TrainingAlgorithm`
save_path : <Optional> str
Path to save (with pickle / joblib) the model.
save_freq : <Optional> int
Frequency of saves, in epochs. A frequency of zero disables
automatic saving altogether. A frequency of 1 saves every
epoch. A frequency of 2 saves every other epoch, etc.
(default=0, i.e. never save). Note: when automatic saving is
enabled (eg save_freq > 0), the model is always saved after
learning, even when the final epoch is not a multiple of
`save_freq`.
extensions : <Optional> iterable
A collection of `TrainExtension` objects whose callbacks are
triggered at various points in learning.
allow_overwrite : <Optional> bool
If `True`, will save the model to save_path even if there is already
something there. Otherwise, will raise an error if the `save_path`
is already occupied.
"""
self.allow_overwrite = allow_overwrite
self.first_save = True
self.dataset = dataset
self.model = model
self.algorithm = algorithm
if save_path is not None:
if save_freq == 0:
warnings.warn('save_path specified but save_freq is 0 '
'(never save). Is this intentional?')
self.save_path = preprocess(save_path)
else:
if save_freq > 0:
phase_variable = 'PYLEARN2_TRAIN_PHASE'
if phase_variable in os.environ:
phase = 'phase%d' % os.environ[phase_variable]
tokens = [os.environ['PYLEARN2_TRAIN_FILE_FULL_STEM'],
phase, 'pkl']
else:
tokens = os.environ['PYLEARN2_TRAIN_FILE_FULL_STEM'], 'pkl'
self.save_path = '.'.join(tokens)
self.save_freq = save_freq
if hasattr(self.dataset, 'yaml_src'):
self.model.dataset_yaml_src = self.dataset.yaml_src
else:
warnings.warn("dataset has no yaml src, model won't know what " +
"data it was trained on")
self.extensions = extensions if extensions is not None else []
self.training_seconds = sharedX(value=0, name='training_seconds_this_epoch')
self.total_seconds = sharedX(value=0, name='total_seconds_last_epoch')
示例8: __enter__
def __enter__(self):
if isinstance(self._f, basestring):
self._f = preprocess(self._f)
self._handle = open(self._f, self._mode, self._buffering)
else:
self._handle = self._f
return self._handle
示例9: main
def main():
base = '${PYLEARN2_DATA_PATH}/esp_game/ESPGame100k/labels/'
base = preprocess(base)
paths = sorted(os.listdir(base))
assert len(paths) == 100000
words = {}
for i, path in enumerate(paths):
if i % 1000 == 0:
print(i)
path = base+path
f = open(path, 'r')
lines = f.readlines()
for line in lines:
word = line[: -1]
if word not in words:
words[word] = 1
else:
words[word] += 1
ranked_words = sorted(words.keys(), key=lambda x: -words[x])
ranked_words = [word_ + '\n' for word_ in ranked_words[0:4000]]
f = open('wordlist.txt', 'w')
f.writelines(ranked_words)
f.close()
示例10: load
def load(filepath, recurse_depth=0):
try:
import joblib
joblib_available = True
except ImportError:
joblib_available = False
if recurse_depth == 0:
filepath = preprocess(filepath)
if filepath.endswith('.npy'):
return np.load(filepath)
if filepath.endswith('.mat'):
global io
if io is None:
import scipy.io
io = scipy.io
try:
return io.loadmat(filepath)
except NotImplementedError, nei:
if str(nei).find('HDF reader') != -1:
global hdf_reader
if hdf_reader is None:
import h5py
hdf_reader = h5py
return hdf_reader.File(filepath)
else:
raise
#this code should never be reached
assert False
示例11: load_ndarray_label
def load_ndarray_label(name):
"""
Load the train,valid,test label data for the dataset `name` and return it
in ndarray format. This is only available for the toy dataset ule.
Parameters
----------
name : 'ule'
Must be 'ule'
Returns
-------
train_l. valid_l, test_l : ndarray
Label data loaded
"""
assert name in ['ule']
common_path = os.path.join(
preprocess('${PYLEARN2_DATA_PATH}'), 'UTLC', 'filetensor', name + '_')
trname, vname, tename = [common_path + subset + '.tf'
for subset in ['trainl', 'validl', 'testl']]
trainl = load_filetensor(trname)
validl = load_filetensor(vname)
testl = load_filetensor(tename)
return trainl, validl, testl
示例12: __init__
def __init__(self, dataset, model, algorithm=None, save_path=None,
save_freq=0, extensions=None, allow_overwrite=True):
self.allow_overwrite = allow_overwrite
self.first_save = True
self.dataset = dataset
self.model = model
self.algorithm = algorithm
if save_path is not None:
if save_freq == 0:
warnings.warn('save_path specified but save_freq is 0 '
'(never save). Is this intentional?')
self.save_path = preprocess(save_path)
else:
if save_freq > 0:
phase_variable = 'PYLEARN2_TRAIN_PHASE'
if phase_variable in os.environ:
phase = 'phase%d' % os.environ[phase_variable]
tokens = [os.environ['PYLEARN2_TRAIN_FILE_FULL_STEM'],
phase, 'pkl']
else:
tokens = os.environ['PYLEARN2_TRAIN_FILE_FULL_STEM'], 'pkl'
self.save_path = '.'.join(tokens)
self.save_freq = save_freq
if hasattr(self.dataset, 'yaml_src'):
self.model.dataset_yaml_src = self.dataset.yaml_src
else:
warnings.warn("dataset has no yaml src, model won't know what " +
"data it was trained on")
self.extensions = extensions if extensions is not None else []
self.training_seconds = sharedX(value=0,
name='training_seconds_this_epoch')
self.total_seconds = sharedX(value=0, name='total_seconds_last_epoch')
示例13: Transform
def Transform():
"""Test smaller version of convolutional_network.ipynb"""
which_experiment = "S100"
skip.skip_if_no_data()
yaml_file_path = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
data_dir = string_utils.preprocess("${PYLEARN2_DATA_PATH}")
save_path = os.path.join(data_dir, "cifar10", "experiment_" + string.lower(which_experiment))
base_save_path = os.path.join(data_dir, "cifar10")
# Escape potential backslashes in Windows filenames, since
# they will be processed when the YAML parser will read it
# as a string
# save_path.replace('\\', r'\\')
yaml = open("{0}/experiment_base_transform.yaml".format(yaml_file_path), "r").read()
hyper_params = {
"batch_size": 64,
"output_channels_h1": 64,
"output_channels_h2": 128,
"output_channels_h3": 600,
"max_epochs": 100,
"save_path": save_path,
"base_save_path": base_save_path,
}
yaml = yaml % (hyper_params)
train = yaml_parse.load(yaml)
train.main_loop()
示例14: __init__
def __init__(self,
path = 'train.csv',
one_hot = False,
expect_labels = True,
expect_headers = True,
delimiter = ',',
col_number = 10):
"""
.. todo::
WRITEME
"""
self.path = path
self.one_hot = one_hot
self.expect_labels = expect_labels
self.expect_headers = expect_headers
self.delimiter = delimiter
self.col_number = col_number
self.view_converter = None
# and go
self.path = preprocess(self.path)
X, y = self._load_data()
super(CSVModified, self).__init__(X=X, y=y)
示例15: __init__
def __init__(self, save_dir):
PYLEARN2_TRAIN_DIR = preprocess('${PYLEARN2_TRAIN_DIR}')
PYLEARN2_TRAIN_BASE_NAME = preprocess('${PYLEARN2_TRAIN_BASE_NAME}')
src = os.path.join(PYLEARN2_TRAIN_DIR, PYLEARN2_TRAIN_BASE_NAME)
dst = os.path.join(save_dir, PYLEARN2_TRAIN_BASE_NAME)
if not os.path.exists(save_dir):
os.makedirs(save_dir)
if os.path.exists(save_dir) and not os.path.isdir(save_dir):
raise IOError("save path %s exists, not a directory" % save_dir)
elif not os.access(save_dir, os.W_OK):
raise IOError("permission error creating %s" % dst)
with log_timing(log, 'copying yaml from {} to {}'.format(src, dst)):
copyfile(src, dst)