本文整理汇总了Python中thunder.rdds.fileio.seriesloader.SeriesLoader类的典型用法代码示例。如果您正苦于以下问题:Python SeriesLoader类的具体用法?Python SeriesLoader怎么用?Python SeriesLoader使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了SeriesLoader类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_fromArrays
def test_fromArrays(self):
ary = arange(8, dtype=dtype('int16')).reshape((2, 4))
series = SeriesLoader(self.sc).fromArrays(ary)
seriesvals = series.collect()
seriesary = series.pack()
# check ordering of keys
assert_equals((0, 0), seriesvals[0][0]) # first key
assert_equals((1, 0), seriesvals[1][0]) # second key
assert_equals((2, 0), seriesvals[2][0])
assert_equals((3, 0), seriesvals[3][0])
assert_equals((0, 1), seriesvals[4][0])
assert_equals((1, 1), seriesvals[5][0])
assert_equals((2, 1), seriesvals[6][0])
assert_equals((3, 1), seriesvals[7][0])
# check dimensions tuple is reversed from numpy shape
assert_equals(ary.shape[::-1], series.dims.count)
# check that values are in original order
collectedvals = array([kv[1] for kv in seriesvals], dtype=dtype('int16')).ravel()
assert_true(array_equal(ary.ravel(), collectedvals))
# check that packing returns transpose of original array
assert_true(array_equal(ary.T, seriesary))
示例2: loadSeriesFromArray
def loadSeriesFromArray(self, values, index=None, npartitions=None):
"""
Load Series data from a local array
Parameters
----------
values : list or ndarray
A list of 1d numpy arrays, or a single 2d numpy array
index : array-like, optional, deafult = None
Index to set for Series object, if None will use linear indices.
npartitions : position int, optional, default = None
Number of partitions for RDD, if unspecified will use
default parallelism.
"""
from numpy import ndarray, asarray
from thunder.rdds.fileio.seriesloader import SeriesLoader
loader = SeriesLoader(self._sc)
if not npartitions:
npartitions = self._sc.defaultParallelism
if isinstance(values, list):
values = asarray(values)
if isinstance(values, ndarray) and values.ndim > 1:
values = list(values)
data = loader.fromArrays(values, npartitions=npartitions)
if index:
data.index = index
return data
示例3: test_fromMultipleArrays
def test_fromMultipleArrays(self):
ary = arange(8, dtype=dtype('int16')).reshape((2, 4))
ary2 = arange(8, 16, dtype=dtype('int16')).reshape((2, 4))
series = SeriesLoader(self.sc).fromArrays([ary, ary2])
seriesvals = series.collect()
seriesary = series.pack()
# check ordering of keys
assert_equals((0, 0), seriesvals[0][0]) # first key
assert_equals((1, 0), seriesvals[1][0]) # second key
assert_equals((3, 0), seriesvals[3][0])
assert_equals((0, 1), seriesvals[4][0])
assert_equals((3, 1), seriesvals[7][0])
# check dimensions tuple is reversed from numpy shape
assert_equals(ary.shape[::-1], series.dims.count)
# check that values are in original order, with subsequent point concatenated in values
collectedvals = array([kv[1] for kv in seriesvals], dtype=dtype('int16'))
assert_true(array_equal(ary.ravel(), collectedvals[:, 0]))
assert_true(array_equal(ary2.ravel(), collectedvals[:, 1]))
# check that packing returns concatenation of input arrays, with time as first dimension
assert_true(array_equal(ary.T, seriesary[0]))
assert_true(array_equal(ary2.T, seriesary[1]))
示例4: _run_roundtrip_tst
def _run_roundtrip_tst(self, testCount, arrays, blockSize):
print "Running TestSeriesBinaryWriteFromStack roundtrip test #%d" % testCount
insubdir = os.path.join(self.outputdir, 'input%d' % testCount)
os.mkdir(insubdir)
outsubdir = os.path.join(self.outputdir, 'output%d' % testCount)
#os.mkdir(outsubdir)
for aryCount, array in enumerate(arrays):
# array.tofile always writes in column-major order...
array.tofile(os.path.join(insubdir, "img%02d.stack" % aryCount))
# ... but we will read and interpret these as though they are in row-major order
dims = list(arrays[0].shape)
dims.reverse()
underTest = SeriesLoader(self.sc)
underTest.saveFromStack(insubdir, outsubdir, dims, blockSize=blockSize, datatype=str(arrays[0].dtype))
roundtripped = underTest.fromBinary(outsubdir).collect()
for serieskeys, seriesvalues in roundtripped:
for seriesidx, seriesval in enumerate(seriesvalues):
#print "seriesidx: %d; serieskeys: %s; seriesval: %g" % (seriesidx, serieskeys, seriesval)
# flip indices again for row vs col-major insanity
arykeys = list(serieskeys)
arykeys.reverse()
msg = "Failure on test #%d, time point %d, indices %s" % (testCount, seriesidx, str(tuple(arykeys)))
try:
assert_almost_equal(arrays[seriesidx][tuple(arykeys)], seriesval, places=4)
except AssertionError, e:
raise AssertionError(msg, e)
示例5: loadSeries
def loadSeries(self, datapath, nkeys=None, nvalues=None, inputformat='binary', minPartitions=None,
conffile='conf.json', keytype=None, valuetype=None):
"""
Loads a Series object from data stored as text or binary files.
Supports single files or multiple files stored on a local file system, a networked file system (mounted
and available on all cluster nodes), Amazon S3, or HDFS.
Parameters
----------
datapath: string
Path to data files or directory, specified as either a local filesystem path or in a URI-like format,
including scheme. A datapath argument may include a single '*' wildcard character in the filename. Examples
of valid datapaths include 'a/local/relative/directory/*.stack", "s3n:///my-s3-bucket/data/mydatafile.tif",
"/mnt/my/absolute/data/directory/", or "file:///mnt/another/data/directory/".
nkeys: int, optional (but required if `inputformat` is 'text')
dimensionality of data keys. (For instance, (x,y,z) keyed data for 3-dimensional image timeseries data.)
For text data, number of keys must be specified in this parameter; for binary data, number of keys must be
specified either in this parameter or in a configuration file named by the 'conffile' argument if this
parameter is not set.
nvalues: int, optional (but required if `inputformat` is 'text')
Number of values expected to be read. For binary data, nvalues must be specified either in this parameter
or in a configuration file named by the 'conffile' argument if this parameter is not set.
inputformat: {'text', 'binary'}. optional, default 'binary'
Format of data to be read.
minPartitions: int, optional
Explicitly specify minimum number of Spark partitions to be generated from this data. Used only for
text data. Default is to use minParallelism attribute of Spark context object.
conffile: string, optional, default 'conf.json'
Path to JSON file with configuration options including 'nkeys', 'nvalues', 'keytype', and 'valuetype'.
If a file is not found at the given path, then the base directory given in 'datafile'
will also be checked. Parameters `nkeys` or `nvalues` that are specified as explicit arguments to this
method will take priority over those found in conffile if both are present.
Returns
-------
data: thunder.rdds.Series
A newly-created Series object, wrapping an RDD of series data. This RDD will have as keys an n-tuple
of int, with n given by `nkeys` or the configuration passed in `conffile`. RDD values will be a numpy
array of length `nvalues` (or as specified in the passed configuration file).
"""
checkparams(inputformat, ['text', 'binary'])
from thunder.rdds.fileio.seriesloader import SeriesLoader
loader = SeriesLoader(self._sc, minPartitions=minPartitions)
if inputformat.lower() == 'text':
data = loader.fromText(datapath, nkeys=nkeys)
else:
# must be either 'text' or 'binary'
data = loader.fromBinary(datapath, conffilename=conffile, nkeys=nkeys, nvalues=nvalues,
keytype=keytype, valuetype=valuetype)
return data
示例6: _run_fromFishTif
def _run_fromFishTif(self, blocksize="150M"):
imagepath = TestSeriesLoader._findSourceTreeDir("utils/data/fish/tif-stack")
series = SeriesLoader(self.sc).fromMultipageTif(imagepath, blockSize=blocksize)
series_ary = series.pack()
series_ary_xpose = series.pack(transpose=True)
assert_equals((76, 87, 2), series.dims.count)
assert_equals((20, 76, 87, 2), series_ary.shape)
assert_equals((20, 2, 87, 76), series_ary_xpose.shape)
示例7: _run_tst_fromBinary
def _run_tst_fromBinary(self, useConfJson=False):
# run this as a single big test so as to avoid repeated setUp and tearDown of the spark context
# data will be a sequence of test data
# all keys and all values in a test data item must be of the same length
# keys get converted to ints regardless of raw input format
DATA = [
SeriesBinaryTestData.fromArrays([[1, 2, 3]], [[11, 12, 13]], 'int16', 'int16'),
SeriesBinaryTestData.fromArrays([[1, 2, 3], [5, 6, 7]], [[11], [12]], 'int16', 'int16'),
SeriesBinaryTestData.fromArrays([[1, 2, 3]], [[11, 12, 13]], 'int16', 'int32'),
SeriesBinaryTestData.fromArrays([[1, 2, 3]], [[11, 12, 13]], 'int32', 'int16'),
SeriesBinaryTestData.fromArrays([[1, 2, 3]], [[11.0, 12.0, 13.0]], 'int16', 'float32'),
SeriesBinaryTestData.fromArrays([[1, 2, 3]], [[11.0, 12.0, 13.0]], 'float32', 'float32'),
SeriesBinaryTestData.fromArrays([[2, 3, 4]], [[11.0, 12.0, 13.0]], 'float32', 'float32'),
]
for itemidx, item in enumerate(DATA):
outSubdir = os.path.join(self.outputdir, 'input%d' % itemidx)
os.mkdir(outSubdir)
fname = os.path.join(outSubdir, 'inputfile%d.bin' % itemidx)
with open(fname, 'wb') as f:
item.writeToFile(f)
loader = SeriesLoader(self.sc)
if not useConfJson:
series = loader.fromBinary(outSubdir, nkeys=item.nkeys, nvalues=item.nvals, keyType=str(item.keyDtype),
valueType=str(item.valDtype))
else:
# write configuration file
conf = {'input': outSubdir,
'nkeys': item.nkeys, 'nvalues': item.nvals,
'valuetype': str(item.valDtype), 'keytype': str(item.keyDtype)}
with open(os.path.join(outSubdir, "conf.json"), 'wb') as f:
json.dump(conf, f, indent=2)
series = loader.fromBinary(outSubdir)
seriesData = series.rdd.collect()
expectedData = item.data
assert_equals(len(expectedData), len(seriesData),
"Differing numbers of k/v pairs in item %d; expected %d, got %d" %
(itemidx, len(expectedData), len(seriesData)))
for expected, actual in zip(expectedData, seriesData):
expectedKeys = tuple(expected[0])
expectedType = smallestFloatType(item.valDtype)
expectedVals = array(expected[1], dtype=expectedType)
assert_equals(expectedKeys, actual[0],
"Key mismatch in item %d; expected %s, got %s" %
(itemidx, str(expectedKeys), str(actual[0])))
assert_true(allclose(expectedVals, actual[1]),
"Value mismatch in item %d; expected %s, got %s" %
(itemidx, str(expectedVals), str(actual[1])))
assert_equals(expectedType, str(actual[1].dtype),
"Value type mismatch in item %d; expected %s, got %s" %
(itemidx, expectedType, str(actual[1].dtype)))
示例8: _run_fromFishTif
def _run_fromFishTif(self, blocksize):
imagepath = TestSeriesLoader._findSourceTreeDir("utils/data/fish/images")
series = SeriesLoader(self.sc).fromTif(imagepath, blockSize=blocksize)
assert_equals('float16', series._dtype)
seriesAry = series.pack()
seriesAry_xpose = series.pack(transpose=True)
assert_equals('float16', str(seriesAry.dtype))
assert_equals((76, 87, 2), series.dims.count)
assert_equals((20, 76, 87, 2), seriesAry.shape)
assert_equals((20, 2, 87, 76), seriesAry_xpose.shape)
示例9: test_loadStacksAsSeries
def test_loadStacksAsSeries(self):
rangeary = arange(64*128, dtype=dtype('int16'))
rangeary.shape = (64, 128)
filepath = os.path.join(self.outputdir, "rangeary.stack")
rangeary.tofile(filepath)
series = SeriesLoader(self.sc).fromStack(filepath, dims=(128, 64))
series_ary = series.pack()
assert_equals((128, 64), series.dims.count)
assert_equals((128, 64), series_ary.shape)
assert_true(array_equal(rangeary.T, series_ary))
示例10: test_castToFloat
def test_castToFloat(self):
from numpy import arange
shape = (3, 2, 2)
size = 3*2*2
ary = arange(size, dtype=dtypeFunc('uint8')).reshape(shape)
ary2 = ary + size
from thunder.rdds.fileio.seriesloader import SeriesLoader
series = SeriesLoader(self.sc).fromArrays([ary, ary2])
castSeries = series.astype("smallfloat")
assert_equals('float16', str(castSeries.dtype))
assert_equals('float16', str(castSeries.first()[1].dtype))
示例11: test_maxProject
def test_maxProject(self):
from thunder.rdds.fileio.seriesloader import SeriesLoader
ary = arange(8, dtype=dtypeFunc('int16')).reshape((2, 4))
series = SeriesLoader(self.sc).fromArrays(ary)
project0Series = series.maxProject(axis=0)
project0 = project0Series.pack()
project1Series = series.maxProject(axis=1)
project1 = project1Series.pack(sorting=True)
assert_true(array_equal(amax(ary.T, 0), project0))
assert_true(array_equal(amax(ary.T, 1), project1))
示例12: _run_roundtrip_tst
def _run_roundtrip_tst(self, nimages, aryShape, dtypeSpec, sizeSpec):
testArrays = TestSeriesBinaryWriteFromStack.generateTestImages(nimages, aryShape, dtypeSpec)
loader = SeriesLoader(self.sc)
series = loader.fromArrays(testArrays)
blocks = series.toBlocks(sizeSpec)
roundtrippedSeries = blocks.toSeries(newDType=series.dtype)
packedSeries = series.pack()
packedRoundtrippedSeries = roundtrippedSeries.pack()
assert_true(array_equal(packedSeries, packedRoundtrippedSeries))
示例13: _run_roundtrip_tst
def _run_roundtrip_tst(self, testCount, arrays, blockSize):
print "Running TestSeriesBinaryWriteFromStack roundtrip test #%d" % testCount
insubdir = os.path.join(self.outputdir, 'input%d' % testCount)
os.mkdir(insubdir)
outsubdir = os.path.join(self.outputdir, 'output%d' % testCount)
#os.mkdir(outsubdir)
for aryCount, array in enumerate(arrays):
# array.tofile always writes in column-major order...
array.tofile(os.path.join(insubdir, "img%02d.stack" % aryCount))
# ... but we will read and interpret these as though they are in row-major order
dims = list(arrays[0].shape)
dims.reverse()
underTest = SeriesLoader(self.sc)
underTest.saveFromStack(insubdir, outsubdir, dims, blockSize=blockSize, datatype=str(arrays[0].dtype))
series = underTest.fromStack(insubdir, dims, datatype=str(arrays[0].dtype))
roundtripped_series = underTest.fromBinary(outsubdir)
roundtripped = roundtripped_series.collect()
direct = series.collect()
expecteddtype = str(smallest_float_type(arrays[0].dtype))
assert_equals(expecteddtype, roundtripped_series.dtype)
assert_equals(expecteddtype, series.dtype)
assert_equals(expecteddtype, str(roundtripped[0][1].dtype))
assert_equals(expecteddtype, str(direct[0][1].dtype))
with open(os.path.join(outsubdir, "conf.json"), 'r') as fp:
# check that binary series file data type *matches* input stack data type (not yet converted to float)
# at least according to conf.json
conf = json.load(fp)
assert_equals(str(arrays[0].dtype), conf["valuetype"])
for ((serieskeys, seriesvalues), (directkeys, directvalues)) in zip(roundtripped, direct):
assert_equals(directkeys, serieskeys)
assert_equals(directvalues, seriesvalues)
for seriesidx, seriesval in enumerate(seriesvalues):
#print "seriesidx: %d; serieskeys: %s; seriesval: %g" % (seriesidx, serieskeys, seriesval)
# flip indices again for row vs col-major insanity
arykeys = list(serieskeys)
arykeys.reverse()
msg = "Failure on test #%d, time point %d, indices %s" % (testCount, seriesidx, str(tuple(arykeys)))
try:
assert_almost_equal(arrays[seriesidx][tuple(arykeys)], seriesval, places=4)
except AssertionError, e:
raise AssertionError(msg, e)
示例14: test_roundtripConvertToSeries
def test_roundtripConvertToSeries(self):
imagepath = TestImagesUsingOutputDir._findSourceTreeDir("utils/data/fish/tif-stack")
outdir = os.path.join(self.outputdir, "fish-series-dir")
images = ImagesLoader(self.sc).fromMultipageTif(imagepath)
series = images.toSeries(blockSize=76*20)
series_ary = series.pack()
images.saveAsBinarySeries(outdir, blockSize=76*20)
converted_series = SeriesLoader(self.sc).fromBinary(outdir)
converted_series_ary = converted_series.pack()
assert_equals((76, 87, 2), series.dims.count)
assert_equals((20, 76, 87, 2), series_ary.shape)
assert_true(array_equal(series_ary, converted_series_ary))
示例15: _run_tst_roundtripConvertToSeries
def _run_tst_roundtripConvertToSeries(self, images, strategy):
outdir = os.path.join(self.outputdir, "fish-series-dir")
partitionedimages = images.toBlocks(strategy)
series = partitionedimages.toSeries()
series_ary = series.pack()
partitionedimages.saveAsBinarySeries(outdir)
converted_series = SeriesLoader(self.sc).fromBinary(outdir)
converted_series_ary = converted_series.pack()
assert_equals(images.dims.count, series.dims.count)
expected_shape = tuple([images.nrecords] + list(images.dims.count))
assert_equals(expected_shape, series_ary.shape)
assert_true(array_equal(series_ary, converted_series_ary))