本文整理汇总了Python中pySPACE.resources.dataset_defs.base.BaseDataset.store_meta_data方法的典型用法代码示例。如果您正苦于以下问题:Python BaseDataset.store_meta_data方法的具体用法?Python BaseDataset.store_meta_data怎么用?Python BaseDataset.store_meta_data使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pySPACE.resources.dataset_defs.base.BaseDataset
的用法示例。
在下文中一共展示了BaseDataset.store_meta_data方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: store
# 需要导入模块: from pySPACE.resources.dataset_defs.base import BaseDataset [as 别名]
# 或者: from pySPACE.resources.dataset_defs.base.BaseDataset import store_meta_data [as 别名]
def store(self, result_dir, s_format = "None"):
if not s_format == "None":
self._log("The format %s is not supported!"%s_format, level=logging.CRITICAL)
return
# Update the meta data
author = get_author()
self.update_meta_data({"type": "only output of individual nodes stored",
"storage_format": s_format,
"author" : author,
"data_pattern": "no data stored"})
# Store meta data
BaseDataset.store_meta_data(result_dir,self.meta_data)
示例2: store
# 需要导入模块: from pySPACE.resources.dataset_defs.base import BaseDataset [as 别名]
# 或者: from pySPACE.resources.dataset_defs.base.BaseDataset import store_meta_data [as 别名]
def store(self, result_dir, s_format = "None"):
if not s_format == "None":
self._log("The format %s is not supported!"%s_format, level=logging.CRITICAL)
return
# Update the meta data
try:
author = pwd.getpwuid(os.getuid())[4]
except:
author = "unknown"
self._log("Author could not be resolved.",level=logging.WARNING)
self.update_meta_data({"type": "only output of individual nodes stored",
"storage_format": s_format,
"author" : author,
"data_pattern": "no data stored"})
# Store meta data
BaseDataset.store_meta_data(result_dir,self.meta_data)
示例3: store
# 需要导入模块: from pySPACE.resources.dataset_defs.base import BaseDataset [as 别名]
# 或者: from pySPACE.resources.dataset_defs.base.BaseDataset import store_meta_data [as 别名]
#.........这里部分代码省略.........
Examples: ["arff", "real"], ["arff", "{0,1}"]
.. todo:: Someone could implement the format ["fasta"] for sax features
To store the data in comma separated values, use ["csv", "real"].
(*optional, default: ["pickle", "real"]*)
.. todo:: Adapt storing of csv file to external library instead of
doing it manually.
"""
name = "features"
# Update the meta data
try:
author = pwd.getpwuid(os.getuid())[4]
except:
author = "unknown"
self._log("Author could not be resolved.",level=logging.WARNING)
self.update_meta_data({"type": "feature_vector",
"storage_format": s_format,
"author": author,
"data_pattern": "data_run" + os.sep
+ name + "_sp_tt." + s_format[0]})
if type(s_format) == list:
s_type = s_format[1]
s_format = s_format[0]
else:
s_type = "real"
if not s_format in ["csv", "arff", "pickle"]:
self._log("Storage format not supported! Using default.",
level=logging.ERROR)
s_format = "pickle"
# Iterate through splits and runs in this dataset
for key, feature_vectors in self.data.iteritems():
# Construct result directory
result_path = result_dir + os.sep + "data" \
+ "_run%s" % key[0]
if not os.path.exists(result_path):
os.mkdir(result_path)
key_str = "_sp%s_%s" % key[1:]
# Store data depending on the desired format
if s_format == "pickle":
result_file = open(os.path.join(result_path,
name + key_str + ".pickle"),
"w")
cPickle.dump(feature_vectors, result_file, cPickle.HIGHEST_PROTOCOL)
elif s_format == "arff": # Write as ARFF
result_file = open(os.path.join(result_path,
name + key_str + ".arff"),"w")
# Create the arff file header
relation_name = result_dir.split(os.sep)[-1]
result_file.write('@relation "%s"\n' % relation_name)
# Write the type of all features
for feature_name in self.meta_data["feature_names"]:
result_file.write("@attribute %s %s\n" % (feature_name, s_type))
classString = "" + ",".join(sorted(self.meta_data["classes_names"])) + ""
result_file.write("@attribute class {%s}\n" % classString)
result_file.write("@data\n")
# Write all given training data into the ARFF file
fv = feature_vectors[0][0]
if numpy.issubdtype(fv.dtype, numpy.string_):
feature_format = "%s,"
elif numpy.issubdtype(fv.dtype, numpy.floating):
feature_format = "%f,"
elif numpy.issubdtype(fv.dtype, numpy.integer):
feature_format = "%d,"
for features, class_name in feature_vectors:
for feature in features[0]:
result_file.write(feature_format % feature)
result_file.write("%s\n" % str(class_name))
elif s_format == "csv": # Write as Comma Separated Value
result_file = open(os.path.join(result_path,
name + key_str + ".csv"),"w")
for feature_name in self.meta_data["feature_names"]:
result_file.write("%s," % (feature_name))
result_file.write("\n")
fv = feature_vectors[0][0]
if numpy.issubdtype(fv.dtype, numpy.floating):
feature_format = "%f,"
elif numpy.issubdtype(fv.dtype, numpy.integer):
feature_format = "%d,"
else:
feature_format = "%s,"
for features, class_name in feature_vectors:
f = features.view(numpy.ndarray)
for feature in f[0]:
result_file.write(feature_format % feature)
result_file.write("%s\n" % str(class_name))
result_file.close()
#Store meta data
BaseDataset.store_meta_data(result_dir,self.meta_data)
示例4: store
# 需要导入模块: from pySPACE.resources.dataset_defs.base import BaseDataset [as 别名]
# 或者: from pySPACE.resources.dataset_defs.base.BaseDataset import store_meta_data [as 别名]
def store(self, result_dir, s_format="BrainVision"):
# Keep original file name, depends on the AnalyserSinkNode, see it's documentation.
if self.meta_data.has_key("eeg_src_file_name") and self.meta_data["eeg_src_file_name"] is None:
name = self.meta_data["eeg_src_file_name"]
# or use default name from this collection
else:
name = "Analyzer"
if not s_format == "BrainVision":
self._log("The format %s is not supported!" % s_format, level=logging.CRITICAL)
return
# Update the meta data
try:
author = pwd.getpwuid(os.getuid())[4]
except:
author = "unknown"
self._log("Author could not be resolved.", level=logging.WARNING)
self.update_meta_data(
{
"type": "only output of individual nodes stored",
"storage_format": s_format,
"author": author,
"data_pattern": "Multiplexed",
}
)
# Store meta data
BaseDataset.store_meta_data(result_dir, self.meta_data)
# self._log("EEG data file %s" % self.collection.data_file)
slices = []
slices.append(0)
channel_names = []
for key, time_series in self.data.iteritems():
# Sort the Times-Series Array
def cmp_start(a, b):
return cmp(a[0].start_time, b[0].start_time)
time_series.sort(cmp_start)
# Check for overlapping Windows and remove them if existent
i = 0
while i < len(time_series):
ts = time_series[i]
# print ts[0].start_time, ts[0].end_time
# print len(time_series)
if ts[0].start_time >= slices[-1]:
slices.append(ts[0].end_time)
else:
warnings.warn("Ignoring at least one overlapping window!", UserWarning)
i = i + 1
# STORE ACTUAL EEG DATA AND WRITE MARKERFILE
result_path = result_dir + os.sep + "data_analyzer" + "_run%s" % key[0]
if not os.path.exists(result_path):
os.mkdir(result_path)
key_str = "_sp%s_%s" % key[1:]
# Keep original name
if self.meta_data.has_key("eeg_src_file_name") and self.meta_data["eeg_src_file_name"] != None:
result_file_eeg = open(os.path.join(result_path, name + ".eeg"), "wb")
result_file_mrk = open(os.path.join(result_path, name + ".vmrk"), "w")
# or use default name from this collection
else:
result_file_eeg = open(os.path.join(result_path, name + key_str + ".eeg"), "wb")
result_file_mrk = open(os.path.join(result_path, name + key_str + ".vmrk"), "w")
# Write Marker header
if self.meta_data.has_key("eeg_src_file_name") and self.meta_data["eeg_src_file_name"] != None:
result_file_mrk.write(header_mrk % (name))
else:
result_file_mrk.write(header_mrk % (name + key_str))
result_file_ms = 0
# Data for padding
padding = None
count_mrk = 2
num_ch = 0
sampling_int = 0
for ts in time_series:
if padding == None:
padding = numpy.zeros(len(ts[0].channel_names), dtype="int16")
num_ch = len(ts[0].channel_names)
channel_names = ts[0].channel_names
sampling_int = 1000000 / ts[0].sampling_frequency
# print "writing %d channels.." % len(ts[0].channel_names)
# Write Padding (zeros)
while result_file_ms < ts[0].start_time:
result_file_eeg.write(padding.tostring())
result_file_ms += ts[0]._samples_to_ms(1)
# Write window
ts[0].tofile(result_file_eeg)
result_file_ms += ts[0].end_time - ts[0].start_time
# Write Marker
result_file_mrk.write(
"Mk%d=Label,%s,%d,1,0\n" % (count_mrk, ts[1], ts[0]._ms_to_samples(ts[0].start_time))
)
count_mrk += 1
# WRITE HEADERFILE
# Keep original name
if self.meta_data.has_key("eeg_src_file_name") and self.meta_data["eeg_src_file_name"] != None:
#.........这里部分代码省略.........
示例5: store
# 需要导入模块: from pySPACE.resources.dataset_defs.base import BaseDataset [as 别名]
# 或者: from pySPACE.resources.dataset_defs.base.BaseDataset import store_meta_data [as 别名]
#.........这里部分代码省略.........
s_type = "%i"
if s_format == "csv" and s_type == "real":
s_type = "%.18e"
# Update the meta data
try:
author = pwd.getpwuid(os.getuid())[4]
except Exception:
author = "unknown"
self._log("Author could not be resolved.", level=logging.WARNING)
self.update_meta_data({"type": "time_series",
"storage_format": s_format,
"author": author,
"data_pattern": "data_run" + os.sep
+ name + "_sp_tt." + s_format})
# Iterate through splits and runs in this dataset
for key, time_series in self.data.iteritems():
# load data, if necessary
# (due to the lazy loading, the data might be not loaded already)
if isinstance(time_series, basestring):
time_series = self.get_data(key[0], key[1], key[2])
if self.sort_string is not None:
time_series.sort(key=eval(self.sort_string))
# Construct result directory
result_path = result_dir + os.sep + "data" + "_run%s" % key[0]
if not os.path.exists(result_path):
os.mkdir(result_path)
key_str = "_sp%s_%s" % key[1:]
# Store data depending on the desired format
if s_format in ["pickle", "cpickle", "cPickle"]:
result_file = open(os.path.join(result_path,
name+key_str+".pickle"), "w")
cPickle.dump(time_series, result_file, cPickle.HIGHEST_PROTOCOL)
elif s_format in ["text","csv"]:
self.update_meta_data({
"type": "stream",
"marker_column": "marker"})
result_file = open(os.path.join(result_path,
name + key_str + ".csv"), "w")
csvwriter = csv.writer(result_file)
channel_names = copy.deepcopy(time_series[0][0].channel_names)
if s_format == "csv":
channel_names.append("marker")
csvwriter.writerow(channel_names)
for (data, key) in time_series:
if s_format == "text":
numpy.savetxt(result_file, data, delimiter=",", fmt=s_type)
if not key is None:
result_file.write(str(key))
result_file.flush()
elif data.marker_name is not None \
and len(data.marker_name) > 0:
result_file.write(str(data.marker_name))
result_file.flush()
else:
first_line = True
marker = ""
if not key is None:
marker = str(key)
elif data.marker_name is not None \
and len(data.marker_name) > 0:
marker = str(data.marker_name)
for line in data:
l = list(line)
l.append(marker)
csvwriter.writerow(list(l))
if first_line:
first_line = False
marker = ""
result_file.flush()
elif s_format in ["mat"]:
result_file = open(os.path.join(result_path,
name + key_str + ".mat"),"w")
# extract a first time series object to get meta data
merged_time_series = time_series.pop(0)[0]
# collect all important information in the collection_object
collection_object = {
"sampling_frequency": merged_time_series.sampling_frequency,
"channel_names": merged_time_series.channel_names}
# merge all data
for (data,key) in time_series:
merged_time_series = numpy.vstack((merged_time_series,
data))
collection_object["data"] = merged_time_series
mdict = dict()
mdict[name + key_str] = collection_object
import scipy.io
scipy.io.savemat(result_file, mdict=mdict)
else:
NotImplementedError("Using unavailable storage format:%s!"
% s_format)
result_file.close()
self.update_meta_data({
"channel_names": copy.deepcopy(time_series[0][0].channel_names),
"sampling_frequency": time_series[0][0].sampling_frequency
})
#Store meta data
BaseDataset.store_meta_data(result_dir, self.meta_data)
示例6: consolidate
# 需要导入模块: from pySPACE.resources.dataset_defs.base import BaseDataset [as 别名]
# 或者: from pySPACE.resources.dataset_defs.base.BaseDataset import store_meta_data [as 别名]
def consolidate(self):
""" Consolidates the results obtained by the single processes into a consistent structure
of collections that are stored on the file system.
"""
# Consolidate the results
directory_pattern = os.sep.join([self.result_directory, "{*",])
dataset_pathes = glob.glob(directory_pattern)
# For all collections found
for dataset_path in dataset_pathes:
# Load their meta_data
meta_data = BaseDataset.load_meta_data(dataset_path)
# Determine author and date
try:
author = pwd.getpwuid(os.getuid())[4]
except:
author = "unknown"
self._log("Author could not be resolved.",level=logging.WARNING)
date = time.strftime("%Y%m%d_%H_%M_%S")
# Update meta data and store it
meta_data.update({"author" : author, "date" : date})
BaseDataset.store_meta_data(dataset_path, meta_data)
# Copy the input dataset specification file to the result
# directory in order to make later analysis of
# the results more easy
input_meta_path = os.sep.join([pySPACE.configuration.storage,
meta_data["input_collection_name"]])
input_meta = BaseDataset.load_meta_data(input_meta_path)
BaseDataset.store_meta_data(dataset_path,input_meta,
file_name="input_metadata.yaml")
# Check if some results consist of several runs
# and update the meta data in this case
# TODO: This is not a clean solution
for dataset_dir in glob.glob(os.sep.join([self.result_directory,
"*"])):
if not os.path.isdir(dataset_dir): continue
# There can be either run dirs, persistency dirs, or both of them.
# Check of whichever there are more. If both exist, their numbers
# are supposed to be equal.
nr_run_dirs = len(glob.glob(os.sep.join([dataset_dir,
"data_run*"])))
nr_per_dirs = len(glob.glob(os.sep.join([dataset_dir,
"persistency_run*"])))
nr_runs = max(nr_run_dirs, nr_per_dirs)
if nr_runs > 1:
collection_meta = BaseDataset.load_meta_data(dataset_dir)
collection_meta["runs"] = nr_runs
BaseDataset.store_meta_data(dataset_dir,collection_meta)
# If we don't create a feature vector or time series collection,
# we evaluated our classification using a classification performance sink.
# The resulting files should be merged to one csv tabular.
pathlist = glob.glob(os.path.join(self.result_directory,"results_*"))
if len(pathlist)>0:
# Do the consolidation the same way as for WekaClassificationOperation
self._log("Consolidating results ...")
# We load and store the results once into a PerformanceResultSummary
# This does the necessary consolidation...
self._log("Reading intermediate results...")
result_collection = PerformanceResultSummary(dataset_dir=self.result_directory)
self._log("done")
self._log("Storing result collection")
result_collection.store(self.result_directory)
self._log("done")
PerformanceResultSummary.merge_traces(self.result_directory)
if not(self.compression == False):
# Since we get one result summary,
# we don't need the numerous folders.
# So we zip them to make the whole folder more easy visible.
import zipfile
cwd=os.getcwd()
os.chdir(self.result_directory)
# If there are to many or to large folders, problems may occur.
# This case we want to log, try 64 bit mode, and then skip the zipping.
try:
pathlist = glob.glob(os.path.join(self.result_directory,"{*}"))
if not self.compression == "delete":
save_file=zipfile.ZipFile(self.result_directory+'/result_folders.zip',mode="w",compression=self.compression)
# we want to have the zipped file relative to the result directory
for path in pathlist:
for node in os.walk(path):
rel_path=os.path.relpath(node[0],self.result_directory)
save_file.write(rel_path)
for data in node[2]:
save_file.write(os.path.join(rel_path,data))
save_file.close()
# To still have an easy access to the history of the processing,
# we keep one folder.
pathlist.pop()
for path in pathlist:
shutil.rmtree(path)
except:
self._log("Result files could not be compressed with 32 bit mode, switching to 64 bit mode.", level=logging.CRITICAL)
# nearly total code copy, only difference with 64 bit mode
try:
#.........这里部分代码省略.........
示例7: store
# 需要导入模块: from pySPACE.resources.dataset_defs.base import BaseDataset [as 别名]
# 或者: from pySPACE.resources.dataset_defs.base.BaseDataset import store_meta_data [as 别名]
#.........这里部分代码省略.........
import scipy.io
result_file_name = os.path.join(result_path,
name + key_str + ".mat")
# extract a first time series object to get meta data
ts1 = time_series[0][0]
# collect all important information in the collection_object
dataset_dict = {
"sampling_frequency": ts1.sampling_frequency,
"channel_names": ts1.channel_names}
# we have to extract the data and labels separatly
if 'channelXtime' in s_format:
dataset_dict["data"] = [data.T for data, _ in time_series]
else:
dataset_dict["data"] = [data for data, _ in time_series]
dataset_dict["labels"] = [label for _, label in time_series]
# construct numpy 3d array (e.g., channelXtimeXtrials)
dataset_dict["data"] = numpy.rollaxis(numpy.array(
dataset_dict["data"]), 0, 3)
scipy.io.savemat(result_file_name, mdict=dataset_dict)
elif f_format in ["bp_eeg"]:
result_file = open(os.path.join(result_path,
name + key_str + ".eeg"),"a+")
result_file_mrk = open(os.path.join(result_path,
name + key_str + ".vmrk"),"w")
result_file_mrk.write("Brain Vision Data Exchange Marker File, "
"Version 1.0\n")
result_file_mrk.write("; Data stored by pySPACE\n")
result_file_mrk.write("[Common Infos]\n")
result_file_mrk.write("Codepage=UTF-8\n")
result_file_mrk.write("DataFile=%s\n" %
str(name + key_str + ".eeg"))
result_file_mrk.write("\n[Marker Infos]\n")
markerno = 1
datapoint = 1
sf = None
channel_names = None
for t in time_series:
if sf is None:
sf = t[0].sampling_frequency
if channel_names is None:
channel_names = t[0].get_channel_names()
for mrk in t[0].marker_name.keys():
for tm in t[0].marker_name[mrk]:
result_file_mrk.write(str("Mk%d=Stimulus,%s,%d,1,0\n" %
(markerno, mrk, datapoint+(tm*sf/1000.0))))
markerno += 1
data_ = t[0].astype(numpy.int16)
data_.tofile(result_file)
datapoint += data_.shape[0]
result_hdr = open(os.path.join(result_path,
name + key_str + ".vhdr"),"w")
result_hdr.write("Brain Vision Data Exchange Header "
"File Version 1.0\n")
result_hdr.write("; Data stored by pySPACE\n\n")
result_hdr.write("[Common Infos]\n")
result_hdr.write("Codepage=UTF-8\n")
result_hdr.write("DataFile=%s\n" %
str(name + key_str + ".eeg"))
result_hdr.write("MarkerFile=%s\n" %
str(name + key_str + ".vmrk"))
result_hdr.write("DataFormat=BINARY\n")
result_hdr.write("DataOrientation=MULTIPLEXED\n")
result_hdr.write("NumberOfChannels=%d\n" % len(channel_names))
result_hdr.write("SamplingInterval=%d\n\n" % (1000000/sf))
result_hdr.write("[Binary Infos]\n")
result_hdr.write("BinaryFormat=INT_16\n\n")
result_hdr.write("[Channel Infos]\n")
# TODO: Add Resolutions to time_series
# 0 = 0.1 [micro]V,
# 1 = 0.5 [micro]V,
# 2 = 10 [micro]V,
# 3 = 152.6 [micro]V (seems to be unused!)
resolutions_str = [unicode("0.1,%sV" % unicode(u"\u03BC")),
unicode("0.5,%sV" % unicode(u"\u03BC")),
unicode("10,%sV" % unicode(u"\u03BC")),
unicode("152.6,%sV" % unicode(u"\u03BC"))]
for i in range(len(channel_names)):
result_hdr.write(unicode("Ch%d=%s,,%s\n" %
(i+1,channel_names[i],
unicode(resolutions_str[0]))).encode('utf-8'))
result_file.close()
else:
NotImplementedError("Using unavailable storage format:%s!"
% f_format)
self.update_meta_data({
"channel_names": copy.deepcopy(time_series[0][0].channel_names),
"sampling_frequency": time_series[0][0].sampling_frequency
})
#Store meta data
BaseDataset.store_meta_data(result_dir, self.meta_data)
示例8: __call__
# 需要导入模块: from pySPACE.resources.dataset_defs.base import BaseDataset [as 别名]
# 或者: from pySPACE.resources.dataset_defs.base.BaseDataset import store_meta_data [as 别名]
#.........这里部分代码省略.........
# Determine names of the original data sets the input
# datasets are based on
base_dataset1 = dataset_name1.strip("}{").split("}{")[0]
base_dataset2 = dataset_name2.strip("}{").split("}{")[0]
# Determine target dataset name and create directory
# for it
mixed_base_dataset = "%s_vs_%s" % (base_dataset1,
base_dataset2)
target_dataset_name = dataset_name1.replace(base_dataset1,
mixed_base_dataset)
target_dataset_dir = os.sep.join([self.result_directory,
target_dataset_name])
create_directory(os.sep.join([target_dataset_dir, "data_run0"]))
if splitted:
# For each split, copy the train data from dataset 1 and
# the test data from dataset 2 to the target dataset
for source_train_file_name in glob.glob(os.sep.join([dataset_dir1,
"data_run0",
"*_sp*_train.*"])):
# TODO: We have $n$ train sets and $n$ test sets, we "metadata.yaml"])),
# could use all $n*n$ combinations
target_train_file_name = source_train_file_name.replace(dataset_dir1,
target_dataset_dir)
if source_train_file_name.endswith("arff"):
self._copy_arff_file(source_train_file_name,
target_train_file_name,
base_dataset1,
mixed_base_dataset)
else:
os.symlink(source_train_file_name,
target_train_file_name)
source_test_file_name = source_train_file_name.replace(dataset_dir1,
dataset_dir2)
source_test_file_name = source_test_file_name.replace("train.",
"test.")
target_test_file_name = target_train_file_name.replace("train.",
"test.")
if source_train_file_name.endswith("arff"):
self._copy_arff_file(source_test_file_name,
target_test_file_name,
base_dataset2,
mixed_base_dataset)
else:
os.symlink(source_test_file_name,
target_test_file_name)
else:
# Use the data set from dataset 1 as training set and
# the data set from dataset 2 as test data
for source_train_file_name in glob.glob(os.sep.join([dataset_dir1,
"data_run0",
"*_sp*_test.*"])):
target_train_file_name = source_train_file_name.replace("test.",
"train.")
target_train_file_name = target_train_file_name.replace(dataset_dir1,
target_dataset_dir)
if source_train_file_name.endswith("arff"):
self._copy_arff_file(source_train_file_name,
target_train_file_name,
base_dataset1,
mixed_base_dataset)
else:
os.symlink(source_train_file_name,
target_train_file_name)
source_test_file_name = source_train_file_name.replace(dataset_dir1,
dataset_dir2)
target_test_file_name = target_train_file_name.replace("train.",
"test.")
if source_train_file_name.endswith("arff"):
self._copy_arff_file(source_test_file_name,
target_test_file_name,
base_dataset2,
mixed_base_dataset)
else:
os.symlink(source_test_file_name,
target_test_file_name)
# Write metadata.yaml based on input meta data
input_dataset1_meta = BaseDataset.load_meta_data(dataset_dir1)
output_dataset_meta = dict(input_dataset1_meta)
output_dataset_meta['train_test'] = True
output_dataset_meta['date'] = time.strftime("%Y%m%d_%H_%M_%S")
try:
output_dataset_meta['author'] = pwd.getpwuid(os.getuid())[4]
except :
self._log("Author could not be resolved.",level=logging.WARNING)
output_dataset_meta['author'] = "unknown"
BaseDataset.store_meta_data(target_dataset_dir,output_dataset_meta)
############## Clean up after benchmarking ##############
super(ShuffleProcess, self).post_benchmarking()
示例9: __call__
# 需要导入模块: from pySPACE.resources.dataset_defs.base import BaseDataset [as 别名]
# 或者: from pySPACE.resources.dataset_defs.base.BaseDataset import store_meta_data [as 别名]
#.........这里部分代码省略.........
target_test_file_path = os.sep.join([target_collection_path,
"data_run0","features_sp0_"+key+".arff"])
elif source_file_name.endswith("pickle"):
file_ending = "pickle"
source_test_file_path = source_test_collection_path
target_test_file_path = target_collection_path
else:
raise NotImplementedError("File type not supported in " \
"MergeOperation")
source_train_pathes = []
for source_train_collection_path in self.input_collections:
source_train_collection_name = \
source_train_collection_path.split(os.sep)[-2]
# We must not use data originating from the same input
# collection both in train and test files
if source_test_collection_name == source_train_collection_name:
continue
# Check that all constraints are fulfilled for this pair of
# input collections
if not all(eval(constraint_template % \
{'source_train_collection_name': source_train_collection_name,
'source_test_collection_name': source_test_collection_name})
for constraint_template in self.collection_constraints):
continue
# check if all parameters are stored in the target path
source_collection = \
BaseDataset.load(source_train_collection_path)
source_collection_params = \
source_collection.meta_data["parameter_setting"]
remaining_params = \
[param for param in source_collection_params.items() \
if param not in target_collection_params.items() and \
param[0] not in ["__INPUT_DATASET__",
"__RESULT_DIRECTORY__", "__OUTPUT_BUNDLE__",
"__INPUT_COLLECTION__" ]] # for old data
if remaining_params != []:
for k,v in remaining_params:
target_collection_path += "{%s#%s}" % (k,str(v))
target_collection_params[k]=v
if "arff" == file_ending:
source_train_file_path = \
os.sep.join([source_train_collection_path,
"data_run0", "features_sp0_" + \
train_set_name_suffix + ".arff"])
elif "pickle" == file_ending:
source_train_file_path = source_train_collection_path
else:
raise NotImplementedError("File type not supported in " \
"MergeOperation!")
source_train_pathes.append(source_train_file_path)
if "arff" == file_ending:
target_train_file_path = os.sep.join([target_collection_path,
"data_run0","features_sp0_"+key+".arff"])
elif "pickle" == file_ending:
target_train_file_path = target_collection_path
else:
raise NotImplementedError("File type not supported in "
"MergeOperation!")
if len(source_train_pathes) == 0:
continue
create_directory(os.sep.join([target_collection_path,
"data_run0"]))
if "arff" == file_ending:
self._copy_arff_file(source_test_file_path,
target_test_file_path,
source_test_collection_name,
target_collection_name)
self._merge_arff_files(target_train_file_path,
source_train_pathes,
target_collection_name)
# Copy metadata.yaml
# TODO: Adapt to new collection
input_meta = BaseDataset.load_meta_data(source_test_collection_path)
BaseDataset.store_meta_data(target_collection_path,input_meta)
elif "pickle" == file_ending:
self._copy_pickle_file(source_test_collection_path,
target_collection_path,
train_set_name_suffix)
self._merge_pickle_files(target_train_file_path,
source_train_pathes,
train_set_name_suffix,
target_collection_params)
else:
raise NotImplementedError("File type not supported in merge_operation")
############## Clean up after benchmarking ##############
super(MergeProcess, self).post_benchmarking()
示例10: consolidate
# 需要导入模块: from pySPACE.resources.dataset_defs.base import BaseDataset [as 别名]
# 或者: from pySPACE.resources.dataset_defs.base.BaseDataset import store_meta_data [as 别名]
def consolidate(self):
"""
Consolidates the results obtained by the single WEKA filter
processes into a consistent summary of datasets that is stored on
the file system.
.. todo:: Some of the contents of this method should go into the
:class:`~pySPACE.resources.dataset_defs.feature_vector.FeatureVectorDataset`
"""
# Iterate over all collections and store the collection meta data etc.
for entries in os.listdir(self.result_directory):
fullpath = os.path.join(self.result_directory, entries)
# For each collection
if os.path.isdir(fullpath):
if entries.startswith("{"):
# Extract the parameters from the collection name in order to
# adjust the relation name
if self.num_parameters > 0:
parameter_strings = entries.strip("}{").split("}{")[-self.num_parameters:]
parameter_postfix = "{" + "}{".join(parameter_strings) + "}"
else:
parameter_strings = ""
parameter_postfix = ""
# Postprocessing of the arff files of this collection
for train_arff_file in glob.glob(fullpath + os.sep + "data_run*"
+ os.sep + "*train.arff"):
# Adjust the relation name of the train file
content = open(train_arff_file, 'r').readlines()
# We strip everything after the last "}"
endindex = content[0].rfind("}")
content[0] = content[0][:endindex+1]
content[0] += parameter_postfix + "'"
open(train_arff_file, 'w').writelines(content)
# Use relation name of train data for test data
test_arff_file = train_arff_file.replace("train.arff", "test.arff")
test_content = open(test_arff_file, 'r').readlines()
test_content[0] = content[0] + "\n"
open(test_arff_file, 'w').writelines(test_content)
# Check which features are contained in the arff file
feature_names = []
for line in content:
if line.startswith("@attribute"):
attribute = line.split()[1]
if attribute is not "class":
feature_names.append(attribute)
# Store the collection meta data etc.
if self.num_parameters > 0:
input_collection_name = \
"{" + "}{".join(entries.strip("}{").split("}{")[:-self.num_parameters]) + "}"
else:
input_collection_name = entries
input_collection_path = os.path.join(self.operation_spec["input_path"],
input_collection_name)
input_collection_meta = BaseDataset.load_meta_data(
pySPACE.configuration.storage
+ os.sep
+ input_collection_path)
# Store the input collection
BaseDataset.store_meta_data(fullpath, input_collection_meta,
file_name="input_metadata.yaml")
# Adjust collection metadata for the new collection
input_collection_meta["feature_names"] = feature_names
input_collection_meta["num_features"] = len(feature_names)
input_collection_meta["author"] = get_author()
input_collection_meta["date"] = time.strftime("%Y%m%d")
input_collection_meta["input_collection_name"] = input_collection_name
# Write the collection meta information into the folder
BaseDataset.store_meta_data(fullpath,input_collection_meta)
# Store the command_template
command_template_file = open(os.path.join(fullpath,
"command_template"), 'w')
command_template_file.write(self.command_template)
command_template_file.close()
else:
# training and test arff need the same relation name
# otherwise Weka can't relate it to each other; the collection
# name and the parameters in {}{}-optic must be the relation
# name for further processing
self._log("WARNING: Collection name doesn't begin with '{'. Further processing may be collapse!", level= logging.WARNING)
# Write the specification of this operation
# to the result directory in order to make later
# analysis of results more easy
source_operation_file = open(os.path.join(self.result_directory,
"source_operation.yaml"), 'w')
yaml.dump(self.operation_spec, source_operation_file)
source_operation_file.close()
示例11: store
# 需要导入模块: from pySPACE.resources.dataset_defs.base import BaseDataset [as 别名]
# 或者: from pySPACE.resources.dataset_defs.base.BaseDataset import store_meta_data [as 别名]
def store(self, result_dir, s_format=["pickle", "real"]):
""" store the collection in *result_dir*"""
name = "predictions"
# Update the meta data
author = get_author()
self.update_meta_data({"type": "prediction_vector",
"storage_format": s_format,
"author": author,
"data_pattern": "data_run" + os.sep
+ name + "_sp_tt." + s_format[0]})
if not s_format in ["csv", "arff", "pickle"]:
self._log("Storage format not supported! Using default.",
level=logging.ERROR)
s_format = "pickle"
for key, prediction_vectors in self.data.iteritems():
# Construct result directory
result_path = result_dir + os.sep + "data" \
+ "_run%s" % key[0]
if not os.path.exists(result_path):
os.mkdir(result_path)
key_str = "_sp%s_%s" % key[1:]
# Store data depending on the desired format
if s_format == "pickle":
result_file = open(os.path.join(result_path,
name + key_str + ".pickle"),
"w")
cPickle.dump(prediction_vectors, result_file, cPickle.HIGHEST_PROTOCOL)
elif s_format == "csv": # Write as Comma Separated Value
result_file = open(os.path.join(result_path,
name + key_str + ".csv"),"w")
if self.meta_data["num_predictors"] == 1:
result_file.write("Predicted Label, Prediction Score, True Label \n")
for pv in prediction_vectors:
result_file.write("%s, %s, %s\n" % (pv[0].label[0], pv[0].prediction[0], pv[1]))
else:
# we begin by dealing with the header of the csv file
base_header = "Predicted %(index)d Label, Prediction %(index)d Score, "
base_result = "%(label)s, %(score)s,"
header = ""
for i in range(self.meta_data["num_predictors"]):
header+= base_header % dict(index=i+1)
header += "True Label\n"
result_file.write(header)
# and now we can write each of the prediction vectors in turn
for pv in prediction_vectors:
result = ""
for i in range(self.meta_data["num_predictors"]):
result += base_result % dict(label=pv[0].label[i],
score=pv[0].prediction[i])
result += str(pv[1]) + "\n"
result_file.write(result)
#Store meta data
BaseDataset.store_meta_data(result_dir,self.meta_data)
示例12: store
# 需要导入模块: from pySPACE.resources.dataset_defs.base import BaseDataset [as 别名]
# 或者: from pySPACE.resources.dataset_defs.base.BaseDataset import store_meta_data [as 别名]
def store(self, result_dir, s_format = "bp_eeg"):
self.merged = False
scale = 10.0 # is used to scale up the eeg sample values. The data samples are converted to int16
# when saving, so scaling is necessary to keep maintain the resolutions.
# Keep original file name, depends on the AnalyserSinkNode, see it's documentation.
if self.meta_data.has_key('eeg_src_file_name') and self.meta_data['eeg_src_file_name'] is not None:
name = self.meta_data['eeg_src_file_name']
# or use default name from this collection
else:
name = "Analyzer"
if not s_format == "bp_eeg":
self._log("The format %s is not supported!"%s_format, level=logging.CRITICAL)
return
# Update the meta data
author = get_author()
self.update_meta_data({"type": "only output of individual nodes stored",
"storage_format": s_format,
"author" : author,
"data_pattern": "Multiplexed"})
# Store meta data
BaseDataset.store_meta_data(result_dir,self.meta_data)
#self._log("EEG data file %s" % self.collection.data_file)
slices = []
slices.append(0)
channel_names = []
for key, time_series in self.data.iteritems():
# Sort the Times-Series Array
def cmp_start(a, b):
return cmp(a[0].start_time, b[0].start_time)
time_series.sort(cmp_start)
# Check for overlapping Windows and remove them if existent
i = 0
while i < len(time_series):
ts = time_series[i]
#print ts[0].start_time, ts[0].end_time
#print len(time_series)
if ts[0].start_time >= slices[-1]:
slices.append(ts[0].end_time)
else:
warnings.warn("Ignoring at least one overlapping window!", UserWarning)
i = i+1
# STORE ACTUAL EEG DATA AND WRITE MARKERFILE
result_path = result_dir + os.sep + "data_analyzer" \
+ "_run%s" % key[0]
if not os.path.exists(result_path):
os.mkdir(result_path)
key_str = "_sp%s_%s" % key[1:]
# Keep original name
if (self.meta_data.has_key('eeg_src_file_name') and self.meta_data['eeg_src_file_name'] != None):
result_file_eeg = open(os.path.join(result_path, name + ".eeg"), "wb")
result_file_mrk = open(os.path.join(result_path, name + ".vmrk"), "w")
# or use default name from this collection
else:
result_file_eeg = open(os.path.join(result_path, name + key_str + ".eeg"), "wb")
result_file_mrk = open(os.path.join(result_path, name + key_str + ".vmrk"), "w")
# Write Marker header
if (self.meta_data.has_key('eeg_src_file_name') and self.meta_data['eeg_src_file_name'] != None):
result_file_mrk.write(header_mrk % (name))
else:
result_file_mrk.write(header_mrk % (name + key_str))
result_file_ms = 0
# Data for padding
padding = None
count_mrk = 2
num_ch = 0
sampling_int = 0
for ts in time_series:
ts0 = ts[0] * scale
ts0 = ts0.astype(numpy.int16)
if padding == None:
padding = numpy.zeros(len(ts[0].channel_names), dtype='int16')
num_ch = len(ts[0].channel_names)
channel_names = ts[0].channel_names
sampling_int = 1000000/ts[0].sampling_frequency
#print "writing %d channels.." % len(ts[0].channel_names)
# Write Padding (zeros)
while result_file_ms < ts[0].start_time - sampling_int/1000.0:
result_file_eeg.write(padding.tostring())
result_file_ms += ts[0]._samples_to_ms(1)
# Write window
ts0.tofile(result_file_eeg)
result_file_ms += ts[0].end_time - (ts[0].start_time - sampling_int/1000.0)
# Write Marker
markers = []
if(len(ts[0].marker_name) > 0):
mk_keys = ts[0].marker_name.keys()
mk_values = ts[0].marker_name.values()
for mk in range(len(mk_keys)):
#.........这里部分代码省略.........
示例13: consolidate
# 需要导入模块: from pySPACE.resources.dataset_defs.base import BaseDataset [as 别名]
# 或者: from pySPACE.resources.dataset_defs.base.BaseDataset import store_meta_data [as 别名]
def consolidate(self, _=None):
""" Consolidates the results obtained by the single processes into a consistent structure
of collections that are stored on the file system.
"""
# Consolidate the results
directory_pattern = os.sep.join([self.result_directory, "{*",])
dataset_pathes = glob.glob(directory_pattern)
# For all collections found
for dataset_path in dataset_pathes:
try:
# Load their meta_data
meta_data = BaseDataset.load_meta_data(dataset_path)
# Determine author and date
author = get_author()
date = time.strftime("%Y%m%d_%H_%M_%S")
# Update meta data and store it
meta_data.update({"author": author, "date": date})
# There can be either run dirs, persistency dirs, or both of them.
# Check of whichever there are more. If both exist, their numbers
# are supposed to be equal.
nr_run_dirs = len(glob.glob(os.path.join(dataset_path, "data_run*")))
nr_per_dirs = len(glob.glob(os.path.join(dataset_path, "persistency_run*")))
nr_runs = max(nr_run_dirs, nr_per_dirs)
if nr_runs > 1:
meta_data["runs"] = nr_runs
# Store the metadata
BaseDataset.store_meta_data(dataset_path, meta_data)
# Copy the input dataset specification file to the result
# directory in order to make later analysis of
# the results more easy
# THA: Split the first "/" from the input collection name, because otherwise it will be treated
# as an absolute path
input_collection_name = meta_data["input_dataset_name"][1:] if \
meta_data["input_dataset_name"][0] == os.sep else meta_data["input_dataset_name"]
input_meta_path = os.path.join(pySPACE.configuration.storage, input_collection_name)
try:
input_meta = BaseDataset.load_meta_data(input_meta_path)
BaseDataset.store_meta_data(dataset_path, input_meta, file_name="input_metadata.yaml")
except (IOError, OSError) as e:
self._log("Error copying the input_metadata.yaml: {error}".format(error=e.message),
level=logging.CRITICAL)
except Exception as e:
logging.getLogger("%s" % self).exception("Error updating the metadata: {error!s}".format(error=e))
raise e
# If we don't create a feature vector or time series collection,
# we evaluated our classification using a classification performance sink.
# The resulting files should be merged to one csv tabular.
pathlist = glob.glob(os.path.join(self.result_directory,"results_*"))
if len(pathlist)>0:
# Do the consolidation the same way as for WekaClassificationOperation
self._log("Consolidating results ...")
# We load and store the results once into a PerformanceResultSummary
# This does the necessary consolidation...
self._log("Reading intermediate results...")
try:
result_collection = PerformanceResultSummary(dataset_dir=self.result_directory)
self._log("done")
self._log("Storing result collection")
result_collection.store(self.result_directory)
self._log("done")
PerformanceResultSummary.merge_traces(self.result_directory)
except Exception as e:
logging.getLogger("%s" % self).exception("Error merging the result collection: {error!s}".format(
error=e))
if self.compression:
# Since we get one result summary,
# we don't need the numerous folders.
# So we zip them to make the whole folder more easy visible.
import zipfile
cwd = os.getcwd()
os.chdir(self.result_directory)
# If there are to many or to large folders, problems may occur.
# This case we want to log, try 64 bit mode,
# and then skip the zipping.
try:
pathlist = glob.glob(os.path.join(self.result_directory,"{*}"))
if not self.compression == "delete":
save_file = zipfile.ZipFile(
self.result_directory+'/result_folders.zip',
mode="w", compression=self.compression)
# we want to have the zipped file relative to the
# result directory
for path in pathlist:
for node in os.walk(path):
rel_path=os.path.relpath(node[0],
self.result_directory)
save_file.write(rel_path)
for data in node[2]:
save_file.write(os.path.join(rel_path,
data))
save_file.close()
#.........这里部分代码省略.........