当前位置: 首页>>代码示例>>Python>>正文


Python BaseDataset.store_meta_data方法代码示例

本文整理汇总了Python中pySPACE.resources.dataset_defs.base.BaseDataset.store_meta_data方法的典型用法代码示例。如果您正苦于以下问题:Python BaseDataset.store_meta_data方法的具体用法?Python BaseDataset.store_meta_data怎么用?Python BaseDataset.store_meta_data使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pySPACE.resources.dataset_defs.base.BaseDataset的用法示例。


在下文中一共展示了BaseDataset.store_meta_data方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: store

# 需要导入模块: from pySPACE.resources.dataset_defs.base import BaseDataset [as 别名]
# 或者: from pySPACE.resources.dataset_defs.base.BaseDataset import store_meta_data [as 别名]
    def store(self, result_dir, s_format = "None"):
        if not s_format == "None":
            self._log("The format %s is not supported!"%s_format, level=logging.CRITICAL)
            return
        # Update the meta data
        author = get_author()
        self.update_meta_data({"type": "only output of individual nodes stored",
                                      "storage_format": s_format,
                                      "author" : author,
                                      "data_pattern": "no data stored"})

        # Store meta data
        BaseDataset.store_meta_data(result_dir,self.meta_data)
开发者ID:MMKrell,项目名称:pyspace,代码行数:15,代码来源:dummy.py

示例2: store

# 需要导入模块: from pySPACE.resources.dataset_defs.base import BaseDataset [as 别名]
# 或者: from pySPACE.resources.dataset_defs.base.BaseDataset import store_meta_data [as 别名]
    def store(self, result_dir, s_format = "None"):
        if not s_format == "None":
            self._log("The format %s is not supported!"%s_format, level=logging.CRITICAL)
            return
        # Update the meta data
        try:
            author = pwd.getpwuid(os.getuid())[4]
        except:
            author = "unknown"
            self._log("Author could not be resolved.",level=logging.WARNING)
        self.update_meta_data({"type": "only output of individual nodes stored",
                                      "storage_format": s_format,
                                      "author" : author,
                                      "data_pattern": "no data stored"})

        # Store meta data
        BaseDataset.store_meta_data(result_dir,self.meta_data)
开发者ID:AlexanderFabisch,项目名称:pyspace,代码行数:19,代码来源:dummy.py

示例3: store

# 需要导入模块: from pySPACE.resources.dataset_defs.base import BaseDataset [as 别名]
# 或者: from pySPACE.resources.dataset_defs.base.BaseDataset import store_meta_data [as 别名]

#.........这里部分代码省略.........
                    Examples: ["arff", "real"], ["arff", "{0,1}"]
                    
                    .. todo:: Someone could implement the format ["fasta"] for sax features
                    
                    To store the data in comma separated values, use ["csv", "real"].
                    
                    (*optional, default: ["pickle", "real"]*)

        .. todo:: Adapt storing of csv file to external library instead of
                  doing it manually.

        """
        name = "features"
        # Update the meta data
        try:
            author = pwd.getpwuid(os.getuid())[4]
        except:
            author = "unknown"
            self._log("Author could not be resolved.",level=logging.WARNING)
        self.update_meta_data({"type": "feature_vector",
                               "storage_format": s_format,
                               "author": author,
                               "data_pattern": "data_run" + os.sep 
                                                 + name + "_sp_tt." + s_format[0]})
        
        if type(s_format) == list:
            s_type = s_format[1]
            s_format = s_format[0]
        else:
            s_type = "real"
            
        if not s_format in ["csv", "arff", "pickle"]:
            self._log("Storage format not supported! Using default.", 
                      level=logging.ERROR)
            s_format = "pickle"
        
        # Iterate through splits and runs in this dataset
        for key, feature_vectors in self.data.iteritems():
            # Construct result directory
            result_path = result_dir + os.sep + "data" \
                            + "_run%s" % key[0]
            if not os.path.exists(result_path):
                os.mkdir(result_path)
                
            key_str = "_sp%s_%s" % key[1:]
            # Store data depending on the desired format
            if s_format == "pickle":
                result_file = open(os.path.join(result_path, 
                                                name + key_str + ".pickle"),
                                   "w")
         
                cPickle.dump(feature_vectors, result_file, cPickle.HIGHEST_PROTOCOL)
            elif s_format == "arff": # Write as ARFF
                result_file = open(os.path.join(result_path, 
                                                name + key_str + ".arff"),"w")
                # Create the arff file header
                relation_name = result_dir.split(os.sep)[-1]
                result_file.write('@relation "%s"\n' % relation_name)
                # Write the type of all features
                for feature_name in self.meta_data["feature_names"]:
                    result_file.write("@attribute %s %s\n" % (feature_name,  s_type))
                classString = "" + ",".join(sorted(self.meta_data["classes_names"])) + ""

                result_file.write("@attribute class {%s}\n" % classString)
                
                result_file.write("@data\n")
                # Write all given training data into the ARFF file
                fv = feature_vectors[0][0]
                if numpy.issubdtype(fv.dtype, numpy.string_):
                    feature_format = "%s,"
                elif numpy.issubdtype(fv.dtype, numpy.floating):
                    feature_format = "%f,"
                elif numpy.issubdtype(fv.dtype, numpy.integer):
                    feature_format = "%d,"
                for features, class_name in feature_vectors:
                    for feature in features[0]:
                        result_file.write(feature_format % feature)
                    result_file.write("%s\n" % str(class_name))
            elif s_format == "csv": # Write as Comma Separated Value
                result_file = open(os.path.join(result_path, 
                                                name + key_str + ".csv"),"w")
                for feature_name in self.meta_data["feature_names"]:
                    result_file.write("%s," % (feature_name))
                result_file.write("\n")
                fv = feature_vectors[0][0]
                if numpy.issubdtype(fv.dtype, numpy.floating):
                    feature_format = "%f,"
                elif numpy.issubdtype(fv.dtype, numpy.integer):
                    feature_format = "%d,"
                else:
                    feature_format = "%s,"
                for features, class_name in feature_vectors:
                    f = features.view(numpy.ndarray)
                    for feature in f[0]:
                        result_file.write(feature_format % feature)
                    result_file.write("%s\n" % str(class_name))
            result_file.close()

        #Store meta data
        BaseDataset.store_meta_data(result_dir,self.meta_data)
开发者ID:BioinformaticsArchive,项目名称:pyspace,代码行数:104,代码来源:feature_vector.py

示例4: store

# 需要导入模块: from pySPACE.resources.dataset_defs.base import BaseDataset [as 别名]
# 或者: from pySPACE.resources.dataset_defs.base.BaseDataset import store_meta_data [as 别名]
    def store(self, result_dir, s_format="BrainVision"):
        # Keep original file name, depends on the AnalyserSinkNode, see it's documentation.
        if self.meta_data.has_key("eeg_src_file_name") and self.meta_data["eeg_src_file_name"] is None:
            name = self.meta_data["eeg_src_file_name"]
        # or use default name from this collection
        else:
            name = "Analyzer"
        if not s_format == "BrainVision":
            self._log("The format %s is not supported!" % s_format, level=logging.CRITICAL)
            return
        # Update the meta data
        try:
            author = pwd.getpwuid(os.getuid())[4]
        except:
            author = "unknown"
            self._log("Author could not be resolved.", level=logging.WARNING)
        self.update_meta_data(
            {
                "type": "only output of individual nodes stored",
                "storage_format": s_format,
                "author": author,
                "data_pattern": "Multiplexed",
            }
        )
        # Store meta data
        BaseDataset.store_meta_data(result_dir, self.meta_data)
        # self._log("EEG data file %s" % self.collection.data_file)
        slices = []
        slices.append(0)
        channel_names = []

        for key, time_series in self.data.iteritems():
            # Sort the Times-Series Array
            def cmp_start(a, b):
                return cmp(a[0].start_time, b[0].start_time)

            time_series.sort(cmp_start)
            # Check for overlapping Windows and remove them if existent
            i = 0
            while i < len(time_series):
                ts = time_series[i]
                # print ts[0].start_time, ts[0].end_time
                # print len(time_series)
                if ts[0].start_time >= slices[-1]:
                    slices.append(ts[0].end_time)
                else:
                    warnings.warn("Ignoring at least one overlapping window!", UserWarning)
                i = i + 1
            # STORE ACTUAL EEG DATA AND WRITE MARKERFILE
            result_path = result_dir + os.sep + "data_analyzer" + "_run%s" % key[0]
            if not os.path.exists(result_path):
                os.mkdir(result_path)

            key_str = "_sp%s_%s" % key[1:]
            # Keep original name
            if self.meta_data.has_key("eeg_src_file_name") and self.meta_data["eeg_src_file_name"] != None:
                result_file_eeg = open(os.path.join(result_path, name + ".eeg"), "wb")
                result_file_mrk = open(os.path.join(result_path, name + ".vmrk"), "w")
            # or use default name from this collection
            else:
                result_file_eeg = open(os.path.join(result_path, name + key_str + ".eeg"), "wb")
                result_file_mrk = open(os.path.join(result_path, name + key_str + ".vmrk"), "w")

            # Write Marker header
            if self.meta_data.has_key("eeg_src_file_name") and self.meta_data["eeg_src_file_name"] != None:
                result_file_mrk.write(header_mrk % (name))
            else:
                result_file_mrk.write(header_mrk % (name + key_str))

            result_file_ms = 0

            # Data for padding
            padding = None

            count_mrk = 2
            num_ch = 0
            sampling_int = 0

            for ts in time_series:
                if padding == None:
                    padding = numpy.zeros(len(ts[0].channel_names), dtype="int16")
                    num_ch = len(ts[0].channel_names)
                    channel_names = ts[0].channel_names
                    sampling_int = 1000000 / ts[0].sampling_frequency
                    # print "writing %d channels.." % len(ts[0].channel_names)
                # Write Padding (zeros)
                while result_file_ms < ts[0].start_time:
                    result_file_eeg.write(padding.tostring())
                    result_file_ms += ts[0]._samples_to_ms(1)
                # Write window
                ts[0].tofile(result_file_eeg)
                result_file_ms += ts[0].end_time - ts[0].start_time
                # Write Marker
                result_file_mrk.write(
                    "Mk%d=Label,%s,%d,1,0\n" % (count_mrk, ts[1], ts[0]._ms_to_samples(ts[0].start_time))
                )
                count_mrk += 1
            # WRITE HEADERFILE
            # Keep original name
            if self.meta_data.has_key("eeg_src_file_name") and self.meta_data["eeg_src_file_name"] != None:
#.........这里部分代码省略.........
开发者ID:BioinformaticsArchive,项目名称:pyspace,代码行数:103,代码来源:analyzer_sink.py

示例5: store

# 需要导入模块: from pySPACE.resources.dataset_defs.base import BaseDataset [as 别名]
# 或者: from pySPACE.resources.dataset_defs.base.BaseDataset import store_meta_data [as 别名]

#.........这里部分代码省略.........
            s_type = "%i"
        if s_format == "csv" and s_type == "real":
            s_type = "%.18e"
        # Update the meta data
        try:
            author = pwd.getpwuid(os.getuid())[4]
        except Exception:
            author = "unknown"
            self._log("Author could not be resolved.", level=logging.WARNING)
        self.update_meta_data({"type": "time_series",
                               "storage_format": s_format,
                               "author": author,
                               "data_pattern": "data_run" + os.sep 
                                               + name + "_sp_tt." + s_format})

        # Iterate through splits and runs in this dataset
        for key, time_series in self.data.iteritems():
            # load data, if necessary 
            # (due to the  lazy loading, the data might be not loaded already)
            if isinstance(time_series, basestring):
                time_series = self.get_data(key[0], key[1], key[2])
            if self.sort_string is not None:
                time_series.sort(key=eval(self.sort_string))
            # Construct result directory
            result_path = result_dir + os.sep + "data" + "_run%s" % key[0]
            if not os.path.exists(result_path):
                os.mkdir(result_path)
            
            key_str = "_sp%s_%s" % key[1:]
            # Store data depending on the desired format
            if s_format in ["pickle", "cpickle", "cPickle"]:
                result_file = open(os.path.join(result_path,
                                                name+key_str+".pickle"), "w")
                cPickle.dump(time_series, result_file, cPickle.HIGHEST_PROTOCOL)
            elif s_format in ["text","csv"]:
                self.update_meta_data({
                    "type": "stream",
                    "marker_column": "marker"})
                result_file = open(os.path.join(result_path,
                                                name + key_str + ".csv"), "w")
                csvwriter = csv.writer(result_file)
                channel_names = copy.deepcopy(time_series[0][0].channel_names)
                if s_format == "csv":
                    channel_names.append("marker")
                csvwriter.writerow(channel_names)
                for (data, key) in time_series:
                    if s_format == "text":
                        numpy.savetxt(result_file, data, delimiter=",", fmt=s_type)
                        if not key is None:
                            result_file.write(str(key))
                            result_file.flush()
                        elif data.marker_name is not None \
                                and len(data.marker_name) > 0:
                            result_file.write(str(data.marker_name))
                            result_file.flush()
                    else:
                        first_line = True
                        marker = ""
                        if not key is None:
                            marker = str(key)
                        elif data.marker_name is not None \
                                and len(data.marker_name) > 0:
                            marker = str(data.marker_name)
                        for line in data:
                            l = list(line)
                            l.append(marker)
                            csvwriter.writerow(list(l))
                            if first_line:
                                first_line = False
                                marker = ""
                        result_file.flush()
            elif s_format in ["mat"]:
                result_file = open(os.path.join(result_path,
                                                name + key_str + ".mat"),"w")
                # extract a first time series object to get meta data 
                merged_time_series = time_series.pop(0)[0]
                # collect all important information in the collection_object
                collection_object = {
                    "sampling_frequency": merged_time_series.sampling_frequency,
                    "channel_names": merged_time_series.channel_names}

                # merge all data 
                for (data,key) in time_series:
                    merged_time_series = numpy.vstack((merged_time_series,
                                                       data))
                collection_object["data"] = merged_time_series 
                mdict = dict()
                mdict[name + key_str] = collection_object 
                import scipy.io
                scipy.io.savemat(result_file, mdict=mdict)
            else:
                NotImplementedError("Using unavailable storage format:%s!"
                                    % s_format)
            result_file.close()
        self.update_meta_data({
            "channel_names": copy.deepcopy(time_series[0][0].channel_names),
            "sampling_frequency": time_series[0][0].sampling_frequency
        })
        #Store meta data
        BaseDataset.store_meta_data(result_dir, self.meta_data)
开发者ID:BioinformaticsArchive,项目名称:pyspace,代码行数:104,代码来源:time_series.py

示例6: consolidate

# 需要导入模块: from pySPACE.resources.dataset_defs.base import BaseDataset [as 别名]
# 或者: from pySPACE.resources.dataset_defs.base.BaseDataset import store_meta_data [as 别名]
    def consolidate(self):
        """ Consolidates the results obtained by the single processes into a consistent structure
        of collections that are stored on the file system.
        """
        # Consolidate the results
        directory_pattern = os.sep.join([self.result_directory, "{*",])
        dataset_pathes = glob.glob(directory_pattern)

        # For all collections found
        for dataset_path in dataset_pathes:
            # Load their meta_data
            meta_data = BaseDataset.load_meta_data(dataset_path)

            # Determine author and date
            try:
                author = pwd.getpwuid(os.getuid())[4]
            except:
                author = "unknown"
                self._log("Author could not be resolved.",level=logging.WARNING)
            date = time.strftime("%Y%m%d_%H_%M_%S")

            # Update meta data and store it
            meta_data.update({"author" : author, "date" : date})
            BaseDataset.store_meta_data(dataset_path, meta_data)

            # Copy the input dataset specification file to the result
            # directory in order to make later analysis of
            # the results more easy
            input_meta_path = os.sep.join([pySPACE.configuration.storage,
                                          meta_data["input_collection_name"]])
            input_meta = BaseDataset.load_meta_data(input_meta_path)
            BaseDataset.store_meta_data(dataset_path,input_meta,
                                        file_name="input_metadata.yaml")
        # Check if some results consist of several runs
        # and update the meta data in this case
        # TODO: This is not a clean solution
        for dataset_dir in glob.glob(os.sep.join([self.result_directory,
                                                     "*"])):
            if not os.path.isdir(dataset_dir): continue
            # There can be either run dirs, persistency dirs, or both of them.
            # Check of whichever there are more. If both exist, their numbers
            # are supposed to be equal.
            nr_run_dirs = len(glob.glob(os.sep.join([dataset_dir,
                                              "data_run*"])))
            nr_per_dirs = len(glob.glob(os.sep.join([dataset_dir,
                                              "persistency_run*"])))
            nr_runs = max(nr_run_dirs, nr_per_dirs)

            if nr_runs > 1:
                collection_meta = BaseDataset.load_meta_data(dataset_dir)
                collection_meta["runs"] = nr_runs
                BaseDataset.store_meta_data(dataset_dir,collection_meta)
        # If we don't create a feature vector or time series collection,
        # we evaluated our classification using a classification performance sink.
        # The resulting files should be merged to one csv tabular.
        pathlist = glob.glob(os.path.join(self.result_directory,"results_*"))
        if len(pathlist)>0:
            # Do the consolidation the same way as for WekaClassificationOperation
            self._log("Consolidating results ...")
            # We load and store the results once into a PerformanceResultSummary
            # This does the necessary consolidation...
            self._log("Reading intermediate results...")
            result_collection = PerformanceResultSummary(dataset_dir=self.result_directory)
            self._log("done")
            self._log("Storing result collection")
            result_collection.store(self.result_directory)
            self._log("done")
            PerformanceResultSummary.merge_traces(self.result_directory)

            if not(self.compression == False):
                # Since we get one result summary,
                # we don't need the numerous folders.
                # So we zip them to make the whole folder more easy visible.
                import zipfile
                cwd=os.getcwd()
                os.chdir(self.result_directory)
                # If there are to many or to large folders, problems may occur.
                # This case we want to log, try 64 bit mode, and then skip the zipping.
                try:
                    pathlist = glob.glob(os.path.join(self.result_directory,"{*}"))
                    
                    if not self.compression == "delete":                        
                        save_file=zipfile.ZipFile(self.result_directory+'/result_folders.zip',mode="w",compression=self.compression)
                        # we want to have the zipped file relative to the result directory
                        for path in pathlist:
                            for node in os.walk(path):
                                rel_path=os.path.relpath(node[0],self.result_directory)
                                save_file.write(rel_path)
                                for data in node[2]:
                                    save_file.write(os.path.join(rel_path,data))
                        save_file.close()
                    # To still have an easy access to the history of the processing,
                    # we keep one folder.
                    pathlist.pop()
                    for path in pathlist:
                        shutil.rmtree(path)
                except:
                    self._log("Result files could not be compressed with 32 bit mode, switching to 64 bit mode.", level=logging.CRITICAL)
                    # nearly total code copy, only difference with 64 bit mode
                    try:
#.........这里部分代码省略.........
开发者ID:AlexanderFabisch,项目名称:pyspace,代码行数:103,代码来源:node_chain.py

示例7: store

# 需要导入模块: from pySPACE.resources.dataset_defs.base import BaseDataset [as 别名]
# 或者: from pySPACE.resources.dataset_defs.base.BaseDataset import store_meta_data [as 别名]

#.........这里部分代码省略.........
                import scipy.io
                result_file_name = os.path.join(result_path, 
                                                name + key_str + ".mat")
                # extract a first time series object to get meta data 
                ts1 = time_series[0][0]
                
                # collect all important information in the collection_object
                dataset_dict = {
                    "sampling_frequency": ts1.sampling_frequency,
                    "channel_names": ts1.channel_names}
                
                # we have to extract the data and labels separatly
                if 'channelXtime' in s_format:
                    dataset_dict["data"] = [data.T for data, _ in time_series] 
                else:
                    dataset_dict["data"] = [data for data, _ in time_series]
                dataset_dict["labels"] = [label for _, label in time_series]
                # construct numpy 3d array (e.g., channelXtimeXtrials)
                dataset_dict["data"] = numpy.rollaxis(numpy.array(
                    dataset_dict["data"]), 0, 3)
                
                scipy.io.savemat(result_file_name, mdict=dataset_dict)
            elif f_format in ["bp_eeg"]:

                result_file = open(os.path.join(result_path,
                                                name + key_str + ".eeg"),"a+")
                result_file_mrk = open(os.path.join(result_path,
                                                name + key_str + ".vmrk"),"w")

                result_file_mrk.write("Brain Vision Data Exchange Marker File, "
                                      "Version 1.0\n")
                result_file_mrk.write("; Data stored by pySPACE\n")
                result_file_mrk.write("[Common Infos]\n")
                result_file_mrk.write("Codepage=UTF-8\n")
                result_file_mrk.write("DataFile=%s\n" %
                                      str(name + key_str + ".eeg"))
                result_file_mrk.write("\n[Marker Infos]\n")

                markerno = 1
                datapoint = 1
                sf = None
                channel_names = None

                for t in time_series:
                    if sf is None:
                        sf = t[0].sampling_frequency
                    if channel_names is None:
                        channel_names = t[0].get_channel_names()
                    for mrk in t[0].marker_name.keys():
                        for tm in t[0].marker_name[mrk]:
                            result_file_mrk.write(str("Mk%d=Stimulus,%s,%d,1,0\n" %
                                (markerno, mrk, datapoint+(tm*sf/1000.0))))
                            markerno += 1
                    data_ = t[0].astype(numpy.int16)
                    data_.tofile(result_file)
                    datapoint += data_.shape[0]

                result_hdr = open(os.path.join(result_path,
                                                name + key_str + ".vhdr"),"w")

                result_hdr.write("Brain Vision Data Exchange Header "
                                 "File Version 1.0\n")
                result_hdr.write("; Data stored by pySPACE\n\n")
                result_hdr.write("[Common Infos]\n")
                result_hdr.write("Codepage=UTF-8\n")
                result_hdr.write("DataFile=%s\n" %
                                      str(name + key_str + ".eeg"))
                result_hdr.write("MarkerFile=%s\n" %
                                      str(name + key_str + ".vmrk"))
                result_hdr.write("DataFormat=BINARY\n")
                result_hdr.write("DataOrientation=MULTIPLEXED\n")
                result_hdr.write("NumberOfChannels=%d\n" % len(channel_names))
                result_hdr.write("SamplingInterval=%d\n\n" % (1000000/sf))
                result_hdr.write("[Binary Infos]\n")
                result_hdr.write("BinaryFormat=INT_16\n\n")
                result_hdr.write("[Channel Infos]\n")

                # TODO: Add Resolutions to time_series
                # 0 = 0.1 [micro]V,
                # 1 = 0.5 [micro]V,
                # 2 = 10 [micro]V,
                # 3 = 152.6 [micro]V (seems to be unused!)
                resolutions_str = [unicode("0.1,%sV" % unicode(u"\u03BC")),
                   unicode("0.5,%sV" % unicode(u"\u03BC")),
                   unicode("10,%sV" % unicode(u"\u03BC")),
                   unicode("152.6,%sV" % unicode(u"\u03BC"))]
                for i in range(len(channel_names)):
                    result_hdr.write(unicode("Ch%d=%s,,%s\n" %
                        (i+1,channel_names[i],
                        unicode(resolutions_str[0]))).encode('utf-8'))
                result_file.close()
            else:
                NotImplementedError("Using unavailable storage format:%s!"
                                    % f_format)
        self.update_meta_data({
            "channel_names": copy.deepcopy(time_series[0][0].channel_names),
            "sampling_frequency": time_series[0][0].sampling_frequency
        })
        #Store meta data
        BaseDataset.store_meta_data(result_dir, self.meta_data)
开发者ID:MMKrell,项目名称:pyspace,代码行数:104,代码来源:time_series.py

示例8: __call__

# 需要导入模块: from pySPACE.resources.dataset_defs.base import BaseDataset [as 别名]
# 或者: from pySPACE.resources.dataset_defs.base.BaseDataset import store_meta_data [as 别名]

#.........这里部分代码省略.........
             
                # Determine names of the original data sets the input 
                # datasets are based on
                base_dataset1 = dataset_name1.strip("}{").split("}{")[0]
                base_dataset2 = dataset_name2.strip("}{").split("}{")[0]
                
                # Determine target dataset name and create directory
                # for it
                mixed_base_dataset = "%s_vs_%s" % (base_dataset1, 
                                                      base_dataset2)
                target_dataset_name = dataset_name1.replace(base_dataset1,
                                                                  mixed_base_dataset)
                
                target_dataset_dir = os.sep.join([self.result_directory,
                                                     target_dataset_name])
                
                create_directory(os.sep.join([target_dataset_dir, "data_run0"]))
                
                if splitted:
                    # For each split, copy the train data from dataset 1 and
                    # the test data from dataset 2 to the target dataset
                    for source_train_file_name in glob.glob(os.sep.join([dataset_dir1,
                                                                       "data_run0",
                                                                       "*_sp*_train.*"])):
                        # TODO: We have $n$ train sets and $n$ test sets, we                   "metadata.yaml"])),
                              
                        #       could use all $n*n$ combinations 
                        target_train_file_name = source_train_file_name.replace(dataset_dir1,
                                                                                target_dataset_dir)
                        if source_train_file_name.endswith("arff"):
                            self._copy_arff_file(source_train_file_name, 
                                                 target_train_file_name,
                                                 base_dataset1,
                                                 mixed_base_dataset)
                        else:
                            os.symlink(source_train_file_name, 
                                       target_train_file_name)
                        
                        source_test_file_name = source_train_file_name.replace(dataset_dir1,
                                                                               dataset_dir2)
                        
                        source_test_file_name =  source_test_file_name.replace("train.",
                                                                                "test.")
                        target_test_file_name = target_train_file_name.replace("train.",
                                                                                "test.")
                        if source_train_file_name.endswith("arff"):
                            self._copy_arff_file(source_test_file_name, 
                                                 target_test_file_name,
                                                 base_dataset2,
                                                 mixed_base_dataset)
                        else:
                            os.symlink(source_test_file_name,
                                       target_test_file_name)
                else:
                    # Use the data set from dataset 1 as training set and 
                    # the data set from dataset 2 as test data
                    for source_train_file_name in glob.glob(os.sep.join([dataset_dir1,
                                                                         "data_run0",
                                                                         "*_sp*_test.*"])):
                        target_train_file_name = source_train_file_name.replace("test.",
                                                                                "train.")
                        target_train_file_name = target_train_file_name.replace(dataset_dir1,
                                                                                target_dataset_dir)
                        if source_train_file_name.endswith("arff"):
                            self._copy_arff_file(source_train_file_name, 
                                                 target_train_file_name,
                                                 base_dataset1,
                                                 mixed_base_dataset)
                        else:
                            os.symlink(source_train_file_name, 
                                       target_train_file_name)
                        
                        source_test_file_name = source_train_file_name.replace(dataset_dir1,
                                                                               dataset_dir2)
                        
                        target_test_file_name = target_train_file_name.replace("train.",
                                                                                "test.")
                        if source_train_file_name.endswith("arff"):
                            self._copy_arff_file(source_test_file_name, 
                                                 target_test_file_name,
                                                 base_dataset2,
                                                 mixed_base_dataset)
                        else:
                            os.symlink(source_test_file_name,
                                       target_test_file_name)
                # Write metadata.yaml based on input meta data
                input_dataset1_meta = BaseDataset.load_meta_data(dataset_dir1)

                output_dataset_meta = dict(input_dataset1_meta)
                output_dataset_meta['train_test'] = True
                output_dataset_meta['date'] = time.strftime("%Y%m%d_%H_%M_%S")
                try:
                    output_dataset_meta['author'] = pwd.getpwuid(os.getuid())[4]
                except :
                    self._log("Author could not be resolved.",level=logging.WARNING)
                    output_dataset_meta['author'] = "unknown"
                BaseDataset.store_meta_data(target_dataset_dir,output_dataset_meta)
        
        ############## Clean up after benchmarking ##############
        super(ShuffleProcess, self).post_benchmarking()
开发者ID:AlexanderFabisch,项目名称:pyspace,代码行数:104,代码来源:shuffle.py

示例9: __call__

# 需要导入模块: from pySPACE.resources.dataset_defs.base import BaseDataset [as 别名]
# 或者: from pySPACE.resources.dataset_defs.base.BaseDataset import store_meta_data [as 别名]

#.........这里部分代码省略.........
                target_test_file_path = os.sep.join([target_collection_path,
                                       "data_run0","features_sp0_"+key+".arff"])
            
            elif source_file_name.endswith("pickle"):
                file_ending = "pickle"
                source_test_file_path = source_test_collection_path
                target_test_file_path = target_collection_path
            else:
                raise NotImplementedError("File type not supported in " \
                                                               "MergeOperation")
            
            source_train_pathes = []
            for source_train_collection_path in self.input_collections:
                source_train_collection_name = \
                                  source_train_collection_path.split(os.sep)[-2]
                # We must not use data originating from the same input
                # collection both in train and test files
                if source_test_collection_name == source_train_collection_name:
                    continue
                
                # Check that all constraints are fulfilled for this pair of
                # input collections
                if not all(eval(constraint_template % \
                  {'source_train_collection_name': source_train_collection_name,
                   'source_test_collection_name': source_test_collection_name})
                        for constraint_template in self.collection_constraints):
                    continue
                
                # check if all parameters are stored in the target path
                source_collection = \
                                BaseDataset.load(source_train_collection_path)
                source_collection_params = \
                            source_collection.meta_data["parameter_setting"]
                remaining_params = \
                          [param for param in source_collection_params.items() \
                            if param not in target_collection_params.items() and \
                               param[0] not in ["__INPUT_DATASET__", 
                               "__RESULT_DIRECTORY__", "__OUTPUT_BUNDLE__",
                               "__INPUT_COLLECTION__" ]] # for old data
                if remaining_params != []:
                    for k,v in remaining_params:
                         target_collection_path += "{%s#%s}" % (k,str(v))
                         target_collection_params[k]=v
                   
                if "arff" == file_ending:
                    source_train_file_path = \
                                      os.sep.join([source_train_collection_path, 
                                                "data_run0", "features_sp0_" + \
                                               train_set_name_suffix + ".arff"])
                elif "pickle" == file_ending:
                    source_train_file_path = source_train_collection_path

                else:
                    raise NotImplementedError("File type not supported in " \
                                                              "MergeOperation!")     
                    
                source_train_pathes.append(source_train_file_path)
            
            if "arff" == file_ending:
                target_train_file_path = os.sep.join([target_collection_path,
                                       "data_run0","features_sp0_"+key+".arff"])
            elif "pickle" == file_ending:
                target_train_file_path = target_collection_path
            else:
                raise NotImplementedError("File type not supported in "
                                                              "MergeOperation!")     
            
            if len(source_train_pathes) == 0:
                continue
            
            create_directory(os.sep.join([target_collection_path,
                                          "data_run0"]))
            
            if "arff" == file_ending:
                self._copy_arff_file(source_test_file_path, 
                                     target_test_file_path,
                                     source_test_collection_name, 
                                     target_collection_name)
                                
                self._merge_arff_files(target_train_file_path, 
                                       source_train_pathes,
                                       target_collection_name)
                # Copy metadata.yaml
                # TODO: Adapt to new collection
                input_meta = BaseDataset.load_meta_data(source_test_collection_path)
                BaseDataset.store_meta_data(target_collection_path,input_meta)
            elif "pickle" == file_ending:
                self._copy_pickle_file(source_test_collection_path,
                                       target_collection_path,
                                       train_set_name_suffix)

                self._merge_pickle_files(target_train_file_path, 
                                         source_train_pathes, 
                                         train_set_name_suffix,
                                         target_collection_params)
            else:
                raise NotImplementedError("File type not supported in merge_operation")
            
        ############## Clean up after benchmarking ##############
        super(MergeProcess, self).post_benchmarking()
开发者ID:BioinformaticsArchive,项目名称:pyspace,代码行数:104,代码来源:merge.py

示例10: consolidate

# 需要导入模块: from pySPACE.resources.dataset_defs.base import BaseDataset [as 别名]
# 或者: from pySPACE.resources.dataset_defs.base.BaseDataset import store_meta_data [as 别名]
    def consolidate(self):
        """
        Consolidates the results obtained by the single WEKA filter
        processes into a consistent summary of datasets that is stored on
        the file system.
        
        .. todo:: Some of the contents of this method should go into the
                  :class:`~pySPACE.resources.dataset_defs.feature_vector.FeatureVectorDataset`
        """

        # Iterate over all collections and store the collection meta data etc.
        for entries in os.listdir(self.result_directory):
            fullpath = os.path.join(self.result_directory, entries)
            # For each collection        
            if os.path.isdir(fullpath):
                if entries.startswith("{"):
                    # Extract the parameters from the collection name in order to
                    # adjust the relation name
                    if self.num_parameters > 0:
                        parameter_strings = entries.strip("}{").split("}{")[-self.num_parameters:]
                        parameter_postfix = "{" + "}{".join(parameter_strings) + "}"
                    else:
                        parameter_strings = ""
                        parameter_postfix = ""
                    # Postprocessing of the arff files of this collection
                    for train_arff_file in glob.glob(fullpath + os.sep + "data_run*" 
                                           + os.sep + "*train.arff"):
                        # Adjust the relation name of the train file
                        content = open(train_arff_file, 'r').readlines()             
                        # We strip everything after the last "}"
                        endindex = content[0].rfind("}")
                        content[0] = content[0][:endindex+1]
                        content[0] += parameter_postfix + "'"
                        open(train_arff_file, 'w').writelines(content)
                        # Use relation name of train data for test data
                        test_arff_file = train_arff_file.replace("train.arff", "test.arff") 
                        test_content = open(test_arff_file, 'r').readlines()
                        test_content[0] = content[0] + "\n"
                        open(test_arff_file, 'w').writelines(test_content)
                    
                        # Check which features are contained in the arff file
                        feature_names = []
                        for line in content:
                            if line.startswith("@attribute"):
                                attribute = line.split()[1]
                                if attribute is not "class":
                                    feature_names.append(attribute)
                    # Store the collection meta data etc.
                    if self.num_parameters > 0:
                        input_collection_name = \
                            "{" + "}{".join(entries.strip("}{").split("}{")[:-self.num_parameters]) + "}"
                    else:
                        input_collection_name = entries
                        
                    input_collection_path = os.path.join(self.operation_spec["input_path"],
                                                     input_collection_name)

                    input_collection_meta = BaseDataset.load_meta_data(
                                            pySPACE.configuration.storage
                                            + os.sep
                                            + input_collection_path)
                    # Store the input collection
                    BaseDataset.store_meta_data(fullpath, input_collection_meta,
                                                file_name="input_metadata.yaml")
                    # Adjust collection metadata for the new collection
                    input_collection_meta["feature_names"] = feature_names
                    input_collection_meta["num_features"] = len(feature_names)
                    input_collection_meta["author"] = get_author()
                    input_collection_meta["date"] = time.strftime("%Y%m%d")
                    input_collection_meta["input_collection_name"] = input_collection_name
                    # Write the collection meta information into the folder
                    BaseDataset.store_meta_data(fullpath,input_collection_meta)
                    # Store the command_template
                    command_template_file = open(os.path.join(fullpath,
                                                          "command_template"), 'w')
                    command_template_file.write(self.command_template)
                    command_template_file.close()
                else:
                    # training and test arff need the same relation name
                    # otherwise Weka can't relate it to each other; the collection
                    # name and the parameters in {}{}-optic must be the relation 
                    # name for further processing    
                    self._log("WARNING: Collection name doesn't begin with '{'. Further processing may be collapse!", level= logging.WARNING)
        # Write the specification of this operation
        # to the result directory in order to make later 
        # analysis of results more easy
        source_operation_file = open(os.path.join(self.result_directory,
                                                  "source_operation.yaml"), 'w')
        yaml.dump(self.operation_spec, source_operation_file)
        source_operation_file.close()
开发者ID:Crespo911,项目名称:pyspace,代码行数:92,代码来源:weka_filter.py

示例11: store

# 需要导入模块: from pySPACE.resources.dataset_defs.base import BaseDataset [as 别名]
# 或者: from pySPACE.resources.dataset_defs.base.BaseDataset import store_meta_data [as 别名]
    def store(self, result_dir, s_format=["pickle", "real"]):
        """ store the collection in *result_dir*"""

        name = "predictions"
        # Update the meta data
        author = get_author()
        self.update_meta_data({"type": "prediction_vector",
                               "storage_format": s_format,
                               "author": author,
                               "data_pattern": "data_run" + os.sep
                                                 + name + "_sp_tt." + s_format[0]})

        if not s_format in ["csv", "arff", "pickle"]:
            self._log("Storage format not supported! Using default.",
                      level=logging.ERROR)
            s_format = "pickle"

        for key, prediction_vectors in self.data.iteritems():
            # Construct result directory
            result_path = result_dir + os.sep + "data" \
                            + "_run%s" % key[0]
            if not os.path.exists(result_path):
                os.mkdir(result_path)

            key_str = "_sp%s_%s" % key[1:]
            # Store data depending on the desired format
            if s_format == "pickle":
                result_file = open(os.path.join(result_path,
                                                name + key_str + ".pickle"),
                                   "w")
                cPickle.dump(prediction_vectors, result_file, cPickle.HIGHEST_PROTOCOL)

            elif s_format == "csv": # Write as Comma Separated Value
                result_file = open(os.path.join(result_path,
                                                name + key_str + ".csv"),"w")
                if self.meta_data["num_predictors"] == 1:
                    result_file.write("Predicted Label, Prediction Score, True Label \n")
                    for pv in prediction_vectors:
                        result_file.write("%s, %s, %s\n" % (pv[0].label[0], pv[0].prediction[0], pv[1]))
                else:
                    # we begin by dealing with the header of the csv file
                    base_header = "Predicted %(index)d Label, Prediction %(index)d Score, "
                    base_result = "%(label)s, %(score)s,"
                    header = ""
                    for i in range(self.meta_data["num_predictors"]):
                        header+= base_header % dict(index=i+1)
                    header += "True Label\n"
                    result_file.write(header)

                    # and now we can write each of the prediction vectors in turn

                    for pv in prediction_vectors:
                        result = ""
                        for i in range(self.meta_data["num_predictors"]):
                            result += base_result % dict(label=pv[0].label[i],
                                                         score=pv[0].prediction[i])

                        result += str(pv[1]) + "\n"
                        result_file.write(result)

        #Store meta data
        BaseDataset.store_meta_data(result_dir,self.meta_data)
开发者ID:Crespo911,项目名称:pyspace,代码行数:64,代码来源:prediction_vector.py

示例12: store

# 需要导入模块: from pySPACE.resources.dataset_defs.base import BaseDataset [as 别名]
# 或者: from pySPACE.resources.dataset_defs.base.BaseDataset import store_meta_data [as 别名]
    def store(self, result_dir, s_format = "bp_eeg"):
        self.merged = False
        scale = 10.0 # is used to scale up the eeg sample values.  The data samples are converted to int16
                    # when saving, so scaling is necessary to keep maintain the resolutions. 
        # Keep original file name, depends on the AnalyserSinkNode, see it's documentation.
        if self.meta_data.has_key('eeg_src_file_name') and self.meta_data['eeg_src_file_name'] is not None:
            name = self.meta_data['eeg_src_file_name']
        # or use default name from this collection
        else:
            name = "Analyzer"
        if not s_format == "bp_eeg":
            self._log("The format %s is not supported!"%s_format, level=logging.CRITICAL)
            return
        # Update the meta data
        author = get_author()
        self.update_meta_data({"type": "only output of individual nodes stored",
                                      "storage_format": s_format,
                                      "author" : author,
                                      "data_pattern": "Multiplexed"})
        # Store meta data
        BaseDataset.store_meta_data(result_dir,self.meta_data)
        #self._log("EEG data file %s" % self.collection.data_file)
        slices = []
        slices.append(0)
        channel_names = []
        

        
        for key, time_series in self.data.iteritems():
            # Sort the Times-Series Array
            def cmp_start(a, b):
                return cmp(a[0].start_time, b[0].start_time)

            time_series.sort(cmp_start)
            # Check for overlapping Windows and remove them if existent
            i = 0
            while i < len(time_series):
                ts = time_series[i]
                #print ts[0].start_time, ts[0].end_time
                #print len(time_series)
                if ts[0].start_time >= slices[-1]:
                    slices.append(ts[0].end_time)
                else:
                    warnings.warn("Ignoring at least one overlapping window!", UserWarning)
                i = i+1
            # STORE ACTUAL EEG DATA AND WRITE MARKERFILE
            result_path = result_dir + os.sep + "data_analyzer" \
                            + "_run%s" % key[0]
            if not os.path.exists(result_path):
                os.mkdir(result_path)
            
            key_str = "_sp%s_%s" % key[1:]
            # Keep original name
            if (self.meta_data.has_key('eeg_src_file_name') and self.meta_data['eeg_src_file_name'] != None):
                result_file_eeg = open(os.path.join(result_path, name + ".eeg"), "wb")
                result_file_mrk = open(os.path.join(result_path, name + ".vmrk"), "w")
            # or use default name from this collection
            else:
                result_file_eeg = open(os.path.join(result_path, name + key_str + ".eeg"), "wb")
                result_file_mrk = open(os.path.join(result_path, name + key_str + ".vmrk"), "w")
        
            # Write Marker header
            if (self.meta_data.has_key('eeg_src_file_name') and self.meta_data['eeg_src_file_name'] != None):
                result_file_mrk.write(header_mrk % (name))
            else:
                result_file_mrk.write(header_mrk % (name + key_str))
        
            result_file_ms = 0
        
            # Data for padding
            padding = None
        
            count_mrk = 2
            num_ch = 0
            sampling_int = 0
            
            for ts in time_series:
                ts0 = ts[0] * scale 
                ts0 = ts0.astype(numpy.int16)
                
                if padding == None:
                    padding = numpy.zeros(len(ts[0].channel_names), dtype='int16')
                    num_ch = len(ts[0].channel_names)
                    channel_names = ts[0].channel_names
                    sampling_int = 1000000/ts[0].sampling_frequency
                    #print "writing %d channels.." % len(ts[0].channel_names)
                # Write Padding (zeros)
                while result_file_ms < ts[0].start_time - sampling_int/1000.0:
                    result_file_eeg.write(padding.tostring())
                    result_file_ms += ts[0]._samples_to_ms(1)
                # Write window
                ts0.tofile(result_file_eeg)
                result_file_ms += ts[0].end_time - (ts[0].start_time - sampling_int/1000.0)
                # Write Marker
                markers = []
                
                if(len(ts[0].marker_name) > 0):
                    mk_keys = ts[0].marker_name.keys()
                    mk_values = ts[0].marker_name.values()
                    for mk in range(len(mk_keys)):
#.........这里部分代码省略.........
开发者ID:Hansa064,项目名称:pyspace,代码行数:103,代码来源:analyzer_sink.py

示例13: consolidate

# 需要导入模块: from pySPACE.resources.dataset_defs.base import BaseDataset [as 别名]
# 或者: from pySPACE.resources.dataset_defs.base.BaseDataset import store_meta_data [as 别名]
    def consolidate(self, _=None):
        """ Consolidates the results obtained by the single processes into a consistent structure
        of collections that are stored on the file system.
        """
        # Consolidate the results
        directory_pattern = os.sep.join([self.result_directory, "{*",])
        dataset_pathes = glob.glob(directory_pattern)

        # For all collections found
        for dataset_path in dataset_pathes:
            try:
                # Load their meta_data
                meta_data = BaseDataset.load_meta_data(dataset_path)

                # Determine author and date
                author = get_author()
                date = time.strftime("%Y%m%d_%H_%M_%S")

                # Update meta data and store it
                meta_data.update({"author": author, "date": date})

                # There can be either run dirs, persistency dirs, or both of them.
                # Check of whichever there are more. If both exist, their numbers
                # are supposed to be equal.
                nr_run_dirs = len(glob.glob(os.path.join(dataset_path, "data_run*")))
                nr_per_dirs = len(glob.glob(os.path.join(dataset_path, "persistency_run*")))
                nr_runs = max(nr_run_dirs, nr_per_dirs)
                if nr_runs > 1:
                    meta_data["runs"] = nr_runs

                # Store the metadata
                BaseDataset.store_meta_data(dataset_path, meta_data)

                # Copy the input dataset specification file to the result
                # directory in order to make later analysis of
                # the results more easy
                # THA: Split the first "/" from the input collection name, because otherwise it will be treated
                # as an absolute path
                input_collection_name = meta_data["input_dataset_name"][1:] if \
                    meta_data["input_dataset_name"][0] == os.sep else meta_data["input_dataset_name"]
                input_meta_path = os.path.join(pySPACE.configuration.storage, input_collection_name)
                try:
                    input_meta = BaseDataset.load_meta_data(input_meta_path)
                    BaseDataset.store_meta_data(dataset_path, input_meta, file_name="input_metadata.yaml")
                except (IOError, OSError) as e:
                    self._log("Error copying the input_metadata.yaml: {error}".format(error=e.message),
                              level=logging.CRITICAL)
            except Exception as e:
                logging.getLogger("%s" % self).exception("Error updating the metadata: {error!s}".format(error=e))
                raise e

        # If we don't create a feature vector or time series collection,
        # we evaluated our classification using a classification performance sink.
        # The resulting files should be merged to one csv tabular.
        pathlist = glob.glob(os.path.join(self.result_directory,"results_*"))
        if len(pathlist)>0:
            # Do the consolidation the same way as for WekaClassificationOperation
            self._log("Consolidating results ...")
            # We load and store the results once into a PerformanceResultSummary
            # This does the necessary consolidation...
            self._log("Reading intermediate results...")
            try:
                result_collection = PerformanceResultSummary(dataset_dir=self.result_directory)
                self._log("done")
                self._log("Storing result collection")
                result_collection.store(self.result_directory)
                self._log("done")
                PerformanceResultSummary.merge_traces(self.result_directory)
            except Exception as e:
                logging.getLogger("%s" % self).exception("Error merging the result collection: {error!s}".format(
                    error=e))

            if self.compression:
                # Since we get one result summary,
                # we don't need the numerous folders.
                # So we zip them to make the whole folder more easy visible.
                import zipfile
                cwd = os.getcwd()
                os.chdir(self.result_directory)
                # If there are to many or to large folders, problems may occur.
                # This case we want to log, try 64 bit mode,
                # and then skip the zipping.
                try:
                    pathlist = glob.glob(os.path.join(self.result_directory,"{*}"))

                    if not self.compression == "delete":
                        save_file = zipfile.ZipFile(
                            self.result_directory+'/result_folders.zip',
                            mode="w", compression=self.compression)
                        # we want to have the zipped file relative to the
                        # result directory
                        for path in pathlist:
                            for node in os.walk(path):
                                rel_path=os.path.relpath(node[0],
                                                         self.result_directory)
                                save_file.write(rel_path)
                                for data in node[2]:
                                    save_file.write(os.path.join(rel_path,
                                                                 data))
                        save_file.close()
#.........这里部分代码省略.........
开发者ID:pyspace,项目名称:pyspace,代码行数:103,代码来源:node_chain.py


注:本文中的pySPACE.resources.dataset_defs.base.BaseDataset.store_meta_data方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。