当前位置: 首页>>代码示例>>Python>>正文


Python arff.loadarff函数代码示例

本文整理汇总了Python中scipy.io.arff.loadarff函数的典型用法代码示例。如果您正苦于以下问题:Python loadarff函数的具体用法?Python loadarff怎么用?Python loadarff使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了loadarff函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: main

def main(k=3, normalize=False, distance=True, base='mt_', ks=[]):
    train, mtrain = loadarff(base + 'train.arff')
    train = DataFrame(train)
    test, mtest = loadarff(base + 'test.arff')
    test = DataFrame(test)

    cols = [col for col in mtrain.names() if mtrain[col][0] == 'numeric']

    if normalize:
        norms(test, train, cols)

    learner = NearestNeighbor(mtrain, train, mtrain.names()[-1], distance=distance)
    learner.calc(test)
    import time
    print 'testing', [k]
    start = time.time()
    err = learner.validate(test, k)
    print 'Err:', err, 'Acc:', 1-err
    print 'Time', time.time() - start
    if not ks: return err
    errs = {}
    errs[k] = err
    for ok in ks:
        print 'testing'
        start = time.time()
        err = learner.validate(test, ok)
        print 'Err:', err, 'Acc:', 1-err
        print 'Time', time.time() - start
        errs[ok] = err
    return errs
开发者ID:jaredly,项目名称:kmeans,代码行数:30,代码来源:kNN.py

示例2: compatibility_check

    def compatibility_check(self):
        c1_data, c1_meta = arff.loadarff(os.path.join(self.c1_folder, 'data', 'features.arff'))
        c2_data, c2_meta = arff.loadarff(os.path.join(self.c1_folder, 'data', 'features.arff'))

        testres = {}
        
        # check features
        if collections.Counter(c1_meta.names()) == collections.Counter(c2_meta.names()):
            testres['features'] = True
        else:
            testres['features'] = False

        # check classes
        classes_c1 = list(set([x[-1] for x in c1_data]))
        classes_c2 = list(set([x[-1] for x in c1_data]))
        if collections.Counter(classes_c1) == collections.Counter(classes_c2):
            testres['classes'] = True
        else:
            testres['classes'] = False

        print 'Compatibility report:'
        print 'features: ', testres[features]
        print 'classes: ', testres['classes']
        
        return testres
开发者ID:ThomasWalter,项目名称:drug_screen,代码行数:25,代码来源:join_classifiers.py

示例3: initial

def initial():
    global traindata,trainmeta,attr,row,col,testdata,testmeta,trow,tcol
    traindata, trainmeta = arff.loadarff(sys.argv[1])
    attr = trainmeta._attrnames
    row = len(traindata)
    col = len(traindata[0])
    testdata, testmeta = arff.loadarff(sys.argv[2])
    trow = len(testdata)
    tcol = len(testdata[0])
    return sys.argv[3] == 'n'
开发者ID:youheiwojiang,项目名称:BayesNet,代码行数:10,代码来源:bayes_full.py

示例4: main

def main():
    #create the training & test sets, skipping the header row with [1:]
    fnc = loadarff(open('Train/train_FNC_attrSelected.arff','r'))
    sbm = loadarff(open('Train/train_SBM_attrSelected.arff','r'))
    testf = genfromtxt(open('Test/test_FNC.csv','r'), delimiter=',', dtype='f8')[1:]
    tests = genfromtxt(open('Test/test_SMB.csv','r'), delimiter=',', dtype='f8')[1:]

    
    gnb = GaussianNB()
    y_pred = gnb.fit(iris.data, iris.target).predict(iris.data)
    predicted_probs = [[index + 1, x[1]] for index, x in enumerate(gnb.predict_proba(test))]

    savetxt('Data/submission.csv', predicted_probs, delimiter=',', fmt='%d,%f', 
            header='MoleculeId,PredictedProbability', comments = '')
开发者ID:steve3003,项目名称:MLSP-psychomy,代码行数:14,代码来源:NB_exampleSubmission.py

示例5: main

def main(k=3, normalize=False, distance=True, base='mt_', ks=[], regress=False, recycle=False, maxerr=.1):
    train, mtrain = loadarff(base + 'train.arff')
    train = DataFrame(train)
    test, mtest = loadarff(base + 'test.arff')
    test = DataFrame(test)

    cols = [col for col in mtrain.names() if mtrain[col][0] == 'numeric']

    if normalize:
        norms(test, train, cols)

    target = mtrain.names()[-1]
    if recycle:
        print len(train)
        if regress:
            removed = reduce_regress(target, train, k, True, maxerr=maxerr)
        else:
            removed = reuse_recycle(target, train, k, True)
        # print removed
        ixs = list(train.index)
        for n in removed:
            ixs.remove(n)
        train = train.loc[ixs]
        print len(train)
        # print train.index

    learner = NearestNeighbor(mtrain, train, target, distance=distance)
    learner.calc(test)

    tester = learner.regress if regress else learner.validate

    import time
    print 'testing', [k]
    start = time.time()
    err = tester(test, k)
    print 'Err:', err, 'Acc:', 1-err
    print 'Time', time.time() - start
    if not ks: return err
    errs = {}
    errs[k] = err
    for ok in ks:
        print 'testing', ok
        start = time.time()
        err = tester(test, ok)
        print 'Err:', err, 'Acc:', 1-err
        print 'Time', time.time() - start
        errs[ok] = err
    return errs
开发者ID:jaredly,项目名称:kNN,代码行数:48,代码来源:kNN.py

示例6: load_data

def load_data(filename):
    """
    returns an array of floats givent the specified filename.
    requires scipy.io.arff.loadarff
    """
    raw = loadarff(filename)[0]
    return np.array([[float(i) for i in row] for row in raw])
开发者ID:sboysel,项目名称:ml,代码行数:7,代码来源:adaboost.py

示例7: preprocess

    def preprocess(self):
        if not os.path.exists(self.outputFolder):
    		try:
    			os.makedirs(self.outputFolder)
    		except OSError as exc:
    		    if exc.errno != errno.EEXIST:
    		        raise exc
    		    pass
        metadata = dict()
        if not self.parameters:
    		self.parameters['parameter']='default'
        metadata['preprocessing_params'] = self.parameters
        yaml.dump(metadata,open(self.outputFolder+'/PreProcessing.yaml','w'))
    	if self.dataFile.split('.')[-1] == 'arff':
    		data,meta = loadarff(self.dataFile)
    		data = pd.DataFrame(data)
    	else:
    		data = pd.read_csv(self.dataFile)

        data = data.fillna(self.missingValue)

        if self.labelEncoding:
            data = self.labelEncode(data)

        data.to_csv(self.outputFolder+'/DataFile.csv',index=False)
开发者ID:pranavbahl2308,项目名称:WOLF,代码行数:25,代码来源:PreProcessing.py

示例8: parse_arff

def parse_arff(name):

  # extract using arff package
  file = arff.loadarff(open(name, 'rb'))
  raw_data, metadata = file
  data = [[v if type(v) is np.string_ else round(v, 14) for v in l] for l in raw_data]
  return data, metadata
开发者ID:jnguyen92,项目名称:small_projects,代码行数:7,代码来源:run_knn.py

示例9: load_features_from_arff

def load_features_from_arff(path):

    data, meta = loadarff(path)
    features = pd.DataFrame(data, columns=meta)
    features[features.columns[:-1]] = StandardScaler().fit_transform(features[features.columns[:-1]])

    return features
开发者ID:tuwien-musicir,项目名称:mir_lecture,代码行数:7,代码来源:mir_lecture_utils.py

示例10: load_data

def load_data(filename):
    """
    load numeric data from arff file using scipy.io.arff.loadarff
    returns a numpy array
    """
    data = loadarff(open(filename, 'r'))[0]
    return np.array([list(row) for row in data])
开发者ID:sboysel,项目名称:ml,代码行数:7,代码来源:kmeans_deprc.py

示例11: test

def test():
    vec = DictVectorizer()
    imp = Imputer(missing_values='NaN', strategy='mean', axis=0)
    for filename in glob.glob(r'../dataset/UCI/*.arff'):
        basename = re.sub(r'(\..*?)$','',os.path.basename(filename))
        print basename
        if basename != DS:
            continue
        # cost_matrix = pickle.load(open('../dataset/UCI/'+basename+'_cost_matrix.pkl', 'rb'))
        data = arff.loadarff(filename)[0]
        X = vec.fit_transform(np.array([{str(i):value for i,value in enumerate(list(row)[:-1])} for row in data])).toarray()
        imp.fit(X)
        X = imp.transform(X)
        labels = np.array([row[-1] for row in data])
        y = np.array([{v:k for k,v in enumerate(list(set(labels)))}[label] for label in labels])
        random = np.random.permutation(range(len(X)))
        print 'dataset ratio\t%s'%('\t'.join([alg+" "*(12-len(alg)) for alg in sorted(ALG.keys())]))
        for iteration in xrange(10):
            X, y, class_num, kf = X[random], y[random], set(labels), KFold(len(X), n_folds=10)
            for train, test in kf:
                length, train_size = len(train), 0.1
                X_train, X_test, y_train, y_test = X[train], X[test], y[train], y[test]
                X_label, X_unlabel, y_label, y_unlabel = train_test_split(X_train, y_train, test_size=1.0-train_size, random_state=0)
                for R in xrange(2,10):
                    ones_matrix, cost_matrix = np.array([[1,1],[1,1]]), np.array([[1,1],[R,R]])            
                    # print "%s R=%d"%(basename,R),
                    cross_validation("%s R=%d"%(basename,R), X_label, X_unlabel, y_label, y_unlabel, ones_matrix, cost_matrix)
                exit()
开发者ID:qiangsiwei,项目名称:semi-supervied_learning,代码行数:28,代码来源:test_weight_KNN.py

示例12: getPurityMissingValues

def getPurityMissingValues(filename):
    # clusters = int(filename.split('=')[1].split('.')[0])
    countdict = {}

    try:
        x = loadarff(filename)
        for row in x[0]:
            # print len(x[1])
            # print x[i]
            # print 100000
            # clusterid = row.Cluster
            clusterid = row['Cluster']
            if clusterid not in countdict:
                countdict[clusterid] = {}
            if row[3] not in countdict[clusterid]:
                countdict[clusterid][row['f2']] = 1
            else:
                countdict[clusterid][row['f2']] += 1

        maxtotal = 0
        alltotal = 0
        for cluster in countdict:
            if cluster != '?':
                maxtotal += max(countdict[cluster].values())
            alltotal += sum(countdict[cluster].values())
        purity = float(maxtotal) / alltotal
    except:
        purity = -1
    return purity
开发者ID:abhisheknkar,项目名称:ML_PA,代码行数:29,代码来源:QA4.py

示例13: split

def split(filename, train_size, reverse=False):
    data, meta = arff.loadarff(filename)
    orig_data = []
    for line in data:
        orig_data.append(list(line)[0:-1])
    if reverse:
        train_size = len(orig_data) - train_size
    return generateTrain(tuple(orig_data), train_size)
开发者ID:TexasRed,项目名称:scalable_transfer_learning,代码行数:8,代码来源:splitter.py

示例14: read_dense_arff_dataset

def read_dense_arff_dataset(train_path, test_path, number_of_labels):

    train_dataset, meta_train = loadarff(open(train_path, 'r'))
    test_dataset, meta_test = loadarff(open(test_path, 'r'))

    meta_names = meta_train.names()

    attributes = meta_names[0:-number_of_labels]
    classes = meta_names[-number_of_labels:len(meta_names)]

    x_train = np.asarray(train_dataset[:][attributes].tolist(), dtype=np.float32)
    y_train = np.asarray(train_dataset[:][classes].tolist(), dtype=np.float32)

    x_test = np.asarray(test_dataset[:][attributes].tolist(), dtype=np.float32)
    y_test = np.asarray(test_dataset[:][classes].tolist(), dtype=np.float32)

    return x_train, y_train, x_test, y_test
开发者ID:sb-mlc,项目名称:Search-Based-Algorithms-for-Multi-Label-Classification,代码行数:17,代码来源:read_data.py

示例15: RunMetrics

  def RunMetrics(self, options):
    Log.Info("Perform RANDOMFOREST.", self.verbose)
    opts = {}
    if "minimum_leaf_size" in options:
      opts["minimum_leaf_size"] = int(options.pop("minimum_leaf_size"));
    else:
      opts["minimum_leaf_size"] = 1
    if len(options) > 0:
      Log.Fatal("Unknown parameters: " + str(options))
      raise Exception("unknown parameters")

    if len(self.dataset) < 2:
      Log.Fatal("This method requires two or more datasets.")
      return -1

    # Split the command using shell-like syntax.
    cmd = shlex.split("java -classpath " + self.path + "/weka.jar" +
        ":methods/weka" + " RANDOMFOREST -t " + self.dataset[0] + " -T " +
        self.dataset[1] + " -M " + str(opts["minimum_leaf_size"]) )

    # Run command with the nessecary arguments and return its output as a byte
    # string. We have untrusted input so we disable all shell based features.
    try:
      s = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=False,
          timeout=self.timeout)
    except subprocess.TimeoutExpired as e:
      Log.Warn(str(e))
      return -2
    except Exception as e:
      Log.Fatal("Could not execute command: " + str(cmd))
      return -1

    # Datastructure to store the results.
    metrics = {}

    # Parse data: runtime.
    timer = self.parseTimer(s)

    if timer != -1:
      predictions = np.genfromtxt("weka_predicted.csv", delimiter=',')
      data, meta = arff.loadarff(self.dataset[2])
      truelabels = np.asarray(
        reduce(operator.concat, data.tolist()), dtype=np.float32)
      metrics['Runtime'] = timer.total_time
      try:
        confusionMatrix = Metrics.ConfusionMatrix(truelabels, predictions)
        metrics['ACC'] = Metrics.AverageAccuracy(confusionMatrix)
        metrics['MCC'] = Metrics.MCCMultiClass(confusionMatrix)
        metrics['Precision'] = Metrics.AvgPrecision(confusionMatrix)
        metrics['Recall'] = Metrics.AvgRecall(confusionMatrix)
        metrics['MSE'] = Metrics.SimpleMeanSquaredError(truelabels, predictions)
      except Exception as e:
        # The confusion matrix can't mix binary and continuous data.
        pass

      Log.Info(("total time: %fs" % (metrics['Runtime'])), self.verbose)

    return metrics
开发者ID:zoq,项目名称:benchmarks,代码行数:58,代码来源:random_forest.py


注:本文中的scipy.io.arff.loadarff函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。