当前位置: 首页>>代码示例>>Python>>正文


Python datasets.load_svmlight_file函数代码示例

本文整理汇总了Python中sklearn.datasets.load_svmlight_file函数的典型用法代码示例。如果您正苦于以下问题:Python load_svmlight_file函数的具体用法?Python load_svmlight_file怎么用?Python load_svmlight_file使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了load_svmlight_file函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: classification_subfeature

def classification_subfeature(train, test, outclss):
    fields = iot.read_fields()
    print len(fields)
    foi = ['liwc_anal.result.i',
           'liwc_anal.result.we',
           'liwc_anal.result.affect',
           'liwc_anal.result.posemo',
           'liwc_anal.result.negemo',
           'liwc_anal.result.bio',
           'liwc_anal.result.body',
           'liwc_anal.result.health',
           'liwc_anal.result.ingest']
    indeces = [np.where(fields==f)[0][0] for f in foi]
    print fields[indeces]

    '''Load Training data'''
    X_train, y_train = load_svmlight_file(train)
    X_train = X_train.toarray()[:, indeces]
    scaler = preprocessing.StandardScaler().fit(X_train)
    X_train = scaler.transform(X_train)
    print X_train.shape
    '''Load Test data'''
    X_test, y_test = load_svmlight_file(test)
    X_test = X_test.toarray()[:, indeces]
    X_test = scaler.transform(X_test)
    print X_test.shape

    svc_lin = SVC(kernel='linear', class_weight='balanced')
    y_lin = svc_lin.fit(X_train, y_train).predict(X_test)
    # pickle.dump(y_test, open(outid, 'w'))
    pickle.dump(y_lin, open(outclss, 'w'))
开发者ID:wtgme,项目名称:ohsn,代码行数:31,代码来源:classification.py

示例2: test_dump

def test_dump():
    Xs, y = load_svmlight_file(datafile)
    Xd = Xs.toarray()

    for X in (Xs, Xd):
        for zero_based in (True, False):
            for dtype in [np.float32, np.float64]:
                f = BytesIO()
                # we need to pass a comment to get the version info in;
                # LibSVM doesn't grok comments so they're not put in by
                # default anymore.
                dump_svmlight_file(X.astype(dtype), y, f, comment="test",
                                   zero_based=zero_based)
                f.seek(0)

                comment = f.readline()
                assert_in("scikit-learn %s" % sklearn.__version__, comment)
                comment = f.readline()
                assert_in(["one", "zero"][zero_based] + "-based", comment)

                X2, y2 = load_svmlight_file(f, dtype=dtype,
                                            zero_based=zero_based)
                assert_equal(X2.dtype, dtype)
                if dtype == np.float32:
                    assert_array_almost_equal(
                        # allow a rounding error at the last decimal place
                        Xd.astype(dtype), X2.toarray(), 4)
                else:
                    assert_array_almost_equal(
                        # allow a rounding error at the last decimal place
                        Xd.astype(dtype), X2.toarray(), 15)
                assert_array_equal(y, y2)
开发者ID:yzhy,项目名称:scikit-learn,代码行数:32,代码来源:test_svmlight_format.py

示例3: train

 def train(self, examples, outDir, parameters, classifyExamples=None, dummy=False):
     outDir = os.path.abspath(outDir)
     
     examples = self.getExampleFile(examples, dummy=dummy)
     classifyExamples = self.getExampleFile(classifyExamples, dummy=dummy)
     
     # Return a new classifier instance for following the training process and using the model
     classifier = copy.copy(self)
     classifier.parameters = parameters
     classifier._filesToRelease = [examples, classifyExamples]
     
     if not os.path.exists(outDir):
         os.makedirs(outDir)
     
     trainFeatures, trainClasses = datasets.load_svmlight_file(examples)
     if classifyExamples != None:
         develFeatures, develClasses = datasets.load_svmlight_file(classifyExamples, trainFeatures.shape[1])
     binarizer = preprocessing.LabelBinarizer()
     binarizer.fit(trainClasses)
     trainClasses = binarizer.transform(trainClasses)
     if classifyExamples != None:
         develClasses = binarizer.transform(develClasses)
     
     print >> sys.stderr, "Training Keras model with parameters:", parameters
     parameters = Parameters.get(parameters, {"TEES.classifier":"KerasClassifier", "layers":5, "lr":0.001, "epochs":1, "batch_size":64, "patience":10})
     np.random.seed(10)
     classifier.kerasModel = classifier._defineModel(outDir, parameters, trainFeatures, trainClasses, develFeatures, develClasses)
     classifier._fitModel(outDir, parameters, trainFeatures, trainClasses, develFeatures, develClasses)
开发者ID:jbjorne,项目名称:TEES,代码行数:28,代码来源:KerasClassifier.py

示例4: gridSearch

def gridSearch():
	
	X_train, y_train = load_svmlight_file(svmPath + "/" + trainFile)
	X_test, y_test = load_svmlight_file(svmPath + "/" + testFile, n_features=X_train.shape[1])

	
	tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1e-3, 1e-4], 'C': [1, 10, 100, 1000]}]#, {'kernel': ['linear'], 'C': [1, 10, 100, 1000]}]

	#training
#	clf = svm.SVC(kernel='linear')
#	clf.fit(X_features, trainingLabels)	

	scores = ['precision', 'recall']

	for score in scores:
		print("# Tuning hyper-parameters for %s" % score)
		print()

    	clf = GridSearchCV(SVC(C=1), tuned_parameters, cv=5, scoring=score)
    	clf.fit(X_train, y_train)
    	print("Best parameters set found on development set:")
    	print()
    	print(clf.best_estimator_)
    	print()
    	print("Grid scores on development set:")
    	print()
    	for params, mean_score, scores in clf.grid_scores_:
    		print("%0.3f (+/-%0.03f) for %r" % (mean_score, scores.std() / 2, params))
    		print()
    		print("Detailed classification report:")
    		print()
    		print("The model is trained on the full development set.")
    		print("The scores are computed on the full evaluation set.")
    		print()
开发者ID:debanjanghosh,项目名称:argessay_ACL2016,代码行数:34,代码来源:scikit_expr_embedding.py

示例5: load

	def load(self, dataset = None, data_dir = "/home/drunkeneye/lab/data", verbose = None):
		if verbose == None:
			verbose = self.verbose
			
		if dataset == None:
			dataset = self.name
		# first try to load the data 'directly'
		try:
			filePath = os.path.join(data_dir, dataset, dataset)
			if verbose:
				print("  Trying to load data set from {}". format(filePath))
			self.X, self.y = load_svmlight_file(filePath)
			self.X = np.asarray(self.X.todense())
			if verbose:
				print ("    Loaded from {}". format( filePath))
			return
		except:
			pass
		
		# next try
		try:
			filePath = os.path.join(data_dir, dataset, dataset + ".combined.scaled")
			if verbose:
				print("  Trying to load data set from {}". format(filePath))
			self.X, self.y = load_svmlight_file(filePath)
			self.X = np.asarray(self.X.todense())
			if verbose:
				print ("    Loaded from {}". format( filePath))
			return 
		except:
			pass
开发者ID:aydindemircioglu,项目名称:MixMex,代码行数:31,代码来源:DataSet.py

示例6: run

def run(train_fp, test_fp, pred_fp, key_fp):

	keys = []
	load(key_fp, keys)

	X_train, y_train = load_svmlight_file(train_fp)
	X_test, y_test = load_svmlight_file(test_fp)

	#dtrain = xgb.DMatrix(train_fp)
	#dtest = xgb.DMatrix(test_fp)

	params = {}
	with open("lr_reg.params", 'r') as f:
		params = json.load(f)
	print "[%s] [INFO] params: %s\n" % (t_now(), str(params))

	model = linear_model.Ridge (alpha = params['alpha'])
	model.fit(X_train, y_train)
	pred = model.predict(X_test)
	#model = xgb.train( params, dtrain, params['n_round'])
	#model = xgb.train( params, dtrain, params['n_round'], obj = customed_obj_1)
	#pred = model.predict(dtest, ntree_limit=params['n_round'])
	#pred = model.predict(dtest)

	f = open(pred_fp, 'w')
	for i in range(len(keys)):
		f.write(keys[i] + "," + str(max(1.0, pred[i])) + "\n")
	f.close()

	return 0
开发者ID:HouJP,项目名称:di-tech-16,代码行数:30,代码来源:lr_reg.py

示例7: main

def main():

    # svm_para = {'C': 10.0, 'kernel': 'rbf', 'gamma': 1.667, 'verbose': False}
    # svm_para = {'kernel': 'linear', 'verbose': False}
    # loading data
    # X_train, y_train = datasets.load_svmlight_file(r'./dataset/mnist_train_784_poly_8vr.dat')
    # X_train, y_train = datasets.load_svmlight_file(r'./dataset/covtype_tr_2vr.data')

    # svm_para = {'C': 10.0, 'kernel': 'rbf', 'gamma': 0.00002, 'tol': 0.01, 'verbose': False}

    # census
    svm_para = {"C": 10.0, "kernel": "rbf", "gamma": 1.667, "verbose": False}
    X_train, y_train = datasets.load_svmlight_file(r"./dataset/census.train")

    # test ramdom sampling
    RS_SVM = RandomSamplingSVM(svm_para)
    start_time = time.time()
    model = RS_SVM.train_one_half_v2(X_train, y_train)

    print("Remain SVs: " + str(model.n_support_), flush=True)
    print("--- %s seconds ---" % (time.time() - start_time), flush=True)

    if model is None:
        print("Can not train the dataset", flush=True)
    else:

        # X_test, y_test = datasets.load_svmlight_file(r'./dataset/mnist_test_784_poly_8vr.dat')
        # X_test, y_test = datasets.load_svmlight_file(r'./dataset/covtype_tst_2vr.data')
        X_test, y_test = datasets.load_svmlight_file(r"./dataset/census.train")
        ratio = model.score(X_test, y_test)
        print(ratio)
        print("--- %s seconds ---" % (time.time() - start_time), flush=True)
开发者ID:viethoangcr,项目名称:thesis,代码行数:32,代码来源:RS_SVM_v2.py

示例8: train_predict

def train_predict(train_file, test_file, predict_valid_file, predict_test_file,
                  n_fold=5):

    feature_name = os.path.basename(train_file)[:-10]
    logging.basicConfig(format='%(asctime)s   %(levelname)s   %(message)s',
                        level=logging.DEBUG,
                        filename='esb_xg_grid_colsub_{}.log'.format(feature_name))

    logging.info('Loading training and test data...')
    X, y = load_svmlight_file(train_file)
    X_tst, _ = load_svmlight_file(test_file)

    xg = xgb.XGBClassifier()
    param = {'learning_rate': [.01, .03, .05], 'max_depth': [4, 5, 6],
             'n_estimators': [400, 600]}
    cv = StratifiedKFold(y, n_folds=n_fold, shuffle=True, random_state=2015)
    clf = GridSearchCV(xg, param, scoring='log_loss', verbose=1, cv=cv)

    logging.info('Cross validation for grid search...')
    clf.fit(X, y)
    p = clf.predict_proba(X)[:, 1]

    logging.info('best model = {}'.format(clf.best_estimator_))
    logging.info('best score = {:.4f}'.format(clf.best_score_))

    logging.info('Retraining with 100% data...')
    clf.best_estimator_.fit(X, y)
    p_tst = clf.best_estimator_.predict_proba(X_tst)[:, 1]

    logging.info('Saving predictions...')
    np.savetxt(predict_valid_file, p, fmt='%.6f')
    np.savetxt(predict_test_file, p_tst, fmt='%.6f')
开发者ID:drivendata,项目名称:countable-care-3rd-place,代码行数:32,代码来源:train_predict_esb_xg_grid_colsub.py

示例9: scale_mnist8m

def scale_mnist8m():
    from sklearn.datasets import load_svmlight_file


    print "loading train",datetime.datetime.now()
    dd_train = load_svmlight_file(base_folder_mnist + "mnist8m_6_8_train.libsvm")
    print "loading test", datetime.datetime.now()
    dd_test = load_svmlight_file(base_folder_mnist + "mnist8m_6_8_test.libsvm")

    Xtrain = dd_train[0]
    Xtest = dd_test[0]
    Ytrain = dd_train[1]
    Ytest = dd_test[1]

    Xtrain = csr_matrix((Xtrain.data, Xtrain.indices, Xtrain.indptr), shape=(Xtrain.shape[0], 786))
    Xtest = csr_matrix((Xtest.data, Xtest.indices, Xtest.indptr), shape=(Xtest.shape[0], 786))
    from sklearn.externals import joblib


    print "densifying train",datetime.datetime.now()
    Xtrain = Xtrain.todense()
    print "densifying test",datetime.datetime.now()
    Xtest = Xtest.todense()

    print "dumping train",datetime.datetime.now()
    joblib.dump((np.asarray(Xtrain),Ytrain),base_folder_mnist + "mnist8m_6_8_train_reshaped")
    #joblib.load(base_folder + "mnist8m_6_8_train_touple_small")
    print "dumping test",datetime.datetime.now()
    joblib.dump((np.asarray(Xtest),Ytest),base_folder_mnist + "mnist8m_6_8_test_reshaped")
    print "finished",datetime.datetime.now()
开发者ID:nikste,项目名称:doubly_random_svm,代码行数:30,代码来源:dataio.py

示例10: svm

def svm():
    #load data
    x_train,y_train=load_svmlight_file("12trainset")
    x_train.todense()
    x_test,y_test=load_svmlight_file("12testdata")
    x_test.todense()
    sk=SelectKBest(f_classif,9).fit(x_train,y_train)
    x_new=sk.transform(x_train)
    x_newtest=sk.transform(x_test)
    print(sk.scores_)
    print(x_new.shape)
    print(sk.get_support())
    #classfier
    clf=SVC(C=2,gamma=2)
    ovrclf=OneVsRestClassifier(clf,-1)
    ovrclf.fit(x_train,y_train)
    y_pred=ovrclf.predict(x_test)
    # write result
    with open("result.txt","w") as fw:
        for st in y_pred.tolist():
            fw.write(str(st)+'\n')
    print(np.array(y_pred).shape)

    target_names=['0','1','2','3']
    #result
    #sum_y = np.sum((np.array(y_pred)-np.array(y_test))**2)
    #print(classification_report(y_test,y_pred,target_names=target_names))
    #print("sougouVal: ",float(sum_y)/y_pred.shape[0])
    print(time.time()-start_time)
开发者ID:lkprof,项目名称:sema,代码行数:29,代码来源:svm.py

示例11: test_dump

def test_dump():
    Xs, y = load_svmlight_file(datafile)
    Xd = Xs.toarray()

    for X in (Xs, Xd):
        for zero_based in (True, False):
            for dtype in [np.float32, np.float64]:
                f = BytesIO()
                dump_svmlight_file(X.astype(dtype), y, f, zero_based=zero_based)
                f.seek(0)

                comment = f.readline()
                assert_in("scikit-learn %s" % sklearn.__version__, comment)
                comment = f.readline()
                assert_in(["one", "zero"][zero_based] + "-based", comment)

                X2, y2 = load_svmlight_file(f, dtype=dtype, zero_based=zero_based)
                assert_equal(X2.dtype, dtype)
                if dtype == np.float32:
                    assert_array_almost_equal(
                        # allow a rounding error at the last decimal place
                        Xd.astype(dtype),
                        X2.toarray(),
                        4,
                    )
                else:
                    assert_array_almost_equal(
                        # allow a rounding error at the last decimal place
                        Xd.astype(dtype),
                        X2.toarray(),
                        15,
                    )
                assert_array_equal(y, y2)
开发者ID:kkuunnddaann,项目名称:scikit-learn,代码行数:33,代码来源:test_svmlight_format.py

示例12: test_load_with_long_qid

def test_load_with_long_qid():
    # load svmfile with longint qid attribute
    data = b("""
    1 qid:0 0:1 1:2 2:3
    0 qid:72048431380967004 0:1440446648 1:72048431380967004 2:236784985
    0 qid:-9223372036854775807 0:1440446648 1:72048431380967004 2:236784985
    3 qid:9223372036854775807  0:1440446648 1:72048431380967004 2:236784985""")
    X, y, qid = load_svmlight_file(BytesIO(data), query_id=True)

    true_X = [[1,          2,                 3],
             [1440446648, 72048431380967004, 236784985],
             [1440446648, 72048431380967004, 236784985],
             [1440446648, 72048431380967004, 236784985]]

    true_y = [1, 0, 0, 3]
    trueQID = [0, 72048431380967004, -9223372036854775807, 9223372036854775807]
    assert_array_equal(y, true_y)
    assert_array_equal(X.toarray(), true_X)
    assert_array_equal(qid, trueQID)

    f = BytesIO()
    dump_svmlight_file(X, y, f, query_id=qid, zero_based=True)
    f.seek(0)
    X, y, qid = load_svmlight_file(f, query_id=True, zero_based=True)
    assert_array_equal(y, true_y)
    assert_array_equal(X.toarray(), true_X)
    assert_array_equal(qid, trueQID)

    f.seek(0)
    X, y = load_svmlight_file(f, query_id=False, zero_based=True)
    assert_array_equal(y, true_y)
    assert_array_equal(X.toarray(), true_X)
开发者ID:AlexisMignon,项目名称:scikit-learn,代码行数:32,代码来源:test_svmlight_format.py

示例13: test_load_with_offsets

def test_load_with_offsets(sparsity, n_samples, n_features):
    rng = np.random.RandomState(0)
    X = rng.uniform(low=0.0, high=1.0, size=(n_samples, n_features))
    if sparsity:
        X[X < sparsity] = 0.0
    X = sp.csr_matrix(X)
    y = rng.randint(low=0, high=2, size=n_samples)

    f = BytesIO()
    dump_svmlight_file(X, y, f)
    f.seek(0)

    size = len(f.getvalue())

    # put some marks that are likely to happen anywhere in a row
    mark_0 = 0
    mark_1 = size // 3
    length_0 = mark_1 - mark_0
    mark_2 = 4 * size // 5
    length_1 = mark_2 - mark_1

    # load the original sparse matrix into 3 independent CSR matrices
    X_0, y_0 = load_svmlight_file(f, n_features=n_features,
                                  offset=mark_0, length=length_0)
    X_1, y_1 = load_svmlight_file(f, n_features=n_features,
                                  offset=mark_1, length=length_1)
    X_2, y_2 = load_svmlight_file(f, n_features=n_features,
                                  offset=mark_2)

    y_concat = np.concatenate([y_0, y_1, y_2])
    X_concat = sp.vstack([X_0, X_1, X_2])
    assert_array_almost_equal(y, y_concat)
    assert_array_almost_equal(X.toarray(), X_concat.toarray())
开发者ID:AlexisMignon,项目名称:scikit-learn,代码行数:33,代码来源:test_svmlight_format.py

示例14: loadData

def loadData():
    data1, target = load_svmlight_file('dataset/text.scale')
    data2, target = load_svmlight_file('dataset/following.scale')

    data1, data2, target = shuffle(data1, data2, target)

    return (data1, data2, target)
开发者ID:pyongjoo,项目名称:twitter-research,代码行数:7,代码来源:ml_cotrain.py

示例15: check_data_compatibility

 def check_data_compatibility(self):
     try:
         load_svmlight_file(self.input_path)
         return True
     except Exception as ex:
         print ex.message
         return False
开发者ID:patrickyeh,项目名称:datalabsdk-python,代码行数:7,代码来源:DataType.py


注:本文中的sklearn.datasets.load_svmlight_file函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。