本文整理汇总了Python中sklearn.datasets.load_svmlight_file函数的典型用法代码示例。如果您正苦于以下问题:Python load_svmlight_file函数的具体用法?Python load_svmlight_file怎么用?Python load_svmlight_file使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了load_svmlight_file函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: classification_subfeature
def classification_subfeature(train, test, outclss):
fields = iot.read_fields()
print len(fields)
foi = ['liwc_anal.result.i',
'liwc_anal.result.we',
'liwc_anal.result.affect',
'liwc_anal.result.posemo',
'liwc_anal.result.negemo',
'liwc_anal.result.bio',
'liwc_anal.result.body',
'liwc_anal.result.health',
'liwc_anal.result.ingest']
indeces = [np.where(fields==f)[0][0] for f in foi]
print fields[indeces]
'''Load Training data'''
X_train, y_train = load_svmlight_file(train)
X_train = X_train.toarray()[:, indeces]
scaler = preprocessing.StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
print X_train.shape
'''Load Test data'''
X_test, y_test = load_svmlight_file(test)
X_test = X_test.toarray()[:, indeces]
X_test = scaler.transform(X_test)
print X_test.shape
svc_lin = SVC(kernel='linear', class_weight='balanced')
y_lin = svc_lin.fit(X_train, y_train).predict(X_test)
# pickle.dump(y_test, open(outid, 'w'))
pickle.dump(y_lin, open(outclss, 'w'))
示例2: test_dump
def test_dump():
Xs, y = load_svmlight_file(datafile)
Xd = Xs.toarray()
for X in (Xs, Xd):
for zero_based in (True, False):
for dtype in [np.float32, np.float64]:
f = BytesIO()
# we need to pass a comment to get the version info in;
# LibSVM doesn't grok comments so they're not put in by
# default anymore.
dump_svmlight_file(X.astype(dtype), y, f, comment="test",
zero_based=zero_based)
f.seek(0)
comment = f.readline()
assert_in("scikit-learn %s" % sklearn.__version__, comment)
comment = f.readline()
assert_in(["one", "zero"][zero_based] + "-based", comment)
X2, y2 = load_svmlight_file(f, dtype=dtype,
zero_based=zero_based)
assert_equal(X2.dtype, dtype)
if dtype == np.float32:
assert_array_almost_equal(
# allow a rounding error at the last decimal place
Xd.astype(dtype), X2.toarray(), 4)
else:
assert_array_almost_equal(
# allow a rounding error at the last decimal place
Xd.astype(dtype), X2.toarray(), 15)
assert_array_equal(y, y2)
示例3: train
def train(self, examples, outDir, parameters, classifyExamples=None, dummy=False):
outDir = os.path.abspath(outDir)
examples = self.getExampleFile(examples, dummy=dummy)
classifyExamples = self.getExampleFile(classifyExamples, dummy=dummy)
# Return a new classifier instance for following the training process and using the model
classifier = copy.copy(self)
classifier.parameters = parameters
classifier._filesToRelease = [examples, classifyExamples]
if not os.path.exists(outDir):
os.makedirs(outDir)
trainFeatures, trainClasses = datasets.load_svmlight_file(examples)
if classifyExamples != None:
develFeatures, develClasses = datasets.load_svmlight_file(classifyExamples, trainFeatures.shape[1])
binarizer = preprocessing.LabelBinarizer()
binarizer.fit(trainClasses)
trainClasses = binarizer.transform(trainClasses)
if classifyExamples != None:
develClasses = binarizer.transform(develClasses)
print >> sys.stderr, "Training Keras model with parameters:", parameters
parameters = Parameters.get(parameters, {"TEES.classifier":"KerasClassifier", "layers":5, "lr":0.001, "epochs":1, "batch_size":64, "patience":10})
np.random.seed(10)
classifier.kerasModel = classifier._defineModel(outDir, parameters, trainFeatures, trainClasses, develFeatures, develClasses)
classifier._fitModel(outDir, parameters, trainFeatures, trainClasses, develFeatures, develClasses)
示例4: gridSearch
def gridSearch():
X_train, y_train = load_svmlight_file(svmPath + "/" + trainFile)
X_test, y_test = load_svmlight_file(svmPath + "/" + testFile, n_features=X_train.shape[1])
tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1e-3, 1e-4], 'C': [1, 10, 100, 1000]}]#, {'kernel': ['linear'], 'C': [1, 10, 100, 1000]}]
#training
# clf = svm.SVC(kernel='linear')
# clf.fit(X_features, trainingLabels)
scores = ['precision', 'recall']
for score in scores:
print("# Tuning hyper-parameters for %s" % score)
print()
clf = GridSearchCV(SVC(C=1), tuned_parameters, cv=5, scoring=score)
clf.fit(X_train, y_train)
print("Best parameters set found on development set:")
print()
print(clf.best_estimator_)
print()
print("Grid scores on development set:")
print()
for params, mean_score, scores in clf.grid_scores_:
print("%0.3f (+/-%0.03f) for %r" % (mean_score, scores.std() / 2, params))
print()
print("Detailed classification report:")
print()
print("The model is trained on the full development set.")
print("The scores are computed on the full evaluation set.")
print()
示例5: load
def load(self, dataset = None, data_dir = "/home/drunkeneye/lab/data", verbose = None):
if verbose == None:
verbose = self.verbose
if dataset == None:
dataset = self.name
# first try to load the data 'directly'
try:
filePath = os.path.join(data_dir, dataset, dataset)
if verbose:
print(" Trying to load data set from {}". format(filePath))
self.X, self.y = load_svmlight_file(filePath)
self.X = np.asarray(self.X.todense())
if verbose:
print (" Loaded from {}". format( filePath))
return
except:
pass
# next try
try:
filePath = os.path.join(data_dir, dataset, dataset + ".combined.scaled")
if verbose:
print(" Trying to load data set from {}". format(filePath))
self.X, self.y = load_svmlight_file(filePath)
self.X = np.asarray(self.X.todense())
if verbose:
print (" Loaded from {}". format( filePath))
return
except:
pass
示例6: run
def run(train_fp, test_fp, pred_fp, key_fp):
keys = []
load(key_fp, keys)
X_train, y_train = load_svmlight_file(train_fp)
X_test, y_test = load_svmlight_file(test_fp)
#dtrain = xgb.DMatrix(train_fp)
#dtest = xgb.DMatrix(test_fp)
params = {}
with open("lr_reg.params", 'r') as f:
params = json.load(f)
print "[%s] [INFO] params: %s\n" % (t_now(), str(params))
model = linear_model.Ridge (alpha = params['alpha'])
model.fit(X_train, y_train)
pred = model.predict(X_test)
#model = xgb.train( params, dtrain, params['n_round'])
#model = xgb.train( params, dtrain, params['n_round'], obj = customed_obj_1)
#pred = model.predict(dtest, ntree_limit=params['n_round'])
#pred = model.predict(dtest)
f = open(pred_fp, 'w')
for i in range(len(keys)):
f.write(keys[i] + "," + str(max(1.0, pred[i])) + "\n")
f.close()
return 0
示例7: main
def main():
# svm_para = {'C': 10.0, 'kernel': 'rbf', 'gamma': 1.667, 'verbose': False}
# svm_para = {'kernel': 'linear', 'verbose': False}
# loading data
# X_train, y_train = datasets.load_svmlight_file(r'./dataset/mnist_train_784_poly_8vr.dat')
# X_train, y_train = datasets.load_svmlight_file(r'./dataset/covtype_tr_2vr.data')
# svm_para = {'C': 10.0, 'kernel': 'rbf', 'gamma': 0.00002, 'tol': 0.01, 'verbose': False}
# census
svm_para = {"C": 10.0, "kernel": "rbf", "gamma": 1.667, "verbose": False}
X_train, y_train = datasets.load_svmlight_file(r"./dataset/census.train")
# test ramdom sampling
RS_SVM = RandomSamplingSVM(svm_para)
start_time = time.time()
model = RS_SVM.train_one_half_v2(X_train, y_train)
print("Remain SVs: " + str(model.n_support_), flush=True)
print("--- %s seconds ---" % (time.time() - start_time), flush=True)
if model is None:
print("Can not train the dataset", flush=True)
else:
# X_test, y_test = datasets.load_svmlight_file(r'./dataset/mnist_test_784_poly_8vr.dat')
# X_test, y_test = datasets.load_svmlight_file(r'./dataset/covtype_tst_2vr.data')
X_test, y_test = datasets.load_svmlight_file(r"./dataset/census.train")
ratio = model.score(X_test, y_test)
print(ratio)
print("--- %s seconds ---" % (time.time() - start_time), flush=True)
示例8: train_predict
def train_predict(train_file, test_file, predict_valid_file, predict_test_file,
n_fold=5):
feature_name = os.path.basename(train_file)[:-10]
logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s',
level=logging.DEBUG,
filename='esb_xg_grid_colsub_{}.log'.format(feature_name))
logging.info('Loading training and test data...')
X, y = load_svmlight_file(train_file)
X_tst, _ = load_svmlight_file(test_file)
xg = xgb.XGBClassifier()
param = {'learning_rate': [.01, .03, .05], 'max_depth': [4, 5, 6],
'n_estimators': [400, 600]}
cv = StratifiedKFold(y, n_folds=n_fold, shuffle=True, random_state=2015)
clf = GridSearchCV(xg, param, scoring='log_loss', verbose=1, cv=cv)
logging.info('Cross validation for grid search...')
clf.fit(X, y)
p = clf.predict_proba(X)[:, 1]
logging.info('best model = {}'.format(clf.best_estimator_))
logging.info('best score = {:.4f}'.format(clf.best_score_))
logging.info('Retraining with 100% data...')
clf.best_estimator_.fit(X, y)
p_tst = clf.best_estimator_.predict_proba(X_tst)[:, 1]
logging.info('Saving predictions...')
np.savetxt(predict_valid_file, p, fmt='%.6f')
np.savetxt(predict_test_file, p_tst, fmt='%.6f')
示例9: scale_mnist8m
def scale_mnist8m():
from sklearn.datasets import load_svmlight_file
print "loading train",datetime.datetime.now()
dd_train = load_svmlight_file(base_folder_mnist + "mnist8m_6_8_train.libsvm")
print "loading test", datetime.datetime.now()
dd_test = load_svmlight_file(base_folder_mnist + "mnist8m_6_8_test.libsvm")
Xtrain = dd_train[0]
Xtest = dd_test[0]
Ytrain = dd_train[1]
Ytest = dd_test[1]
Xtrain = csr_matrix((Xtrain.data, Xtrain.indices, Xtrain.indptr), shape=(Xtrain.shape[0], 786))
Xtest = csr_matrix((Xtest.data, Xtest.indices, Xtest.indptr), shape=(Xtest.shape[0], 786))
from sklearn.externals import joblib
print "densifying train",datetime.datetime.now()
Xtrain = Xtrain.todense()
print "densifying test",datetime.datetime.now()
Xtest = Xtest.todense()
print "dumping train",datetime.datetime.now()
joblib.dump((np.asarray(Xtrain),Ytrain),base_folder_mnist + "mnist8m_6_8_train_reshaped")
#joblib.load(base_folder + "mnist8m_6_8_train_touple_small")
print "dumping test",datetime.datetime.now()
joblib.dump((np.asarray(Xtest),Ytest),base_folder_mnist + "mnist8m_6_8_test_reshaped")
print "finished",datetime.datetime.now()
示例10: svm
def svm():
#load data
x_train,y_train=load_svmlight_file("12trainset")
x_train.todense()
x_test,y_test=load_svmlight_file("12testdata")
x_test.todense()
sk=SelectKBest(f_classif,9).fit(x_train,y_train)
x_new=sk.transform(x_train)
x_newtest=sk.transform(x_test)
print(sk.scores_)
print(x_new.shape)
print(sk.get_support())
#classfier
clf=SVC(C=2,gamma=2)
ovrclf=OneVsRestClassifier(clf,-1)
ovrclf.fit(x_train,y_train)
y_pred=ovrclf.predict(x_test)
# write result
with open("result.txt","w") as fw:
for st in y_pred.tolist():
fw.write(str(st)+'\n')
print(np.array(y_pred).shape)
target_names=['0','1','2','3']
#result
#sum_y = np.sum((np.array(y_pred)-np.array(y_test))**2)
#print(classification_report(y_test,y_pred,target_names=target_names))
#print("sougouVal: ",float(sum_y)/y_pred.shape[0])
print(time.time()-start_time)
示例11: test_dump
def test_dump():
Xs, y = load_svmlight_file(datafile)
Xd = Xs.toarray()
for X in (Xs, Xd):
for zero_based in (True, False):
for dtype in [np.float32, np.float64]:
f = BytesIO()
dump_svmlight_file(X.astype(dtype), y, f, zero_based=zero_based)
f.seek(0)
comment = f.readline()
assert_in("scikit-learn %s" % sklearn.__version__, comment)
comment = f.readline()
assert_in(["one", "zero"][zero_based] + "-based", comment)
X2, y2 = load_svmlight_file(f, dtype=dtype, zero_based=zero_based)
assert_equal(X2.dtype, dtype)
if dtype == np.float32:
assert_array_almost_equal(
# allow a rounding error at the last decimal place
Xd.astype(dtype),
X2.toarray(),
4,
)
else:
assert_array_almost_equal(
# allow a rounding error at the last decimal place
Xd.astype(dtype),
X2.toarray(),
15,
)
assert_array_equal(y, y2)
示例12: test_load_with_long_qid
def test_load_with_long_qid():
# load svmfile with longint qid attribute
data = b("""
1 qid:0 0:1 1:2 2:3
0 qid:72048431380967004 0:1440446648 1:72048431380967004 2:236784985
0 qid:-9223372036854775807 0:1440446648 1:72048431380967004 2:236784985
3 qid:9223372036854775807 0:1440446648 1:72048431380967004 2:236784985""")
X, y, qid = load_svmlight_file(BytesIO(data), query_id=True)
true_X = [[1, 2, 3],
[1440446648, 72048431380967004, 236784985],
[1440446648, 72048431380967004, 236784985],
[1440446648, 72048431380967004, 236784985]]
true_y = [1, 0, 0, 3]
trueQID = [0, 72048431380967004, -9223372036854775807, 9223372036854775807]
assert_array_equal(y, true_y)
assert_array_equal(X.toarray(), true_X)
assert_array_equal(qid, trueQID)
f = BytesIO()
dump_svmlight_file(X, y, f, query_id=qid, zero_based=True)
f.seek(0)
X, y, qid = load_svmlight_file(f, query_id=True, zero_based=True)
assert_array_equal(y, true_y)
assert_array_equal(X.toarray(), true_X)
assert_array_equal(qid, trueQID)
f.seek(0)
X, y = load_svmlight_file(f, query_id=False, zero_based=True)
assert_array_equal(y, true_y)
assert_array_equal(X.toarray(), true_X)
示例13: test_load_with_offsets
def test_load_with_offsets(sparsity, n_samples, n_features):
rng = np.random.RandomState(0)
X = rng.uniform(low=0.0, high=1.0, size=(n_samples, n_features))
if sparsity:
X[X < sparsity] = 0.0
X = sp.csr_matrix(X)
y = rng.randint(low=0, high=2, size=n_samples)
f = BytesIO()
dump_svmlight_file(X, y, f)
f.seek(0)
size = len(f.getvalue())
# put some marks that are likely to happen anywhere in a row
mark_0 = 0
mark_1 = size // 3
length_0 = mark_1 - mark_0
mark_2 = 4 * size // 5
length_1 = mark_2 - mark_1
# load the original sparse matrix into 3 independent CSR matrices
X_0, y_0 = load_svmlight_file(f, n_features=n_features,
offset=mark_0, length=length_0)
X_1, y_1 = load_svmlight_file(f, n_features=n_features,
offset=mark_1, length=length_1)
X_2, y_2 = load_svmlight_file(f, n_features=n_features,
offset=mark_2)
y_concat = np.concatenate([y_0, y_1, y_2])
X_concat = sp.vstack([X_0, X_1, X_2])
assert_array_almost_equal(y, y_concat)
assert_array_almost_equal(X.toarray(), X_concat.toarray())
示例14: loadData
def loadData():
data1, target = load_svmlight_file('dataset/text.scale')
data2, target = load_svmlight_file('dataset/following.scale')
data1, data2, target = shuffle(data1, data2, target)
return (data1, data2, target)
示例15: check_data_compatibility
def check_data_compatibility(self):
try:
load_svmlight_file(self.input_path)
return True
except Exception as ex:
print ex.message
return False