本文整理汇总了Java中org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat类的典型用法代码示例。如果您正苦于以下问题:Java CombineTextInputFormat类的具体用法?Java CombineTextInputFormat怎么用?Java CombineTextInputFormat使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
CombineTextInputFormat类属于org.apache.hadoop.mapreduce.lib.input包,在下文中一共展示了CombineTextInputFormat类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: main
import org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
long maxInputSplitSize = Long.parseLong(otherArgs[otherArgs.length-1]);
long minInputSplitSize = Long.parseLong(otherArgs[otherArgs.length-2]);
//added by wei
conf.setLong(CombineTextInputFormat.SPLIT_MINSIZE_PERNODE, minInputSplitSize);
conf.setLong(CombineTextInputFormat.SPLIT_MINSIZE_PERRACK, minInputSplitSize);
//----
if (otherArgs.length < 2) {
System.err.println("Usage: wordcount <in> [<in>...] <out>");
System.exit(2);
}
Job job = new Job(conf, "mulword count");
job.setJarByClass(WordCount.class);
job.setMapperClass(TokenizerMapper.class);
job.setCombinerClass(IntSumReducer.class);
job.setReducerClass(IntSumReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
//added by wei
job.setInputFormatClass(CombineTextInputFormat.class);
CombineTextInputFormat.setMaxInputSplitSize(job, maxInputSplitSize);
//----
for (int i = 0; i < otherArgs.length - 3; ++i) {
FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
}
FileOutputFormat.setOutputPath(job,
new Path(otherArgs[otherArgs.length - 3]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
示例2: setConf
import org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat; //导入依赖的package包/类
public void setConf() throws Exception {
String inputPath = "../data/exampledata/MFLocalExampleData";
// Set local deploy mode
conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL");
// Set basic configuration keys
conf.setBoolean("mapred.mapper.new-api", true);
conf.set(AngelConf.ANGEL_INPUTFORMAT_CLASS, CombineTextInputFormat.class.getName());
conf.setBoolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, true);
// set angel resource parameters #worker, #task, #PS
conf.setInt(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1);
conf.setInt(AngelConf.ANGEL_WORKER_TASK_NUMBER, 1);
conf.setInt(AngelConf.ANGEL_PS_NUMBER, 1);
String LOCAL_FS = LocalFileSystem.DEFAULT_FS;
String TMP_PATH = System.getProperty("java.io.tmpdir", "/tmp");
// Set trainning data, and save model path
conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, inputPath);
conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, LOCAL_FS + TMP_PATH + "/model");
conf.set(AngelConf.ANGEL_LOG_PATH, LOCAL_FS + TMP_PATH + "/log");
// Set actionType train
conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_TRAIN());
// Set MF algorithm parameters
conf.set(MLConf.ML_MF_RANK(), "200");
conf.set(MLConf.ML_EPOCH_NUM(), "8");
conf.set(MLConf.ML_MF_ROW_BATCH_NUM(), "2");
conf.set(MLConf.ML_MF_ITEM_NUM(), "1683");
conf.set(MLConf.ML_MF_LAMBDA(), "0.01");
conf.set(MLConf.ML_MF_ETA(), "0.0054");
}
示例3: setConf
import org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat; //导入依赖的package包/类
public void setConf() {
String inputPath = "../data/exampledata/GBDTLocalExampleData/agaricus.txt.train";
// Feature number of train data
int featureNum = 127;
// Number of nonzero features
int featureNzz = 25;
// Tree number
int treeNum = 2;
// Tree depth
int treeDepth = 2;
// Split number
int splitNum = 10;
// Feature sample ratio
double sampleRatio = 1.0;
// Data format
String dataFmt = "libsvm";
// Learning rate
double learnRate = 0.01;
// Set basic configuration keys
String LOCAL_FS = LocalFileSystem.DEFAULT_FS;
String TMP_PATH = System.getProperty("java.io.tmpdir", "/tmp");
conf.setBoolean("mapred.mapper.new-api", true);
conf.setBoolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, true);
// Use local deploy mode and data format
conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL");
conf.set(MLConf.ML_DATA_FORMAT(), String.valueOf(dataFmt));
// set input, output path
conf.set(AngelConf.ANGEL_INPUTFORMAT_CLASS, CombineTextInputFormat.class.getName());
conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, inputPath);
conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, LOCAL_FS + TMP_PATH + "/out");
conf.set(AngelConf.ANGEL_LOG_PATH, LOCAL_FS + TMP_PATH + "/log");
// set angel resource parameters #worker, #task, #PS
conf.setInt(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1);
conf.setInt(AngelConf.ANGEL_WORKER_TASK_NUMBER, 1);
conf.setInt(AngelConf.ANGEL_PS_NUMBER, 1);
// Set GBDT algorithm parameters
conf.set(MLConf.ML_FEATURE_NUM(), String.valueOf(featureNum));
conf.set(MLConf.ML_FEATURE_NNZ(), String.valueOf(featureNzz));
conf.set(MLConf.ML_GBDT_TREE_NUM(), String.valueOf(treeNum));
conf.set(MLConf.ML_GBDT_TREE_DEPTH(), String.valueOf(treeDepth));
conf.set(MLConf.ML_GBDT_SPLIT_NUM(), String.valueOf(splitNum));
conf.set(MLConf.ML_GBDT_SAMPLE_RATIO(), String.valueOf(sampleRatio));
conf.set(MLConf.ML_LEARN_RATE(), String.valueOf(learnRate));
}
示例4: setConf
import org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat; //导入依赖的package包/类
public void setConf() {
// Feature number of train data
int featureNum = 124;
// Total iteration number
int epochNum = 20;
// Validation sample Ratio
double vRatio = 0.1;
// Data format, libsvm or dummy
String dataFmt = "libsvm";
// Train batch number per epoch.
double spRatio = 1.0;
// Batch number
int batchNum = 10;
// Learning rate
double learnRate = 1.0;
// Decay of learning rate
double decay = 0.1;
// Regularization coefficient
double reg = 0.2;
// Set local deploy mode
conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL");
// Set basic configuration keys
conf.setBoolean("mapred.mapper.new-api", true);
conf.set(AngelConf.ANGEL_INPUTFORMAT_CLASS, CombineTextInputFormat.class.getName());
conf.setBoolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, true);
// Set data format
conf.set(MLConf.ML_DATA_FORMAT(), dataFmt);
//set angel resource parameters #worker, #task, #PS
conf.setInt(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1);
conf.setInt(AngelConf.ANGEL_WORKER_TASK_NUMBER, 1);
conf.setInt(AngelConf.ANGEL_PS_NUMBER, 1);
//set sgd LR algorithm parameters #feature #epoch
conf.set(MLConf.ML_FEATURE_NUM(), String.valueOf(featureNum));
conf.set(MLConf.ML_EPOCH_NUM(), String.valueOf(epochNum));
conf.set(MLConf.ML_BATCH_SAMPLE_Ratio(), String.valueOf(spRatio));
conf.set(MLConf.ML_VALIDATE_RATIO(), String.valueOf(vRatio));
conf.set(MLConf.ML_LEARN_RATE(), String.valueOf(learnRate));
conf.set(MLConf.ML_LEARN_DECAY(), String.valueOf(decay));
conf.set(MLConf.ML_REG_L2(), String.valueOf(reg));
}
示例5: setConf
import org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat; //导入依赖的package包/类
public void setConf() {
// Feature number of train data
int featureNum = 101;
// Total iteration number
int epochNum = 20;
// Validation sample Ratio
double vRatio = 0.5;
// Data format, libsvm or dummy
String dataFmt = "libsvm";
// Train batch number per epoch.
double spRatio = 1;
// Learning rate
double learnRate = 0.1;
// Decay of learning rate
double decay = 0.01;
// Regularization coefficient
double reg = 0;
// Set local deploy mode
conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL");
// Set basic configuration keys
conf.setBoolean("mapred.mapper.new-api", true);
conf.set(AngelConf.ANGEL_INPUTFORMAT_CLASS, CombineTextInputFormat.class.getName());
conf.setBoolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, true);
// Set data format
conf.set(MLConf.ML_DATA_FORMAT(), dataFmt);
// set angel resource parameters #worker, #task, #PS
conf.setInt(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1);
conf.setInt(AngelConf.ANGEL_WORKER_TASK_NUMBER, 1);
conf.setInt(AngelConf.ANGEL_PS_NUMBER, 1);
// set sgd LR algorithm parameters #feature #epoch
conf.set(MLConf.ML_FEATURE_NUM(), String.valueOf(featureNum));
conf.set(MLConf.ML_EPOCH_NUM(), String.valueOf(epochNum));
conf.set(MLConf.ML_BATCH_SAMPLE_Ratio(), String.valueOf(spRatio));
conf.set(MLConf.ML_VALIDATE_RATIO(), String.valueOf(vRatio));
conf.set(MLConf.ML_LEARN_RATE(), String.valueOf(learnRate));
conf.set(MLConf.ML_LEARN_DECAY(), String.valueOf(decay));
conf.set(MLConf.ML_REG_L2(), String.valueOf(reg));
}
示例6: setConf
import org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat; //导入依赖的package包/类
public static Configuration setConf() {
Configuration conf = new Configuration();
// Feature number of train data
int featureNum = 124;
// Total iteration number
int epochNum = 20;
// Learning rate
double learnRate = 1.0;
// Regularization coefficient
double reg = 0.2;
String inputPath = "./angel-ps/mllib/src/test/data/lr/a9a.train";
String LOCAL_FS = LocalFileSystem.DEFAULT_FS;
String TMP_PATH = System.getProperty("java.io.tmpdir", "/tmp");
String savePath = LOCAL_FS + TMP_PATH + "/model";
// Set local deploy mode
conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL");
// Set basic configuration keys
conf.setBoolean("mapred.mapper.new-api", true);
conf.set(AngelConf.ANGEL_INPUTFORMAT_CLASS, CombineTextInputFormat.class.getName());
conf.setBoolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, true);
//set angel resource parameters #worker, #task, #PS
conf.setInt(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1);
conf.setInt(AngelConf.ANGEL_WORKER_TASK_NUMBER, 1);
conf.setInt(AngelConf.ANGEL_PS_NUMBER, 1);
//set sgd LR algorithm parameters #feature #epoch
conf.set(MLConf.ML_FEATURE_NUM(), String.valueOf(featureNum));
conf.set(MLConf.ML_EPOCH_NUM(), String.valueOf(epochNum));
conf.set(MLConf.ML_LEARN_RATE(), String.valueOf(learnRate));
conf.set(MLConf.ML_REG_L2(), String.valueOf(reg));
// Set input data path
conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, inputPath);
// Set save model path
conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, savePath);
// Set actionType train
conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_TRAIN());
conf.set(MLConf.ML_DATA_FORMAT(), "libsvm");
return conf;
}
示例7: setConf
import org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat; //导入依赖的package包/类
/**
* set parameter values of conf
*/
@Before
public void setConf() throws Exception {
try {
// Feature number of train data
int featureNum = 124;
// Total iteration number
int epochNum = 5;
// Validation sample Ratio
double vRatio = 0.1;
// Data format, libsvm or dummy
String dataFmt = "libsvm";
// Train batch number per epoch.
double spRatio = 1.0;
// Batch number
int batchNum = 10;
// Model type
String modelType = String.valueOf(RowType.T_DOUBLE_DENSE);
// Learning rate
double learnRate = 1.0;
// Decay of learning rate
double decay = 1;
// Regularization coefficient
double reg = 0.002;
// Set local deploy mode
conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL");
// Set basic configuration keys
conf.setBoolean("mapred.mapper.new-api", true);
conf.set(AngelConf.ANGEL_INPUTFORMAT_CLASS, CombineTextInputFormat.class.getName());
conf.setBoolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, true);
conf.set(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, "true");
conf.setBoolean(MLConf.ML_INDEX_GET_ENABLE(), true);
// Set data format
conf.set(MLConf.ML_DATA_FORMAT(), dataFmt);
//set angel resource parameters #worker, #task, #PS
conf.setInt(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1);
conf.setInt(AngelConf.ANGEL_WORKER_TASK_NUMBER, 1);
conf.setInt(AngelConf.ANGEL_PS_NUMBER, 1);
//set sgd LR algorithm parameters #feature #epoch
conf.set(MLConf.LR_MODEL_TYPE(), modelType);
conf.set(MLConf.ML_FEATURE_NUM(), String.valueOf(featureNum));
conf.set(MLConf.ML_EPOCH_NUM(), String.valueOf(epochNum));
conf.set(MLConf.ML_BATCH_SAMPLE_Ratio(), String.valueOf(spRatio));
conf.set(MLConf.ML_VALIDATE_RATIO(), String.valueOf(vRatio));
conf.set(MLConf.ML_LEARN_RATE(), String.valueOf(learnRate));
conf.set(MLConf.ML_LEARN_DECAY(), String.valueOf(decay));
conf.set(MLConf.ML_REG_L2(), String.valueOf(reg));
} catch (Exception x) {
LOG.error("setup failed ", x);
throw x;
}
}
示例8: setup
import org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat; //导入依赖的package包/类
@Before public void setup() throws Exception {
try {
conf.set(MLConf.ML_GBDT_TASK_TYPE(), "classification");
// Feature number of train data
int featureNum = 127;
// Number of nonzero features
int featureNzz = 25;
// Tree number
int treeNum = 5;
// Tree depth
int treeDepth = 3;
// Split number
int splitNum = 10;
// Feature sample ratio
double sampleRatio = 1.0;
// Data format
String dataFmt = "libsvm";
// Learning rate
double learnRate = 0.01;
// Set basic configuration keys
conf.setBoolean("mapred.mapper.new-api", true);
conf.setBoolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, true);
// Use local deploy mode and dummy data spliter
conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL");
// set input, output path
conf.set(AngelConf.ANGEL_INPUTFORMAT_CLASS, CombineTextInputFormat.class.getName());
conf.set(AngelConf.ANGEL_LOG_PATH, LOCAL_FS + TMP_PATH + "/LOG/log");
//set angel resource parameters #worker, #task, #PS
conf.setInt(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1);
conf.setInt(AngelConf.ANGEL_WORKER_TASK_NUMBER, 1);
conf.setInt(AngelConf.ANGEL_PS_NUMBER, 1);
// Set GBDT algorithm parameters
conf.set(MLConf.ML_DATA_FORMAT(), dataFmt);
conf.set(MLConf.ML_FEATURE_NUM(), String.valueOf(featureNum));
conf.set(MLConf.ML_FEATURE_NNZ(), String.valueOf(featureNzz));
conf.set(MLConf.ML_GBDT_TREE_NUM(), String.valueOf(treeNum));
conf.set(MLConf.ML_GBDT_TREE_DEPTH(), String.valueOf(treeDepth));
conf.set(MLConf.ML_GBDT_SPLIT_NUM(), String.valueOf(splitNum));
conf.set(MLConf.ML_GBDT_SAMPLE_RATIO(), String.valueOf(sampleRatio));
conf.set(MLConf.ML_LEARN_RATE(), String.valueOf(learnRate));
conf.set(MLConf.ML_GBDT_CATE_FEAT(),
"0:2,1:2,2:2,3:2,4:2,5:2,6:2,7:2,8:2,9:2,10:2,11:2,12:2,13:2,14:2,15:2,16:2,17:2,18:2,19:2,20:2,21:2,22:2,23:2,24:2,25:2,26:2,27:2,28:2,29:2,30:2,31:2,32:2,33:2,34:2,35:2,36:2,37:2,38:2,39:2,40:2,41:2,42:2,43:2,44:2,45:2,46:2,47:2,48:2,49:2,50:2,51:2,52:2,53:2,54:2,55:2,56:2,57:2,58:2,59:2,60:2,61:2,62:2,63:2,64:2,65:2,66:2,67:2,68:2,69:2,70:2,71:2,72:2,73:2,74:2,75:2,76:2,77:2,78:2,79:2,80:2,81:2,82:2,83:2,84:2,85:2,86:2,87:2,88:2,89:2,90:2,91:2,92:2,93:2,94:2,95:2,96:2,97:2,98:2,99:2,100:2,101:2,102:2,103:2,104:2,105:2,106:2,107:2,108:2,109:2,110:2,111:2,112:2,113:2,114:2,115:2,116:2,117:2,118:2,119:2,120:2,121:2,122:2,123:2,124:2,125:2,126:2");
} catch (Exception x) {
LOG.error("setup failed ", x);
throw x;
}
}
示例9: setup
import org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat; //导入依赖的package包/类
@Before
public void setup() throws Exception {
try{
// set basic configuration keys
Configuration conf = new Configuration();
conf.setBoolean("mapred.mapper.new-api", true);
conf.setBoolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, true);
// use local deploy mode and dummy dataspliter
conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL");
conf.setBoolean(AngelConf.ANGEL_AM_USE_DUMMY_DATASPLITER, true);
conf.set(AngelConf.ANGEL_INPUTFORMAT_CLASS, CombineTextInputFormat.class.getName());
conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, LOCAL_FS + TMP_PATH + "/out");
conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, LOCAL_FS + TMP_PATH + "/in");
conf.set(AngelConf.ANGEL_LOG_PATH, LOCAL_FS + TMP_PATH + "/log");
conf.setInt(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1);
conf.setInt(AngelConf.ANGEL_PS_NUMBER, 1);
conf.setInt(AngelConf.ANGEL_WORKER_TASK_NUMBER, 2);
// get a angel client
angelClient = AngelClientFactory.get(conf);
// add matrix
MatrixContext mMatrix = new MatrixContext();
mMatrix.setName("w1");
mMatrix.setRowNum(1);
mMatrix.setColNum(100000);
mMatrix.setMaxRowNumInBlock(1);
mMatrix.setMaxColNumInBlock(50000);
mMatrix.setRowType(RowType.T_INT_DENSE);
mMatrix.set(MatrixConf.MATRIX_OPLOG_ENABLEFILTER, "false");
mMatrix.set(MatrixConf.MATRIX_HOGWILD, "true");
mMatrix.set(MatrixConf.MATRIX_AVERAGE, "false");
mMatrix.set(MatrixConf.MATRIX_OPLOG_TYPE, "DENSE_INT");
angelClient.addMatrix(mMatrix);
angelClient.startPSServer();
angelClient.runTask(MetricTestTask.class);
Thread.sleep(5000);
task0Id = new TaskId(0);
task1Id = new TaskId(1);
} catch (Exception x) {
LOG.error("setup failed ", x);
throw x;
}
}
示例10: testMF
import org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat; //导入依赖的package包/类
@Test
public void testMF() throws Exception {
try {
String inputPath = "./src/test/data/recommendation/MovieLensDataSet";
// Set local deploy mode
conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL");
// Set basic configuration keys
conf.setBoolean("mapred.mapper.new-api", true);
conf.set(AngelConf.ANGEL_INPUTFORMAT_CLASS, CombineTextInputFormat.class.getName());
conf.setBoolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, true);
//set angel resource parameters #worker, #task, #PS
conf.setInt(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1);
conf.setInt(AngelConf.ANGEL_WORKER_TASK_NUMBER, 1);
conf.setInt(AngelConf.ANGEL_PS_NUMBER, 1);
String LOCAL_FS = LocalFileSystem.DEFAULT_FS;
String TMP_PATH = System.getProperty("java.io.tmpdir", "/tmp");
// Set trainning data, save model, log path
conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, inputPath);
conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, LOCAL_FS + TMP_PATH + "/model");
conf.set(AngelConf.ANGEL_LOG_PATH, LOCAL_FS + TMP_PATH + "/Log/log");
conf.set(MLConf.ML_MF_USER_OUTPUT_PATH(), LOCAL_FS + TMP_PATH + "/usermodel");
// Set actionType train
conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_TRAIN());
// Set MF algorithm parameters
conf.set(MLConf.ML_MF_RANK(), "200");
conf.set(MLConf.ML_EPOCH_NUM(), "5");
conf.set(MLConf.ML_MF_ROW_BATCH_NUM(), "2");
conf.set(MLConf.ML_MF_ITEM_NUM(), "1683");
conf.set(MLConf.ML_MF_LAMBDA(), "0.01");
conf.set(MLConf.ML_MF_ETA(), "0.0005");
MatrixFactorizationRunner runner = new MatrixFactorizationRunner();
runner.train(conf);
} catch (Exception x) {
LOG.error("run testMF failed ", x);
throw x;
}
}
示例11: setConf
import org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat; //导入依赖的package包/类
/**
* set parameter values of conf
*/
@Before
public void setConf() {
// Feature number of train data
int featureNum = 236;
// Total iteration number
int epochNum = 5;
// Rank
int rank = 5;
// Regularization parameters
double reg0 = 0.0;
double reg1 = 0.0;
double reg2 = 0.001;
// Learn rage
double lr = 0.001;
double stev = 0.1;
// Set local deploy mode
conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL");
// Set basic configuration keys
conf.setBoolean("mapred.mapper.new-api", true);
conf.set(AngelConf.ANGEL_INPUTFORMAT_CLASS, CombineTextInputFormat.class.getName());
conf.setBoolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, true);
//set angel resource parameters #worker, #task, #PS
conf.setInt(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1);
conf.setInt(AngelConf.ANGEL_WORKER_TASK_NUMBER, 1);
conf.setInt(AngelConf.ANGEL_PS_NUMBER, 1);
//set FM algorithm parameters #feature #epoch
conf.set(MLConf.ML_FEATURE_NUM(), String.valueOf(featureNum));
conf.set(MLConf.ML_EPOCH_NUM(), String.valueOf(epochNum));
conf.set(MLConf.ML_FM_RANK(), String.valueOf(rank));
conf.set(MLConf.ML_LEARN_RATE(), String.valueOf(lr));
conf.set(MLConf.ML_FM_REG0(), String.valueOf(reg0));
conf.set(MLConf.ML_FM_REG1(), String.valueOf(reg1));
conf.set(MLConf.ML_FM_REG2(), String.valueOf(reg2));
conf.set(MLConf.ML_FM_V_STDDEV(), String.valueOf(stev));
}
示例12: setup
import org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat; //导入依赖的package包/类
@Before
public void setup() throws Exception {
try {
String dataFmt = "libsvm";
// Cluster center number
int centerNum = 3;
// Feature number of train data
int featureNum = 4;
// Total iteration number
int epochNum = 5;
// Sample ratio per mini-batch
double spratio = 1.0;
// C
double c = 0.15;
// Set local deploy mode
conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL");
// Set basic configuration keys
conf.setBoolean("mapred.mapper.new-api", true);
conf.set(AngelConf.ANGEL_INPUTFORMAT_CLASS, CombineTextInputFormat.class.getName());
conf.setBoolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, true);
//set angel resource parameters #worker, #task, #PS
conf.setInt(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1);
conf.setInt(AngelConf.ANGEL_WORKER_TASK_NUMBER, 1);
conf.setInt(AngelConf.ANGEL_PS_NUMBER, 1);
//set Kmeans algorithm parameters #cluster #feature #epoch
conf.set(MLConf.KMEANS_CENTER_NUM(), String.valueOf(centerNum));
conf.set(MLConf.ML_FEATURE_NUM(), String.valueOf(featureNum));
conf.set(MLConf.ML_EPOCH_NUM(), String.valueOf(epochNum));
conf.set(MLConf.KMEANS_SAMPLE_RATIO_PERBATCH(), String.valueOf(spratio));
conf.set(MLConf.kMEANS_C(), String.valueOf(c));
// Set data format
conf.set(MLConf.ML_DATA_FORMAT(), dataFmt);
} catch (Exception x) {
LOG.error("setup failed ", x);
throw x;
}
}
示例13: setConf
import org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat; //导入依赖的package包/类
/**
* set parameter values of conf
*/
@Before public void setConf() throws Exception {
try {
// Feature number of train data
int featureNum = 124;
// Total iteration number
int epochNum = 5;
// Validation sample Ratio
double vRatio = 0.1;
// Data format, libsvm or dummy
String dataFmt = "libsvm";
// Train batch number per epoch.
double spRatio = 1.0;
// Batch number
int batchNum = 10;
// Learning rate
double learnRate = 1.0;
// Decay of learning rate
double decay = 0.1;
// Regularization coefficient
double reg = 0.02;
int rank = 5;
double vInit = 0.1;
// Set local deploy mode
conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL");
// Set basic configuration keys
conf.setBoolean("mapred.mapper.new-api", true);
conf.set(AngelConf.ANGEL_INPUTFORMAT_CLASS, CombineTextInputFormat.class.getName());
conf.setBoolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, true);
// Set data format
conf.set(MLConf.ML_DATA_FORMAT(), dataFmt);
//set angel resource parameters #worker, #task, #PS
conf.setInt(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1);
conf.setInt(AngelConf.ANGEL_WORKER_TASK_NUMBER, 1);
conf.setInt(AngelConf.ANGEL_PS_NUMBER, 1);
//set MLR algorithm parameters #feature #epoch
conf.set(MLConf.ML_FEATURE_NUM(), String.valueOf(featureNum));
conf.set(MLConf.ML_EPOCH_NUM(), String.valueOf(epochNum));
conf.set(MLConf.ML_BATCH_SAMPLE_Ratio(), String.valueOf(spRatio));
conf.set(MLConf.ML_VALIDATE_RATIO(), String.valueOf(vRatio));
conf.set(MLConf.ML_LEARN_RATE(), String.valueOf(learnRate));
conf.set(MLConf.ML_LEARN_DECAY(), String.valueOf(decay));
conf.set(MLConf.ML_REG_L2(), String.valueOf(reg));
conf.set(MLConf.ML_MLR_RANK(), String.valueOf(rank));
conf.set(MLConf.ML_MLR_V_INIT(), String.valueOf(vInit));
conf.set(MLConf.ML_SGD_BATCH_NUM(), String.valueOf(batchNum));
} catch (Exception x) {
LOG.error("setup failed ", x);
throw x;
}
}
示例14: setConf
import org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat; //导入依赖的package包/类
/**
* set parameter values of conf
*/
@Before public void setConf() throws Exception {
try {
// Feature number of train data
int featureNum = 101;
// Total iteration number
int epochNum = 5;
// Validation sample Ratio
double vRatio = 0.5;
// Data format, libsvm or dummy
String dataFmt = "libsvm";
// Train batch number per epoch.
double spRatio = 1;
// Learning rate
double learnRate = 0.01;
// Decay of learning rate
double decay = 0.1;
// Regularization coefficient
double reg = 0;
// Set local deploy mode
conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL");
// Set basic configuration keys
conf.setBoolean("mapred.mapper.new-api", true);
conf.set(AngelConf.ANGEL_INPUTFORMAT_CLASS, CombineTextInputFormat.class.getName());
conf.setBoolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, true);
// Set data format
conf.set(MLConf.ML_DATA_FORMAT(), dataFmt);
//set angel resource parameters #worker, #task, #PS
conf.setInt(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1);
conf.setInt(AngelConf.ANGEL_WORKER_TASK_NUMBER, 1);
conf.setInt(AngelConf.ANGEL_PS_NUMBER, 1);
//set sgd LR algorithm parameters #feature #epoch
conf.set(MLConf.ML_FEATURE_NUM(), String.valueOf(featureNum));
conf.set(MLConf.ML_EPOCH_NUM(), String.valueOf(epochNum));
conf.set(MLConf.ML_BATCH_SAMPLE_Ratio(), String.valueOf(spRatio));
conf.set(MLConf.ML_VALIDATE_RATIO(), String.valueOf(vRatio));
conf.set(MLConf.ML_LEARN_RATE(), String.valueOf(learnRate));
conf.set(MLConf.ML_LEARN_DECAY(), String.valueOf(decay));
conf.set(MLConf.ML_REG_L2(), String.valueOf(reg));
} catch (Exception x) {
LOG.error("setup failed ", x);
throw x;
}
}
示例15: setup
import org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat; //导入依赖的package包/类
public static void setup() throws Exception {
// set basic configuration keys
Configuration conf = new Configuration();
conf.setBoolean("mapred.mapper.new-api", true);
conf.setBoolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, true);
conf.set(AngelConf.ANGEL_TASK_USER_TASKCLASS, DummyTask.class.getName());
// use local deploy mode and dummy data spliter
conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL");
conf.setBoolean(AngelConf.ANGEL_AM_USE_DUMMY_DATASPLITER, true);
conf.set(AngelConf.ANGEL_INPUTFORMAT_CLASS, CombineTextInputFormat.class.getName());
conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, LOCAL_FS + TMP_PATH + "/out");
conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, LOCAL_FS + TMP_PATH + "/in");
conf.set(AngelConf.ANGEL_LOG_PATH, LOCAL_FS + TMP_PATH + "/log");
conf.setInt(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1);
conf.setInt(AngelConf.ANGEL_PS_NUMBER, 1);
conf.setInt(AngelConf.ANGEL_WORKER_TASK_NUMBER, 2);
// get a angel client
angelClient = AngelClientFactory.get(conf);
// add matrix
MatrixContext mMatrix = new MatrixContext();
mMatrix.setName("w1");
mMatrix.setRowNum(10);
mMatrix.setColNum(1000);
mMatrix.setMaxRowNumInBlock(10);
mMatrix.setMaxColNumInBlock(500);
mMatrix.setRowType(RowType.T_INT_DENSE);
mMatrix.set(MatrixConf.MATRIX_OPLOG_ENABLEFILTER, "false");
mMatrix.set(MatrixConf.MATRIX_HOGWILD, "true");
mMatrix.set(MatrixConf.MATRIX_AVERAGE, "false");
mMatrix.set(MatrixConf.MATRIX_OPLOG_TYPE, "DENSE_INT");
angelClient.addMatrix(mMatrix);
MatrixContext mMatrix2 = new MatrixContext();
mMatrix2.setName("w2");
mMatrix2.setRowNum(10);
mMatrix2.setColNum(100);
mMatrix2.setMaxRowNumInBlock(5);
mMatrix2.setMaxColNumInBlock(50);
mMatrix2.setRowType(RowType.T_DOUBLE_DENSE);
mMatrix2.set(MatrixConf.MATRIX_OPLOG_ENABLEFILTER, "false");
mMatrix2.set(MatrixConf.MATRIX_HOGWILD, "false");
mMatrix2.set(MatrixConf.MATRIX_AVERAGE, "false");
mMatrix2.set(MatrixConf.MATRIX_OPLOG_TYPE, "DENSE_DOUBLE");
angelClient.addMatrix(mMatrix2);
angelClient.startPSServer();
angelClient.run();
Thread.sleep(2 * 1000);
group0Id = new WorkerGroupId(0);
worker0Id = new WorkerId(group0Id, 0);
worker0Attempt0Id = new WorkerAttemptId(worker0Id, 0);
task0Id = new TaskId(0);
task1Id = new TaskId(1);
psId = new ParameterServerId(0);
psAttempt0Id = new PSAttemptId(psId, 0);
}