当前位置: 首页>>代码示例>>Java>>正文


Java CombineTextInputFormat类代码示例

本文整理汇总了Java中org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat的典型用法代码示例。如果您正苦于以下问题:Java CombineTextInputFormat类的具体用法?Java CombineTextInputFormat怎么用?Java CombineTextInputFormat使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


CombineTextInputFormat类属于org.apache.hadoop.mapreduce.lib.input包,在下文中一共展示了CombineTextInputFormat类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: main

import org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
	   	
  Configuration conf = new Configuration();
  
  String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
  
  long maxInputSplitSize = Long.parseLong(otherArgs[otherArgs.length-1]);
  long minInputSplitSize = Long.parseLong(otherArgs[otherArgs.length-2]);
  
  //added by wei
  conf.setLong(CombineTextInputFormat.SPLIT_MINSIZE_PERNODE, minInputSplitSize);
  conf.setLong(CombineTextInputFormat.SPLIT_MINSIZE_PERRACK, minInputSplitSize);
  //----
  
 
  if (otherArgs.length < 2) {
    System.err.println("Usage: wordcount <in> [<in>...] <out>");
    System.exit(2);
  }
  Job job = new Job(conf, "mulword count");
  job.setJarByClass(WordCount.class);
  job.setMapperClass(TokenizerMapper.class);
  job.setCombinerClass(IntSumReducer.class);
  job.setReducerClass(IntSumReducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(IntWritable.class);
  
  //added by wei
  job.setInputFormatClass(CombineTextInputFormat.class);
  CombineTextInputFormat.setMaxInputSplitSize(job, maxInputSplitSize);
  //----
  
  for (int i = 0; i < otherArgs.length - 3; ++i) {
    FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
  }
  FileOutputFormat.setOutputPath(job,
    new Path(otherArgs[otherArgs.length - 3]));
  System.exit(job.waitForCompletion(true) ? 0 : 1);
}
 
开发者ID:yncxcw,项目名称:big-c,代码行数:40,代码来源:MutiWordcount.java

示例2: setConf

import org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat; //导入依赖的package包/类
public void setConf() throws Exception {
  String inputPath = "../data/exampledata/MFLocalExampleData";
  // Set local deploy mode
  conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL");
  // Set basic configuration keys
  conf.setBoolean("mapred.mapper.new-api", true);
  conf.set(AngelConf.ANGEL_INPUTFORMAT_CLASS, CombineTextInputFormat.class.getName());
  conf.setBoolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, true);

  // set angel resource parameters #worker, #task, #PS
  conf.setInt(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1);
  conf.setInt(AngelConf.ANGEL_WORKER_TASK_NUMBER, 1);
  conf.setInt(AngelConf.ANGEL_PS_NUMBER, 1);

  String LOCAL_FS = LocalFileSystem.DEFAULT_FS;
  String TMP_PATH = System.getProperty("java.io.tmpdir", "/tmp");

  // Set trainning data, and save model path
  conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, inputPath);
  conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, LOCAL_FS + TMP_PATH + "/model");
  conf.set(AngelConf.ANGEL_LOG_PATH, LOCAL_FS + TMP_PATH + "/log");
  // Set actionType train
  conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_TRAIN());

  // Set MF algorithm parameters
  conf.set(MLConf.ML_MF_RANK(), "200");
  conf.set(MLConf.ML_EPOCH_NUM(), "8");
  conf.set(MLConf.ML_MF_ROW_BATCH_NUM(), "2");
  conf.set(MLConf.ML_MF_ITEM_NUM(), "1683");
  conf.set(MLConf.ML_MF_LAMBDA(), "0.01");
  conf.set(MLConf.ML_MF_ETA(), "0.0054");
}
 
开发者ID:Tencent,项目名称:angel,代码行数:33,代码来源:MFLocalExample.java

示例3: setConf

import org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat; //导入依赖的package包/类
public void setConf() {
  String inputPath = "../data/exampledata/GBDTLocalExampleData/agaricus.txt.train";

  // Feature number of train data
  int featureNum = 127;
  // Number of nonzero features
  int featureNzz = 25;
  // Tree number
  int treeNum = 2;
  // Tree depth
  int treeDepth = 2;
  // Split number
  int splitNum = 10;
  // Feature sample ratio
  double sampleRatio = 1.0;

  // Data format
  String dataFmt = "libsvm";

  // Learning rate
  double learnRate = 0.01;

  // Set basic configuration keys
  String LOCAL_FS = LocalFileSystem.DEFAULT_FS;
  String TMP_PATH = System.getProperty("java.io.tmpdir", "/tmp");

  conf.setBoolean("mapred.mapper.new-api", true);
  conf.setBoolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, true);

  // Use local deploy mode and data format
  conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL");
  conf.set(MLConf.ML_DATA_FORMAT(), String.valueOf(dataFmt));

  // set input, output path
  conf.set(AngelConf.ANGEL_INPUTFORMAT_CLASS, CombineTextInputFormat.class.getName());
  conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, inputPath);
  conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, LOCAL_FS + TMP_PATH + "/out");
  conf.set(AngelConf.ANGEL_LOG_PATH, LOCAL_FS + TMP_PATH + "/log");

  // set angel resource parameters #worker, #task, #PS
  conf.setInt(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1);
  conf.setInt(AngelConf.ANGEL_WORKER_TASK_NUMBER, 1);
  conf.setInt(AngelConf.ANGEL_PS_NUMBER, 1);

  // Set GBDT algorithm parameters
  conf.set(MLConf.ML_FEATURE_NUM(), String.valueOf(featureNum));
  conf.set(MLConf.ML_FEATURE_NNZ(), String.valueOf(featureNzz));
  conf.set(MLConf.ML_GBDT_TREE_NUM(), String.valueOf(treeNum));
  conf.set(MLConf.ML_GBDT_TREE_DEPTH(), String.valueOf(treeDepth));
  conf.set(MLConf.ML_GBDT_SPLIT_NUM(), String.valueOf(splitNum));
  conf.set(MLConf.ML_GBDT_SAMPLE_RATIO(), String.valueOf(sampleRatio));
  conf.set(MLConf.ML_LEARN_RATE(), String.valueOf(learnRate));
}
 
开发者ID:Tencent,项目名称:angel,代码行数:54,代码来源:GBDTLocalExample.java

示例4: setConf

import org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat; //导入依赖的package包/类
public void setConf() {

    // Feature number of train data
    int featureNum = 124;
    // Total iteration number
    int epochNum = 20;
    // Validation sample Ratio
    double vRatio = 0.1;
    // Data format, libsvm or dummy
    String dataFmt = "libsvm";
    // Train batch number per epoch.
    double spRatio = 1.0;
    // Batch number
    int batchNum = 10;

    // Learning rate
    double learnRate = 1.0;
    // Decay of learning rate
    double decay = 0.1;
    // Regularization coefficient
    double reg = 0.2;

    // Set local deploy mode
    conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL");

    // Set basic configuration keys
    conf.setBoolean("mapred.mapper.new-api", true);
    conf.set(AngelConf.ANGEL_INPUTFORMAT_CLASS, CombineTextInputFormat.class.getName());
    conf.setBoolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, true);

    // Set data format
    conf.set(MLConf.ML_DATA_FORMAT(), dataFmt);

    //set angel resource parameters #worker, #task, #PS
    conf.setInt(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1);
    conf.setInt(AngelConf.ANGEL_WORKER_TASK_NUMBER, 1);
    conf.setInt(AngelConf.ANGEL_PS_NUMBER, 1);

    //set sgd LR algorithm parameters #feature #epoch
    conf.set(MLConf.ML_FEATURE_NUM(), String.valueOf(featureNum));
    conf.set(MLConf.ML_EPOCH_NUM(), String.valueOf(epochNum));
    conf.set(MLConf.ML_BATCH_SAMPLE_Ratio(), String.valueOf(spRatio));
    conf.set(MLConf.ML_VALIDATE_RATIO(), String.valueOf(vRatio));
    conf.set(MLConf.ML_LEARN_RATE(), String.valueOf(learnRate));
    conf.set(MLConf.ML_LEARN_DECAY(), String.valueOf(decay));
    conf.set(MLConf.ML_REG_L2(), String.valueOf(reg));
  }
 
开发者ID:Tencent,项目名称:angel,代码行数:48,代码来源:SGDLRLocalExample.java

示例5: setConf

import org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat; //导入依赖的package包/类
public void setConf() {

    // Feature number of train data
    int featureNum = 101;
    // Total iteration number
    int epochNum = 20;
    // Validation sample Ratio
    double vRatio = 0.5;
    // Data format, libsvm or dummy
    String dataFmt = "libsvm";
    // Train batch number per epoch.
    double spRatio = 1;

    // Learning rate
    double learnRate = 0.1;
    // Decay of learning rate
    double decay = 0.01;
    // Regularization coefficient
    double reg = 0;

    // Set local deploy mode
    conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL");

    // Set basic configuration keys
    conf.setBoolean("mapred.mapper.new-api", true);
    conf.set(AngelConf.ANGEL_INPUTFORMAT_CLASS, CombineTextInputFormat.class.getName());
    conf.setBoolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, true);

    // Set data format
    conf.set(MLConf.ML_DATA_FORMAT(), dataFmt);

    // set angel resource parameters #worker, #task, #PS
    conf.setInt(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1);
    conf.setInt(AngelConf.ANGEL_WORKER_TASK_NUMBER, 1);
    conf.setInt(AngelConf.ANGEL_PS_NUMBER, 1);

    // set sgd LR algorithm parameters #feature #epoch
    conf.set(MLConf.ML_FEATURE_NUM(), String.valueOf(featureNum));
    conf.set(MLConf.ML_EPOCH_NUM(), String.valueOf(epochNum));
    conf.set(MLConf.ML_BATCH_SAMPLE_Ratio(), String.valueOf(spRatio));
    conf.set(MLConf.ML_VALIDATE_RATIO(), String.valueOf(vRatio));
    conf.set(MLConf.ML_LEARN_RATE(), String.valueOf(learnRate));
    conf.set(MLConf.ML_LEARN_DECAY(), String.valueOf(decay));
    conf.set(MLConf.ML_REG_L2(), String.valueOf(reg));
  }
 
开发者ID:Tencent,项目名称:angel,代码行数:46,代码来源:LinearRegLocalExample.java

示例6: setConf

import org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat; //导入依赖的package包/类
public static Configuration setConf() {
  Configuration conf = new Configuration();

  // Feature number of train data
  int featureNum = 124;
  // Total iteration number
  int epochNum = 20;
  // Learning rate
  double learnRate = 1.0;
  // Regularization coefficient
  double reg = 0.2;

  String inputPath = "./angel-ps/mllib/src/test/data/lr/a9a.train";
  String LOCAL_FS = LocalFileSystem.DEFAULT_FS;
  String TMP_PATH = System.getProperty("java.io.tmpdir", "/tmp");
  String savePath = LOCAL_FS + TMP_PATH + "/model";

  // Set local deploy mode
  conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL");

  // Set basic configuration keys
  conf.setBoolean("mapred.mapper.new-api", true);
  conf.set(AngelConf.ANGEL_INPUTFORMAT_CLASS, CombineTextInputFormat.class.getName());
  conf.setBoolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, true);

  //set angel resource parameters #worker, #task, #PS
  conf.setInt(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1);
  conf.setInt(AngelConf.ANGEL_WORKER_TASK_NUMBER, 1);
  conf.setInt(AngelConf.ANGEL_PS_NUMBER, 1);

  //set sgd LR algorithm parameters #feature #epoch
  conf.set(MLConf.ML_FEATURE_NUM(), String.valueOf(featureNum));
  conf.set(MLConf.ML_EPOCH_NUM(), String.valueOf(epochNum));
  conf.set(MLConf.ML_LEARN_RATE(), String.valueOf(learnRate));
  conf.set(MLConf.ML_REG_L2(), String.valueOf(reg));

  // Set input data path
  conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, inputPath);
  // Set save model path
  conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, savePath);
  // Set actionType train
  conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_TRAIN());
  conf.set(MLConf.ML_DATA_FORMAT(), "libsvm");
  return conf;
}
 
开发者ID:Tencent,项目名称:angel,代码行数:46,代码来源:QSLRLocalExample.java

示例7: setConf

import org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat; //导入依赖的package包/类
/**
 * set parameter values of conf
 */
@Before
public void setConf() throws Exception {
  try {
    // Feature number of train data
    int featureNum = 124;
    // Total iteration number
    int epochNum = 5;
    // Validation sample Ratio
    double vRatio = 0.1;
    // Data format, libsvm or dummy
    String dataFmt = "libsvm";
    // Train batch number per epoch.
    double spRatio = 1.0;
    // Batch number
    int batchNum = 10;
    // Model type
    String modelType = String.valueOf(RowType.T_DOUBLE_DENSE);

    // Learning rate
    double learnRate = 1.0;
    // Decay of learning rate
    double decay = 1;
    // Regularization coefficient
    double reg = 0.002;

    // Set local deploy mode
    conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL");

    // Set basic configuration keys
    conf.setBoolean("mapred.mapper.new-api", true);
    conf.set(AngelConf.ANGEL_INPUTFORMAT_CLASS, CombineTextInputFormat.class.getName());
    conf.setBoolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, true);
    conf.set(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, "true");
    conf.setBoolean(MLConf.ML_INDEX_GET_ENABLE(), true);
    // Set data format
    conf.set(MLConf.ML_DATA_FORMAT(), dataFmt);

    //set angel resource parameters #worker, #task, #PS
    conf.setInt(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1);
    conf.setInt(AngelConf.ANGEL_WORKER_TASK_NUMBER, 1);
    conf.setInt(AngelConf.ANGEL_PS_NUMBER, 1);

    //set sgd LR algorithm parameters #feature #epoch
    conf.set(MLConf.LR_MODEL_TYPE(), modelType);
    conf.set(MLConf.ML_FEATURE_NUM(), String.valueOf(featureNum));
    conf.set(MLConf.ML_EPOCH_NUM(), String.valueOf(epochNum));
    conf.set(MLConf.ML_BATCH_SAMPLE_Ratio(), String.valueOf(spRatio));
    conf.set(MLConf.ML_VALIDATE_RATIO(), String.valueOf(vRatio));
    conf.set(MLConf.ML_LEARN_RATE(), String.valueOf(learnRate));
    conf.set(MLConf.ML_LEARN_DECAY(), String.valueOf(decay));
    conf.set(MLConf.ML_REG_L2(), String.valueOf(reg));
  } catch (Exception x) {
    LOG.error("setup failed ", x);
    throw x;
  }
}
 
开发者ID:Tencent,项目名称:angel,代码行数:60,代码来源:SgdLRTest.java

示例8: setup

import org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat; //导入依赖的package包/类
@Before public void setup() throws Exception {
  try {

    conf.set(MLConf.ML_GBDT_TASK_TYPE(), "classification");

    // Feature number of train data
    int featureNum = 127;
    // Number of nonzero features
    int featureNzz = 25;
    // Tree number
    int treeNum = 5;
    // Tree depth
    int treeDepth = 3;
    // Split number
    int splitNum = 10;
    // Feature sample ratio
    double sampleRatio = 1.0;

    // Data format
    String dataFmt = "libsvm";

    // Learning rate
    double learnRate = 0.01;

    // Set basic configuration keys
    conf.setBoolean("mapred.mapper.new-api", true);
    conf.setBoolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, true);

    // Use local deploy mode and dummy data spliter
    conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL");

    // set input, output path
    conf.set(AngelConf.ANGEL_INPUTFORMAT_CLASS, CombineTextInputFormat.class.getName());
    conf.set(AngelConf.ANGEL_LOG_PATH, LOCAL_FS + TMP_PATH + "/LOG/log");

    //set angel resource parameters #worker, #task, #PS
    conf.setInt(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1);
    conf.setInt(AngelConf.ANGEL_WORKER_TASK_NUMBER, 1);
    conf.setInt(AngelConf.ANGEL_PS_NUMBER, 1);

    // Set GBDT algorithm parameters
    conf.set(MLConf.ML_DATA_FORMAT(), dataFmt);
    conf.set(MLConf.ML_FEATURE_NUM(), String.valueOf(featureNum));
    conf.set(MLConf.ML_FEATURE_NNZ(), String.valueOf(featureNzz));
    conf.set(MLConf.ML_GBDT_TREE_NUM(), String.valueOf(treeNum));
    conf.set(MLConf.ML_GBDT_TREE_DEPTH(), String.valueOf(treeDepth));
    conf.set(MLConf.ML_GBDT_SPLIT_NUM(), String.valueOf(splitNum));
    conf.set(MLConf.ML_GBDT_SAMPLE_RATIO(), String.valueOf(sampleRatio));
    conf.set(MLConf.ML_LEARN_RATE(), String.valueOf(learnRate));
    conf.set(MLConf.ML_GBDT_CATE_FEAT(),
        "0:2,1:2,2:2,3:2,4:2,5:2,6:2,7:2,8:2,9:2,10:2,11:2,12:2,13:2,14:2,15:2,16:2,17:2,18:2,19:2,20:2,21:2,22:2,23:2,24:2,25:2,26:2,27:2,28:2,29:2,30:2,31:2,32:2,33:2,34:2,35:2,36:2,37:2,38:2,39:2,40:2,41:2,42:2,43:2,44:2,45:2,46:2,47:2,48:2,49:2,50:2,51:2,52:2,53:2,54:2,55:2,56:2,57:2,58:2,59:2,60:2,61:2,62:2,63:2,64:2,65:2,66:2,67:2,68:2,69:2,70:2,71:2,72:2,73:2,74:2,75:2,76:2,77:2,78:2,79:2,80:2,81:2,82:2,83:2,84:2,85:2,86:2,87:2,88:2,89:2,90:2,91:2,92:2,93:2,94:2,95:2,96:2,97:2,98:2,99:2,100:2,101:2,102:2,103:2,104:2,105:2,106:2,107:2,108:2,109:2,110:2,111:2,112:2,113:2,114:2,115:2,116:2,117:2,118:2,119:2,120:2,121:2,122:2,123:2,124:2,125:2,126:2");
  } catch (Exception x) {
    LOG.error("setup failed ", x);
    throw x;
  }
}
 
开发者ID:Tencent,项目名称:angel,代码行数:57,代码来源:GBDTTest.java

示例9: setup

import org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat; //导入依赖的package包/类
@Before
public void setup() throws Exception {
  try{
    // set basic configuration keys
    Configuration conf = new Configuration();
    conf.setBoolean("mapred.mapper.new-api", true);
    conf.setBoolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, true);

    // use local deploy mode and dummy dataspliter
    conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL");
    conf.setBoolean(AngelConf.ANGEL_AM_USE_DUMMY_DATASPLITER, true);
    conf.set(AngelConf.ANGEL_INPUTFORMAT_CLASS, CombineTextInputFormat.class.getName());
    conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, LOCAL_FS + TMP_PATH + "/out");
    conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, LOCAL_FS + TMP_PATH + "/in");
    conf.set(AngelConf.ANGEL_LOG_PATH, LOCAL_FS + TMP_PATH + "/log");

    conf.setInt(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1);
    conf.setInt(AngelConf.ANGEL_PS_NUMBER, 1);
    conf.setInt(AngelConf.ANGEL_WORKER_TASK_NUMBER, 2);

    // get a angel client
    angelClient = AngelClientFactory.get(conf);

    // add matrix
    MatrixContext mMatrix = new MatrixContext();
    mMatrix.setName("w1");
    mMatrix.setRowNum(1);
    mMatrix.setColNum(100000);
    mMatrix.setMaxRowNumInBlock(1);
    mMatrix.setMaxColNumInBlock(50000);
    mMatrix.setRowType(RowType.T_INT_DENSE);
    mMatrix.set(MatrixConf.MATRIX_OPLOG_ENABLEFILTER, "false");
    mMatrix.set(MatrixConf.MATRIX_HOGWILD, "true");
    mMatrix.set(MatrixConf.MATRIX_AVERAGE, "false");
    mMatrix.set(MatrixConf.MATRIX_OPLOG_TYPE, "DENSE_INT");
    angelClient.addMatrix(mMatrix);

    angelClient.startPSServer();
    angelClient.runTask(MetricTestTask.class);
    Thread.sleep(5000);
    task0Id = new TaskId(0);
    task1Id = new TaskId(1);
  } catch (Exception x) {
    LOG.error("setup failed ", x);
    throw x;
  }
}
 
开发者ID:Tencent,项目名称:angel,代码行数:48,代码来源:AlgoMetricsTest.java

示例10: testMF

import org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat; //导入依赖的package包/类
@Test
public void testMF() throws Exception {
  try {
    String inputPath = "./src/test/data/recommendation/MovieLensDataSet";

    // Set local deploy mode
    conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL");
    // Set basic configuration keys
    conf.setBoolean("mapred.mapper.new-api", true);
    conf.set(AngelConf.ANGEL_INPUTFORMAT_CLASS, CombineTextInputFormat.class.getName());
    conf.setBoolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, true);

    //set angel resource parameters #worker, #task, #PS
    conf.setInt(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1);
    conf.setInt(AngelConf.ANGEL_WORKER_TASK_NUMBER, 1);
    conf.setInt(AngelConf.ANGEL_PS_NUMBER, 1);

    String LOCAL_FS = LocalFileSystem.DEFAULT_FS;
    String TMP_PATH = System.getProperty("java.io.tmpdir", "/tmp");

    // Set trainning data, save model, log path
    conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, inputPath);
    conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, LOCAL_FS + TMP_PATH + "/model");
    conf.set(AngelConf.ANGEL_LOG_PATH, LOCAL_FS + TMP_PATH + "/Log/log");
    conf.set(MLConf.ML_MF_USER_OUTPUT_PATH(), LOCAL_FS + TMP_PATH + "/usermodel");
    // Set actionType train
    conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_TRAIN());

    // Set MF algorithm parameters
    conf.set(MLConf.ML_MF_RANK(), "200");
    conf.set(MLConf.ML_EPOCH_NUM(), "5");
    conf.set(MLConf.ML_MF_ROW_BATCH_NUM(), "2");
    conf.set(MLConf.ML_MF_ITEM_NUM(), "1683");
    conf.set(MLConf.ML_MF_LAMBDA(), "0.01");
    conf.set(MLConf.ML_MF_ETA(), "0.0005");

    MatrixFactorizationRunner runner = new MatrixFactorizationRunner();
    runner.train(conf);
  } catch (Exception x) {
    LOG.error("run testMF failed ", x);
    throw x;
  }
}
 
开发者ID:Tencent,项目名称:angel,代码行数:44,代码来源:matrixfactorizationTest.java

示例11: setConf

import org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat; //导入依赖的package包/类
/**
 * set parameter values of conf
 */
@Before
public void setConf() {

  // Feature number of train data
  int featureNum = 236;
  // Total iteration number
  int epochNum = 5;
  // Rank
  int rank = 5;
  // Regularization parameters
  double reg0 = 0.0;
  double reg1 = 0.0;
  double reg2 = 0.001;
  // Learn rage
  double lr = 0.001;
  double stev = 0.1;

  // Set local deploy mode
  conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL");

  // Set basic configuration keys
  conf.setBoolean("mapred.mapper.new-api", true);
  conf.set(AngelConf.ANGEL_INPUTFORMAT_CLASS, CombineTextInputFormat.class.getName());
  conf.setBoolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, true);

  //set angel resource parameters #worker, #task, #PS
  conf.setInt(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1);
  conf.setInt(AngelConf.ANGEL_WORKER_TASK_NUMBER, 1);
  conf.setInt(AngelConf.ANGEL_PS_NUMBER, 1);

  //set FM algorithm parameters #feature #epoch
  conf.set(MLConf.ML_FEATURE_NUM(), String.valueOf(featureNum));
  conf.set(MLConf.ML_EPOCH_NUM(), String.valueOf(epochNum));
  conf.set(MLConf.ML_FM_RANK(), String.valueOf(rank));
  conf.set(MLConf.ML_LEARN_RATE(), String.valueOf(lr));
  conf.set(MLConf.ML_FM_REG0(), String.valueOf(reg0));
  conf.set(MLConf.ML_FM_REG1(), String.valueOf(reg1));
  conf.set(MLConf.ML_FM_REG2(), String.valueOf(reg2));
  conf.set(MLConf.ML_FM_V_STDDEV(), String.valueOf(stev));
}
 
开发者ID:Tencent,项目名称:angel,代码行数:44,代码来源:FMTest.java

示例12: setup

import org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat; //导入依赖的package包/类
@Before
public void setup() throws Exception {
  try {
    String dataFmt = "libsvm";

    // Cluster center number
    int centerNum = 3;
    // Feature number of train data
    int featureNum = 4;
    // Total iteration number
    int epochNum = 5;
    // Sample ratio per mini-batch
    double spratio = 1.0;
    // C
    double c = 0.15;

    // Set local deploy mode
    conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL");

    // Set basic configuration keys
    conf.setBoolean("mapred.mapper.new-api", true);
    conf.set(AngelConf.ANGEL_INPUTFORMAT_CLASS, CombineTextInputFormat.class.getName());
    conf.setBoolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, true);

    //set angel resource parameters #worker, #task, #PS
    conf.setInt(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1);
    conf.setInt(AngelConf.ANGEL_WORKER_TASK_NUMBER, 1);
    conf.setInt(AngelConf.ANGEL_PS_NUMBER, 1);

    //set Kmeans algorithm parameters #cluster #feature #epoch
    conf.set(MLConf.KMEANS_CENTER_NUM(), String.valueOf(centerNum));
    conf.set(MLConf.ML_FEATURE_NUM(), String.valueOf(featureNum));
    conf.set(MLConf.ML_EPOCH_NUM(), String.valueOf(epochNum));
    conf.set(MLConf.KMEANS_SAMPLE_RATIO_PERBATCH(), String.valueOf(spratio));
    conf.set(MLConf.kMEANS_C(), String.valueOf(c));

    // Set data format
    conf.set(MLConf.ML_DATA_FORMAT(), dataFmt);
  } catch (Exception x) {
    LOG.error("setup failed ", x);
    throw x;
  }
}
 
开发者ID:Tencent,项目名称:angel,代码行数:44,代码来源:KmeansTest.java

示例13: setConf

import org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat; //导入依赖的package包/类
/**
 * set parameter values of conf
 */
@Before public void setConf() throws Exception {
  try {
    // Feature number of train data
    int featureNum = 124;
    // Total iteration number
    int epochNum = 5;
    // Validation sample Ratio
    double vRatio = 0.1;
    // Data format, libsvm or dummy
    String dataFmt = "libsvm";
    // Train batch number per epoch.
    double spRatio = 1.0;
    // Batch number
    int batchNum = 10;

    // Learning rate
    double learnRate = 1.0;
    // Decay of learning rate
    double decay = 0.1;
    // Regularization coefficient
    double reg = 0.02;
    int rank = 5;
    double vInit = 0.1;

    // Set local deploy mode
    conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL");

    // Set basic configuration keys
    conf.setBoolean("mapred.mapper.new-api", true);
    conf.set(AngelConf.ANGEL_INPUTFORMAT_CLASS, CombineTextInputFormat.class.getName());
    conf.setBoolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, true);

    // Set data format
    conf.set(MLConf.ML_DATA_FORMAT(), dataFmt);

    //set angel resource parameters #worker, #task, #PS
    conf.setInt(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1);
    conf.setInt(AngelConf.ANGEL_WORKER_TASK_NUMBER, 1);
    conf.setInt(AngelConf.ANGEL_PS_NUMBER, 1);

    //set MLR algorithm parameters #feature #epoch
    conf.set(MLConf.ML_FEATURE_NUM(), String.valueOf(featureNum));
    conf.set(MLConf.ML_EPOCH_NUM(), String.valueOf(epochNum));
    conf.set(MLConf.ML_BATCH_SAMPLE_Ratio(), String.valueOf(spRatio));
    conf.set(MLConf.ML_VALIDATE_RATIO(), String.valueOf(vRatio));
    conf.set(MLConf.ML_LEARN_RATE(), String.valueOf(learnRate));
    conf.set(MLConf.ML_LEARN_DECAY(), String.valueOf(decay));
    conf.set(MLConf.ML_REG_L2(), String.valueOf(reg));
    conf.set(MLConf.ML_MLR_RANK(), String.valueOf(rank));
    conf.set(MLConf.ML_MLR_V_INIT(), String.valueOf(vInit));
    conf.set(MLConf.ML_SGD_BATCH_NUM(), String.valueOf(batchNum));
  } catch (Exception x) {
    LOG.error("setup failed ", x);
    throw x;
  }
}
 
开发者ID:Tencent,项目名称:angel,代码行数:60,代码来源:MLRTest.java

示例14: setConf

import org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat; //导入依赖的package包/类
/**
 * set parameter values of conf
 */
@Before public void setConf() throws Exception {
  try {
    // Feature number of train data
    int featureNum = 101;
    // Total iteration number
    int epochNum = 5;
    // Validation sample Ratio
    double vRatio = 0.5;
    // Data format, libsvm or dummy
    String dataFmt = "libsvm";
    // Train batch number per epoch.
    double spRatio = 1;

    // Learning rate
    double learnRate = 0.01;
    // Decay of learning rate
    double decay = 0.1;
    // Regularization coefficient
    double reg = 0;

    // Set local deploy mode
    conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL");

    // Set basic configuration keys
    conf.setBoolean("mapred.mapper.new-api", true);
    conf.set(AngelConf.ANGEL_INPUTFORMAT_CLASS, CombineTextInputFormat.class.getName());
    conf.setBoolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, true);

    // Set data format
    conf.set(MLConf.ML_DATA_FORMAT(), dataFmt);

    //set angel resource parameters #worker, #task, #PS
    conf.setInt(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1);
    conf.setInt(AngelConf.ANGEL_WORKER_TASK_NUMBER, 1);
    conf.setInt(AngelConf.ANGEL_PS_NUMBER, 1);

    //set sgd LR algorithm parameters #feature #epoch
    conf.set(MLConf.ML_FEATURE_NUM(), String.valueOf(featureNum));
    conf.set(MLConf.ML_EPOCH_NUM(), String.valueOf(epochNum));
    conf.set(MLConf.ML_BATCH_SAMPLE_Ratio(), String.valueOf(spRatio));
    conf.set(MLConf.ML_VALIDATE_RATIO(), String.valueOf(vRatio));
    conf.set(MLConf.ML_LEARN_RATE(), String.valueOf(learnRate));
    conf.set(MLConf.ML_LEARN_DECAY(), String.valueOf(decay));
    conf.set(MLConf.ML_REG_L2(), String.valueOf(reg));
  } catch (Exception x) {
    LOG.error("setup failed ", x);
    throw x;
  }
}
 
开发者ID:Tencent,项目名称:angel,代码行数:53,代码来源:LinearRegTest.java

示例15: setup

import org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat; //导入依赖的package包/类
public static void setup() throws Exception {
  // set basic configuration keys
  Configuration conf = new Configuration();
  conf.setBoolean("mapred.mapper.new-api", true);
  conf.setBoolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, true);
  conf.set(AngelConf.ANGEL_TASK_USER_TASKCLASS, DummyTask.class.getName());

  // use local deploy mode and dummy data spliter
  conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL");
  conf.setBoolean(AngelConf.ANGEL_AM_USE_DUMMY_DATASPLITER, true);
  conf.set(AngelConf.ANGEL_INPUTFORMAT_CLASS, CombineTextInputFormat.class.getName());
  conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, LOCAL_FS + TMP_PATH + "/out");
  conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, LOCAL_FS + TMP_PATH + "/in");
  conf.set(AngelConf.ANGEL_LOG_PATH, LOCAL_FS + TMP_PATH + "/log");

  conf.setInt(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1);
  conf.setInt(AngelConf.ANGEL_PS_NUMBER, 1);
  conf.setInt(AngelConf.ANGEL_WORKER_TASK_NUMBER, 2);

  // get a angel client
  angelClient = AngelClientFactory.get(conf);

  // add matrix
  MatrixContext mMatrix = new MatrixContext();
  mMatrix.setName("w1");
  mMatrix.setRowNum(10);
  mMatrix.setColNum(1000);
  mMatrix.setMaxRowNumInBlock(10);
  mMatrix.setMaxColNumInBlock(500);
  mMatrix.setRowType(RowType.T_INT_DENSE);
  mMatrix.set(MatrixConf.MATRIX_OPLOG_ENABLEFILTER, "false");
  mMatrix.set(MatrixConf.MATRIX_HOGWILD, "true");
  mMatrix.set(MatrixConf.MATRIX_AVERAGE, "false");
  mMatrix.set(MatrixConf.MATRIX_OPLOG_TYPE, "DENSE_INT");
  angelClient.addMatrix(mMatrix);

  MatrixContext mMatrix2 = new MatrixContext();
  mMatrix2.setName("w2");
  mMatrix2.setRowNum(10);
  mMatrix2.setColNum(100);
  mMatrix2.setMaxRowNumInBlock(5);
  mMatrix2.setMaxColNumInBlock(50);
  mMatrix2.setRowType(RowType.T_DOUBLE_DENSE);
  mMatrix2.set(MatrixConf.MATRIX_OPLOG_ENABLEFILTER, "false");
  mMatrix2.set(MatrixConf.MATRIX_HOGWILD, "false");
  mMatrix2.set(MatrixConf.MATRIX_AVERAGE, "false");
  mMatrix2.set(MatrixConf.MATRIX_OPLOG_TYPE, "DENSE_DOUBLE");
  angelClient.addMatrix(mMatrix2);

  angelClient.startPSServer();
  angelClient.run();
  Thread.sleep(2 * 1000);
  group0Id = new WorkerGroupId(0);
  worker0Id = new WorkerId(group0Id, 0);
  worker0Attempt0Id = new WorkerAttemptId(worker0Id, 0);
  task0Id = new TaskId(0);
  task1Id = new TaskId(1);
  psId = new ParameterServerId(0);
  psAttempt0Id = new PSAttemptId(psId, 0);
}
 
开发者ID:Tencent,项目名称:angel,代码行数:61,代码来源:LocalClusterHelper.java


注:本文中的org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。