当前位置: 首页>>代码示例>>C++>>正文


C++ Metric::reset方法代码示例

本文整理汇总了C++中Metric::reset方法的典型用法代码示例。如果您正苦于以下问题:C++ Metric::reset方法的具体用法?C++ Metric::reset怎么用?C++ Metric::reset使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在Metric的用法示例。


在下文中一共展示了Metric::reset方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: getGoldActions

void Segmentor::getGoldActions(const vector<Instance>& vecInsts, vector<vector<CAction> >& vecActions) {
	vecActions.clear();

	static Metric segEval, posEval;
	static CStateItem state[m_classifier.MAX_SENTENCE_SIZE];
	static CResult output;
	static CAction answer;
	segEval.reset();
	posEval.reset();
	static int numInstance, actionNum;
	vecActions.resize(vecInsts.size());
	for (numInstance = 0; numInstance < vecInsts.size(); numInstance++) {
		const Instance &instance = vecInsts[numInstance];

		actionNum = 0;
		state[actionNum].initSentence(&instance.chars, &instance.candidateLabels);
		state[actionNum].clear();

		while (!state[actionNum].IsTerminated()) {
			state[actionNum].getGoldAction(instance, m_classifier.fe._postagAlphabet, answer);
			vecActions[numInstance].push_back(answer);
			state[actionNum].move(state + actionNum + 1, answer, m_classifier.fe._postagAlphabet);
			actionNum++;
		}

		if (actionNum - 1 != instance.charsize()) {
			std::cout << "action number is not correct, please check" << std::endl;
		}
		state[actionNum].getSegPosResults(output);

		instance.evaluate(output, segEval, posEval);

		if (!segEval.bIdentical() || !posEval.bIdentical()) {
			std::cout << "error state conversion!" << std::endl;
			std::cout << "output instance:" << std::endl;
			for (int tmpK = 0; tmpK < instance.words.size(); tmpK++) {
				std::cout << instance.words[tmpK] << "_" << instance.postags[tmpK] << " ";
			}
			std::cout << std::endl;

			std::cout << "predicated instance:" << std::endl;
			for (int tmpK = 0; tmpK < output.size(); tmpK++) {
				std::cout << output.words[tmpK] << "_" << output.postags[tmpK] << " ";
			}
			std::cout << std::endl;

			exit(0);
		}

		if ((numInstance + 1) % m_options.verboseIter == 0) {
			cout << numInstance + 1 << " ";
			if ((numInstance + 1) % (40 * m_options.verboseIter) == 0)
				cout << std::endl;
			cout.flush();
		}
		if (m_options.maxInstance > 0 && numInstance == m_options.maxInstance)
			break;
	}
}
开发者ID:zhangmeishan,项目名称:NNTransitionPOSTagging,代码行数:59,代码来源:LinearSegmentor.cpp

示例2: getGoldActions

void Segmentor::getGoldActions(const vector<Instance>& vecInsts, vector<vector<CAction> >& vecActions){
  vecActions.clear();

  static Metric eval;
#if USE_CUDA==1
  static CStateItem<gpu> state[m_classifier.MAX_SENTENCE_SIZE];
#else
  static CStateItem<cpu> state[m_classifier.MAX_SENTENCE_SIZE];
#endif
  static vector<string> output;
  static CAction answer;
  eval.reset();
  static int numInstance, actionNum;
  vecActions.resize(vecInsts.size());
  for (numInstance = 0; numInstance < vecInsts.size(); numInstance++) {
    const Instance &instance = vecInsts[numInstance];

    actionNum = 0;
    state[actionNum].initSentence(&instance.chars);
    state[actionNum].clear();

    while (!state[actionNum].IsTerminated()) {
      state[actionNum].getGoldAction(instance.words, answer);
      vecActions[numInstance].push_back(answer);
      state[actionNum].move(state+actionNum+1, answer);
      actionNum++;
    }

    if(actionNum-1 != instance.charsize()) {
      std::cout << "action number is not correct, please check" << std::endl;
    }
    state[actionNum].getSegResults(output);

    instance.evaluate(output, eval);

    if (!eval.bIdentical()) {
      std::cout << "error state conversion!" << std::endl;
      exit(0);
    }

    if ((numInstance + 1) % m_options.verboseIter == 0) {
      cout << numInstance + 1 << " ";
      if ((numInstance + 1) % (40 * m_options.verboseIter) == 0)
        cout << std::endl;
      cout.flush();
    }
    if (m_options.maxInstance > 0 && numInstance == m_options.maxInstance)
      break;
  }
}
开发者ID:LinguList,项目名称:NNTransitionSegmentor,代码行数:50,代码来源:LSTMNCSegmentor.cpp

示例3: train

void Labeler::train(const string& trainFile, const string& devFile, const string& testFile, 
    const string& modelFile, const string& optionFile, const string& wordEmbFile) {
  if (optionFile != "")
    m_options.load(optionFile);
  m_options.showOptions();
  vector<Instance> trainInsts, devInsts, testInsts;
  static vector<Instance> decodeInstResults;
  static Instance curDecodeInst;
  bool bCurIterBetter = false;

  m_pipe.readInstances(trainFile, trainInsts, m_options.maxInstance);
  if (devFile != "")
    m_pipe.readInstances(devFile, devInsts, m_options.maxInstance);
  if (testFile != "")
    m_pipe.readInstances(testFile, testInsts, m_options.maxInstance);

  //Ensure that each file in m_options.testFiles exists!
  vector<vector<Instance> > otherInsts(m_options.testFiles.size());
  for (int idx = 0; idx < m_options.testFiles.size(); idx++) {
    m_pipe.readInstances(m_options.testFiles[idx], otherInsts[idx], m_options.maxInstance);
  }

  //std::cout << "Training example number: " << trainInsts.size() << std::endl;
  //std::cout << "Dev example number: " << trainInsts.size() << std::endl;
  //std::cout << "Test example number: " << trainInsts.size() << std::endl;

  createAlphabet(trainInsts);

  if (!m_options.wordEmbFineTune) {
    addTestWordAlpha(devInsts);
    addTestWordAlpha(testInsts);
    for (int idx = 0; idx < otherInsts.size(); idx++) {
      addTestWordAlpha(otherInsts[idx]);
    }
    cout << "Remain words num: " << m_wordAlphabet.size() << endl;
  }

  NRMat<dtype> wordEmb;
  if (wordEmbFile != "") {
    readWordEmbeddings(wordEmbFile, wordEmb);
  } else {
    wordEmb.resize(m_wordAlphabet.size(), m_options.wordEmbSize);
    wordEmb.randu(1000);
  }

  NRVec<NRMat<dtype> > tagEmbs(m_tagAlphabets.size());
  for (int idx = 0; idx < tagEmbs.size(); idx++) {
    tagEmbs[idx].resize(m_tagAlphabets[idx].size(), m_options.tagEmbSize);
    tagEmbs[idx].randu(1002 + idx);
  }

  m_classifier.init(m_labelAlphabet.size(), m_featAlphabet.size());

  m_classifier.setDropValue(m_options.dropProb);

  vector<Example> trainExamples, devExamples, testExamples;
  initialExamples(trainInsts, trainExamples);
  initialExamples(devInsts, devExamples);
  initialExamples(testInsts, testExamples);

  vector<int> otherInstNums(otherInsts.size());
  vector<vector<Example> > otherExamples(otherInsts.size());
  for (int idx = 0; idx < otherInsts.size(); idx++) {
    initialExamples(otherInsts[idx], otherExamples[idx]);
    otherInstNums[idx] = otherExamples[idx].size();
  }

  dtype bestDIS = 0;

  int inputSize = trainExamples.size();

  int batchBlock = inputSize / m_options.batchSize;
  if (inputSize % m_options.batchSize != 0)
    batchBlock++;

  srand(0);
  std::vector<int> indexes;
  for (int i = 0; i < inputSize; ++i)
    indexes.push_back(i);

  static Metric eval, metric_dev, metric_test;
  static vector<Example> subExamples;
  int devNum = devExamples.size(), testNum = testExamples.size();
  for (int iter = 0; iter < m_options.maxIter; ++iter) {
    std::cout << "##### Iteration " << iter << std::endl;

    random_shuffle(indexes.begin(), indexes.end());
    eval.reset();
    for (int updateIter = 0; updateIter < batchBlock; updateIter++) {
      subExamples.clear();
      int start_pos = updateIter * m_options.batchSize;
      int end_pos = (updateIter + 1) * m_options.batchSize;
      if (end_pos > inputSize)
        end_pos = inputSize;

      for (int idy = start_pos; idy < end_pos; idy++) {
        subExamples.push_back(trainExamples[indexes[idy]]);
      }

      int curUpdateIter = iter * batchBlock + updateIter;
//.........这里部分代码省略.........
开发者ID:LeonCrashCode,项目名称:NNSegmentation,代码行数:101,代码来源:SparseLabeler.cpp

示例4: createAlphabet

// all linear features are extracted from positive examples
int Segmentor::createAlphabet(const vector<Instance>& vecInsts) {
	cout << "Creating Alphabet..." << endl;

	int numInstance = vecInsts.size();

	hash_map<string, int> action_stat;
	hash_map<string, int> feat_stat;
	hash_map<string, int> postag_stat;

	assert(numInstance > 0);

	static Metric segEval, posEval;
	static CStateItem state[m_classifier.MAX_SENTENCE_SIZE];
	static Feature feat;
	static CResult output;
	static CAction answer;
	static int actionNum;
	m_classifier.initAlphabet();
	segEval.reset();
	posEval.reset();
	int maxFreqChar = -1;
	int maxFreqWord = -1;

	for (numInstance = 0; numInstance < vecInsts.size(); numInstance++) {
		const Instance &instance = vecInsts[numInstance];
		for (int idx = 0; idx < instance.postagsize(); idx++) {
			postag_stat[instance.postags[idx]];
			m_classifier.fe._tagConstraints.addWordPOSPair(instance.words[idx], instance.postags[idx]);
		}
	}

	m_classifier.addToPostagAlphabet(postag_stat);

	for (numInstance = 0; numInstance < vecInsts.size(); numInstance++) {
		const Instance &instance = vecInsts[numInstance];
		actionNum = 0;
		state[actionNum].initSentence(&instance.chars, &instance.candidateLabels);
		state[actionNum].clear();

		while (!state[actionNum].IsTerminated()) {
			state[actionNum].getGoldAction(instance, m_classifier.fe._postagAlphabet, answer);
			action_stat[answer.str()]++;

			m_classifier.extractFeature(state + actionNum, answer, feat);
			for (int idx = 0; idx < feat._strSparseFeat.size(); idx++) {
				feat_stat[feat._strSparseFeat[idx]]++;
			}
			state[actionNum].move(state + actionNum + 1, answer, m_classifier.fe._postagAlphabet);
			actionNum++;
		}

		if (actionNum - 1 != instance.charsize()) {
			std::cout << "action number is not correct, please check" << std::endl;
		}
		state[actionNum].getSegPosResults(output);

		instance.evaluate(output, segEval, posEval);

		if (!segEval.bIdentical() || !posEval.bIdentical()) {
			std::cout << "error state conversion!" << std::endl;
			std::cout << "output instance:" << std::endl;
			for (int tmpK = 0; tmpK < instance.words.size(); tmpK++) {
				std::cout << instance.words[tmpK] << "_" << instance.postags[tmpK] << " ";
			}
			std::cout << std::endl;

			std::cout << "predicated instance:" << std::endl;
			for (int tmpK = 0; tmpK < output.size(); tmpK++) {
				std::cout << output.words[tmpK] << "_" << output.postags[tmpK] << " ";
			}
			std::cout << std::endl;

			exit(0);
		}

		if ((numInstance + 1) % m_options.verboseIter == 0) {
			cout << numInstance + 1 << " ";
			if ((numInstance + 1) % (40 * m_options.verboseIter) == 0)
				cout << std::endl;
			cout.flush();
		}
		if (m_options.maxInstance > 0 && numInstance == m_options.maxInstance)
			break;
	}

	m_classifier.addToActionAlphabet(action_stat);
	m_classifier.addToFeatureAlphabet(feat_stat, m_options.featCutOff);

	cout << numInstance << " " << endl;
	cout << "Action num: " << m_classifier.fe._actionAlphabet.size() << endl;
	cout << "Pos num: " << m_classifier.fe._postagAlphabet.size() << endl;
	cout << "Total feat num: " << feat_stat.size() << endl;

	cout << "Remain feat num: " << m_classifier.fe._featAlphabet.size() << endl;

	//m_classifier.setFeatureCollectionState(false);

	return 0;
}
开发者ID:zhangmeishan,项目名称:NNTransitionPOSTagging,代码行数:100,代码来源:LinearSegmentor.cpp

示例5: train

void Segmentor::train(const string& trainFile, const string& devFile, const string& testFile, const string& modelFile, const string& optionFile, const string& lexiconFile) {
	if (optionFile != "")
		m_options.load(optionFile);

	m_options.showOptions();
	vector<Instance> trainInsts, devInsts, testInsts;
	m_pipe.readInstances(trainFile, trainInsts, m_classifier.MAX_SENTENCE_SIZE - 2, m_options.maxInstance);
	if (devFile != "")
		m_pipe.readInstances(devFile, devInsts, m_classifier.MAX_SENTENCE_SIZE - 2, m_options.maxInstance);
	if (testFile != "")
		m_pipe.readInstances(testFile, testInsts, m_classifier.MAX_SENTENCE_SIZE - 2, m_options.maxInstance);

	vector<vector<Instance> > otherInsts(m_options.testFiles.size());
	for (int idx = 0; idx < m_options.testFiles.size(); idx++) {
		m_pipe.readInstances(m_options.testFiles[idx], otherInsts[idx], m_classifier.MAX_SENTENCE_SIZE - 2, m_options.maxInstance);
	}

	createAlphabet(trainInsts);

	m_classifier.init(m_options.delta);
	m_classifier.setDropValue(m_options.dropProb);

	vector<vector<CAction> > trainInstGoldactions;
	getGoldActions(trainInsts, trainInstGoldactions);
	double bestPostagFmeasure = 0;

	int inputSize = trainInsts.size();

	std::vector<int> indexes;
	for (int i = 0; i < inputSize; ++i)
		indexes.push_back(i);

	static Metric eval;
	static Metric segMetric_dev, segMetric_test;
	static Metric postagMetric_dev, postagMetric_test;

	int maxIter = m_options.maxIter * (inputSize / m_options.batchSize + 1);
	int oneIterMaxRound = (inputSize + m_options.batchSize - 1) / m_options.batchSize;
	std::cout << "maxIter = " << maxIter << std::endl;
	int devNum = devInsts.size(), testNum = testInsts.size();

	static vector<CResult> decodeInstResults;
	static CResult curDecodeInst;
	static bool bCurIterBetter;
	static vector<Instance > subInstances;
	static vector<vector<CAction> > subInstGoldActions;

	for (int iter = 0; iter < maxIter; ++iter) {
		std::cout << "##### Iteration " << iter << std::endl;
		srand(iter);
		random_shuffle(indexes.begin(), indexes.end());
		std::cout << "random: " << indexes[0] << ", " << indexes[indexes.size() - 1] << std::endl;
		bool bEvaluate = false;
		if (m_options.batchSize == 1) {
			eval.reset();
			bEvaluate = true;
			for (int idy = 0; idy < inputSize; idy++) {
				subInstances.clear();
				subInstGoldActions.clear();
				subInstances.push_back(trainInsts[indexes[idy]]);
				subInstGoldActions.push_back(trainInstGoldactions[indexes[idy]]);

				double cost = m_classifier.train(subInstances, subInstGoldActions);

				eval.overall_label_count += m_classifier._eval.overall_label_count;
				eval.correct_label_count += m_classifier._eval.correct_label_count;

				if ((idy + 1) % (m_options.verboseIter * 10) == 0) {
					std::cout << "current: " << idy + 1 << ", Cost = " << cost << ", Correct(%) = " << eval.getAccuracy() << std::endl;
				}
				m_classifier.updateParams(m_options.regParameter, m_options.adaAlpha, m_options.adaEps);
			}
			std::cout << "current: " << iter + 1 << ", Correct(%) = " << eval.getAccuracy() << std::endl;
		}
		else {
			if (iter == 0)
				eval.reset();
			subInstances.clear();
			subInstGoldActions.clear();
			for (int idy = 0; idy < m_options.batchSize; idy++) {
				subInstances.push_back(trainInsts[indexes[idy]]);
				subInstGoldActions.push_back(trainInstGoldactions[indexes[idy]]);
			}
			double cost = m_classifier.train(subInstances, subInstGoldActions);

			eval.overall_label_count += m_classifier._eval.overall_label_count;
			eval.correct_label_count += m_classifier._eval.correct_label_count;

			if ((iter + 1) % (m_options.verboseIter) == 0) {
				std::cout << "current: " << iter + 1 << ", Cost = " << cost << ", Correct(%) = " << eval.getAccuracy() << std::endl;
				eval.reset();
				bEvaluate = true;
			}

			m_classifier.updateParams(m_options.regParameter, m_options.adaAlpha, m_options.adaEps);
		}

		if (bEvaluate && devNum > 0) {
			bCurIterBetter = false;
			if (!m_options.outBest.empty())
//.........这里部分代码省略.........
开发者ID:zhangmeishan,项目名称:NNTransitionPOSTagging,代码行数:101,代码来源:LinearSegmentor.cpp

示例6: createAlphabet

// all linear features are extracted from positive examples
int Segmentor::createAlphabet(const vector<Instance>& vecInsts) {
  cout << "Creating Alphabet..." << endl;

  int numInstance = vecInsts.size();

  hash_map<string, int> word_stat;
  hash_map<string, int> char_stat;
  hash_map<string, int> bichar_stat;
  hash_map<string, int> action_stat;
  hash_map<string, int> feat_stat;

  assert(numInstance > 0);

  static Metric eval;
  static CStateItem state[m_classifier.MAX_SENTENCE_SIZE];
  static Feature feat;
  static vector<string> output;
  static CAction answer;
  static int actionNum;
  m_classifier.initAlphabet();
  eval.reset();
  for (numInstance = 0; numInstance < vecInsts.size(); numInstance++) {
    const Instance &instance = vecInsts[numInstance];
    for (int idx = 0; idx < instance.wordsize(); idx++) {
      word_stat[normalize_to_lowerwithdigit(instance.words[idx])]++;
    }
    for (int idx = 0; idx < instance.charsize(); idx++) {
      char_stat[instance.chars[idx]]++;
    }
    for (int idx = 0; idx < instance.charsize() - 1; idx++) {
      bichar_stat[instance.chars[idx] + instance.chars[idx + 1]]++;
    }
    bichar_stat[instance.chars[instance.charsize() - 1] + m_classifier.fe.nullkey]++;
    bichar_stat[m_classifier.fe.nullkey + instance.chars[0]]++;
    actionNum = 0;
    state[actionNum].initSentence(&instance.chars);
    state[actionNum].clear();

    while (!state[actionNum].IsTerminated()) {
      state[actionNum].getGoldAction(instance.words, answer);
      action_stat[answer.str()]++;

      m_classifier.extractFeature(state+actionNum, answer, feat);
      for (int idx = 0; idx < feat._strSparseFeat.size(); idx++) {
        feat_stat[feat._strSparseFeat[idx]]++;
      }
      state[actionNum].move(state+actionNum+1, answer);
      actionNum++;
    }

    if(actionNum-1 != instance.charsize()) {
      std::cout << "action number is not correct, please check" << std::endl;
    }
    state[actionNum].getSegResults(output);

    instance.evaluate(output, eval);

    if (!eval.bIdentical()) {
      std::cout << "error state conversion!" << std::endl;
      exit(0);
    }

    if ((numInstance + 1) % m_options.verboseIter == 0) {
      cout << numInstance + 1 << " ";
      if ((numInstance + 1) % (40 * m_options.verboseIter) == 0)
        cout << std::endl;
      cout.flush();
    }
    if (m_options.maxInstance > 0 && numInstance == m_options.maxInstance)
      break;
  }

  m_classifier.addToActionAlphabet(action_stat);
  m_classifier.addToWordAlphabet(word_stat, m_options.wordEmbFineTune ? m_options.wordCutOff : 0);
  m_classifier.addToCharAlphabet(char_stat, m_options.charEmbFineTune ? m_options.charCutOff : 0);
  m_classifier.addToBiCharAlphabet(bichar_stat, m_options.tagEmbFineTune ? m_options.tagCutOff : 0);
  m_classifier.addToFeatureAlphabet(feat_stat, m_options.featCutOff);

  cout << numInstance << " " << endl;
  cout << "Action num: " << m_classifier.fe._actionAlphabet.size() << endl;
  cout << "Total word num: " << word_stat.size() << endl;
  cout << "Total char num: " << char_stat.size() << endl;
  cout << "Total bichar num: " << bichar_stat.size() << endl;
  cout << "Total feat num: " << feat_stat.size() << endl;

  cout << "Remain word num: " << m_classifier.fe._wordAlphabet.size() << endl;
  cout << "Remain char num: " << m_classifier.fe._charAlphabet.size() << endl;
  cout << "Remain bichar num: " << m_classifier.fe._bicharAlphabet.size() << endl;
  cout << "Remain feat num: " << m_classifier.fe._featAlphabet.size() << endl;

  //m_classifier.setFeatureCollectionState(false);

  return 0;
}
开发者ID:jinyeqiong,项目名称:NNTransitionSegmentor,代码行数:95,代码来源:APSegmentor.cpp

示例7: train


//.........这里部分代码省略.........
  m_classifier.setOOVRatio(m_options.oovRatio);
  m_classifier.setWordFreq(m_word_stat);

  vector<vector<CAction> > trainInstGoldactions;
  getGoldActions(trainInsts, trainInstGoldactions);
  double bestFmeasure = 0;

  int inputSize = trainInsts.size();

  std::vector<int> indexes;
  for (int i = 0; i < inputSize; ++i)
    indexes.push_back(i);

  static Metric eval, metric_dev, metric_test;

  int maxIter = m_options.maxIter * (inputSize / m_options.batchSize + 1);
  int oneIterMaxRound = (inputSize + m_options.batchSize -1) / m_options.batchSize;
  std::cout << "maxIter = " << maxIter << std::endl;
  int devNum = devInsts.size(), testNum = testInsts.size();

  static vector<vector<string> > decodeInstResults;
  static vector<string> curDecodeInst;
  static bool bCurIterBetter;
  static vector<vector<string> > subInstances;
  static vector<vector<CAction> > subInstGoldActions;

  for (int iter = 0; iter < maxIter; ++iter) {
    std::cout << "##### Iteration " << iter << std::endl;
    srand(iter);
    random_shuffle(indexes.begin(), indexes.end());
    std::cout << "random: " << indexes[0] << ", " << indexes[indexes.size() - 1] << std::endl;    
    bool bEvaluate = false;
    if(m_options.batchSize == 1){
    	eval.reset();
	    bEvaluate = true;
	    for (int idy = 0; idy < inputSize; idy++) {
	      subInstances.clear();
	      subInstGoldActions.clear();
	      subInstances.push_back(trainInsts[indexes[idy]].chars);
	      subInstGoldActions.push_back(trainInstGoldactions[indexes[idy]]);
	      	
	      double cost = m_classifier.train(subInstances, subInstGoldActions);
	
	      eval.overall_label_count += m_classifier._eval.overall_label_count;
	      eval.correct_label_count += m_classifier._eval.correct_label_count;
	
	      if ((idy + 1) % (m_options.verboseIter*10) == 0) {
	        std::cout << "current: " << idy + 1 << ", Cost = " << cost << ", Correct(%) = " << eval.getAccuracy() << std::endl;
	      }
	      m_classifier.updateParams(m_options.regParameter, m_options.adaAlpha, m_options.adaEps, m_options.clip);
	    }
	    std::cout << "current: " << iter + 1  << ", Correct(%) = " << eval.getAccuracy() << std::endl;
    }
    else{
    	if(iter == 0)eval.reset();
  		subInstances.clear();
      subInstGoldActions.clear();
    	for (int idy = 0; idy < m_options.batchSize; idy++) {
	      subInstances.push_back(trainInsts[indexes[idy]].chars);
	      subInstGoldActions.push_back(trainInstGoldactions[indexes[idy]]);    		
    	}
      double cost = m_classifier.train(subInstances, subInstGoldActions);

      eval.overall_label_count += m_classifier._eval.overall_label_count;
      eval.correct_label_count += m_classifier._eval.correct_label_count;
      
开发者ID:LinguList,项目名称:NNTransitionSegmentor,代码行数:66,代码来源:LSTMNCSegmentor.cpp

示例8: train

void Labeler::train(const string& trainFile, const string& devFile, const string& testFile, const string& modelFile, const string& optionFile,
    const string& wordEmbFile, const string& charEmbFile) {
  if (optionFile != "")
    m_options.load(optionFile);

  m_options.showOptions();

  m_linearfeat = 0;

  vector<Instance> trainInsts, devInsts, testInsts;
  static vector<Instance> decodeInstResults;
  static Instance curDecodeInst;
  bool bCurIterBetter = false;

  m_pipe.readInstances(trainFile, trainInsts, m_options.maxInstance);
  if (devFile != "")
    m_pipe.readInstances(devFile, devInsts, m_options.maxInstance);
  if (testFile != "")
    m_pipe.readInstances(testFile, testInsts, m_options.maxInstance);

  //Ensure that each file in m_options.testFiles exists!
  vector<vector<Instance> > otherInsts(m_options.testFiles.size());
  for (int idx = 0; idx < m_options.testFiles.size(); idx++) {
    m_pipe.readInstances(m_options.testFiles[idx], otherInsts[idx], m_options.maxInstance);
  }

  //std::cout << "Training example number: " << trainInsts.size() << std::endl;
  //std::cout << "Dev example number: " << trainInsts.size() << std::endl;
  //std::cout << "Test example number: " << trainInsts.size() << std::endl;

  createAlphabet(trainInsts);

  if (!m_options.wordEmbFineTune) {
    addTestWordAlpha(devInsts);
    addTestWordAlpha(testInsts);
    for (int idx = 0; idx < otherInsts.size(); idx++) {
      addTestWordAlpha(otherInsts[idx]);
    }
    cout << "Remain words num: " << m_textWordAlphabet.size() << endl;
  }

  if (!m_options.charEmbFineTune) {
    addTestCharAlpha(devInsts);
    addTestCharAlpha(testInsts);
    for (int idx = 0; idx < otherInsts.size(); idx++) {
      addTestCharAlpha(otherInsts[idx]);
    }
    cout << "Remain char num: " << m_charAlphabet.size() << endl;
  }

  NRMat<double> wordEmb;
  if (wordEmbFile != "") {
    readWordEmbeddings(wordEmbFile, wordEmb);
  } else {
    wordEmb.resize(m_textWordAlphabet.size(), m_options.wordEmbSize);
    wordEmb.randu(1000);
  }

  NRMat<double> charEmb;
  if (charEmbFile != "") {
    readWordEmbeddings(charEmbFile, charEmb);
  } else {
    charEmb.resize(m_charAlphabet.size(), m_options.charEmbSize);
    charEmb.randu(1001);
  }

  m_classifier.init(wordEmb, m_options.wordcontext, charEmb, m_options.charcontext, m_headWordAlphabet.size(), m_options.wordHiddenSize, m_options.charHiddenSize, m_options.hiddenSize);
  m_classifier.resetRemove(m_options.removePool, m_options.removeCharPool);
  m_classifier.setDropValue(m_options.dropProb);
  m_classifier.setWordEmbFinetune(m_options.wordEmbFineTune, m_options.charEmbFineTune);

  vector<Example> trainExamples, devExamples, testExamples;
  initialExamples(trainInsts, trainExamples);
  initialExamples(devInsts, devExamples);
  initialExamples(testInsts, testExamples);

  vector<int> otherInstNums(otherInsts.size());
  vector<vector<Example> > otherExamples(otherInsts.size());
  for (int idx = 0; idx < otherInsts.size(); idx++) {
    initialExamples(otherInsts[idx], otherExamples[idx]);
    otherInstNums[idx] = otherExamples[idx].size();
  }

  double bestDIS = 0;

  int inputSize = trainExamples.size();

  srand(0);
  std::vector<int> indexes;
  for (int i = 0; i < inputSize; ++i)
    indexes.push_back(i);

  static Metric eval, metric_dev, metric_test;
  static vector<Example> subExamples;
  int devNum = devExamples.size(), testNum = testExamples.size();

  int maxIter = m_options.maxIter;
  if (m_options.batchSize > 1)
    maxIter = m_options.maxIter * (inputSize / m_options.batchSize + 1);

//.........这里部分代码省略.........
开发者ID:SherlockYangBing,项目名称:TitleCaption,代码行数:101,代码来源:HCharDetector.cpp


注:本文中的Metric::reset方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。