当前位置: 首页>>代码示例>>C++>>正文


C++ NRMat类代码示例

本文整理汇总了C++中NRMat的典型用法代码示例。如果您正苦于以下问题:C++ NRMat类的具体用法?C++ NRMat怎么用?C++ NRMat使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了NRMat类的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: readWordEmb

void WordEmbReader::readWordEmb(NRMat<double> &word_emb,map<string,int> features_alphabet,char *word_emb_path)
{
	ifstream w_e(word_emb_path);	
	if(w_e.is_open())
	{
		string line;
		if(getline(w_e,line))
		{
			vector<string> data;
			split(data,line,is_any_of(" "));
			delNull(data);
			map<string,int>::iterator it = features_alphabet.find(data[0]);
			word_emb.resize(features_alphabet.size(),data.size()-1);
			word_emb = 0.0;
			if(it != features_alphabet.end())
			{
				for(int i = 1; i < data.size(); i++)
				{
					stringstream string_to_double;
					double cur_value;
					string_to_double << data[i];
					string_to_double >> cur_value;
					word_emb[it -> second][i] = cur_value;
				}
			}
		}
		while(getline(w_e,line))
		{
			vector<string> data;	
			split(data,line,is_any_of(" "));
			delNull(data);
			map<string,int>::iterator it = features_alphabet.find(data[0]);
			if(it != features_alphabet.end())
			{
				for(int i = 1; i < data.size(); i++)
				{
					stringstream string_to_double;
					double cur_value;
					string_to_double << data[i];
					string_to_double >> cur_value;
					word_emb[it -> second][i] = cur_value;
				}
			}
		}
	} else
开发者ID:3cQScbrOnly1,项目名称:maxEntropy,代码行数:45,代码来源:WordEmbReader.cpp

示例2: readWordEmbeddings

void Labeler::readWordEmbeddings(const string& inFile, NRMat<dtype>& wordEmb) {
  static ifstream inf;
  if (inf.is_open()) {
    inf.close();
    inf.clear();
  }
  inf.open(inFile.c_str());

  static string strLine, curWord;
  static int wordId;

  //find the first line, decide the wordDim;
  while (1) {
    if (!my_getline(inf, strLine)) {
      break;
    }
    if (!strLine.empty())
      break;
  }

  int unknownId = m_wordAlphabet.from_string(unknownkey);

  static vector<string> vecInfo;
  split_bychar(strLine, vecInfo, ' ');
  int wordDim = vecInfo.size() - 1;

  std::cout << "word embedding dim is " << wordDim << std::endl;
  m_options.wordEmbSize = wordDim;

  wordEmb.resize(m_wordAlphabet.size(), wordDim);
  wordEmb = 0.0;
  curWord = normalize_to_lowerwithdigit(vecInfo[0]);
  wordId = m_wordAlphabet.from_string(curWord);
  hash_set<int> indexers;
  dtype sum[wordDim];
  int count = 0;
  bool bHasUnknown = false;
  if (wordId >= 0) {
    count++;
    if (unknownId == wordId)
      bHasUnknown = true;
    indexers.insert(wordId);
    for (int idx = 0; idx < wordDim; idx++) {
      dtype curValue = atof(vecInfo[idx + 1].c_str());
      sum[idx] = curValue;
      wordEmb[wordId][idx] = curValue;
    }

  } else {
    for (int idx = 0; idx < wordDim; idx++) {
      sum[idx] = 0.0;
    }
  }

  while (1) {
    if (!my_getline(inf, strLine)) {
      break;
    }
    if (strLine.empty())
      continue;
    split_bychar(strLine, vecInfo, ' ');
    if (vecInfo.size() != wordDim + 1) {
      std::cout << "error embedding file" << std::endl;
    }
    curWord = normalize_to_lowerwithdigit(vecInfo[0]);
    wordId = m_wordAlphabet.from_string(curWord);
    if (wordId >= 0) {
      count++;
      if (unknownId == wordId)
        bHasUnknown = true;
      indexers.insert(wordId);

      for (int idx = 0; idx < wordDim; idx++) {
        dtype curValue = atof(vecInfo[idx + 1].c_str());
        sum[idx] += curValue;
        wordEmb[wordId][idx] += curValue;
      }
    }

  }

  if (!bHasUnknown) {
    for (int idx = 0; idx < wordDim; idx++) {
      wordEmb[unknownId][idx] = sum[idx] / count;
    }
    count++;
    std::cout << unknownkey << " not found, using averaged value to initialize." << std::endl;
  }

  int oovWords = 0;
  int totalWords = 0;
  for (int id = 0; id < m_wordAlphabet.size(); id++) {
    if (indexers.find(id) == indexers.end()) {
      oovWords++;
      for (int idx = 0; idx < wordDim; idx++) {
        wordEmb[id][idx] = wordEmb[unknownId][idx];
      }
    }
    totalWords++;
  }
//.........这里部分代码省略.........
开发者ID:LeonCrashCode,项目名称:NNSegmentation,代码行数:101,代码来源:SparseLabeler.cpp

示例3: otherInsts

void Labeler::train(const string& trainFile, const string& devFile, const string& testFile, 
    const string& modelFile, const string& optionFile, const string& wordEmbFile) {
  if (optionFile != "")
    m_options.load(optionFile);
  m_options.showOptions();
  vector<Instance> trainInsts, devInsts, testInsts;
  static vector<Instance> decodeInstResults;
  static Instance curDecodeInst;
  bool bCurIterBetter = false;

  m_pipe.readInstances(trainFile, trainInsts, m_options.maxInstance);
  if (devFile != "")
    m_pipe.readInstances(devFile, devInsts, m_options.maxInstance);
  if (testFile != "")
    m_pipe.readInstances(testFile, testInsts, m_options.maxInstance);

  //Ensure that each file in m_options.testFiles exists!
  vector<vector<Instance> > otherInsts(m_options.testFiles.size());
  for (int idx = 0; idx < m_options.testFiles.size(); idx++) {
    m_pipe.readInstances(m_options.testFiles[idx], otherInsts[idx], m_options.maxInstance);
  }

  //std::cout << "Training example number: " << trainInsts.size() << std::endl;
  //std::cout << "Dev example number: " << trainInsts.size() << std::endl;
  //std::cout << "Test example number: " << trainInsts.size() << std::endl;

  createAlphabet(trainInsts);

  if (!m_options.wordEmbFineTune) {
    addTestWordAlpha(devInsts);
    addTestWordAlpha(testInsts);
    for (int idx = 0; idx < otherInsts.size(); idx++) {
      addTestWordAlpha(otherInsts[idx]);
    }
    cout << "Remain words num: " << m_wordAlphabet.size() << endl;
  }

  NRMat<dtype> wordEmb;
  if (wordEmbFile != "") {
    readWordEmbeddings(wordEmbFile, wordEmb);
  } else {
    wordEmb.resize(m_wordAlphabet.size(), m_options.wordEmbSize);
    wordEmb.randu(1000);
  }

  NRVec<NRMat<dtype> > tagEmbs(m_tagAlphabets.size());
  for (int idx = 0; idx < tagEmbs.size(); idx++) {
    tagEmbs[idx].resize(m_tagAlphabets[idx].size(), m_options.tagEmbSize);
    tagEmbs[idx].randu(1002 + idx);
  }

  m_classifier.init(m_labelAlphabet.size(), m_featAlphabet.size());

  m_classifier.setDropValue(m_options.dropProb);

  vector<Example> trainExamples, devExamples, testExamples;
  initialExamples(trainInsts, trainExamples);
  initialExamples(devInsts, devExamples);
  initialExamples(testInsts, testExamples);

  vector<int> otherInstNums(otherInsts.size());
  vector<vector<Example> > otherExamples(otherInsts.size());
  for (int idx = 0; idx < otherInsts.size(); idx++) {
    initialExamples(otherInsts[idx], otherExamples[idx]);
    otherInstNums[idx] = otherExamples[idx].size();
  }

  dtype bestDIS = 0;

  int inputSize = trainExamples.size();

  int batchBlock = inputSize / m_options.batchSize;
  if (inputSize % m_options.batchSize != 0)
    batchBlock++;

  srand(0);
  std::vector<int> indexes;
  for (int i = 0; i < inputSize; ++i)
    indexes.push_back(i);

  static Metric eval, metric_dev, metric_test;
  static vector<Example> subExamples;
  int devNum = devExamples.size(), testNum = testExamples.size();
  for (int iter = 0; iter < m_options.maxIter; ++iter) {
    std::cout << "##### Iteration " << iter << std::endl;

    random_shuffle(indexes.begin(), indexes.end());
    eval.reset();
    for (int updateIter = 0; updateIter < batchBlock; updateIter++) {
      subExamples.clear();
      int start_pos = updateIter * m_options.batchSize;
      int end_pos = (updateIter + 1) * m_options.batchSize;
      if (end_pos > inputSize)
        end_pos = inputSize;

      for (int idy = start_pos; idy < end_pos; idy++) {
        subExamples.push_back(trainExamples[indexes[idy]]);
      }

      int curUpdateIter = iter * batchBlock + updateIter;
//.........这里部分代码省略.........
开发者ID:LeonCrashCode,项目名称:NNSegmentation,代码行数:101,代码来源:SparseLabeler.cpp

示例4: otherInsts

void Segmentor::train(const string& trainFile, const string& devFile, const string& testFile, const string& modelFile, const string& optionFile,
    const string& wordEmbFile, const string& charEmbFile, const string& bicharEmbFile) {
  if (optionFile != "")
    m_options.load(optionFile);

  m_options.showOptions();
  vector<Instance> trainInsts, devInsts, testInsts;
  m_pipe.readInstances(trainFile, trainInsts, m_classifier.MAX_SENTENCE_SIZE - 2, m_options.maxInstance);
  if (devFile != "")
    m_pipe.readInstances(devFile, devInsts, m_classifier.MAX_SENTENCE_SIZE - 2, m_options.maxInstance);
  if (testFile != "")
    m_pipe.readInstances(testFile, testInsts, m_classifier.MAX_SENTENCE_SIZE - 2, m_options.maxInstance);

  vector<vector<Instance> > otherInsts(m_options.testFiles.size());
  for (int idx = 0; idx < m_options.testFiles.size(); idx++) {
    m_pipe.readInstances(m_options.testFiles[idx], otherInsts[idx], m_classifier.MAX_SENTENCE_SIZE - 2, m_options.maxInstance);
  }

  createAlphabet(trainInsts);

  addTestWordAlpha(devInsts);
  addTestWordAlpha(testInsts);
    
  NRMat<dtype> wordEmb, allwordEmb;
  if (wordEmbFile != "") {  	
  	allWordAlphaEmb(wordEmbFile, allwordEmb);
  } else {
  	std::cout << "must not be here, allword must be pretrained." << std::endl;
  }
  wordEmb.resize(m_classifier.fe._wordAlphabet.size(), m_options.wordEmbSize);
  wordEmb.randu(1000);

  cout << "word emb dim is " << wordEmb.ncols() << std::endl;

  NRMat<dtype> charEmb;
  if (charEmbFile != "") {
  	readEmbeddings(m_classifier.fe._charAlphabet, charEmbFile, charEmb);
  } else {
  	charEmb.resize(m_classifier.fe._charAlphabet.size(), m_options.charEmbSize);
  	charEmb.randu(2000);
  }

  cout << "char emb dim is " << charEmb.ncols() << std::endl;

  NRMat<dtype> bicharEmb;
  if (bicharEmbFile != "") {
  	readEmbeddings(m_classifier.fe._bicharAlphabet, bicharEmbFile, bicharEmb);
  } else {
  	bicharEmb.resize(m_classifier.fe._bicharAlphabet.size(), m_options.bicharEmbSize);
  	bicharEmb.randu(2000);
  }

  cout << "bichar emb dim is " << bicharEmb.ncols() << std::endl;

  NRMat<dtype> actionEmb;
  actionEmb.resize(m_classifier.fe._actionAlphabet.size(), m_options.actionEmbSize);
  actionEmb.randu(3000);

  cout << "action emb dim is " << actionEmb.ncols() << std::endl;

  NRMat<dtype> lengthEmb;
  lengthEmb.resize(6, m_options.lengthEmbSize);
  lengthEmb.randu(3000);

  cout << "length emb dim is " << actionEmb.ncols() << std::endl;


  m_classifier.init(wordEmb, allwordEmb, lengthEmb, m_options.wordNgram, m_options.wordHiddenSize, m_options.wordRNNHiddenSize,
			charEmb, bicharEmb, m_options.charcontext, m_options.charHiddenSize, m_options.charRNNHiddenSize,
			actionEmb, m_options.actionNgram, m_options.actionHiddenSize, m_options.actionRNNHiddenSize,
			m_options.sepHiddenSize, m_options.appHiddenSize, m_options.delta);

  m_classifier.setDropValue(m_options.dropProb);

  m_classifier.setOOVFreq(m_options.wordCutOff);
  m_classifier.setOOVRatio(m_options.oovRatio);
  m_classifier.setWordFreq(m_word_stat);

  vector<vector<CAction> > trainInstGoldactions;
  getGoldActions(trainInsts, trainInstGoldactions);
  double bestFmeasure = 0;

  int inputSize = trainInsts.size();

  std::vector<int> indexes;
  for (int i = 0; i < inputSize; ++i)
    indexes.push_back(i);

  static Metric eval, metric_dev, metric_test;

  int maxIter = m_options.maxIter * (inputSize / m_options.batchSize + 1);
  int oneIterMaxRound = (inputSize + m_options.batchSize -1) / m_options.batchSize;
  std::cout << "maxIter = " << maxIter << std::endl;
  int devNum = devInsts.size(), testNum = testInsts.size();

  static vector<vector<string> > decodeInstResults;
  static vector<string> curDecodeInst;
  static bool bCurIterBetter;
  static vector<vector<string> > subInstances;
  static vector<vector<CAction> > subInstGoldActions;
//.........这里部分代码省略.........
开发者ID:LinguList,项目名称:NNTransitionSegmentor,代码行数:101,代码来源:LSTMNCSegmentor.cpp

示例5: allWordAlphaEmb

int Segmentor::allWordAlphaEmb(const string& inFile, NRMat<dtype>& emb) {
  cout << "All word  alphabet and emb creating..." << endl;

  hash_map<string, int> word_stat;
  
  static ifstream inf;
  if (inf.is_open()) {
    inf.close();
    inf.clear();
  }
  inf.open(inFile.c_str());

  static string strLine, curWord;
  static int wordId;
  static vector<string> vecInfo;
  vector<string> allLines;

  int wordDim = 0;
  while (1) {
    if (!my_getline(inf, strLine)) {
      break;
    }
    if (!strLine.empty()){
      split_bychar(strLine, vecInfo, ' '); 
      if(wordDim == 0){
      	wordDim = vecInfo.size() - 1;
      	std::cout << "allword embedding dim is " << wordDim << std::endl;
      }
      curWord = normalize_to_lowerwithdigit(vecInfo[0]);
      word_stat[curWord]++;
      allLines.push_back(strLine);
    }
  }

  m_classifier.addToAllWordAlphabet(word_stat);
  cout << "Remain all word num: " << m_classifier.fe._allwordAlphabet.size() << endl;
  
  emb.resize(m_classifier.fe._allwordAlphabet.size(), wordDim);
  emb = 0.0;
  
  int unknownId = m_classifier.fe._allwordAlphabet.from_string(m_classifier.fe.unknownkey);
  dtype sum[wordDim];
  int count = 0;
  bool bHasUnknown = false;
  for (int idx = 0; idx < wordDim; idx++) {
    sum[idx] = 0.0;
  }
  
  for(int idx = 0; idx < allLines.size(); idx++){
    split_bychar(allLines[idx], vecInfo, ' ');
    if (vecInfo.size() != wordDim + 1) {
      std::cout << "error embedding file" << std::endl;
    }
    curWord = normalize_to_lowerwithdigit(vecInfo[0]);
    wordId = m_classifier.fe._allwordAlphabet.from_string(curWord);
    if (wordId >= 0) {
      count++;
      if (unknownId == wordId)
        bHasUnknown = true;

      for (int idx = 0; idx < wordDim; idx++) {
        dtype curValue = atof(vecInfo[idx + 1].c_str());
        sum[idx] += curValue;
        emb[wordId][idx] += curValue;
      }
    }
    else{
    	std::cout << "read all word embedding strange...." << std::endl;
    }	
  	
  }  
  
  if (!bHasUnknown) {
    for (int idx = 0; idx < wordDim; idx++) {
      emb[unknownId][idx] = sum[idx] / count;
    }
    count++;
    std::cout << unknownkey << " not found, using averaged value to initialize." << std::endl;
  }

  return 0;
}
开发者ID:LinguList,项目名称:NNTransitionSegmentor,代码行数:82,代码来源:LSTMNCSegmentor.cpp

示例6: otherInsts

void Labeler::train(const string& trainFile, const string& devFile, const string& testFile, const string& modelFile, const string& optionFile,
    const string& wordEmbFile, const string& charEmbFile) {
  if (optionFile != "")
    m_options.load(optionFile);

  m_options.showOptions();

  m_linearfeat = 0;

  vector<Instance> trainInsts, devInsts, testInsts;
  static vector<Instance> decodeInstResults;
  static Instance curDecodeInst;
  bool bCurIterBetter = false;

  m_pipe.readInstances(trainFile, trainInsts, m_options.maxInstance);
  if (devFile != "")
    m_pipe.readInstances(devFile, devInsts, m_options.maxInstance);
  if (testFile != "")
    m_pipe.readInstances(testFile, testInsts, m_options.maxInstance);

  //Ensure that each file in m_options.testFiles exists!
  vector<vector<Instance> > otherInsts(m_options.testFiles.size());
  for (int idx = 0; idx < m_options.testFiles.size(); idx++) {
    m_pipe.readInstances(m_options.testFiles[idx], otherInsts[idx], m_options.maxInstance);
  }

  //std::cout << "Training example number: " << trainInsts.size() << std::endl;
  //std::cout << "Dev example number: " << trainInsts.size() << std::endl;
  //std::cout << "Test example number: " << trainInsts.size() << std::endl;

  createAlphabet(trainInsts);

  if (!m_options.wordEmbFineTune) {
    addTestWordAlpha(devInsts);
    addTestWordAlpha(testInsts);
    for (int idx = 0; idx < otherInsts.size(); idx++) {
      addTestWordAlpha(otherInsts[idx]);
    }
    cout << "Remain words num: " << m_textWordAlphabet.size() << endl;
  }

  if (!m_options.charEmbFineTune) {
    addTestCharAlpha(devInsts);
    addTestCharAlpha(testInsts);
    for (int idx = 0; idx < otherInsts.size(); idx++) {
      addTestCharAlpha(otherInsts[idx]);
    }
    cout << "Remain char num: " << m_charAlphabet.size() << endl;
  }

  NRMat<double> wordEmb;
  if (wordEmbFile != "") {
    readWordEmbeddings(wordEmbFile, wordEmb);
  } else {
    wordEmb.resize(m_textWordAlphabet.size(), m_options.wordEmbSize);
    wordEmb.randu(1000);
  }

  NRMat<double> charEmb;
  if (charEmbFile != "") {
    readWordEmbeddings(charEmbFile, charEmb);
  } else {
    charEmb.resize(m_charAlphabet.size(), m_options.charEmbSize);
    charEmb.randu(1001);
  }

  m_classifier.init(wordEmb, m_options.wordcontext, charEmb, m_options.charcontext, m_headWordAlphabet.size(), m_options.wordHiddenSize, m_options.charHiddenSize, m_options.hiddenSize);
  m_classifier.resetRemove(m_options.removePool, m_options.removeCharPool);
  m_classifier.setDropValue(m_options.dropProb);
  m_classifier.setWordEmbFinetune(m_options.wordEmbFineTune, m_options.charEmbFineTune);

  vector<Example> trainExamples, devExamples, testExamples;
  initialExamples(trainInsts, trainExamples);
  initialExamples(devInsts, devExamples);
  initialExamples(testInsts, testExamples);

  vector<int> otherInstNums(otherInsts.size());
  vector<vector<Example> > otherExamples(otherInsts.size());
  for (int idx = 0; idx < otherInsts.size(); idx++) {
    initialExamples(otherInsts[idx], otherExamples[idx]);
    otherInstNums[idx] = otherExamples[idx].size();
  }

  double bestDIS = 0;

  int inputSize = trainExamples.size();

  srand(0);
  std::vector<int> indexes;
  for (int i = 0; i < inputSize; ++i)
    indexes.push_back(i);

  static Metric eval, metric_dev, metric_test;
  static vector<Example> subExamples;
  int devNum = devExamples.size(), testNum = testExamples.size();

  int maxIter = m_options.maxIter;
  if (m_options.batchSize > 1)
    maxIter = m_options.maxIter * (inputSize / m_options.batchSize + 1);

//.........这里部分代码省略.........
开发者ID:SherlockYangBing,项目名称:TitleCaption,代码行数:101,代码来源:HCharDetector.cpp


注:本文中的NRMat类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。