本文整理汇总了C++中NRMat类的典型用法代码示例。如果您正苦于以下问题:C++ NRMat类的具体用法?C++ NRMat怎么用?C++ NRMat使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了NRMat类的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: readWordEmb
void WordEmbReader::readWordEmb(NRMat<double> &word_emb,map<string,int> features_alphabet,char *word_emb_path)
{
ifstream w_e(word_emb_path);
if(w_e.is_open())
{
string line;
if(getline(w_e,line))
{
vector<string> data;
split(data,line,is_any_of(" "));
delNull(data);
map<string,int>::iterator it = features_alphabet.find(data[0]);
word_emb.resize(features_alphabet.size(),data.size()-1);
word_emb = 0.0;
if(it != features_alphabet.end())
{
for(int i = 1; i < data.size(); i++)
{
stringstream string_to_double;
double cur_value;
string_to_double << data[i];
string_to_double >> cur_value;
word_emb[it -> second][i] = cur_value;
}
}
}
while(getline(w_e,line))
{
vector<string> data;
split(data,line,is_any_of(" "));
delNull(data);
map<string,int>::iterator it = features_alphabet.find(data[0]);
if(it != features_alphabet.end())
{
for(int i = 1; i < data.size(); i++)
{
stringstream string_to_double;
double cur_value;
string_to_double << data[i];
string_to_double >> cur_value;
word_emb[it -> second][i] = cur_value;
}
}
}
} else
示例2: readWordEmbeddings
void Labeler::readWordEmbeddings(const string& inFile, NRMat<dtype>& wordEmb) {
static ifstream inf;
if (inf.is_open()) {
inf.close();
inf.clear();
}
inf.open(inFile.c_str());
static string strLine, curWord;
static int wordId;
//find the first line, decide the wordDim;
while (1) {
if (!my_getline(inf, strLine)) {
break;
}
if (!strLine.empty())
break;
}
int unknownId = m_wordAlphabet.from_string(unknownkey);
static vector<string> vecInfo;
split_bychar(strLine, vecInfo, ' ');
int wordDim = vecInfo.size() - 1;
std::cout << "word embedding dim is " << wordDim << std::endl;
m_options.wordEmbSize = wordDim;
wordEmb.resize(m_wordAlphabet.size(), wordDim);
wordEmb = 0.0;
curWord = normalize_to_lowerwithdigit(vecInfo[0]);
wordId = m_wordAlphabet.from_string(curWord);
hash_set<int> indexers;
dtype sum[wordDim];
int count = 0;
bool bHasUnknown = false;
if (wordId >= 0) {
count++;
if (unknownId == wordId)
bHasUnknown = true;
indexers.insert(wordId);
for (int idx = 0; idx < wordDim; idx++) {
dtype curValue = atof(vecInfo[idx + 1].c_str());
sum[idx] = curValue;
wordEmb[wordId][idx] = curValue;
}
} else {
for (int idx = 0; idx < wordDim; idx++) {
sum[idx] = 0.0;
}
}
while (1) {
if (!my_getline(inf, strLine)) {
break;
}
if (strLine.empty())
continue;
split_bychar(strLine, vecInfo, ' ');
if (vecInfo.size() != wordDim + 1) {
std::cout << "error embedding file" << std::endl;
}
curWord = normalize_to_lowerwithdigit(vecInfo[0]);
wordId = m_wordAlphabet.from_string(curWord);
if (wordId >= 0) {
count++;
if (unknownId == wordId)
bHasUnknown = true;
indexers.insert(wordId);
for (int idx = 0; idx < wordDim; idx++) {
dtype curValue = atof(vecInfo[idx + 1].c_str());
sum[idx] += curValue;
wordEmb[wordId][idx] += curValue;
}
}
}
if (!bHasUnknown) {
for (int idx = 0; idx < wordDim; idx++) {
wordEmb[unknownId][idx] = sum[idx] / count;
}
count++;
std::cout << unknownkey << " not found, using averaged value to initialize." << std::endl;
}
int oovWords = 0;
int totalWords = 0;
for (int id = 0; id < m_wordAlphabet.size(); id++) {
if (indexers.find(id) == indexers.end()) {
oovWords++;
for (int idx = 0; idx < wordDim; idx++) {
wordEmb[id][idx] = wordEmb[unknownId][idx];
}
}
totalWords++;
}
//.........这里部分代码省略.........
示例3: otherInsts
void Labeler::train(const string& trainFile, const string& devFile, const string& testFile,
const string& modelFile, const string& optionFile, const string& wordEmbFile) {
if (optionFile != "")
m_options.load(optionFile);
m_options.showOptions();
vector<Instance> trainInsts, devInsts, testInsts;
static vector<Instance> decodeInstResults;
static Instance curDecodeInst;
bool bCurIterBetter = false;
m_pipe.readInstances(trainFile, trainInsts, m_options.maxInstance);
if (devFile != "")
m_pipe.readInstances(devFile, devInsts, m_options.maxInstance);
if (testFile != "")
m_pipe.readInstances(testFile, testInsts, m_options.maxInstance);
//Ensure that each file in m_options.testFiles exists!
vector<vector<Instance> > otherInsts(m_options.testFiles.size());
for (int idx = 0; idx < m_options.testFiles.size(); idx++) {
m_pipe.readInstances(m_options.testFiles[idx], otherInsts[idx], m_options.maxInstance);
}
//std::cout << "Training example number: " << trainInsts.size() << std::endl;
//std::cout << "Dev example number: " << trainInsts.size() << std::endl;
//std::cout << "Test example number: " << trainInsts.size() << std::endl;
createAlphabet(trainInsts);
if (!m_options.wordEmbFineTune) {
addTestWordAlpha(devInsts);
addTestWordAlpha(testInsts);
for (int idx = 0; idx < otherInsts.size(); idx++) {
addTestWordAlpha(otherInsts[idx]);
}
cout << "Remain words num: " << m_wordAlphabet.size() << endl;
}
NRMat<dtype> wordEmb;
if (wordEmbFile != "") {
readWordEmbeddings(wordEmbFile, wordEmb);
} else {
wordEmb.resize(m_wordAlphabet.size(), m_options.wordEmbSize);
wordEmb.randu(1000);
}
NRVec<NRMat<dtype> > tagEmbs(m_tagAlphabets.size());
for (int idx = 0; idx < tagEmbs.size(); idx++) {
tagEmbs[idx].resize(m_tagAlphabets[idx].size(), m_options.tagEmbSize);
tagEmbs[idx].randu(1002 + idx);
}
m_classifier.init(m_labelAlphabet.size(), m_featAlphabet.size());
m_classifier.setDropValue(m_options.dropProb);
vector<Example> trainExamples, devExamples, testExamples;
initialExamples(trainInsts, trainExamples);
initialExamples(devInsts, devExamples);
initialExamples(testInsts, testExamples);
vector<int> otherInstNums(otherInsts.size());
vector<vector<Example> > otherExamples(otherInsts.size());
for (int idx = 0; idx < otherInsts.size(); idx++) {
initialExamples(otherInsts[idx], otherExamples[idx]);
otherInstNums[idx] = otherExamples[idx].size();
}
dtype bestDIS = 0;
int inputSize = trainExamples.size();
int batchBlock = inputSize / m_options.batchSize;
if (inputSize % m_options.batchSize != 0)
batchBlock++;
srand(0);
std::vector<int> indexes;
for (int i = 0; i < inputSize; ++i)
indexes.push_back(i);
static Metric eval, metric_dev, metric_test;
static vector<Example> subExamples;
int devNum = devExamples.size(), testNum = testExamples.size();
for (int iter = 0; iter < m_options.maxIter; ++iter) {
std::cout << "##### Iteration " << iter << std::endl;
random_shuffle(indexes.begin(), indexes.end());
eval.reset();
for (int updateIter = 0; updateIter < batchBlock; updateIter++) {
subExamples.clear();
int start_pos = updateIter * m_options.batchSize;
int end_pos = (updateIter + 1) * m_options.batchSize;
if (end_pos > inputSize)
end_pos = inputSize;
for (int idy = start_pos; idy < end_pos; idy++) {
subExamples.push_back(trainExamples[indexes[idy]]);
}
int curUpdateIter = iter * batchBlock + updateIter;
//.........这里部分代码省略.........
示例4: otherInsts
void Segmentor::train(const string& trainFile, const string& devFile, const string& testFile, const string& modelFile, const string& optionFile,
const string& wordEmbFile, const string& charEmbFile, const string& bicharEmbFile) {
if (optionFile != "")
m_options.load(optionFile);
m_options.showOptions();
vector<Instance> trainInsts, devInsts, testInsts;
m_pipe.readInstances(trainFile, trainInsts, m_classifier.MAX_SENTENCE_SIZE - 2, m_options.maxInstance);
if (devFile != "")
m_pipe.readInstances(devFile, devInsts, m_classifier.MAX_SENTENCE_SIZE - 2, m_options.maxInstance);
if (testFile != "")
m_pipe.readInstances(testFile, testInsts, m_classifier.MAX_SENTENCE_SIZE - 2, m_options.maxInstance);
vector<vector<Instance> > otherInsts(m_options.testFiles.size());
for (int idx = 0; idx < m_options.testFiles.size(); idx++) {
m_pipe.readInstances(m_options.testFiles[idx], otherInsts[idx], m_classifier.MAX_SENTENCE_SIZE - 2, m_options.maxInstance);
}
createAlphabet(trainInsts);
addTestWordAlpha(devInsts);
addTestWordAlpha(testInsts);
NRMat<dtype> wordEmb, allwordEmb;
if (wordEmbFile != "") {
allWordAlphaEmb(wordEmbFile, allwordEmb);
} else {
std::cout << "must not be here, allword must be pretrained." << std::endl;
}
wordEmb.resize(m_classifier.fe._wordAlphabet.size(), m_options.wordEmbSize);
wordEmb.randu(1000);
cout << "word emb dim is " << wordEmb.ncols() << std::endl;
NRMat<dtype> charEmb;
if (charEmbFile != "") {
readEmbeddings(m_classifier.fe._charAlphabet, charEmbFile, charEmb);
} else {
charEmb.resize(m_classifier.fe._charAlphabet.size(), m_options.charEmbSize);
charEmb.randu(2000);
}
cout << "char emb dim is " << charEmb.ncols() << std::endl;
NRMat<dtype> bicharEmb;
if (bicharEmbFile != "") {
readEmbeddings(m_classifier.fe._bicharAlphabet, bicharEmbFile, bicharEmb);
} else {
bicharEmb.resize(m_classifier.fe._bicharAlphabet.size(), m_options.bicharEmbSize);
bicharEmb.randu(2000);
}
cout << "bichar emb dim is " << bicharEmb.ncols() << std::endl;
NRMat<dtype> actionEmb;
actionEmb.resize(m_classifier.fe._actionAlphabet.size(), m_options.actionEmbSize);
actionEmb.randu(3000);
cout << "action emb dim is " << actionEmb.ncols() << std::endl;
NRMat<dtype> lengthEmb;
lengthEmb.resize(6, m_options.lengthEmbSize);
lengthEmb.randu(3000);
cout << "length emb dim is " << actionEmb.ncols() << std::endl;
m_classifier.init(wordEmb, allwordEmb, lengthEmb, m_options.wordNgram, m_options.wordHiddenSize, m_options.wordRNNHiddenSize,
charEmb, bicharEmb, m_options.charcontext, m_options.charHiddenSize, m_options.charRNNHiddenSize,
actionEmb, m_options.actionNgram, m_options.actionHiddenSize, m_options.actionRNNHiddenSize,
m_options.sepHiddenSize, m_options.appHiddenSize, m_options.delta);
m_classifier.setDropValue(m_options.dropProb);
m_classifier.setOOVFreq(m_options.wordCutOff);
m_classifier.setOOVRatio(m_options.oovRatio);
m_classifier.setWordFreq(m_word_stat);
vector<vector<CAction> > trainInstGoldactions;
getGoldActions(trainInsts, trainInstGoldactions);
double bestFmeasure = 0;
int inputSize = trainInsts.size();
std::vector<int> indexes;
for (int i = 0; i < inputSize; ++i)
indexes.push_back(i);
static Metric eval, metric_dev, metric_test;
int maxIter = m_options.maxIter * (inputSize / m_options.batchSize + 1);
int oneIterMaxRound = (inputSize + m_options.batchSize -1) / m_options.batchSize;
std::cout << "maxIter = " << maxIter << std::endl;
int devNum = devInsts.size(), testNum = testInsts.size();
static vector<vector<string> > decodeInstResults;
static vector<string> curDecodeInst;
static bool bCurIterBetter;
static vector<vector<string> > subInstances;
static vector<vector<CAction> > subInstGoldActions;
//.........这里部分代码省略.........
示例5: allWordAlphaEmb
int Segmentor::allWordAlphaEmb(const string& inFile, NRMat<dtype>& emb) {
cout << "All word alphabet and emb creating..." << endl;
hash_map<string, int> word_stat;
static ifstream inf;
if (inf.is_open()) {
inf.close();
inf.clear();
}
inf.open(inFile.c_str());
static string strLine, curWord;
static int wordId;
static vector<string> vecInfo;
vector<string> allLines;
int wordDim = 0;
while (1) {
if (!my_getline(inf, strLine)) {
break;
}
if (!strLine.empty()){
split_bychar(strLine, vecInfo, ' ');
if(wordDim == 0){
wordDim = vecInfo.size() - 1;
std::cout << "allword embedding dim is " << wordDim << std::endl;
}
curWord = normalize_to_lowerwithdigit(vecInfo[0]);
word_stat[curWord]++;
allLines.push_back(strLine);
}
}
m_classifier.addToAllWordAlphabet(word_stat);
cout << "Remain all word num: " << m_classifier.fe._allwordAlphabet.size() << endl;
emb.resize(m_classifier.fe._allwordAlphabet.size(), wordDim);
emb = 0.0;
int unknownId = m_classifier.fe._allwordAlphabet.from_string(m_classifier.fe.unknownkey);
dtype sum[wordDim];
int count = 0;
bool bHasUnknown = false;
for (int idx = 0; idx < wordDim; idx++) {
sum[idx] = 0.0;
}
for(int idx = 0; idx < allLines.size(); idx++){
split_bychar(allLines[idx], vecInfo, ' ');
if (vecInfo.size() != wordDim + 1) {
std::cout << "error embedding file" << std::endl;
}
curWord = normalize_to_lowerwithdigit(vecInfo[0]);
wordId = m_classifier.fe._allwordAlphabet.from_string(curWord);
if (wordId >= 0) {
count++;
if (unknownId == wordId)
bHasUnknown = true;
for (int idx = 0; idx < wordDim; idx++) {
dtype curValue = atof(vecInfo[idx + 1].c_str());
sum[idx] += curValue;
emb[wordId][idx] += curValue;
}
}
else{
std::cout << "read all word embedding strange...." << std::endl;
}
}
if (!bHasUnknown) {
for (int idx = 0; idx < wordDim; idx++) {
emb[unknownId][idx] = sum[idx] / count;
}
count++;
std::cout << unknownkey << " not found, using averaged value to initialize." << std::endl;
}
return 0;
}
示例6: otherInsts
void Labeler::train(const string& trainFile, const string& devFile, const string& testFile, const string& modelFile, const string& optionFile,
const string& wordEmbFile, const string& charEmbFile) {
if (optionFile != "")
m_options.load(optionFile);
m_options.showOptions();
m_linearfeat = 0;
vector<Instance> trainInsts, devInsts, testInsts;
static vector<Instance> decodeInstResults;
static Instance curDecodeInst;
bool bCurIterBetter = false;
m_pipe.readInstances(trainFile, trainInsts, m_options.maxInstance);
if (devFile != "")
m_pipe.readInstances(devFile, devInsts, m_options.maxInstance);
if (testFile != "")
m_pipe.readInstances(testFile, testInsts, m_options.maxInstance);
//Ensure that each file in m_options.testFiles exists!
vector<vector<Instance> > otherInsts(m_options.testFiles.size());
for (int idx = 0; idx < m_options.testFiles.size(); idx++) {
m_pipe.readInstances(m_options.testFiles[idx], otherInsts[idx], m_options.maxInstance);
}
//std::cout << "Training example number: " << trainInsts.size() << std::endl;
//std::cout << "Dev example number: " << trainInsts.size() << std::endl;
//std::cout << "Test example number: " << trainInsts.size() << std::endl;
createAlphabet(trainInsts);
if (!m_options.wordEmbFineTune) {
addTestWordAlpha(devInsts);
addTestWordAlpha(testInsts);
for (int idx = 0; idx < otherInsts.size(); idx++) {
addTestWordAlpha(otherInsts[idx]);
}
cout << "Remain words num: " << m_textWordAlphabet.size() << endl;
}
if (!m_options.charEmbFineTune) {
addTestCharAlpha(devInsts);
addTestCharAlpha(testInsts);
for (int idx = 0; idx < otherInsts.size(); idx++) {
addTestCharAlpha(otherInsts[idx]);
}
cout << "Remain char num: " << m_charAlphabet.size() << endl;
}
NRMat<double> wordEmb;
if (wordEmbFile != "") {
readWordEmbeddings(wordEmbFile, wordEmb);
} else {
wordEmb.resize(m_textWordAlphabet.size(), m_options.wordEmbSize);
wordEmb.randu(1000);
}
NRMat<double> charEmb;
if (charEmbFile != "") {
readWordEmbeddings(charEmbFile, charEmb);
} else {
charEmb.resize(m_charAlphabet.size(), m_options.charEmbSize);
charEmb.randu(1001);
}
m_classifier.init(wordEmb, m_options.wordcontext, charEmb, m_options.charcontext, m_headWordAlphabet.size(), m_options.wordHiddenSize, m_options.charHiddenSize, m_options.hiddenSize);
m_classifier.resetRemove(m_options.removePool, m_options.removeCharPool);
m_classifier.setDropValue(m_options.dropProb);
m_classifier.setWordEmbFinetune(m_options.wordEmbFineTune, m_options.charEmbFineTune);
vector<Example> trainExamples, devExamples, testExamples;
initialExamples(trainInsts, trainExamples);
initialExamples(devInsts, devExamples);
initialExamples(testInsts, testExamples);
vector<int> otherInstNums(otherInsts.size());
vector<vector<Example> > otherExamples(otherInsts.size());
for (int idx = 0; idx < otherInsts.size(); idx++) {
initialExamples(otherInsts[idx], otherExamples[idx]);
otherInstNums[idx] = otherExamples[idx].size();
}
double bestDIS = 0;
int inputSize = trainExamples.size();
srand(0);
std::vector<int> indexes;
for (int i = 0; i < inputSize; ++i)
indexes.push_back(i);
static Metric eval, metric_dev, metric_test;
static vector<Example> subExamples;
int devNum = devExamples.size(), testNum = testExamples.size();
int maxIter = m_options.maxIter;
if (m_options.batchSize > 1)
maxIter = m_options.maxIter * (inputSize / m_options.batchSize + 1);
//.........这里部分代码省略.........