本文整理汇总了C++中Alphabet::from_string方法的典型用法代码示例。如果您正苦于以下问题:C++ Alphabet::from_string方法的具体用法?C++ Alphabet::from_string怎么用?C++ Alphabet::from_string使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Alphabet
的用法示例。
在下文中一共展示了Alphabet::from_string方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: readEmbeddings
void Segmentor::readEmbeddings(Alphabet &alpha, const string& inFile, NRMat<dtype>& emb) {
static ifstream inf;
if (inf.is_open()) {
inf.close();
inf.clear();
}
inf.open(inFile.c_str());
static string strLine, curWord;
static int wordId;
//find the first line, decide the wordDim;
while (1) {
if (!my_getline(inf, strLine)) {
break;
}
if (!strLine.empty())
break;
}
int unknownId = alpha.from_string(m_classifier.fe.unknownkey);
static vector<string> vecInfo;
split_bychar(strLine, vecInfo, ' ');
int wordDim = vecInfo.size() - 1;
std::cout << "embedding dim is " << wordDim << std::endl;
emb.resize(alpha.size(), wordDim);
emb = 0.0;
curWord = normalize_to_lowerwithdigit(vecInfo[0]);
wordId = alpha.from_string(curWord);
hash_set<int> indexers;
dtype sum[wordDim];
int count = 0;
bool bHasUnknown = false;
if (wordId >= 0) {
count++;
if (unknownId == wordId)
bHasUnknown = true;
indexers.insert(wordId);
for (int idx = 0; idx < wordDim; idx++) {
dtype curValue = atof(vecInfo[idx + 1].c_str());
sum[idx] = curValue;
emb[wordId][idx] = curValue;
}
} else {
for (int idx = 0; idx < wordDim; idx++) {
sum[idx] = 0.0;
}
}
while (1) {
if (!my_getline(inf, strLine)) {
break;
}
if (strLine.empty())
continue;
split_bychar(strLine, vecInfo, ' ');
if (vecInfo.size() != wordDim + 1) {
std::cout << "error embedding file" << std::endl;
}
curWord = normalize_to_lowerwithdigit(vecInfo[0]);
wordId = alpha.from_string(curWord);
if (wordId >= 0) {
count++;
if (unknownId == wordId)
bHasUnknown = true;
indexers.insert(wordId);
for (int idx = 0; idx < wordDim; idx++) {
dtype curValue = atof(vecInfo[idx + 1].c_str());
sum[idx] += curValue;
emb[wordId][idx] += curValue;
}
}
}
if (!bHasUnknown) {
for (int idx = 0; idx < wordDim; idx++) {
emb[unknownId][idx] = sum[idx] / count;
}
count++;
std::cout << unknownkey << " not found, using averaged value to initialize." << std::endl;
}
int oovWords = 0;
int totalWords = 0;
for (int id = 0; id < alpha.size(); id++) {
if (indexers.find(id) == indexers.end()) {
oovWords++;
for (int idx = 0; idx < wordDim; idx++) {
emb[id][idx] = emb[unknownId][idx];
}
}
totalWords++;
}
//.........这里部分代码省略.........