本文整理汇总了C++中PBowDocBs::PutTrainDIdV方法的典型用法代码示例。如果您正苦于以下问题:C++ PBowDocBs::PutTrainDIdV方法的具体用法?C++ PBowDocBs::PutTrainDIdV怎么用?C++ PBowDocBs::PutTrainDIdV使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类PBowDocBs
的用法示例。
在下文中一共展示了PBowDocBs::PutTrainDIdV方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: LoadCsv
//.........这里部分代码省略.........
PSIn SIn = TFIn::New(FNm);
char SsCh = ' '; TStrV FldValV;
// read the headers and initialise the feature generators
TSs::LoadTxtFldV(ssfCommaSep, SIn, SsCh, FldValV, false);
for (int FldValN = 0; FldValN < FldValV.Len(); FldValN++) {
const TStr& FldVal = FldValV[FldValN];
if (FldValN == ClassId) {
if (FldVal == "NOM") {
FtrGenBs->PutClsFtrGen(TFtrGenNominal::New());
} else if (FldVal == "MULTI-NOM") {
FtrGenBs->PutClsFtrGen(TFtrGenMultiNom::New());
} else {
TExcept::Throw("Wrong class type '" + FldVal + "', should be NOM or MULTI-NOM!");
}
} else if (!IgnoreIdV.IsIn(FldValN)) {
if (FldVal == TFtrGenNumeric::GetType()) {
FtrGenBs->AddFtrGen(TFtrGenNumeric::New());
} else if (FldVal == TFtrGenNominal::GetType()) {
FtrGenBs->AddFtrGen(TFtrGenNominal::New());
} else if (FldVal == TFtrGenToken::GetType()) {
FtrGenBs->AddFtrGen(TFtrGenToken::New(
TSwSet::New(swstNone), TStemmer::New(stmtNone)));
} else if (FldVal == TFtrGenSparseNumeric::GetType()) {
FtrGenBs->AddFtrGen(TFtrGenSparseNumeric::New());
} else if (FldVal == TFtrGenMultiNom::GetType()) {
FtrGenBs->AddFtrGen(TFtrGenMultiNom::New());
} else {
TExcept::Throw("Wrong type '" + FldVal + "'!");
}
}
}
const int Flds = FldValV.Len();
// read the lines and feed them to the feature generators
int Recs = 0;
while (!SIn->Eof()) {
if (Recs == TrainLen) { break; }
Recs++; printf("%7d\r", Recs);
TSs::LoadTxtFldV(ssfCommaSep, SIn, SsCh, FldValV, false);
// make sure line still has the same number of fields as the header
EAssertR(FldValV.Len() == Flds,
TStr::Fmt("Wrong number of fields in line %d! Found %d and expected %d!",
Recs + 1, FldValV.Len(), Flds));
// go over lines
try {
TStrV FtrValV;
for (int FldValN = 0; FldValN < FldValV.Len(); FldValN++) {
const TStr& FldVal = FldValV[FldValN];
if (FldValN == ClassId) {
FtrGenBs->UpdateCls(FldVal);
} else if (!IgnoreIdV.IsIn(FldValN)) {
FtrValV.Add(FldVal);
}
}
FtrGenBs->Update(FtrValV);
} catch (PExcept Ex) {
TExcept::Throw(TStr::Fmt("Error in line %d: '%s'!",
Recs+1, Ex->GetMsgStr().CStr()));
}
}
// read the file again and feed it to the training set
PBowDocBs BowDocBs = FtrGenBs->MakeBowDocBs();
// we read and ignore the headers since we parsed them already
SIn = TFIn::New(FNm); SsCh = ' ';
TSs::LoadTxtFldV(ssfCommaSep, SIn, SsCh, FldValV, false);
// read the lines and feed them to the training set
Recs = 0;
while (!SIn->Eof()){
Recs++; printf("%7d\r", Recs);
TSs::LoadTxtFldV(ssfCommaSep, SIn, SsCh, FldValV, false);
// make sure line still has the same number of fields as the header
EAssertR(FldValV.Len() == Flds,
TStr::Fmt("Wrong number of fields in line %s! Found %d and expected %d!",
Recs + 1, FldValV.Len(), Flds));
// go over lines and construct the sparse vector
TStrV FtrValV; TStr ClsFtrVal;
try {
for (int FldValN = 0; FldValN < FldValV.Len(); FldValN++) {
const TStr& FldVal = FldValV[FldValN];
if (FldValN == ClassId) {
ClsFtrVal = FldVal;
} else if (!IgnoreIdV.IsIn(FldValN)) {
FtrValV.Add(FldVal);
}
}
} catch (PExcept Ex) {
TExcept::Throw(TStr::Fmt("Error in line %d: '%s'!",
Recs+1, Ex->GetMsgStr().CStr()));
}
// add the feature vector to trainsets
FtrGenBs->AddBowDoc(BowDocBs, TStr::Fmt("Line-%d", Recs), FtrValV, ClsFtrVal);
}
// prepare training and testing doc ids
TIntV AllDIdV; BowDocBs->GetAllDIdV(AllDIdV); IAssert(AllDIdV.IsSorted());
TIntV TrainDIdV = AllDIdV; TrainDIdV.Trunc(TrainLen);
BowDocBs->PutTrainDIdV(TrainDIdV);
TIntV TestDIdV = AllDIdV; TestDIdV.Minus(TrainDIdV);
BowDocBs->PutTestDIdV(TestDIdV);
return BowDocBs;
}