本文整理汇总了C++中PBowDocBs::GetAllDIdV方法的典型用法代码示例。如果您正苦于以下问题:C++ PBowDocBs::GetAllDIdV方法的具体用法?C++ PBowDocBs::GetAllDIdV怎么用?C++ PBowDocBs::GetAllDIdV使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类PBowDocBs
的用法示例。
在下文中一共展示了PBowDocBs::GetAllDIdV方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: SaveSparseMatlabTxt
void TBowFl::SaveSparseMatlabTxt(const PBowDocBs& BowDocBs,
const PBowDocWgtBs& BowDocWgtBs, const TStr& FNm,
const TStr& CatFNm, const TIntV& _DIdV) {
TIntV DIdV;
if (_DIdV.Empty()) {
BowDocBs->GetAllDIdV(DIdV);
} else {
DIdV = _DIdV;
}
// generate map of row-ids to words
TFOut WdMapSOut(TStr::PutFExt(FNm, ".row-to-word-map.dat"));
for (int WId = 0; WId < BowDocWgtBs->GetWords(); WId++) {
TStr WdStr = BowDocBs->GetWordStr(WId);
WdMapSOut.PutStrLn(TStr::Fmt("%d %s", WId+1, WdStr.CStr()));
}
WdMapSOut.Flush();
// generate map of col-ids to document names
TFOut DocMapSOut(TStr::PutFExt(FNm, ".col-to-docName-map.dat"));
for (int DocN = 0; DocN < DIdV.Len(); DocN++) {
const int DId = DIdV[DocN];
TStr DocNm = BowDocBs->GetDocNm(DId);
DocMapSOut.PutStrLn(TStr::Fmt("%d %d %s", DocN, DId, DocNm.CStr()));
}
DocMapSOut.Flush();
// save documents' sparse vectors
TFOut SOut(FNm);
for (int DocN = 0; DocN < DIdV.Len(); DocN++){
const int DId = DIdV[DocN];
PBowSpV DocSpV = BowDocWgtBs->GetSpV(DId);
const int DocWIds = DocSpV->GetWIds();
for (int DocWIdN=0; DocWIdN<DocWIds; DocWIdN++){
const int WId = DocSpV->GetWId(DocWIdN);
const double WordWgt = DocSpV->GetWgt(DocWIdN);
SOut.PutStrLn(TStr::Fmt("%d %d %.16f", WId+1, DocN+1, WordWgt));
}
}
SOut.Flush();
// save documents' category sparse vectors
if (!CatFNm.Empty()) {
TFOut CatSOut(CatFNm);
for (int DocN = 0; DocN < DIdV.Len(); DocN++){
const int DId = DIdV[DocN];
const int DocCIds = BowDocBs->GetDocCIds(DId);
for (int DocCIdN=0; DocCIdN<DocCIds; DocCIdN++){
const int CId = BowDocBs->GetDocCId(DId, DocCIdN);
const double CatWgt = 1.0;
CatSOut.PutStrLn(TStr::Fmt("%d %d %.16f", CId+1, DocN+1, CatWgt));
}
}
CatSOut.Flush();
}
}
示例2: LoadCsv
PBowDocBs TFtrGenBs::LoadCsv(TStr& FNm, const int& ClassId,
const TIntV& IgnoreIdV, const int& TrainLen) {
// feature generators
PFtrGenBs FtrGenBs = TFtrGenBs::New();
// CSV parsing stuff
PSIn SIn = TFIn::New(FNm);
char SsCh = ' '; TStrV FldValV;
// read the headers and initialise the feature generators
TSs::LoadTxtFldV(ssfCommaSep, SIn, SsCh, FldValV, false);
for (int FldValN = 0; FldValN < FldValV.Len(); FldValN++) {
const TStr& FldVal = FldValV[FldValN];
if (FldValN == ClassId) {
if (FldVal == "NOM") {
FtrGenBs->PutClsFtrGen(TFtrGenNominal::New());
} else if (FldVal == "MULTI-NOM") {
FtrGenBs->PutClsFtrGen(TFtrGenMultiNom::New());
} else {
TExcept::Throw("Wrong class type '" + FldVal + "', should be NOM or MULTI-NOM!");
}
} else if (!IgnoreIdV.IsIn(FldValN)) {
if (FldVal == TFtrGenNumeric::GetType()) {
FtrGenBs->AddFtrGen(TFtrGenNumeric::New());
} else if (FldVal == TFtrGenNominal::GetType()) {
FtrGenBs->AddFtrGen(TFtrGenNominal::New());
} else if (FldVal == TFtrGenToken::GetType()) {
FtrGenBs->AddFtrGen(TFtrGenToken::New(
TSwSet::New(swstNone), TStemmer::New(stmtNone)));
} else if (FldVal == TFtrGenSparseNumeric::GetType()) {
FtrGenBs->AddFtrGen(TFtrGenSparseNumeric::New());
} else if (FldVal == TFtrGenMultiNom::GetType()) {
FtrGenBs->AddFtrGen(TFtrGenMultiNom::New());
} else {
TExcept::Throw("Wrong type '" + FldVal + "'!");
}
}
}
const int Flds = FldValV.Len();
// read the lines and feed them to the feature generators
int Recs = 0;
while (!SIn->Eof()) {
if (Recs == TrainLen) { break; }
Recs++; printf("%7d\r", Recs);
TSs::LoadTxtFldV(ssfCommaSep, SIn, SsCh, FldValV, false);
// make sure line still has the same number of fields as the header
EAssertR(FldValV.Len() == Flds,
TStr::Fmt("Wrong number of fields in line %d! Found %d and expected %d!",
Recs + 1, FldValV.Len(), Flds));
// go over lines
try {
TStrV FtrValV;
for (int FldValN = 0; FldValN < FldValV.Len(); FldValN++) {
const TStr& FldVal = FldValV[FldValN];
if (FldValN == ClassId) {
FtrGenBs->UpdateCls(FldVal);
} else if (!IgnoreIdV.IsIn(FldValN)) {
FtrValV.Add(FldVal);
}
}
FtrGenBs->Update(FtrValV);
} catch (PExcept Ex) {
TExcept::Throw(TStr::Fmt("Error in line %d: '%s'!",
Recs+1, Ex->GetMsgStr().CStr()));
}
}
// read the file again and feed it to the training set
PBowDocBs BowDocBs = FtrGenBs->MakeBowDocBs();
// we read and ignore the headers since we parsed them already
SIn = TFIn::New(FNm); SsCh = ' ';
TSs::LoadTxtFldV(ssfCommaSep, SIn, SsCh, FldValV, false);
// read the lines and feed them to the training set
Recs = 0;
while (!SIn->Eof()){
Recs++; printf("%7d\r", Recs);
TSs::LoadTxtFldV(ssfCommaSep, SIn, SsCh, FldValV, false);
// make sure line still has the same number of fields as the header
EAssertR(FldValV.Len() == Flds,
TStr::Fmt("Wrong number of fields in line %s! Found %d and expected %d!",
Recs + 1, FldValV.Len(), Flds));
// go over lines and construct the sparse vector
TStrV FtrValV; TStr ClsFtrVal;
try {
for (int FldValN = 0; FldValN < FldValV.Len(); FldValN++) {
const TStr& FldVal = FldValV[FldValN];
if (FldValN == ClassId) {
ClsFtrVal = FldVal;
} else if (!IgnoreIdV.IsIn(FldValN)) {
FtrValV.Add(FldVal);
}
}
} catch (PExcept Ex) {
TExcept::Throw(TStr::Fmt("Error in line %d: '%s'!",
Recs+1, Ex->GetMsgStr().CStr()));
}
// add the feature vector to trainsets
FtrGenBs->AddBowDoc(BowDocBs, TStr::Fmt("Line-%d", Recs), FtrValV, ClsFtrVal);
}
// prepare training and testing doc ids
TIntV AllDIdV; BowDocBs->GetAllDIdV(AllDIdV); IAssert(AllDIdV.IsSorted());
TIntV TrainDIdV = AllDIdV; TrainDIdV.Trunc(TrainLen);
//.........这里部分代码省略.........
示例3: New
PBowMd TBowWinnowMd::New(
const PBowDocBs& BowDocBs, const TStr& CatNm, const double& Beta){
// create model
TBowWinnowMd* WinnowMd=new TBowWinnowMd(BowDocBs); PBowMd BowMd(WinnowMd);
WinnowMd->CatNm=CatNm;
WinnowMd->Beta=Beta;
WinnowMd->VoteTsh=0.5;
// prepare Winnow parameters
const double MnExpertWgtSum=1e-15;
// get cat-id
int CId=BowDocBs->GetCId(CatNm);
if (CId==-1){
TExcept::Throw(TStr::GetStr(CatNm, "Invalid Category Name ('%s')!"));}
// get training documents
TIntV TrainDIdV; BowDocBs->GetAllDIdV(TrainDIdV);
int TrainDocs=TrainDIdV.Len();
// prepare mini-experts
int Words=BowDocBs->GetWords();
WinnowMd->PosExpertWgtV.Gen(Words); WinnowMd->PosExpertWgtV.PutAll(1);
WinnowMd->NegExpertWgtV.Gen(Words); WinnowMd->NegExpertWgtV.PutAll(1);
// winnow loop
double PrevAcc=0; double PrevPrec=0; double PrevRec=0; double PrevF1=0;
const double MxDiff=-0.005; const int MxWorseIters=3; int WorseIters=0;
const int MxIters=50; int IterN=0;
while ((IterN<MxIters)&&(WorseIters<MxWorseIters)){
IterN++;
int FalsePos=0; int FalseNeg=0; int TruePos=0; int TrueNeg=0;
for (int DIdN=0; DIdN<TrainDocs; DIdN++){
int DId=TrainDIdV[DIdN];
bool ClassVal=BowDocBs->IsCatInDoc(DId, CId);
double PosWgt=0; double NegWgt=0;
double OldSum=0; double NewSum=0;
int WIds=BowDocBs->GetDocWIds(DId);
// change only experts of words that occur in the document
for (int WIdN=0; WIdN<WIds; WIdN++){
int WId=BowDocBs->GetDocWId(DId, WIdN);
OldSum+=WinnowMd->PosExpertWgtV[WId]+WinnowMd->NegExpertWgtV[WId];
// penalize expert giving wrong class prediction
if (ClassVal){
WinnowMd->NegExpertWgtV[WId]*=Beta;
} else {
WinnowMd->PosExpertWgtV[WId]*=Beta;
}
NewSum+=WinnowMd->PosExpertWgtV[WId]+WinnowMd->NegExpertWgtV[WId];
PosWgt+=WinnowMd->PosExpertWgtV[WId];
NegWgt+=WinnowMd->NegExpertWgtV[WId];
}
// normalize all experts
if (NewSum>MnExpertWgtSum){
for (int WIdN=0; WIdN<WIds; WIdN++){
int WId=BowDocBs->GetDocWId(DId, WIdN);
WinnowMd->PosExpertWgtV[WId]*=OldSum/NewSum;
WinnowMd->NegExpertWgtV[WId]*=OldSum/NewSum;
}
}
bool PredClassVal;
if (PosWgt+NegWgt==0){PredClassVal=TBool::GetRnd();}
else {PredClassVal=(PosWgt/(PosWgt+NegWgt))>WinnowMd->VoteTsh;}
if (PredClassVal==ClassVal){
if (PredClassVal){TruePos++;} else {TrueNeg++;}
} else {
if (PredClassVal){FalsePos++;} else {FalseNeg++;}
}
}
// calculate temporary results
if (TrainDocs==0){break;}
double Acc=0; double Prec=0; double Rec=0; double F1=0;
if (TrainDocs>0){
Acc=100*(TruePos+TrueNeg)/double(TrainDocs);
if (TruePos+FalsePos>0){
Prec=(TruePos/double(TruePos+FalsePos));
Rec=(TruePos/double(TruePos+FalseNeg));
if (Prec+Rec>0){
F1=(2*Prec*Rec/(Prec+Rec));
}
}
}
// check if the current iteration gave worse results then the previous
if (((Acc-PrevAcc)<MxDiff)||((F1-PrevF1)<MxDiff)||(((Prec-PrevPrec)<MxDiff)&&
((Rec-PrevRec)<MxDiff))){WorseIters++;}
else {WorseIters=0;}
PrevAcc=Acc; PrevPrec=Prec; PrevRec=Rec; PrevF1=F1;
printf("%d. Precision:%0.3f Recall:%0.3f F1:%0.3f Accuracy:%0.3f%%\n",
IterN, Prec, Rec, F1, Acc);
}
// return model
return BowMd;
}