本文整理汇总了C++中PBowDocBs::GetDocs方法的典型用法代码示例。如果您正苦于以下问题:C++ PBowDocBs::GetDocs方法的具体用法?C++ PBowDocBs::GetDocs怎么用?C++ PBowDocBs::GetDocs使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类PBowDocBs
的用法示例。
在下文中一共展示了PBowDocBs::GetDocs方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: SaveLnDocTxt
void TBowFl::SaveLnDocTxt(const PBowDocBs& BowDocBs, const TStr& FNm, const bool& UseDocStrP){
TFOut SOut(FNm);
int Docs=BowDocBs->GetDocs();
for (int DId=0; DId<Docs; DId++){
printf("%d/%d\r", DId+1, Docs);
// output document-name
TStr DocNm=TStr::GetFNmStr(BowDocBs->GetDocNm(DId));
SOut.PutStr(DocNm);
// output categories
for (int CIdN=0; CIdN<BowDocBs->GetDocCIds(DId); CIdN++){
int CId=BowDocBs->GetDocCId(DId, CIdN);
TStr CatNm=TStr::GetFNmStr(BowDocBs->GetCatNm(CId));
SOut.PutCh(' '); SOut.PutCh('!'); SOut.PutStr(CatNm);
}
// output words
if (UseDocStrP){
TStr DocStr=BowDocBs->GetDocStr(DId);
// DocStr.DelChAll('\n'); DocStr.DelChAll('\r');
SOut.PutCh(' '); SOut.PutStr(DocStr);
} else {
int DocWIds=BowDocBs->GetDocWIds(DId);
int WId; double WordFq;
for (int DocWIdN=0; DocWIdN<DocWIds; DocWIdN++){
BowDocBs->GetDocWIdFq(DId, DocWIdN, WId, WordFq);
TStr WordStr=BowDocBs->GetWordStr(WId);
for (int WordFqN=0; WordFqN<WordFq; WordFqN++){
SOut.PutCh(' '); SOut.PutStr(WordStr);
}
}
}
SOut.PutLn();
}
printf("\n");
}
示例2: GetOntoGround
PLwOntoGround TLwOntoGround::GetOntoGround(
const PLwOnto& LwOnto, const PBowDocBs& BowDocBs,
const TStr& LangNm, const bool& DocCatIsTermIdP,
const double& CutWordWgtSumPrc){
printf("Generating Ontology-Classifier...\n");
// shortcuts
PLwTermBs TermBs=LwOnto->GetTermBs();
int Terms=TermBs->GetTerms();
PLwLinkBs LinkBs=LwOnto->GetLinkBs();
PLwLinkTypeBs LinkTypeBs=LwOnto->GetLinkTypeBs();
int LangId=LwOnto->GetLangBs()->GetLangId(LangNm);
int Docs=BowDocBs->GetDocs();
// create tfidf
printf(" Creating BowDocWgtBs ...");
PBowDocWgtBs BowDocWgtBs=TBowDocWgtBs::New(BowDocBs, bwwtNrmTFIDF);
PBowSim BowSim=TBowSim::New(bstCos);
printf(" Done.\n");
// collect documents per ontology-term
printf(" Collecting documents per ontology-term ...\n");
TIntIntVH TermIdToDIdVH; int PosCats=0; int NegCats=0;
for (int DId=0; DId<Docs; DId++){
printf(" Docs:%d/%d Pos:%d Neg:%d\r", 1+DId, Docs, PosCats, NegCats);
for (int DocCIdN=0; DocCIdN<BowDocBs->GetDocCIds(DId); DocCIdN++){
// get document-category
int CId=BowDocBs->GetDocCId(DId, DocCIdN);
TStr CatNm=BowDocBs->GetCatNm(CId);
// get term-id
if (DocCatIsTermIdP){
int TermId=CatNm.GetInt();
if (TermBs->IsTermId(TermId)){
TermIdToDIdVH.AddDat(TermId).Add(DId); PosCats++;
} else {NegCats++;}
} else {
if (TermBs->IsTermId(CatNm, LangId)){
int TermId=TermBs->GetTermId(CatNm, LangId);
TermIdToDIdVH.AddDat(TermId).Add(DId); PosCats++;
} else {NegCats++;}
}
}
}
printf(" Docs:%d/%d Pos:%d Neg:%d\n", Docs, Docs, PosCats, NegCats);
printf(" Done.\n");
// create sub-terms & up-terms vectors
printf(" Creating sub-terms & up-terms vectors ...");
TIntIntVH Const_TermIdToSubTermIdVH;
TIntIntVH TermIdToSubTermIdVH;
TIntIntVH TermIdToUpTermIdVH;
for (int TermN=0; TermN<Terms; TermN++){
int TermId=TermBs->GetTermId(TermN);
for (int LinkN=0; LinkN<LinkBs->GetFromLinks(TermId); LinkN++){
int LinkTypeId; int DstTermId;
LinkBs->GetFromLink(TermId, LinkN, LinkTypeId, DstTermId);
TStr LinkTypeNm=LinkTypeBs->GetLinkType(LinkTypeId)->GetLinkTypeNm();
if (LinkTypeNm=="NT"){
Const_TermIdToSubTermIdVH.AddDat(TermId).Add(DstTermId);
TermIdToSubTermIdVH.AddDat(TermId).Add(DstTermId);
TermIdToUpTermIdVH.AddDat(DstTermId).Add(TermId);
}
}
}
printf(" Done.\n");
// create centroids
printf(" Creating centroids ...\n");
THash<TInt, PBowSpV> TermIdToConceptSpVH;
TIntIntVH TermIdToSubTermDIdVH;
TIntH ProcTermIdH;
int PrevActiveTerms=-1;
forever{
// count active nodes for processing
int ActiveTerms=0;
for (int TermN=0; TermN<Terms; TermN++){
int TermId=TermBs->GetTermId(TermN);
if ((TermIdToSubTermIdVH.IsKey(TermId))&&
(TermIdToSubTermIdVH.GetDat(TermId).Len()>0)){
ActiveTerms++;
}
}
// stop if no change from previous round
printf(" Active-Terms:%d\n", ActiveTerms);
if (ActiveTerms==PrevActiveTerms){break;}
PrevActiveTerms=ActiveTerms;
// reduce active-nodes with zero-ancestors
for (int TermN=0; TermN<Terms; TermN++){
int TermId=TermBs->GetTermId(TermN);
if (ProcTermIdH.IsKey(TermId)){continue;}
if ((!TermIdToSubTermIdVH.IsKey(TermId))||
(TermIdToSubTermIdVH.GetDat(TermId).Len()==0)){
printf(" %d/%d\r", 1+TermN, Terms);
ProcTermIdH.AddKey(TermId);
// collect document-ids
TIntV TermDIdV;
if (TermIdToDIdVH.IsKey(TermId)){
TermDIdV.AddV(TermIdToDIdVH.GetDat(TermId));}
if (TermIdToSubTermDIdVH.IsKey(TermId)){
TermDIdV.AddV(TermIdToSubTermDIdVH.GetDat(TermId));}
// create concept-vector if any documents
if (TermDIdV.Len()>0){
PBowSpV ConceptSpV=
TBowClust::GetConceptSpV(BowDocWgtBs, BowSim, TermDIdV, CutWordWgtSumPrc);
TermIdToConceptSpVH.AddDat(TermId, ConceptSpV);
//.........这里部分代码省略.........