本文整理汇总了C++中THashSet::Empty方法的典型用法代码示例。如果您正苦于以下问题:C++ THashSet::Empty方法的具体用法?C++ THashSet::Empty怎么用?C++ THashSet::Empty使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类THashSet
的用法示例。
在下文中一共展示了THashSet::Empty方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: BigMain
void BigMain(int argc, char* argv[]) {
TExeTm ExeTm;
Env = TEnv(argc, argv, TNotify::StdNotify);
Env.PrepArgs("QuotesApp");
const TStr ToDo = Env.GetIfArgPrefixStr("-do:", "", "To do").GetLc();
if (Env.IsEndOfRun()) {
printf("To do:\n");
printf(" MkDataset : Make memes dataset (extract quotes and save txt)\n");
printf(" ExtractSubset : Extract a subset of memes containing particular words\n");
printf(" MemesToQtBs : Load memes dataset and create quote base\n");
printf(" MkClustNet : Build cluster network from the quote base\n");
return;
}
#pragma region mkdataset
// extract quotes and links and make them into a single file
if (ToDo == "mkdataset") {
const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "files.txt", "Spinn3r input files (one file per line)");
const TStr OutFNm = Env.GetIfArgPrefixStr("-o:", "Spinn3r-dataset.txt", "Output file");
const int MinQtWrdLen = Env.GetIfArgPrefixInt("-w:", 3, "Minimum quote word length");
const TStr UrlFNm = Env.GetIfArgPrefixStr("-u:", "", "Seen url set (THashSet<TMd5Sig>) file name");
const bool UrlOnlyOnce = Env.GetIfArgPrefixBool("-q:", true, "Only keep unique Urls");
//// parse directly from Spinn3r
TStr Spinn3rFNm;
THashSet<TMd5Sig> SeenUrlSet;
if (UrlOnlyOnce && ! UrlFNm.Empty()) { // keep track of already seen urls (so that there are no duplicate urls)
TFIn FIn(UrlFNm); SeenUrlSet.Load(FIn);
}
FILE *F = fopen(OutFNm.CStr(), "wt");
TFIn FIn(InFNm);
int Items=0;
for (int f=0; FIn.GetNextLn(Spinn3rFNm); f++) {
TQuoteExtractor QE(Spinn3rFNm.ToTrunc());
printf("Processing %02d: %s [%s]\n", f+1, Spinn3rFNm.CStr(), TExeTm::GetCurTm());
fflush(stdout);
for (int item = 0; QE.Next(); item++) {
const TMd5Sig PostMd5(QE.PostUrlStr);
if (QE.QuoteV.Empty() && QE.LinkV.Empty()) { continue; } // no quotes, no links
if (UrlOnlyOnce) {
if (SeenUrlSet.IsKey(PostMd5)) { continue; }
SeenUrlSet.AddKey(PostMd5);
}
fprintf(F, "P\t%s\n", QE.PostUrlStr.CStr());
//if (QE.PubTm > TSecTm(2008,8,30) || QE.PubTm < TSecTm(2008,7,25)) { printf("%s\n", QE.PubTm.GetStr().CStr()); }
fprintf(F, "T\t%s\n", QE.PubTm.GetYmdTmStr().CStr());
for (int q = 0; q < QE.QuoteV.Len(); q++) {
if (TStrUtil::CountWords(QE.QuoteV[q]) >= MinQtWrdLen) {
fprintf(F, "Q\t%s\n", QE.QuoteV[q].CStr()); }
}
for (int l = 0; l < QE.LinkV.Len(); l++) {
fprintf(F, "L\t%s\n", QE.LinkV[l].CStr()); }
fprintf(F, "\n");
if (item>0 && item % Kilo(100) == 0) {
QE.DumpStat(); QE.ExeTm.Tick(); }
Items++;
}
printf("file done. Total %d all posts, %d all items\n", SeenUrlSet.Len(), Items);
fflush(stdout);
}
printf("all done. Saving %d post urls\n", SeenUrlSet.Len()); fflush(stdout);
if (! SeenUrlSet.Empty()) {
TFOut FOut(OutFNm.GetFMid()+".SeenUrlSet");
SeenUrlSet.Save(FOut);
}
fclose(F);
}
#pragma endregion mkdataset
#pragma region extractsubset
// save posts with memes containing particular words
else if (ToDo == "extractsubset") {
const TStr InFNmWc = Env.GetIfArgPrefixStr("-i:", "memes_*.rar", "Input file prefix");
const bool IsInFNmWc = Env.GetIfArgPrefixBool("-w:", true, "Input is wildcard (else a file with list of input files)");
const TStr OutFNm = Env.GetIfArgPrefixStr("-o:", "memes-subset.txt", "Output memes file");
const TStr WordsFNm = Env.GetIfArgPrefixStr("-p:", "phrases-in.txt", "Phrases that memes have to contain");
TChAV CatchMemeV;// = TStr::GetV("great depression", "economic meltdown", "recession had bottomed out", "green shoots", "slow recovery", "gradual recovery");
printf("Loading %s\n", WordsFNm.CStr());
{ TFIn FIn(WordsFNm);
for (TStr Ln; FIn.GetNextLn(Ln); ) {
printf(" %s\n", Ln.GetLc().CStr());
CatchMemeV.Add(Ln.GetLc()); }
}
printf("%d strings loaded\n", CatchMemeV.Len());
TFOut FOut(OutFNm);
TMemesDataLoader Memes(InFNmWc, IsInFNmWc);
for (int posts = 0, nsave=0; Memes.LoadNext(); posts++) {
bool DoSave = false;
for (int m = 0; m < Memes.MemeV.Len(); m++) {
for (int i = 0; i < CatchMemeV.Len(); i++) {
if (Memes.MemeV[m].SearchStr(CatchMemeV[i]) != -1) {
DoSave=true; break; }
}
if (DoSave) { break; }
}
if (DoSave) { Memes.SaveTxt(FOut); nsave++; }
if (posts % Mega(1) == 0) {
printf("%dm posts, %d saved\n", posts/Mega(1), nsave);
FOut.Flush();
}
}
//.........这里部分代码省略.........