当前位置: 首页>>代码示例>>C++>>正文


C++ THashSet::Len方法代码示例

本文整理汇总了C++中THashSet::Len方法的典型用法代码示例。如果您正苦于以下问题:C++ THashSet::Len方法的具体用法?C++ THashSet::Len怎么用?C++ THashSet::Len使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在THashSet的用法示例。


在下文中一共展示了THashSet::Len方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: Expand

void TCliqueOverlap::Expand(const THashSet<TInt>& SUBG, THashSet<TInt>& CAND) {
	if (SUBG.Len()==0) { if (m_Q.Len() >= m_minMaxCliqueSize) { m_Q.Pack(); m_maxCliques->Add(m_Q); } return; }
	if (CAND.Len()==0) return;
	//Get u that maximaze CAND intersection with neighbours of vertex u
	int u = MaxNbrsInCANDNodeId(SUBG, CAND);
	//Get neighbours of node u
	THashSet<TInt> nbrsU;
	GetNbrs(u, nbrsU);
	//Get relative complement of nbrsU in CAND
	THashSet<TInt> EXT;
	GetRelativeComplement(CAND, nbrsU, EXT);
	while(EXT.Len() != 0) {
		int q = GetNodeIdWithMaxDeg(EXT);
		//
		m_Q.Add(q);
		//
		THashSet<TInt> nbrsQ;
		GetNbrs(q, nbrsQ);
		//
		THashSet<TInt> SUBGq;
		GetIntersection(SUBG, nbrsQ, SUBGq);
		//
		THashSet<TInt> CANDq;
		GetIntersection(CAND, nbrsQ, CANDq);
		//
		Expand(SUBGq, CANDq);
		//
 		CAND.DelKey(q);
		m_Q.DelLast();
		//
		EXT.Clr();
		GetRelativeComplement(CAND, nbrsU, EXT);
	}
}
开发者ID:Aleyasen,项目名称:Alaki,代码行数:34,代码来源:cliques.cpp

示例2: GetIntersection

void TCliqueOverlap::GetIntersection(const THashSet<TInt>& A, const THashSet<TInt>& B, THashSet<TInt>& C) {
	if (A.Len() < B.Len()) {
		for (THashSetKeyI<TInt> it=A.BegI(); it<A.EndI(); it++) 
			if (B.IsKey(it.GetKey())) C.AddKey(it.GetKey());
	} else {
		for (THashSetKeyI<TInt> it=B.BegI(); it<B.EndI(); it++) 
			if (A.IsKey(it.GetKey())) C.AddKey(it.GetKey());
	}
}
开发者ID:Aleyasen,项目名称:Alaki,代码行数:9,代码来源:cliques.cpp

示例3: Intersection

int TCliqueOverlap::Intersection(const THashSet<TInt>& A, const THashSet<TInt>& B) {
	int n = 0;
	if (A.Len() < B.Len()) {
		for (THashSetKeyI<TInt> it=A.BegI(); it<A.EndI(); it++) 
			if (B.IsKey(it.GetKey())) n++;
	} else {
		for (THashSetKeyI<TInt> it=B.BegI(); it<B.EndI(); it++) 
			if (A.IsKey(it.GetKey())) n++;
	}
	return n;
}
开发者ID:Aleyasen,项目名称:Alaki,代码行数:11,代码来源:cliques.cpp

示例4: ComputeSignatures

void LSH::ComputeSignatures(THashSet<TMd5Sig>& Shingles,
    THash<TMd5Sig, TIntV>& Signatures, int NumSignatures) {
  if (NumSignatures < 1)
    return;
  TRnd RandomGenerator; // TODO: make this "more random" by incorporating time
  TInt NumShingles = Shingles.Len();

  for (int i = 0; i < NumSignatures; ++i) {
    // Create new signature
    TVec < TMd5Sig > Shuffle;
    Shingles.GetKeyV(Shuffle);
    Shuffle.Shuffle(RandomGenerator);

    for (int j = 0; j < NumShingles; j++) {
      TIntV Signature;
      Signatures.IsKeyGetDat(Shuffle[j], Signature);
      Signature.Add(j);
      Signatures.AddDat(Shuffle[j], Signature);
    }
  }
  Err("Computed %d signatures!\n", NumSignatures);
}
开发者ID:snap-stanford,项目名称:curis-2012,代码行数:22,代码来源:lsh.cpp

示例5: WordHashing

void LSH::WordHashing(TQuoteBase* QuoteBase, THashSet<TMd5Sig>& Shingles) {
  Err("Hashing shingles using words...\n");
  TIntV QuoteIds;
  QuoteBase->GetAllQuoteIds(QuoteIds);
  for (int qt = 0; qt < QuoteIds.Len(); qt++) {
    if (qt % 1000 == 0) {
      Err("%d out of %d completed\n", qt, QuoteIds.Len());
    }
    TQuote Q;
    QuoteBase->GetQuote(QuoteIds[qt], Q);

    TStrV Content;
    Q.GetParsedContent(Content);

    int ContentLen = Content.Len();
    for (int i = 0; i < ContentLen; i++) {
      const TMd5Sig ShingleMd5(Content[i]);
      Shingles.AddKey(ShingleMd5);
    }
  }
  Err("Done with word hashing! Number of shingles: %d\n", Shingles.Len());
}
开发者ID:snap-stanford,项目名称:curis-2012,代码行数:22,代码来源:lsh.cpp

示例6: MinHash

void LSH::MinHash(TQuoteBase *QB, THashSet<TMd5Sig>& Shingles,
    TVec<THash<TMd5Sig, TIntSet> >& SignatureBandBuckets) {
  Err("Creating buckets...\n");
  THash < TMd5Sig, TIntV > Signatures;
  ComputeSignatures(Shingles, Signatures, NumBands * BandSize);

  // bucket creation
  for (int i = 0; i < NumBands; ++i) {
    SignatureBandBuckets.Add(THash<TMd5Sig, TIntSet>());
  }


  // bucket filling
  int NumShingles = Shingles.Len();
  THash<TInt, TQuote> Quotes;
  QB->GetIdToTQuotes(Quotes);

  THash<TInt, TQuote>::TIter CurI = Quotes.BegI();
  THash<TInt, TQuote>::TIter EndI = Quotes.EndI();
  TQuote Q; // SKYFALL

  for (; CurI < EndI; CurI++) {
    Q = CurI.GetDat();

    TStrV Content;
    Q.GetParsedContent(Content);
    TInt Id = Q.GetId();

    // signature for quote
    int ContentLen = Content.Len();
    TVec < TIntV > Signature;
    for (int i = 0; i < ContentLen; i++) {
      const TMd5Sig ShingleMd5(Content[i]);
      Signature.Add(Signatures.GetDat(ShingleMd5));
    }

    // place in bucket
    if (ContentLen < WordWindow) {
      for (int i = 0; i < NumBands; ++i) {
        TStr Sig;
        for (int j = 0; j < BandSize; ++j) {
          int CurSig = i * BandSize + j;

          TInt min = NumShingles;
          for (int k = 0; k < ContentLen; k++) {
            if (Signature[k][CurSig] < min) {
              min = Signature[k][CurSig];
            }
          }
          Sig += min.GetStr() + "-";
        }
        //Err(Sig.CStr());

        const TMd5Sig SigMd5(Sig);
        TIntSet Bucket;
        SignatureBandBuckets[i].IsKeyGetDat(SigMd5, Bucket);
        Bucket.AddKey(Id);
        SignatureBandBuckets[i].AddDat(SigMd5, Bucket);
      }
    } else {

    }

  }
  Err("Minhash step complete!\n");
}
开发者ID:snap-stanford,项目名称:curis-2012,代码行数:66,代码来源:lsh.cpp

示例7: BigMain

void BigMain(int argc, char* argv[]) {
  TExeTm ExeTm;
  Env = TEnv(argc, argv, TNotify::StdNotify);
  Env.PrepArgs("QuotesApp");
  const TStr ToDo = Env.GetIfArgPrefixStr("-do:", "", "To do").GetLc();
  if (Env.IsEndOfRun()) {
    printf("To do:\n");
    printf("    MkDataset         : Make memes dataset (extract quotes and save txt)\n");
    printf("    ExtractSubset     : Extract a subset of memes containing particular words\n");
    printf("    MemesToQtBs       : Load memes dataset and create quote base\n");
    printf("    MkClustNet        : Build cluster network from the quote base\n");
    return;
  }	
#pragma region mkdataset
  // extract quotes and links and make them into a single file
  if (ToDo == "mkdataset") {
    const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "files.txt", "Spinn3r input files (one file per line)");
    const TStr OutFNm = Env.GetIfArgPrefixStr("-o:", "Spinn3r-dataset.txt", "Output file");
    const int MinQtWrdLen = Env.GetIfArgPrefixInt("-w:", 3, "Minimum quote word length");
    const TStr UrlFNm = Env.GetIfArgPrefixStr("-u:", "", "Seen url set (THashSet<TMd5Sig>) file name");
    const bool UrlOnlyOnce = Env.GetIfArgPrefixBool("-q:", true, "Only keep unique Urls");
    //// parse directly from Spinn3r
    TStr Spinn3rFNm;
    THashSet<TMd5Sig> SeenUrlSet;
    if (UrlOnlyOnce && ! UrlFNm.Empty()) {  // keep track of already seen urls (so that there are no duplicate urls)
      TFIn FIn(UrlFNm);  SeenUrlSet.Load(FIn);
    }
    FILE *F = fopen(OutFNm.CStr(), "wt");
    TFIn FIn(InFNm);
    int Items=0;
    for (int f=0; FIn.GetNextLn(Spinn3rFNm); f++) {
      TQuoteExtractor QE(Spinn3rFNm.ToTrunc());
      printf("Processing %02d: %s [%s]\n", f+1, Spinn3rFNm.CStr(), TExeTm::GetCurTm());
      fflush(stdout);
      for (int item = 0; QE.Next(); item++) {
        const TMd5Sig PostMd5(QE.PostUrlStr);
        if (QE.QuoteV.Empty() && QE.LinkV.Empty()) { continue; } // no quotes, no links
        if (UrlOnlyOnce) {
          if (SeenUrlSet.IsKey(PostMd5)) { continue; }
          SeenUrlSet.AddKey(PostMd5);
        }
        fprintf(F, "P\t%s\n", QE.PostUrlStr.CStr());
        //if (QE.PubTm > TSecTm(2008,8,30) || QE.PubTm < TSecTm(2008,7,25)) { printf("%s\n", QE.PubTm.GetStr().CStr()); }
        fprintf(F, "T\t%s\n", QE.PubTm.GetYmdTmStr().CStr());
        for (int q = 0; q < QE.QuoteV.Len(); q++) {
          if (TStrUtil::CountWords(QE.QuoteV[q]) >= MinQtWrdLen) {
            fprintf(F, "Q\t%s\n", QE.QuoteV[q].CStr()); }
        }
        for (int l = 0; l < QE.LinkV.Len(); l++) {
          fprintf(F, "L\t%s\n", QE.LinkV[l].CStr()); }
        fprintf(F, "\n");
        if (item>0 && item % Kilo(100) == 0) {
          QE.DumpStat();  QE.ExeTm.Tick(); }
        Items++;
      }
      printf("file done. Total %d all posts, %d all items\n", SeenUrlSet.Len(), Items);
      fflush(stdout);
    }
    printf("all done. Saving %d post urls\n", SeenUrlSet.Len());  fflush(stdout);
    if (! SeenUrlSet.Empty()) {
      TFOut FOut(OutFNm.GetFMid()+".SeenUrlSet");
      SeenUrlSet.Save(FOut);
    }
    fclose(F);
  }
#pragma endregion mkdataset

#pragma region extractsubset
  // save posts with memes containing particular words
  else if (ToDo == "extractsubset") {
    const TStr InFNmWc = Env.GetIfArgPrefixStr("-i:", "memes_*.rar", "Input file prefix");
    const bool IsInFNmWc = Env.GetIfArgPrefixBool("-w:", true, "Input is wildcard (else a file with list of input files)");
    const TStr OutFNm = Env.GetIfArgPrefixStr("-o:", "memes-subset.txt", "Output memes file");
    const TStr WordsFNm = Env.GetIfArgPrefixStr("-p:", "phrases-in.txt", "Phrases that memes have to contain");

    TChAV CatchMemeV;// = TStr::GetV("great depression", "economic meltdown", "recession had bottomed out", "green shoots", "slow recovery", "gradual recovery");
    printf("Loading %s\n", WordsFNm.CStr());
    { TFIn FIn(WordsFNm);
    for (TStr Ln; FIn.GetNextLn(Ln); ) {
      printf("  %s\n", Ln.GetLc().CStr());
      CatchMemeV.Add(Ln.GetLc()); }
    }
    printf("%d strings loaded\n", CatchMemeV.Len());
    TFOut FOut(OutFNm);
    TMemesDataLoader Memes(InFNmWc, IsInFNmWc);
    for (int posts = 0, nsave=0; Memes.LoadNext(); posts++) {
      bool DoSave = false;
      for (int m = 0; m < Memes.MemeV.Len(); m++) {
        for (int i = 0; i < CatchMemeV.Len(); i++) {
          if (Memes.MemeV[m].SearchStr(CatchMemeV[i]) != -1) {
            DoSave=true; break; }
        }
        if (DoSave) { break; }
      }
      if (DoSave) { Memes.SaveTxt(FOut); nsave++; }
      if (posts % Mega(1) == 0) {
        printf("%dm posts, %d saved\n", posts/Mega(1), nsave);
        FOut.Flush();
      }
    }
//.........这里部分代码省略.........
开发者ID:snap-stanford,项目名称:curis-2012,代码行数:101,代码来源:memeclust.cpp

示例8: main

/**
 * Used for benchmarking sorting by source algorithm.
 * Takes as input starting point of
 * a top cascade and outputs time taken for casacade detection. 
 * Input : Source, Dest, Start, Duration 
 * Output : Prints the time for cascade detection
 */
int main(int argc,char* argv[]) {
  TTableContext Context;
  Schema TimeS;
  TimeS.Add(TPair<TStr,TAttrType>("Source",atInt));
  TimeS.Add(TPair<TStr,TAttrType>("Dest",atInt));
  TimeS.Add(TPair<TStr,TAttrType>("Start",atInt));
  TimeS.Add(TPair<TStr,TAttrType>("Duration",atInt));
  PTable P1 = TTable::LoadSS(TimeS,"./../../../../datasets/temporal/yemen_call_201001.txt",&Context,' ');
  TIntV MapV;
  TStrV SortBy;
  SortBy.Add("Source");
  P1->Order(SortBy);
  TIntV Source; // Sorted vec of start time
  P1->ReadIntCol("Source",Source);
  for (TRowIterator RI = P1->BegRI(); RI < P1->EndRI(); RI++) {
    MapV.Add(RI.GetRowIdx());
  }
  // Attribute to Int mapping
  TInt SIdx = P1->GetColIdx("Source");
  TInt DIdx = P1->GetColIdx("Dest");
  TInt StIdx = P1->GetColIdx("Start");
  TInt DuIdx = P1->GetColIdx("Duration");
  int W = atoi(argv[1]);
  int len = 0;
  // Find the starting point
  int TSource = atoi(argv[2]);
  int TDest = atoi(argv[3]);
  int TStart = atoi(argv[4]);
  int TDur = atoi(argv[5]);
  TInt RIdx;
  for (TRowIterator RI = P1->BegRI(); RI < P1->EndRI(); RI++) {
    RIdx = RI.GetRowIdx();
    int RSource = P1->GetIntValAtRowIdx(SIdx,RIdx).Val;
    int RDest = P1->GetIntValAtRowIdx(DIdx,RIdx).Val;
    int RStart = P1->GetIntValAtRowIdx(StIdx,RIdx).Val;
    int RDur = P1->GetIntValAtRowIdx(DuIdx,RIdx).Val;
    if (TSource == RSource && TDest == RDest && TStart == RStart && TDur == RDur) break;
  }
  // Start building the cascade from the start point
  clock_t st,et;
  st = clock();
  for (int i = 0; i < 1; i++) {
    THashSet<TInt> VisitedH;
    TSnapQueue<TInt> EventQ;
    EventQ.Push(RIdx);
    VisitedH.AddKey(RIdx);
    while (!EventQ.Empty()) {
      TInt CIdx = EventQ.Top();
      EventQ.Pop();
      int CDest = P1->GetIntValAtRowIdx(DIdx,CIdx).Val;
      int CStart = P1->GetIntValAtRowIdx(StIdx,CIdx).Val;
      int CDur = P1->GetIntValAtRowIdx(DuIdx,CIdx).Val;
      // In line binary search
      int val = CDest;
      int lo = 0;
      int hi = Source.Len() - 1;
      int index = -1;
      while (hi >= lo) {
        int mid = lo + (hi - lo)/2;
        if (Source.GetVal(mid) > val) { hi = mid - 1;}
        else if (Source.GetVal(mid) < val) { lo = mid + 1;}
        else { index = mid; hi = mid - 1;}
      } 
      // End of binary search
      int BIdx = index;
      for(int i = BIdx; i < Source.Len(); i++) {
        int PId = MapV.GetVal(i).Val;
        if (! VisitedH.IsKey(PId)) {
          int TSource = P1->GetIntValAtRowIdx(SIdx,PId).Val;
          int TStart = P1->GetIntValAtRowIdx(StIdx,PId).Val;
          if (TSource != CDest) {
            break;
          }
          if (TStart >= (CDur + CStart) && TStart - (CDur + CStart) <= W) {
            VisitedH.AddKey(PId);
            EventQ.Push(PId);
          }
        }
      }
    }
    len = VisitedH.Len();
  }
  et = clock();
  float diff = ((float) et - (float) st)/CLOCKS_PER_SEC;
  printf("Size %d,Time %f\n",len,diff);
  return 0;
}
开发者ID:JohnMatta,项目名称:snap,代码行数:94,代码来源:sortSource.cpp


注:本文中的THashSet::Len方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。