C++ TStr::SplitOnWs方法代码示例

本文整理汇总了C++中TStr::SplitOnWs方法的典型用法代码示例。如果您正苦于以下问题：C++ TStr::SplitOnWs方法的具体用法？C++ TStr::SplitOnWs怎么用？C++ TStr::SplitOnWs使用的例子？那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类TStr的用法示例。

在下文中一共展示了TStr::SplitOnWs方法的3个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: DocStrToWIdV

//////////////////////////////////////////////////////////////////////////
// String-To-Words
void TStrParser::DocStrToWIdV(const TStr& _DocStr, TIntV& WordIdV, const bool& Stemm) {
    TStr DocStr = _DocStr.GetUc();  // to upper case
    TStrV WordV; DocStr.SplitOnWs(WordV); int WordN = WordV.Len();
    WordIdV.Reserve(WordN, 0);

    PStemmer Stemmer = TStemmer::New(stmtPorter);
    TIntH WordsInDoc;
    for (int WordC = 0; WordC < WordN; WordC++) {
        TStr WordStr;
        if (Stemm) {
            WordStr = Stemmer->GetStem(WordV[WordC]);
        } else {
            WordStr = WordV[WordC];
        }
        int WId = GetWId(WordStr);
        if (WId == -1) {
            WId = WordToIdH.AddKey(WordStr);
            WordToIdH[WId] = 0;
        }
        WordIdV.Add(WId);
        
        // is it first time we see this word in this doc?
        if (!WordsInDoc.IsKey(WId)) WordsInDoc.AddKey(WId);
    }

    //do some statistics for DF
    DocsParsed++;
    for (int i = 0, l = WordsInDoc.Len(); i < l; i++)
        WordToIdH[WordsInDoc.GetKey(i)]++;

    Assert(WordV.Len() == WordIdV.Len());
}

开发者ID:Austindeadhead，项目名称:qminer，代码行数:34，代码来源:strkernel.cpp

示例2: GetHashedShinglesOfCluster

void LSH::GetHashedShinglesOfCluster(TQuoteBase *QuoteBase, TCluster& C,
    TInt ShingleLen, THashSet<TMd5Sig>& HashedShingles) {
  TIntV QuoteIds;
  C.GetQuoteIds(QuoteIds);
  for (int qt = 0; qt < QuoteIds.Len(); qt++) {
    TQuote Q;
    QuoteBase->GetQuote(QuoteIds[qt], Q);
    TStr QContentStr;
    Q.GetContentString(QContentStr);
    TStr QContentStrNoPunc;
    TStringUtil::RemovePunctuation(QContentStr, QContentStrNoPunc);
    TStrV QContentV;
    QContentStrNoPunc.SplitOnWs(QContentV);
    for (int i = 0; i < QContentV.Len() - ShingleLen + 1; i++) {
      TStr Shingle;
      for (int j = 0; j < ShingleLen; j++) {
        if (j > 0) {
          Shingle.InsStr(Shingle.Len(), " ");
        }
        Shingle.InsStr(Shingle.Len(), QContentV[i + j]);
      }
      TMd5Sig ShingleMd5(Shingle);
      HashedShingles.AddKey(ShingleMd5);
    }
  }
}

开发者ID:snap-stanford，项目名称:curis-2012，代码行数:26，代码来源:lsh.cpp

示例3: GetStdName

// <last_name>_<first name innitial>
TStr TStrUtil::GetStdName(TStr AuthorName) {
    TStr StdName;
    AuthorName.ToLc();
    AuthorName.ChangeChAll('\n', ' ');
    AuthorName.ChangeChAll('.', ' ');
    // if there is a number in the name, remove it and everything after it
    int i, pos = 0;
    while (pos<AuthorName.Len() && (AuthorName[pos]!='#' && !TCh::IsNum(AuthorName[pos]))) {
        pos++;
    }
    if (pos < AuthorName.Len()) {
        AuthorName = AuthorName.GetSubStr(0, pos-1).ToTrunc();
    }
    if (AuthorName.Empty()) {
        return TStr::GetNullStr();
    }

    // replace everything after '('
    int b = AuthorName.SearchCh('(');
    if (b != -1) {
        AuthorName = AuthorName.GetSubStr(0, b-1).ToTrunc();
    }
    // skip if contains ')'
    if (AuthorName .SearchCh(')')!=-1) {
        return TStr::GetNullStr();
    }
    // skip if it is not a name
    if (AuthorName .SearchStr("figures")!=-1 || AuthorName .SearchStr("macros")!=-1
            || AuthorName .SearchStr("univ")!=-1 || AuthorName .SearchStr("institute")!=-1) {
        return TStr::GetNullStr();
    }
    // remove all non-letters (latex tags, ...)
    TChA NewName;
    for (i = 0; i < AuthorName.Len(); i++) {
        const char Ch = AuthorName[i];
        if (TCh::IsAlpha(Ch) || TCh::IsWs(Ch) || Ch=='-') {
            NewName += Ch;
        }
    }
    StdName = NewName;
    StdName.ToTrunc();
    TStrV AuthNmV;
    StdName.SplitOnWs(AuthNmV);
    // too short -- not a name
    if (! AuthNmV.Empty() && AuthNmV.Last() == "jr") AuthNmV.DelLast();
    if (AuthNmV.Len() < 2) return TStr::GetNullStr();

    const TStr LastNm = AuthNmV.Last();
    if (! TCh::IsAlpha(LastNm[0]) || LastNm.Len() == 1) return TStr::GetNullStr();

    IAssert(isalpha(AuthNmV[0][0]));
    return TStr::Fmt("%s_%c", LastNm.CStr(), AuthNmV[0][0]);
}

开发者ID:pikma，项目名称:Snap，代码行数:54，代码来源:util.cpp

注：本文中的TStr::SplitOnWs方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。