当前位置: 首页>>代码示例>>C++>>正文


C++ TStr::SplitOnWs方法代码示例

本文整理汇总了C++中TStr::SplitOnWs方法的典型用法代码示例。如果您正苦于以下问题:C++ TStr::SplitOnWs方法的具体用法?C++ TStr::SplitOnWs怎么用?C++ TStr::SplitOnWs使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在TStr的用法示例。


在下文中一共展示了TStr::SplitOnWs方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: DocStrToWIdV

//////////////////////////////////////////////////////////////////////////
// String-To-Words
void TStrParser::DocStrToWIdV(const TStr& _DocStr, TIntV& WordIdV, const bool& Stemm) {
    TStr DocStr = _DocStr.GetUc();  // to upper case
    TStrV WordV; DocStr.SplitOnWs(WordV); int WordN = WordV.Len();
    WordIdV.Reserve(WordN, 0);

    PStemmer Stemmer = TStemmer::New(stmtPorter);
    TIntH WordsInDoc;
    for (int WordC = 0; WordC < WordN; WordC++) {
        TStr WordStr;
        if (Stemm) {
            WordStr = Stemmer->GetStem(WordV[WordC]);
        } else {
            WordStr = WordV[WordC];
        }
        int WId = GetWId(WordStr);
        if (WId == -1) {
            WId = WordToIdH.AddKey(WordStr);
            WordToIdH[WId] = 0;
        }
        WordIdV.Add(WId);
        
        // is it first time we see this word in this doc?
        if (!WordsInDoc.IsKey(WId)) WordsInDoc.AddKey(WId);
    }

    //do some statistics for DF
    DocsParsed++;
    for (int i = 0, l = WordsInDoc.Len(); i < l; i++)
        WordToIdH[WordsInDoc.GetKey(i)]++;

    Assert(WordV.Len() == WordIdV.Len());
}
开发者ID:Austindeadhead,项目名称:qminer,代码行数:34,代码来源:strkernel.cpp

示例2: GetHashedShinglesOfCluster

void LSH::GetHashedShinglesOfCluster(TQuoteBase *QuoteBase, TCluster& C,
    TInt ShingleLen, THashSet<TMd5Sig>& HashedShingles) {
  TIntV QuoteIds;
  C.GetQuoteIds(QuoteIds);
  for (int qt = 0; qt < QuoteIds.Len(); qt++) {
    TQuote Q;
    QuoteBase->GetQuote(QuoteIds[qt], Q);
    TStr QContentStr;
    Q.GetContentString(QContentStr);
    TStr QContentStrNoPunc;
    TStringUtil::RemovePunctuation(QContentStr, QContentStrNoPunc);
    TStrV QContentV;
    QContentStrNoPunc.SplitOnWs(QContentV);
    for (int i = 0; i < QContentV.Len() - ShingleLen + 1; i++) {
      TStr Shingle;
      for (int j = 0; j < ShingleLen; j++) {
        if (j > 0) {
          Shingle.InsStr(Shingle.Len(), " ");
        }
        Shingle.InsStr(Shingle.Len(), QContentV[i + j]);
      }
      TMd5Sig ShingleMd5(Shingle);
      HashedShingles.AddKey(ShingleMd5);
    }
  }
}
开发者ID:snap-stanford,项目名称:curis-2012,代码行数:26,代码来源:lsh.cpp

示例3: GetStdName

// <last_name>_<first name innitial>
TStr TStrUtil::GetStdName(TStr AuthorName) {
    TStr StdName;
    AuthorName.ToLc();
    AuthorName.ChangeChAll('\n', ' ');
    AuthorName.ChangeChAll('.', ' ');
    // if there is a number in the name, remove it and everything after it
    int i, pos = 0;
    while (pos<AuthorName.Len() && (AuthorName[pos]!='#' && !TCh::IsNum(AuthorName[pos]))) {
        pos++;
    }
    if (pos < AuthorName.Len()) {
        AuthorName = AuthorName.GetSubStr(0, pos-1).ToTrunc();
    }
    if (AuthorName.Empty()) {
        return TStr::GetNullStr();
    }

    // replace everything after '('
    int b = AuthorName.SearchCh('(');
    if (b != -1) {
        AuthorName = AuthorName.GetSubStr(0, b-1).ToTrunc();
    }
    // skip if contains ')'
    if (AuthorName .SearchCh(')')!=-1) {
        return TStr::GetNullStr();
    }
    // skip if it is not a name
    if (AuthorName .SearchStr("figures")!=-1 || AuthorName .SearchStr("macros")!=-1
            || AuthorName .SearchStr("univ")!=-1 || AuthorName .SearchStr("institute")!=-1) {
        return TStr::GetNullStr();
    }
    // remove all non-letters (latex tags, ...)
    TChA NewName;
    for (i = 0; i < AuthorName.Len(); i++) {
        const char Ch = AuthorName[i];
        if (TCh::IsAlpha(Ch) || TCh::IsWs(Ch) || Ch=='-') {
            NewName += Ch;
        }
    }
    StdName = NewName;
    StdName.ToTrunc();
    TStrV AuthNmV;
    StdName.SplitOnWs(AuthNmV);
    // too short -- not a name
    if (! AuthNmV.Empty() && AuthNmV.Last() == "jr") AuthNmV.DelLast();
    if (AuthNmV.Len() < 2) return TStr::GetNullStr();

    const TStr LastNm = AuthNmV.Last();
    if (! TCh::IsAlpha(LastNm[0]) || LastNm.Len() == 1) return TStr::GetNullStr();

    IAssert(isalpha(AuthNmV[0][0]));
    return TStr::Fmt("%s_%c", LastNm.CStr(), AuthNmV[0][0]);
}
开发者ID:pikma,项目名称:Snap,代码行数:54,代码来源:util.cpp


注:本文中的TStr::SplitOnWs方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。