本文整理汇总了C++中TStr::SplitOnWs方法的典型用法代码示例。如果您正苦于以下问题:C++ TStr::SplitOnWs方法的具体用法?C++ TStr::SplitOnWs怎么用?C++ TStr::SplitOnWs使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类TStr
的用法示例。
在下文中一共展示了TStr::SplitOnWs方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: DocStrToWIdV
//////////////////////////////////////////////////////////////////////////
// String-To-Words
void TStrParser::DocStrToWIdV(const TStr& _DocStr, TIntV& WordIdV, const bool& Stemm) {
TStr DocStr = _DocStr.GetUc(); // to upper case
TStrV WordV; DocStr.SplitOnWs(WordV); int WordN = WordV.Len();
WordIdV.Reserve(WordN, 0);
PStemmer Stemmer = TStemmer::New(stmtPorter);
TIntH WordsInDoc;
for (int WordC = 0; WordC < WordN; WordC++) {
TStr WordStr;
if (Stemm) {
WordStr = Stemmer->GetStem(WordV[WordC]);
} else {
WordStr = WordV[WordC];
}
int WId = GetWId(WordStr);
if (WId == -1) {
WId = WordToIdH.AddKey(WordStr);
WordToIdH[WId] = 0;
}
WordIdV.Add(WId);
// is it first time we see this word in this doc?
if (!WordsInDoc.IsKey(WId)) WordsInDoc.AddKey(WId);
}
//do some statistics for DF
DocsParsed++;
for (int i = 0, l = WordsInDoc.Len(); i < l; i++)
WordToIdH[WordsInDoc.GetKey(i)]++;
Assert(WordV.Len() == WordIdV.Len());
}
示例2: GetHashedShinglesOfCluster
void LSH::GetHashedShinglesOfCluster(TQuoteBase *QuoteBase, TCluster& C,
TInt ShingleLen, THashSet<TMd5Sig>& HashedShingles) {
TIntV QuoteIds;
C.GetQuoteIds(QuoteIds);
for (int qt = 0; qt < QuoteIds.Len(); qt++) {
TQuote Q;
QuoteBase->GetQuote(QuoteIds[qt], Q);
TStr QContentStr;
Q.GetContentString(QContentStr);
TStr QContentStrNoPunc;
TStringUtil::RemovePunctuation(QContentStr, QContentStrNoPunc);
TStrV QContentV;
QContentStrNoPunc.SplitOnWs(QContentV);
for (int i = 0; i < QContentV.Len() - ShingleLen + 1; i++) {
TStr Shingle;
for (int j = 0; j < ShingleLen; j++) {
if (j > 0) {
Shingle.InsStr(Shingle.Len(), " ");
}
Shingle.InsStr(Shingle.Len(), QContentV[i + j]);
}
TMd5Sig ShingleMd5(Shingle);
HashedShingles.AddKey(ShingleMd5);
}
}
}
示例3: GetStdName
// <last_name>_<first name innitial>
TStr TStrUtil::GetStdName(TStr AuthorName) {
TStr StdName;
AuthorName.ToLc();
AuthorName.ChangeChAll('\n', ' ');
AuthorName.ChangeChAll('.', ' ');
// if there is a number in the name, remove it and everything after it
int i, pos = 0;
while (pos<AuthorName.Len() && (AuthorName[pos]!='#' && !TCh::IsNum(AuthorName[pos]))) {
pos++;
}
if (pos < AuthorName.Len()) {
AuthorName = AuthorName.GetSubStr(0, pos-1).ToTrunc();
}
if (AuthorName.Empty()) {
return TStr::GetNullStr();
}
// replace everything after '('
int b = AuthorName.SearchCh('(');
if (b != -1) {
AuthorName = AuthorName.GetSubStr(0, b-1).ToTrunc();
}
// skip if contains ')'
if (AuthorName .SearchCh(')')!=-1) {
return TStr::GetNullStr();
}
// skip if it is not a name
if (AuthorName .SearchStr("figures")!=-1 || AuthorName .SearchStr("macros")!=-1
|| AuthorName .SearchStr("univ")!=-1 || AuthorName .SearchStr("institute")!=-1) {
return TStr::GetNullStr();
}
// remove all non-letters (latex tags, ...)
TChA NewName;
for (i = 0; i < AuthorName.Len(); i++) {
const char Ch = AuthorName[i];
if (TCh::IsAlpha(Ch) || TCh::IsWs(Ch) || Ch=='-') {
NewName += Ch;
}
}
StdName = NewName;
StdName.ToTrunc();
TStrV AuthNmV;
StdName.SplitOnWs(AuthNmV);
// too short -- not a name
if (! AuthNmV.Empty() && AuthNmV.Last() == "jr") AuthNmV.DelLast();
if (AuthNmV.Len() < 2) return TStr::GetNullStr();
const TStr LastNm = AuthNmV.Last();
if (! TCh::IsAlpha(LastNm[0]) || LastNm.Len() == 1) return TStr::GetNullStr();
IAssert(isalpha(AuthNmV[0][0]));
return TStr::Fmt("%s_%c", LastNm.CStr(), AuthNmV[0][0]);
}