本文整理汇总了C++中CSphVector::GetLength方法的典型用法代码示例。如果您正苦于以下问题:C++ CSphVector::GetLength方法的具体用法?C++ CSphVector::GetLength怎么用?C++ CSphVector::GetLength使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类CSphVector
的用法示例。
在下文中一共展示了CSphVector::GetLength方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: AddJunk
void ExcerptGen_c::AddJunk ( int iStart, int iLength )
{
int iChunkStart = iStart;
for ( int i = iStart; i < iStart+iLength; i++ )
if ( sphIsSpace ( m_sBuffer.cstr () [i] ) != sphIsSpace ( m_sBuffer.cstr () [iChunkStart] ) )
{
m_dTokens.Resize ( m_dTokens.GetLength () + 1 );
Token_t & tLast = m_dTokens.Last ();
tLast.m_eType = TOK_SPACE;
tLast.m_iStart = iChunkStart;
tLast.m_iLengthBytes = i - iChunkStart;
tLast.m_iWordID = 0;
tLast.m_uWords = 0;
iChunkStart = i;
}
m_dTokens.Resize ( m_dTokens.GetLength () + 1 );
Token_t & tLast = m_dTokens.Last ();
tLast.m_eType = TOK_SPACE;
tLast.m_iStart = iChunkStart;
tLast.m_iLengthBytes = iStart + iLength - iChunkStart;
tLast.m_iWordID = 0;
tLast.m_uWords = 0;
}
示例2: Load
bool CISpellAffix::Load ( const char * szFilename )
{
if ( !szFilename )
return false;
m_dRules.Reset ();
memset ( m_dCharset, 0, sizeof ( m_dCharset ) );
m_bFirstCaseConv = true;
m_bUseLowerCaser = false;
m_bUseDictConversion = false;
m_LowerCaser.Reset ();
FILE * pFile = fopen ( szFilename, "rt" );
if ( !pFile )
return false;
bool bResult = false;
AffixFormat_e eFormat = DetectFormat ( pFile );
if ( eFormat==AFFIX_FORMAT_UNKNOWN )
printf ( "Failed to detect affix file format\n" );
else
{
fseek ( pFile, SEEK_SET, 0 );
printf ( "Using %s affix file format\n", AffixFormatName[eFormat] );
switch ( eFormat )
{
case AFFIX_FORMAT_MYSPELL:
bResult = LoadMySpell ( pFile );
break;
case AFFIX_FORMAT_ISPELL:
bResult = LoadISpell ( pFile );
break;
case AFFIX_FORMAT_UNKNOWN:
break;
}
}
fclose ( pFile );
bool bHaveCrossPrefix = false;
for ( int i = 0; i < m_dRules.GetLength () && !bHaveCrossPrefix; i++ )
if ( m_dRules[i].IsPrefix() && m_dRules[i].IsCrossProduct() )
bHaveCrossPrefix = true;
bool bHaveCrossSuffix = false;
for ( int i = 0; i < m_dRules.GetLength () && !bHaveCrossSuffix; i++ )
if ( !m_dRules[i].IsPrefix() && m_dRules[i].IsCrossProduct() )
bHaveCrossSuffix = true;
m_bCheckCrosses = bHaveCrossPrefix && bHaveCrossSuffix;
return bResult;
}
示例3: StripStdin
void StripStdin ( const char * sIndexAttrs, const char * sRemoveElements )
{
CSphString sError;
CSphHTMLStripper tStripper ( true );
if ( !tStripper.SetIndexedAttrs ( sIndexAttrs, sError )
|| !tStripper.SetRemovedElements ( sRemoveElements, sError ) )
sphDie ( "failed to configure stripper: %s", sError.cstr() );
CSphVector<BYTE> dBuffer;
while ( !feof(stdin) )
{
char sBuffer[1024];
int iLen = fread ( sBuffer, 1, sizeof(sBuffer), stdin );
if ( !iLen )
break;
int iPos = dBuffer.GetLength();
dBuffer.Resize ( iPos+iLen );
memcpy ( &dBuffer[iPos], sBuffer, iLen );
}
dBuffer.Add ( 0 );
tStripper.Strip ( &dBuffer[0] );
fprintf ( stdout, "dumping stripped results...\n%s\n", &dBuffer[0] );
}
示例4:
const CISpellDict::CISpellDictWord * CISpellDict::IterateNext ()
{
if ( m_iIterator>=m_dEntries.GetLength() )
return NULL;
return &m_dEntries [m_iIterator++];
}
示例5: sphPluginParseSpec
bool sphPluginParseSpec ( const CSphString & sParams, CSphVector<CSphString> & dParams, CSphString & sError )
{
dParams.Resize ( 0 );
sphSplit ( dParams, sParams.cstr(), ":" );
switch ( dParams.GetLength() )
{
case 0:
return true;
case 1:
sError = "filter name required in spec string; example: \"plugins.so:myfilter\"";
return false;
case 2:
dParams.Add ( "" );
return true;
case 3:
return true;
}
sError = "too many parts in spec string; must be in \"plugins.so:myfilter:options\" format";
return false;
}
示例6: AddJunk
void ExcerptGen_c::AddJunk ( int iStart, int iLength, int iBoundary )
{
int iChunkStart = iStart;
int iSaved = 0;
for ( int i = iStart; i < iStart+iLength; i++ ){
const char* buf_ptr = NULL;
if(m_bUtf8){
buf_ptr = m_sBufferUTF8.cstr ();
}else{
buf_ptr = m_sBuffer.cstr ();
}
if ( sphIsSpace ( buf_ptr[i] ) != sphIsSpace ( buf_ptr[iChunkStart] ) )
{
m_dTokens.Resize ( m_dTokens.GetLength () + 1 );
Token_t & tLast = m_dTokens.Last ();
tLast.m_eType = TOK_SPACE;
tLast.m_iStart = iChunkStart;
tLast.m_iLengthBytes = i - iChunkStart;
tLast.m_iWordID = 0;
tLast.m_uWords = 0;
iChunkStart = i;
iSaved += tLast.m_iLengthBytes;
if ( iBoundary != -1 && iSaved > iBoundary - iStart )
{
AddBoundary();
iBoundary = -1;
}
}
}
m_dTokens.Resize ( m_dTokens.GetLength () + 1 );
Token_t & tLast = m_dTokens.Last ();
tLast.m_eType = TOK_SPACE;
tLast.m_iStart = iChunkStart;
tLast.m_iLengthBytes = iStart + iLength - iChunkStart;
tLast.m_iWordID = 0;
tLast.m_uWords = 0;
if ( iBoundary != -1 ) AddBoundary();
}
示例7: AddBoundary
void ExcerptGen_c::AddBoundary()
{
m_dTokens.Resize ( m_dTokens.GetLength () + 1 );
Token_t & tLast = m_dTokens.Last ();
tLast.m_eType = TOK_BREAK;
tLast.m_iStart = 0;
tLast.m_iLengthBytes = 0;
tLast.m_iWordID = 0;
tLast.m_uWords = 0;
}
示例8: TokenizeQuery
void ExcerptGen_c::TokenizeQuery ( const ExcerptQuery_t & tQuery, CSphDict * pDict, ISphTokenizer * pTokenizer )
{
const bool bUtf8 = pTokenizer->IsUtf8();
// tokenize query words
int iWordsLength = strlen ( tQuery.m_sWords.cstr() );
m_dKeywords.Reserve ( MAX_HIGHLIGHT_WORDS );
BYTE * sWord;
int iKwIndex = 0;
pTokenizer->SetBuffer ( (BYTE *)tQuery.m_sWords.cstr(), iWordsLength );
while ( ( sWord = pTokenizer->GetToken() )!=NULL )
{
SphWordID_t iWord = pDict->GetWordID ( sWord );
if ( iWord )
{
Token_t & tLast = m_dWords.Add();
tLast.m_eType = TOK_WORD;
tLast.m_iWordID = iWord;
tLast.m_iLengthBytes = strlen ( (const char *)sWord );
tLast.m_iLengthCP = bUtf8 ? sphUTF8Len ( (const char *)sWord ) : tLast.m_iLengthBytes;
// store keyword
Keyword_t & kwLast = m_dKeywords.Add();
kwLast.m_iLength = tLast.m_iLengthCP;
// find stars
bool bStarBack = ( *pTokenizer->GetTokenEnd()=='*' );
bool bStarFront = ( pTokenizer->GetTokenStart()!=pTokenizer->GetBufferPtr() ) &&
( pTokenizer->GetTokenStart()[-1]=='*' );
kwLast.m_uStar = ( bStarFront ? STAR_FRONT : 0 ) | ( bStarBack ? STAR_BACK : 0 );
// store token
const int iEndIndex = iKwIndex + tLast.m_iLengthBytes + 1;
m_dKeywordsBuffer.Resize ( iEndIndex );
kwLast.m_iWord = iKwIndex;
strcpy ( &m_dKeywordsBuffer [ iKwIndex ], (const char *)sWord ); // NOLINT
iKwIndex = iEndIndex;
if ( m_dWords.GetLength()==MAX_HIGHLIGHT_WORDS )
break;
}
}
}
示例9: strlen
char * ExcerptGen_c::BuildExcerpt ( const ExcerptQuery_t & q, CSphDict * pDict, ISphTokenizer * pTokenizer )
{
m_dTokens.Reserve ( 1024 );
m_sBuffer = q.m_sSource;
const bool bUtf8 = pTokenizer->IsUtf8();
m_bUtf8 = bUtf8;
// tokenize query words
int iWordsLength = strlen ( q.m_sWords.cstr() );
CSphVector<char> dKwBuffer ( iWordsLength );
CSphVector<Keyword_t> dKeywords;
dKeywords.Reserve ( MAX_HIGHLIGHT_WORDS );
BYTE * sWord;
int iKwIndex = 0;
pTokenizer->SetBuffer ( (BYTE*)q.m_sWords.cstr(), iWordsLength );
while ( ( sWord = pTokenizer->GetToken() ) != NULL )
{
SphWordID_t iWord = pDict->GetWordID ( sWord );
if ( iWord )
{
m_dWords.Resize ( m_dWords.GetLength () + 1 );
Token_t & tLast = m_dWords.Last ();
tLast.m_eType = TOK_WORD;
tLast.m_iWordID = iWord;
tLast.m_iLengthBytes = strlen ( (const char *)sWord );
tLast.m_iLengthCP = bUtf8 ? sphUTF8Len ( (const char *)sWord ) : tLast.m_iLengthBytes;
// store keyword
dKeywords.Resize( dKeywords.GetLength() + 1 );
Keyword_t & kwLast = dKeywords.Last ();
// find stars
bool bStarBack = *pTokenizer->GetTokenEnd() == '*';
bool bStarFront = ( pTokenizer->GetTokenStart() != pTokenizer->GetBufferPtr() ) &&
pTokenizer->GetTokenStart()[-1] == '*';
kwLast.m_uStar = ( bStarFront ? STAR_FRONT : 0 ) | ( bStarBack ? STAR_BACK : 0 );
// store token
const int iEndIndex = iKwIndex + tLast.m_iLengthBytes + 1;
dKwBuffer.Resize ( iEndIndex );
kwLast.m_iWord = iKwIndex;
strcpy ( &dKwBuffer [ iKwIndex ], (const char *)sWord );
iKwIndex = iEndIndex;
if ( m_dWords.GetLength() == MAX_HIGHLIGHT_WORDS )
break;
}
}
// tokenize document
pTokenizer->SetBuffer ( (BYTE*)q.m_sSource.cstr (), strlen ( q.m_sSource.cstr () ) );
const char * pStartPtr = pTokenizer->GetBufferPtr ();
const char * pLastTokenEnd = pStartPtr;
//assign utf-8
m_sBufferUTF8 = pStartPtr;
while ( ( sWord = pTokenizer->GetToken() ) != NULL )
{
const char * pTokenStart = pTokenizer->GetTokenStart ();
if ( pTokenStart != pStartPtr )
AddJunk ( pLastTokenEnd - pStartPtr,
pTokenStart - pLastTokenEnd,
pTokenizer->GetBoundary() ? pTokenizer->GetBoundaryOffset() : -1 );
SphWordID_t iWord = pDict->GetWordID ( sWord );
pLastTokenEnd = pTokenizer->GetTokenEnd ();
m_dTokens.Resize ( m_dTokens.GetLength () + 1 );
Token_t & tLast = m_dTokens.Last ();
tLast.m_eType = iWord ? TOK_WORD : TOK_SPACE;
tLast.m_iStart = pTokenStart - pStartPtr;
tLast.m_iLengthBytes = pLastTokenEnd - pTokenStart;
tLast.m_iWordID = iWord;
tLast.m_uWords = 0;
// fill word mask
if ( iWord )
{
bool bMatch = false;
int iOffset;
ARRAY_FOREACH ( nWord, m_dWords )
{
const char * keyword = &dKwBuffer [ dKeywords[nWord].m_iWord ];
const Token_t & token = m_dWords[nWord];
switch ( dKeywords[nWord].m_uStar )
{
case STAR_NONE:
bMatch = iWord == token.m_iWordID;
break;
case STAR_FRONT:
//.........这里部分代码省略.........
示例10: UrlBreak
void UrlBreak ( Split_t & tBest, const char * sWord )
{
const int iLen = strlen(sWord);
tBest.m_Pos.Resize(0);
// current partial splits
// begin with an empty one
CSphVector<Split_t> dSplits;
dSplits.Add();
// our best guess so far
// begin with a trivial baseline one (ie. no splits at all)
Prob_t p = g_LM.GetProb ( sWord, iLen );
tBest.m_Pos.Add ( iLen );
tBest.m_fProb = p.m_fProb;
tBest.m_bAllDict = tBest.m_bAnyDict = p.m_bDict;
if ( iLen>=DICT_COMPOUND_MIN && tBest.m_bAllDict )
{
static const float THRESH = logf ( DICT_COMPOUND_THRESH );
if ( tBest.m_fProb<=THRESH )
tBest.m_fProb *= DICT_COMPOUND_COEFF;
}
// work the current splits
CSphVector<Split_t> dSplits2;
while ( dSplits.GetLength() )
{
int iWorkedSplits = 0;
float fPrevBest = tBest.m_fProb;
ARRAY_FOREACH ( iSplit, dSplits )
{
Split_t & s = dSplits[iSplit];
// filter out splits that were added before (!) a new best guess on the previous iteration
if ( dSplits[iSplit] < tBest )
continue;
iWorkedSplits++;
int iLast = 0;
if ( s.m_Pos.GetLength() )
iLast = s.m_Pos.Last();
for ( int i=1+iLast; i<iLen; i++ )
{
// consider a split at position i
// it generates a word candidate [iLast,i) and a tail [i,iLen)
// let's score those
Prob_t tCand = g_LM.GetProb ( sWord+iLast, i-iLast );
Prob_t tTail = g_LM.GetProb ( sWord+i, iLen-i );
// if the current best is all-keywords, the new candidates must be, too
if ( tBest.m_bAllDict && !tCand.m_bDict )
continue;
// compute partial and full split candidates generated by the current guess
Split_t tPartial = s;
tPartial.AddSplitPos ( tCand, i );
Split_t tFull = tPartial;
tFull.AddSplitPos ( tTail, iLen );
// check if the full one is our new best full one
bool bNewBest = false;
if ( tBest < tFull )
{
// FIXME? we do this even when the new split is *not* all-keywords,
// but the old best split was; is this ever a problem?
tBest = tFull;
// tBest.Dump ( sWord, "new-best" );
bNewBest = true;
}
// check if the resulting partial split is worth scanning further
if ( tBest < tPartial )
{
dSplits2.Add ( tPartial );
// dSplits2.Last().Dump ( sWord, "scan-partial" );
}
}
}
// damage control!
// if we just processed over 100K candidate splits and got no improvement
// lets assume that our chances of getting one are kinda low and bail
if ( iWorkedSplits>=100000 && tBest.m_fProb>=fPrevBest )
break;
// keep going
dSplits.SwapData ( dSplits2 );
dSplits2.Resize ( 0 );
}
示例11: GetNumRules
int CISpellAffix::GetNumRules () const
{
return m_dRules.GetLength ();
}
示例12: TokenizeDocument
void ExcerptGen_c::TokenizeDocument ( char * pData, CSphDict * pDict, ISphTokenizer * pTokenizer, bool bFillMasks, bool bRetainHtml )
{
m_iDocumentWords = 0;
m_dTokens.Reserve ( 1024 );
m_sBuffer = pData;
pTokenizer->SetBuffer ( (BYTE*)pData, strlen(pData) );
const char * pStartPtr = pTokenizer->GetBufferPtr ();
const char * pLastTokenEnd = pStartPtr;
if ( bRetainHtml )
pTokenizer->AddSpecials ( "<" );
BYTE * sWord;
DWORD uPosition = 0; // hit position in document
while ( ( sWord = pTokenizer->GetToken() )!=NULL )
{
if ( pTokenizer->TokenIsBlended() )
continue;
const char * pTokenStart = pTokenizer->GetTokenStart ();
if ( pTokenStart!=pStartPtr && pTokenStart>pLastTokenEnd )
AddJunk ( pLastTokenEnd - pStartPtr,
pTokenStart - pLastTokenEnd,
pTokenizer->GetBoundary() ? pTokenizer->GetBoundaryOffset() : -1 );
if ( bRetainHtml && *pTokenStart=='<' )
{
int iTagEnd = FindTagEnd ( pTokenStart );
if ( iTagEnd!=-1 )
{
assert ( pTokenStart+iTagEnd<pTokenizer->GetBufferEnd() );
AddJunk ( pTokenStart-pStartPtr, iTagEnd+1, pTokenizer->GetBoundary() ? pTokenizer->GetBoundaryOffset() : -1 );
pTokenizer->SetBufferPtr ( pTokenStart+iTagEnd+1 );
pLastTokenEnd = pTokenStart+iTagEnd+1; // fix it up to prevent adding last chunk on exit
continue;
}
}
SphWordID_t iWord = iWord = pDict->GetWordID ( sWord );
pLastTokenEnd = pTokenizer->GetTokenEnd ();
if ( pTokenizer->GetBoundary() )
uPosition += 100; // FIXME: this should be taken from index settings
Token_t & tLast = m_dTokens.Add();
tLast.m_eType = iWord ? TOK_WORD : TOK_SPACE;
tLast.m_uPosition = iWord ? ++uPosition : 0;
tLast.m_iStart = pTokenStart - pStartPtr;
tLast.m_iLengthBytes = pLastTokenEnd - pTokenStart;
tLast.m_iWordID = iWord;
tLast.m_uWords = 0;
if ( iWord )
m_iDocumentWords++;
m_iLastWord = iWord ? m_dTokens.GetLength() - 1 : m_iLastWord;
// fill word mask
if ( bFillMasks && iWord )
{
bool bMatch = false;
int iOffset;
ARRAY_FOREACH ( nWord, m_dWords )
{
const char * sKeyword = &m_dKeywordsBuffer [ m_dKeywords[nWord].m_iWord ];
const Token_t & tToken = m_dWords[nWord];
switch ( m_dKeywords[nWord].m_uStar )
{
case STAR_NONE:
bMatch = ( iWord==tToken.m_iWordID );
break;
case STAR_FRONT:
iOffset = tLast.m_iLengthBytes - tToken.m_iLengthBytes;
bMatch = ( iOffset>=0 ) &&
( memcmp ( sKeyword, sWord + iOffset, tToken.m_iLengthBytes )==0 );
break;
case STAR_BACK:
bMatch = ( tLast.m_iLengthBytes>=tToken.m_iLengthBytes ) &&
( memcmp ( sKeyword, sWord, tToken.m_iLengthBytes )==0 );
break;
case STAR_BOTH:
bMatch = strstr ( (const char *)sWord, sKeyword )!=NULL;
break;
}
if ( bMatch )
{
tLast.m_uWords |= 1UL<<nWord;
m_uFoundWords |= 1UL<<nWord;
}
}
}
//.........这里部分代码省略.........
示例13: sphPluginReload
bool sphPluginReload ( const char * sName, CSphString & sError )
{
#if !HAVE_DLOPEN
sError = "no dlopen(), no plugins";
return false;
#else
// find all plugins from the given library
CSphScopedLock<CSphMutex> tLock ( g_tPluginMutex );
CSphVector<PluginKey_t> dKeys;
CSphVector<PluginDesc_c*> dPlugins;
g_hPlugins.IterateStart();
while ( g_hPlugins.IterateNext() )
{
PluginDesc_c * v = g_hPlugins.IterateGet();
if ( v->GetLibName()==sName )
{
dKeys.Add ( g_hPlugins.IterateGetKey() );
dPlugins.Add ( g_hPlugins.IterateGet() );
}
}
// no plugins loaded? oops
if ( dPlugins.GetLength()==0 )
{
sError.SetSprintf ( "no active plugins loaded from %s", sName );
return false;
}
// load new library and check every plugin
#if !USE_WINDOWS
PluginLib_c * pNewLib = LoadPluginLibrary ( sName, sError, true );
#else
PluginLib_c * pNewLib = LoadPluginLibrary ( sName, sError );
#endif
if ( !pNewLib )
return false;
// load all plugins
CSphVector<PluginDesc_c*> dNewPlugins;
ARRAY_FOREACH ( i, dPlugins )
{
PluginDesc_c * pDesc = NULL;
const SymbolDesc_t * pSym = NULL;
switch ( dKeys[i].m_eType )
{
case PLUGIN_RANKER: pDesc = new PluginRanker_c ( pNewLib ); pSym = g_dSymbolsRanker; break;
case PLUGIN_INDEX_TOKEN_FILTER: pDesc = new PluginTokenFilter_c ( pNewLib ); pSym = g_dSymbolsTokenFilter; break;
case PLUGIN_QUERY_TOKEN_FILTER: pDesc = new PluginQueryTokenFilter_c ( pNewLib ); pSym = g_dSymbolsQueryTokenFilter; break;
case PLUGIN_FUNCTION: pDesc = new PluginUDF_c ( pNewLib, dPlugins[i]->GetUdfRetType() ); pSym = g_dSymbolsUDF; break;
default:
sphDie ( "INTERNAL ERROR: unknown plugin type %d in sphPluginReload()", (int)dKeys[i].m_eType );
return false;
}
if ( !PluginLoadSymbols ( pDesc, pSym, pNewLib->GetHandle(), dKeys[i].m_sName.cstr(), sError ) )
{
pDesc->Release();
break;
}
dNewPlugins.Add ( pDesc );
}