本文整理汇总了C++中Index::documentLength方法的典型用法代码示例。如果您正苦于以下问题:C++ Index::documentLength方法的具体用法?C++ Index::documentLength怎么用?C++ Index::documentLength使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Index
的用法示例。
在下文中一共展示了Index::documentLength方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: _addInvertedListData
void IndexWriter::_addInvertedListData( indri::utility::greedy_vector<WriterIndexContext*>& lists, indri::index::TermData* termData, indri::utility::Buffer& listBuffer, UINT64& endOffset ) {
indri::utility::greedy_vector<WriterIndexContext*>::iterator iter;
const int minimumSkip = 1<<12; // 4k
int documentsWritten = 0;
const float topdocsFraction = 0.01f;
bool hasTopdocs = termData->corpus.documentCount > TOPDOCS_DOCUMENT_COUNT;
bool isFrequent = termData->corpus.totalCount > FREQUENT_TERM_COUNT;
int topdocsCount = hasTopdocs ? int(termData->corpus.documentCount * 0.01) : 0;
int topdocsSpace = hasTopdocs ? ((topdocsCount*3*sizeof(UINT32)) + sizeof(int)) : 0;
// write a control byte
char control = (hasTopdocs ? 0x01 : 0) | (isFrequent ? 0x02 : 0);
_invertedOutput->write( &control, 1 );
UINT64 initialPosition = _invertedOutput->tell();
// leave some room for the topdocs list
if( hasTopdocs ) {
_invertedOutput->seek( topdocsSpace + initialPosition );
}
// maintain a list of top documents
std::priority_queue<DocListIterator::TopDocument,
std::vector<DocListIterator::TopDocument>,
DocListIterator::TopDocument::greater> topdocs;
double threshold = 0;
int lastDocument = 0;
int positions = 0;
int docs = 0;
// for each matching list:
for( iter = lists.begin(); iter != lists.end(); ++iter ) {
indri::index::DocListFileIterator::DocListData* listData = (*iter)->iterator->currentEntry();
DocListIterator* iterator = listData->iterator;
Index* index = (*iter)->index;
indri::utility::RVLCompressStream stream( listBuffer );
int listDocs = 0;
int listPositions = 0;
while( !iterator->finished() ) {
// get the latest entry from the list
DocListIterator::DocumentData* documentData = iterator->currentEntry();
// add to document counter
docs++; listDocs++;
// update the topdocs list
if( hasTopdocs ) {
int length = index->documentLength( documentData->document );
int count = documentData->positions.size();
// compute DocListIterator::TopDocument::greater (current, top())
// if false, no reason to insert this entry.
// note that the test is inverted.
// int(length * threshold) <= count is equivalent to
// count/length > topdocs.top().count/topdocs.top().length
// but we use < to force breaking a tie in favor of keeping
// the first seen document.
if( int(length * threshold) < count || topdocs.size() < topdocsCount ) {
// form a topdocs entry for this document
DocListIterator::TopDocument topDocument( documentData->document,
count,
length );
topdocs.push( topDocument );
while( topdocs.size() > topdocsCount )
topdocs.pop();
threshold = topdocs.top().count / double(topdocs.top().length);
}
}
if( listBuffer.position() > minimumSkip ) {
// time to write in a skip
_writeBatch( _invertedOutput, documentData->document, listBuffer.position(), listBuffer );
// delta encode documents by batch
lastDocument = 0;
}
assert( documentData->document > lastDocument );
// write this entry out to the list
stream << documentData->document - lastDocument;
stream << (int) documentData->positions.size();
lastDocument = documentData->document;
int lastPosition = 0;
for( int i=0; i<documentData->positions.size(); i++ ) {
stream << (documentData->positions[i] - lastPosition);
lastPosition = documentData->positions[i];
positions++; listPositions++;
}
iterator->nextEntry();
}
//.........这里部分代码省略.........