本文整理汇总了C++中xapian::Document::remove_term方法的典型用法代码示例。如果您正苦于以下问题:C++ Document::remove_term方法的具体用法?C++ Document::remove_term怎么用?C++ Document::remove_term使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类xapian::Document
的用法示例。
在下文中一共展示了Document::remove_term方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: renameLabel
/// Renames a label.
bool XapianIndex::renameLabel(const string &name, const string &newName)
{
bool renamedLabel = false;
XapianDatabase *pDatabase = XapianDatabaseFactory::getDatabase(m_databaseName, false);
if (pDatabase == NULL)
{
cerr << "Bad index " << m_databaseName << endl;
return false;
}
try
{
Xapian::WritableDatabase *pIndex = pDatabase->writeLock();
if (pIndex != NULL)
{
string term("XLABEL:");
// Get documents that have this label
term += name;
for (Xapian::PostingIterator postingIter = pIndex->postlist_begin(term);
postingIter != pIndex->postlist_end(term); ++postingIter)
{
Xapian::docid docId = *postingIter;
// Get the document
Xapian::Document doc = pIndex->get_document(docId);
// Remove the term
doc.remove_term(term);
// ...add the new one
doc.add_term(limitTermLength(string("XLABEL:") + newName));
// ...and update the document
pIndex->replace_document(docId, doc);
}
renamedLabel = true;
}
}
catch (const Xapian::Error &error)
{
cerr << "Couldn't delete label: " << error.get_type() << ": " << error.get_msg() << endl;
}
catch (...)
{
cerr << "Couldn't delete label, unknown exception occured" << endl;
}
pDatabase->unlock();
return renamedLabel;
}
示例2: removeCommonTerms
//.........这里部分代码省略.........
// FIXME: remove terms extracted from the title if they don't have more than one posting
string title(docInfo.getTitle());
if (title.empty() == false)
{
Document titleDoc;
titleDoc.setData(title.c_str(), title.length());
Tokenizer titleTokens(&titleDoc);
removeFirstPostingsFromDocument(titleTokens, doc, "S", language, STORE_UNSTEM);
titleTokens.rewind();
removeFirstPostingsFromDocument(titleTokens, doc, "", language, m_stemMode);
}
// Location
string location(docInfo.getLocation());
commonTerms.insert(string("U") + XapianDatabase::limitTermLength(Url::escapeUrl(location), true));
// Base file
string::size_type qmPos = location.find("?");
if ((urlObj.isLocal() == true) &&
(qmPos != string::npos))
{
commonTerms.insert(string("XFILE:") + XapianDatabase::limitTermLength(Url::escapeUrl(location.substr(0, qmPos)), true));
}
// Host name
string hostName(StringManip::toLowerCase(urlObj.getHost()));
if (hostName.empty() == false)
{
commonTerms.insert(string("H") + XapianDatabase::limitTermLength(hostName, true));
string::size_type dotPos = hostName.find('.');
while (dotPos != string::npos)
{
commonTerms.insert(string("H") + XapianDatabase::limitTermLength(hostName.substr(dotPos + 1), true));
// Next
dotPos = hostName.find('.', dotPos + 1);
}
}
// ...location
string tree(urlObj.getLocation());
if (tree.empty() == false)
{
commonTerms.insert(string("XDIR:") + XapianDatabase::limitTermLength(Url::escapeUrl(tree), true));
if (tree[0] == '/')
{
commonTerms.insert("XDIR:/");
}
string::size_type slashPos = tree.find('/', 1);
while (slashPos != string::npos)
{
commonTerms.insert(string("XDIR:") + XapianDatabase::limitTermLength(Url::escapeUrl(tree.substr(0, slashPos)), true));
// Next
slashPos = tree.find('/', slashPos + 1);
}
}
// ...and file name
string fileName(urlObj.getFile());
if (fileName.empty() == false)
{
string extension;
commonTerms.insert(string("P") + XapianDatabase::limitTermLength(Url::escapeUrl(fileName), true));
// Does it have an extension ?
string::size_type extPos = fileName.rfind('.');
if ((extPos != string::npos) &&
(extPos + 1 < fileName.length()))
{
extension = StringManip::toLowerCase(fileName.substr(extPos + 1));
}
commonTerms.insert(string("XEXT:") + XapianDatabase::limitTermLength(extension));
}
// Date terms
time_t timeT = TimeConverter::fromTimestamp(docInfo.getTimestamp());
struct tm *tm = localtime(&timeT);
string yyyymmdd = TimeConverter::toYYYYMMDDString(tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday);
if (yyyymmdd.length() == 8)
{
commonTerms.insert(string("D") + yyyymmdd);
commonTerms.insert(string("M") + yyyymmdd.substr(0, 6));
commonTerms.insert(string("Y") + yyyymmdd.substr(0, 4));
}
// Language code
commonTerms.insert(string("L") + Languages::toCode(language));
// MIME type
commonTerms.insert(string("T") + docInfo.getType());
for (set<string>::const_iterator termIter = commonTerms.begin(); termIter != commonTerms.end(); ++termIter)
{
try
{
doc.remove_term(*termIter);
}
catch (const Xapian::Error &error)
{
#ifdef DEBUG
cout << "XapianIndex::removeCommonTerms: " << error.get_msg() << endl;
#endif
}
}
}
示例3: removeFirstPostingsFromDocument
void XapianIndex::removeFirstPostingsFromDocument(Tokenizer &tokens, Xapian::Document &doc,
const string &prefix, const string &language, StemmingMode mode) const
{
Xapian::TermIterator termListIter = doc.termlist_begin();
Xapian::Stem *pStemmer = NULL;
string stemPrefix("Z");
string term;
// Do we know what language to use for stemming ?
if (language.empty() == false)
{
try
{
pStemmer = new Xapian::Stem(StringManip::toLowerCase(m_stemLanguage));
}
catch (const Xapian::Error &error)
{
cerr << "Couldn't create stemmer: " << error.get_type() << ": " << error.get_msg() << endl;
}
}
// Stems are Z-prefixed, unless a prefix is already defined
if (prefix.empty() == false)
{
stemPrefix = prefix;
}
// Get the terms and remove the first posting for each
while (tokens.nextToken(term) == true)
{
bool removeStem = false;
if (term.empty() == true)
{
continue;
}
// Lower case the term
term = StringManip::toLowerCase(term);
// Stem the term ?
if ((mode == STORE_UNSTEM) ||
(pStemmer == NULL))
{
removeFirstPosting(doc, termListIter, prefix + XapianDatabase::limitTermLength(term));
}
else if (mode == STORE_STEM)
{
removeStem = true;
}
else if (mode == STORE_BOTH)
{
// Remove both
removeFirstPosting(doc, termListIter, prefix + XapianDatabase::limitTermLength(term));
removeStem = true;
}
// Since stems don't have positional information, we can't simply remove them
// since any may appear more than once in the original document
// We can only remove those that have some prefix set
// Don't stem if the term starts with a digit
if ((removeStem == true) &&
(prefix.empty() == false) &&
(isdigit((int)term[0]) == 0))
{
#if XAPIAN_MAJOR_VERSION==0
string stemmedTerm(pStemmer->stem_word(term));
#else
string stemmedTerm((*pStemmer)(term));
#endif
doc.remove_term(stemPrefix + XapianDatabase::limitTermLength(stemmedTerm));
}
}
if (pStemmer != NULL)
{
delete pStemmer;
}
}
示例4: removeCommonTerms
void XapianIndex::removeCommonTerms(Xapian::Document &doc)
{
DocumentInfo docInfo;
string record(doc.get_data());
// First, remove the magic term
doc.remove_term(MAGIC_TERM);
if (record.empty() == true)
{
// Nothing else we can do
return;
}
string language(StringManip::extractField(record, "language=", ""));
string timestamp(StringManip::extractField(record, "timestamp=", "\n"));
docInfo = DocumentInfo(StringManip::extractField(record, "caption=", "\n"),
StringManip::extractField(record, "url=", "\n"),
StringManip::extractField(record, "type=", "\n"),
Languages::toLocale(language));
// We used to use timestamp prior to 0.60
if (timestamp.empty() == true)
{
string modTime(StringManip::extractField(record, "modtime=", "\n"));
if (modTime.empty() == false)
{
time_t timeT = (time_t )atol(modTime.c_str());
timestamp = TimeConverter::toTimestamp(timeT);
}
}
docInfo.setTimestamp(timestamp);
Url urlObj(docInfo.getLocation());
// FIXME: remove terms extracted from the title if they don't have more than one posting
string title(docInfo.getTitle());
if (title.empty() == false)
{
Document titleDoc;
titleDoc.setData(title.c_str(), title.length());
Tokenizer titleTokens(&titleDoc);
removeFirstPostingsFromDocument(titleTokens, doc, "S", language, STORE_UNSTEM);
titleTokens.rewind();
removeFirstPostingsFromDocument(titleTokens, doc, "", language, m_stemMode);
}
// Title
doc.remove_term(limitTermLength(string("U") + docInfo.getLocation(), true));
// Host name
string hostName(StringManip::toLowerCase(urlObj.getHost()));
if (hostName.empty() == false)
{
doc.remove_term(limitTermLength(string("H") + hostName, true));
string::size_type dotPos = hostName.find('.');
while (dotPos != string::npos)
{
doc.remove_term(limitTermLength(string("H") + hostName.substr(dotPos + 1), true));
// Next
dotPos = hostName.find('.', dotPos + 1);
}
}
// ...location
string tree(urlObj.getLocation());
if (tree.empty() == false)
{
doc.remove_term(limitTermLength(string("XDIR:") + tree, true));
string::size_type slashPos = tree.find('/', 1);
while (slashPos != string::npos)
{
doc.remove_term(limitTermLength(string("XDIR:") + tree.substr(0, slashPos), true));
// Next
slashPos = tree.find('/', slashPos + 1);
}
}
// ...and file name
string fileName(urlObj.getFile());
if (fileName.empty() == false)
{
doc.remove_term(limitTermLength(string("P") + StringManip::toLowerCase(fileName), true));
}
// Language code
doc.remove_term(string("L") + Languages::toCode(language));
// MIME type
doc.remove_term(string("T") + docInfo.getType());
}
示例5: setDocumentLabels
/// Sets a document's labels.
bool XapianIndex::setDocumentLabels(unsigned int docId, const set<string> &labels,
bool resetLabels)
{
bool updatedLabels = false;
XapianDatabase *pDatabase = XapianDatabaseFactory::getDatabase(m_databaseName, false);
if (pDatabase == NULL)
{
cerr << "Bad index " << m_databaseName << endl;
return false;
}
try
{
Xapian::WritableDatabase *pIndex = pDatabase->writeLock();
if (pIndex != NULL)
{
Xapian::Document doc = pIndex->get_document(docId);
// Reset existing labels ?
if (resetLabels == true)
{
Xapian::TermIterator termIter = pIndex->termlist_begin(docId);
if (termIter != pIndex->termlist_end(docId))
{
for (termIter.skip_to("XLABEL:");
termIter != pIndex->termlist_end(docId); ++termIter)
{
// Is this a label ?
if (strncasecmp((*termIter).c_str(), "XLABEL:", min(7, (int)(*termIter).length())) == 0)
{
doc.remove_term(*termIter);
}
}
}
}
// Set new labels
for (set<string>::const_iterator labelIter = labels.begin(); labelIter != labels.end();
++labelIter)
{
if (labelIter->empty() == false)
{
doc.add_term(limitTermLength(string("XLABEL:") + *labelIter));
}
}
pIndex->replace_document(docId, doc);
updatedLabels = true;
}
}
catch (const Xapian::Error &error)
{
cerr << "Couldn't update document's labels: " << error.get_type() << ": " << error.get_msg() << endl;
}
catch (...)
{
cerr << "Couldn't update document's labels, unknown exception occured" << endl;
}
pDatabase->unlock();
return updatedLabels;
}