本文整理汇总了C++中xapian::Document::add_term方法的典型用法代码示例。如果您正苦于以下问题:C++ Document::add_term方法的具体用法?C++ Document::add_term怎么用?C++ Document::add_term使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类xapian::Document
的用法示例。
在下文中一共展示了Document::add_term方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: renameLabel
/// Renames a label.
bool XapianIndex::renameLabel(const string &name, const string &newName)
{
bool renamedLabel = false;
XapianDatabase *pDatabase = XapianDatabaseFactory::getDatabase(m_databaseName, false);
if (pDatabase == NULL)
{
cerr << "Bad index " << m_databaseName << endl;
return false;
}
try
{
Xapian::WritableDatabase *pIndex = pDatabase->writeLock();
if (pIndex != NULL)
{
string term("XLABEL:");
// Get documents that have this label
term += name;
for (Xapian::PostingIterator postingIter = pIndex->postlist_begin(term);
postingIter != pIndex->postlist_end(term); ++postingIter)
{
Xapian::docid docId = *postingIter;
// Get the document
Xapian::Document doc = pIndex->get_document(docId);
// Remove the term
doc.remove_term(term);
// ...add the new one
doc.add_term(limitTermLength(string("XLABEL:") + newName));
// ...and update the document
pIndex->replace_document(docId, doc);
}
renamedLabel = true;
}
}
catch (const Xapian::Error &error)
{
cerr << "Couldn't delete label: " << error.get_type() << ": " << error.get_msg() << endl;
}
catch (...)
{
cerr << "Couldn't delete label, unknown exception occured" << endl;
}
pDatabase->unlock();
return renamedLabel;
}
示例2: prepareDocument
bool XapianIndex::prepareDocument(const DocumentInfo &info, Xapian::Document &doc,
Xapian::termcount &termPos) const
{
string title(info.getTitle());
string location(info.getLocation());
Url urlObj(location);
// Add a magic term :-)
doc.add_term(MAGIC_TERM);
// Index the title with and without prefix S
if (title.empty() == false)
{
Document titleDoc;
titleDoc.setData(title.c_str(), title.length());
Tokenizer titleTokens(&titleDoc);
addTermsToDocument(titleTokens, doc, "S", termPos, STORE_UNSTEM);
titleTokens.rewind();
addTermsToDocument(titleTokens, doc, "", termPos, m_stemMode);
}
// Index the full URL with prefix U
doc.add_term(limitTermLength(string("U") + location, true));
// ...the host name and included domains with prefix H
string hostName(StringManip::toLowerCase(urlObj.getHost()));
if (hostName.empty() == false)
{
doc.add_term(limitTermLength(string("H") + hostName, true));
string::size_type dotPos = hostName.find('.');
while (dotPos != string::npos)
{
doc.add_term(limitTermLength(string("H") + hostName.substr(dotPos + 1), true));
// Next
dotPos = hostName.find('.', dotPos + 1);
}
}
// ...and the file name with prefix P
string fileName(urlObj.getFile());
if (fileName.empty() == false)
{
doc.add_term(limitTermLength(string("P") + StringManip::toLowerCase(fileName), true));
}
// Finally, add the language code with prefix L
doc.add_term(string("L") + Languages::toCode(m_stemLanguage));
setDocumentData(doc, info, m_stemLanguage);
return true;
}
示例3: addCommonTerms
void XapianIndex::addCommonTerms(const DocumentInfo &info, Xapian::Document &doc,
Xapian::termcount &termPos) const
{
string title(info.getTitle());
string location(info.getLocation());
Url urlObj(location);
// Add a magic term :-)
doc.add_term(MAGIC_TERM);
// Index the title with and without prefix S
if (title.empty() == false)
{
Document titleDoc;
titleDoc.setData(title.c_str(), title.length());
Tokenizer titleTokens(&titleDoc);
addPostingsToDocument(titleTokens, doc, "S", termPos, STORE_UNSTEM);
titleTokens.rewind();
addPostingsToDocument(titleTokens, doc, "", termPos, m_stemMode);
}
// Index the full URL with prefix U
doc.add_term(string("U") + XapianDatabase::limitTermLength(Url::escapeUrl(location), true));
// ...the base file with XFILE:
string::size_type qmPos = location.find("?");
if ((urlObj.isLocal() == true) &&
(qmPos != string::npos))
{
doc.add_term(string("XFILE:") + XapianDatabase::limitTermLength(Url::escapeUrl(location.substr(0, qmPos)), true));
}
// ...the host name and included domains with prefix H
string hostName(StringManip::toLowerCase(urlObj.getHost()));
if (hostName.empty() == false)
{
doc.add_term(string("H") + XapianDatabase::limitTermLength(hostName, true));
string::size_type dotPos = hostName.find('.');
while (dotPos != string::npos)
{
doc.add_term(string("H") + XapianDatabase::limitTermLength(hostName.substr(dotPos + 1), true));
// Next
dotPos = hostName.find('.', dotPos + 1);
}
}
// ...the location (as is) and all directories with prefix XDIR:
string tree(urlObj.getLocation());
if (tree.empty() == false)
{
doc.add_term(string("XDIR:") + XapianDatabase::limitTermLength(Url::escapeUrl(tree), true));
if (tree[0] == '/')
{
doc.add_term("XDIR:/");
}
string::size_type slashPos = tree.find('/', 1);
while (slashPos != string::npos)
{
doc.add_term(string("XDIR:") + XapianDatabase::limitTermLength(Url::escapeUrl(tree.substr(0, slashPos)), true));
// Next
slashPos = tree.find('/', slashPos + 1);
}
}
// ...and the file name with prefix P
string fileName(urlObj.getFile());
if (fileName.empty() == false)
{
string extension;
doc.add_term(string("P") + XapianDatabase::limitTermLength(Url::escapeUrl(fileName), true));
// Does it have an extension ?
string::size_type extPos = fileName.rfind('.');
if ((extPos != string::npos) &&
(extPos + 1 < fileName.length()))
{
extension = StringManip::toLowerCase(fileName.substr(extPos + 1));
}
doc.add_term(string("XEXT:") + XapianDatabase::limitTermLength(extension));
}
// Add the date terms D, M and Y
time_t timeT = TimeConverter::fromTimestamp(info.getTimestamp());
struct tm *tm = localtime(&timeT);
string yyyymmdd = TimeConverter::toYYYYMMDDString(tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday);
if (yyyymmdd.length() == 8)
{
doc.add_term(string("D") + yyyymmdd);
doc.add_term(string("M") + yyyymmdd.substr(0, 6));
doc.add_term(string("Y") + yyyymmdd.substr(0, 4));
}
// Finally, add the language code with prefix L
doc.add_term(string("L") + Languages::toCode(m_stemLanguage));
// ...and the MIME type with prefix T
doc.add_term(string("T") + info.getType());
}
示例4: addPostingsToDocument
void XapianIndex::addPostingsToDocument(Tokenizer &tokens, Xapian::Document &doc,
const string &prefix, Xapian::termcount &termPos, StemmingMode mode) const
{
Xapian::Stem *pStemmer = NULL;
string stemPrefix("Z");
string term;
// Do we know what language to use for stemming ?
if (m_stemLanguage.empty() == false)
{
try
{
pStemmer = new Xapian::Stem(StringManip::toLowerCase(m_stemLanguage));
}
catch (const Xapian::Error &error)
{
cerr << "Couldn't create stemmer: " << error.get_type() << ": " << error.get_msg() << endl;
}
}
// Stems are Z-prefixed, unless a prefix is already defined
if (prefix.empty() == false)
{
stemPrefix = prefix;
}
// Get the terms
while (tokens.nextToken(term) == true)
{
bool addStem = false;
if (term.empty() == true)
{
continue;
}
// Lower case the term
term = StringManip::toLowerCase(term);
// Stem the term ?
if ((mode == STORE_UNSTEM) ||
(pStemmer == NULL))
{
doc.add_posting(prefix + XapianDatabase::limitTermLength(term), termPos);
}
else if (mode == STORE_STEM)
{
addStem = true;
}
else if (mode == STORE_BOTH)
{
// Add both
doc.add_posting(prefix + XapianDatabase::limitTermLength(term), termPos);
addStem = true;
}
// Don't stem if the term starts with a digit
if ((addStem == true) &&
(isdigit((int)term[0]) == 0))
{
#if XAPIAN_MAJOR_VERSION==0
string stemmedTerm(pStemmer->stem_word(term));
#else
string stemmedTerm((*pStemmer)(term));
#endif
doc.add_term(stemPrefix + XapianDatabase::limitTermLength(stemmedTerm));
}
++termPos;
}
#ifdef DEBUG
cout << "XapianIndex::addPostingsToDocument: added " << termPos << " terms" << endl;
#endif
if (pStemmer != NULL)
{
delete pStemmer;
}
}
示例5: indexDocument
/// Indexes the given data.
bool XapianIndex::indexDocument(Tokenizer &tokens, const std::set<std::string> &labels,
unsigned int &docId)
{
unsigned int dataLength = 0;
bool indexed = false;
XapianDatabase *pDatabase = XapianDatabaseFactory::getDatabase(m_databaseName, false);
if (pDatabase == NULL)
{
cerr << "Bad index " << m_databaseName << endl;
return false;
}
try
{
// Get the document
const Document *pDocument = tokens.getDocument();
if (pDocument == NULL)
{
#ifdef DEBUG
cout << "XapianIndex::indexDocument: no document" << endl;
#endif
return false;
}
// Cache the document's properties
DocumentInfo docInfo(pDocument->getTitle(), pDocument->getLocation(),
pDocument->getType(), pDocument->getLanguage());
docInfo.setTimestamp(pDocument->getTimestamp());
docInfo.setLocation(Url::canonicalizeUrl(docInfo.getLocation()));
const char *pData = pDocument->getData(dataLength);
if (pData != NULL)
{
m_stemLanguage = scanDocument(pData, dataLength, docInfo);
}
Xapian::Document doc;
Xapian::termcount termPos = 0;
#ifdef DEBUG
cout << "XapianIndex::indexDocument: adding terms" << endl;
#endif
// Add the tokenizer's terms to the Xapian document
addPostingsToDocument(tokens, doc, "", termPos, m_stemMode);
// Add labels
for (set<string>::const_iterator labelIter = labels.begin(); labelIter != labels.end();
++labelIter)
{
doc.add_term(limitTermLength(string("XLABEL:") + *labelIter));
}
if (addCommonTerms(docInfo, doc, termPos) == true)
{
setDocumentData(docInfo, doc, m_stemLanguage);
Xapian::WritableDatabase *pIndex = pDatabase->writeLock();
if (pIndex != NULL)
{
// Add this document to the Xapian index
docId = pIndex->add_document(doc);
indexed = true;
}
}
}
catch (const Xapian::Error &error)
{
cerr << "Couldn't index document: " << error.get_type() << ": " << error.get_msg() << endl;
}
catch (...)
{
cerr << "Couldn't index document, unknown exception occured" << endl;
}
pDatabase->unlock();
return indexed;
}
示例6: setDocumentLabels
/// Sets a document's labels.
bool XapianIndex::setDocumentLabels(unsigned int docId, const set<string> &labels,
bool resetLabels)
{
bool updatedLabels = false;
XapianDatabase *pDatabase = XapianDatabaseFactory::getDatabase(m_databaseName, false);
if (pDatabase == NULL)
{
cerr << "Bad index " << m_databaseName << endl;
return false;
}
try
{
Xapian::WritableDatabase *pIndex = pDatabase->writeLock();
if (pIndex != NULL)
{
Xapian::Document doc = pIndex->get_document(docId);
// Reset existing labels ?
if (resetLabels == true)
{
Xapian::TermIterator termIter = pIndex->termlist_begin(docId);
if (termIter != pIndex->termlist_end(docId))
{
for (termIter.skip_to("XLABEL:");
termIter != pIndex->termlist_end(docId); ++termIter)
{
// Is this a label ?
if (strncasecmp((*termIter).c_str(), "XLABEL:", min(7, (int)(*termIter).length())) == 0)
{
doc.remove_term(*termIter);
}
}
}
}
// Set new labels
for (set<string>::const_iterator labelIter = labels.begin(); labelIter != labels.end();
++labelIter)
{
if (labelIter->empty() == false)
{
doc.add_term(limitTermLength(string("XLABEL:") + *labelIter));
}
}
pIndex->replace_document(docId, doc);
updatedLabels = true;
}
}
catch (const Xapian::Error &error)
{
cerr << "Couldn't update document's labels: " << error.get_type() << ": " << error.get_msg() << endl;
}
catch (...)
{
cerr << "Couldn't update document's labels, unknown exception occured" << endl;
}
pDatabase->unlock();
return updatedLabels;
}
示例7: updateDocument
/// Updates the given document; true if success.
bool XapianIndex::updateDocument(unsigned int docId, Tokenizer &tokens)
{
unsigned int dataLength = 0;
bool updated = false;
const Document *pDocument = tokens.getDocument();
if (pDocument == NULL)
{
return false;
}
XapianDatabase *pDatabase = XapianDatabaseFactory::getDatabase(m_databaseName, false);
if (pDatabase == NULL)
{
cerr << "Bad index " << m_databaseName << endl;
return false;
}
const char *pData = pDocument->getData(dataLength);
if (pData == NULL)
{
return false;
}
// Cache the document's properties
DocumentInfo docInfo(pDocument->getTitle(), pDocument->getLocation(),
pDocument->getType(), pDocument->getLanguage());
docInfo.setTimestamp(pDocument->getTimestamp());
docInfo.setLocation(Url::canonicalizeUrl(docInfo.getLocation()));
// Don't scan the document if a language is specified
m_stemLanguage = Languages::toEnglish(pDocument->getLanguage());
if (m_stemLanguage.empty() == true)
{
m_stemLanguage = scanDocument(pData, dataLength, docInfo);
}
try
{
set<string> labels;
Xapian::Document doc;
Xapian::termcount termPos = 0;
// Add the tokenizer's terms to the document
addPostingsToDocument(tokens, doc, "", termPos, m_stemMode);
// Get the document's labels
if (getDocumentLabels(docId, labels) == true)
{
// Add labels
for (set<string>::const_iterator labelIter = labels.begin(); labelIter != labels.end();
++labelIter)
{
doc.add_term(limitTermLength(string("XLABEL:") + *labelIter));
}
}
if (addCommonTerms(docInfo, doc, termPos) == true)
{
setDocumentData(docInfo, doc, m_stemLanguage);
Xapian::WritableDatabase *pIndex = pDatabase->writeLock();
if (pIndex != NULL)
{
// Update the document in the database
pIndex->replace_document(docId, doc);
updated = true;
}
}
}
catch (const Xapian::Error &error)
{
cerr << "Couldn't update document: " << error.get_type() << ": " << error.get_msg() << endl;
}
catch (...)
{
cerr << "Couldn't update document, unknown exception occured" << endl;
}
pDatabase->unlock();
return updated;
}
示例8: db
bool
DatabaseWrite::rebuild (GList *cpt_list)
{
string old_path = m_dbPath + "_old";
string rebuild_path = m_dbPath + "_rb";
string db_locale;
// Create the rebuild directory
if (g_mkdir_with_parents (rebuild_path.c_str (), 0755) != 0) {
g_warning ("Unable to create database rebuild directory.");
return false;
}
// check if old unrequired version of db still exists on filesystem
if (g_file_test (old_path.c_str (), G_FILE_TEST_EXISTS)) {
g_warning ("Existing xapian old db was not cleaned previously: '%s'.", old_path.c_str ());
as_utils_delete_dir_recursive (old_path.c_str ());
}
// check if old unrequired version of db still exists on filesystem
if (g_file_test (rebuild_path.c_str (), G_FILE_TEST_EXISTS)) {
g_debug ("Removing old rebuild-dir from previous database rebuild.");
as_utils_delete_dir_recursive (rebuild_path.c_str ());
}
Xapian::WritableDatabase db (rebuild_path, Xapian::DB_CREATE_OR_OVERWRITE);
Xapian::TermGenerator term_generator;
term_generator.set_database(db);
try {
/* this tests if we have spelling suggestions (there must be
* a better way?!?) - this is needed as inmemory does not have
* spelling corrections, but it allows setting the flag and will
* raise a exception much later
*/
db.add_spelling("test");
db.remove_spelling("test");
/* this enables the flag for it (we only reach this line if
* the db supports spelling suggestions)
*/
term_generator.set_flags(Xapian::TermGenerator::FLAG_SPELLING);
} catch (const Xapian::UnimplementedError &error) {
// Ignore
}
for (GList *list = cpt_list; list != NULL; list = list->next) {
AsComponent *cpt = (AsComponent*) list->data;
Xapian::Document doc;
term_generator.set_document (doc);
doc.set_data (as_component_get_name (cpt));
// Sanity check
if (!as_component_has_install_candidate (cpt)) {
g_warning ("Skipped component '%s' from inclusion into database: Does not have an installation candidate.",
as_component_get_id (cpt));
continue;
}
// Package name
gchar **pkgs = as_component_get_pkgnames (cpt);
if (pkgs != NULL) {
gchar *pkgs_cstr = g_strjoinv (";", pkgs);
string pkgs_str = pkgs_cstr;
doc.add_value (XapianValues::PKGNAMES, pkgs_str);
g_free (pkgs_cstr);
for (uint i = 0; pkgs[i] != NULL; i++) {
string pkgname = pkgs[i];
doc.add_term("AP" + pkgname);
if (pkgname.find ("-") != string::npos) {
// we need this to work around xapian oddness
string tmp = pkgname;
replace (tmp.begin (), tmp.end (), '-', '_');
doc.add_term (tmp);
}
// add packagename as meta-data too
term_generator.index_text_without_positions (pkgname, WEIGHT_PKGNAME);
}
}
// Source package name
const gchar *spkgname_cstr = as_component_get_source_pkgname (cpt);
if (spkgname_cstr != NULL) {
string spkgname = spkgname_cstr;
doc.add_value (XapianValues::SOURCE_PKGNAME, spkgname);
if (!spkgname.empty()) {
doc.add_term("AP" + spkgname);
if (spkgname.find ("-") != string::npos) {
// we need this to work around xapian oddness
string tmp = spkgname;
replace (tmp.begin (), tmp.end (), '-', '_');
doc.add_term (tmp);
}
// add packagename as meta-data too
term_generator.index_text_without_positions (spkgname, WEIGHT_PKGNAME);
}
}
//.........这里部分代码省略.........