本文整理汇总了C++中xapian::Document类的典型用法代码示例。如果您正苦于以下问题:C++ Document类的具体用法?C++ Document怎么用?C++ Document使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Document类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: main
int main(int argc, char **argv)
{
// Simplest possible options parsing: we just require three or more
// parameters.
if(argc < 4) {
cout << "usage: " << argv[0] <<
" <path to database> <document data> <document terms>" << endl;
exit(1);
}
// Catch any Xapian::Error exceptions thrown
try {
// Make the database
Xapian::WritableDatabase database(argv[1], Xapian::DB_CREATE_OR_OPEN);
// Make the document
Xapian::Document newdocument;
// Put the data in the document
newdocument.set_data(string(argv[2]));
// Put the terms into the document
for (int i = 3; i < argc; ++i) {
newdocument.add_posting(argv[i], i - 2);
}
// Add the document to the database
database.add_document(newdocument);
} catch(const Xapian::Error &error) {
cout << "Exception: " << error.get_msg() << endl;
}
}
示例2: addTermsToDocument
void XapianIndex::addTermsToDocument(Tokenizer &tokens, Xapian::Document &doc,
const string &prefix, Xapian::termcount &termPos, StemmingMode mode) const
{
Xapian::Stem *pStemmer = NULL;
string term;
// Do we know what language to use for stemming ?
if (m_stemLanguage.empty() == false)
{
pStemmer = new Xapian::Stem(StringManip::toLowerCase(m_stemLanguage));
}
// Get the terms
while (tokens.nextToken(term) == true)
{
if (term.empty() == true)
{
continue;
}
// Does it start with a capital letter ?
if (isupper((int)term[0]) != 0)
{
// R-prefix the raw term
doc.add_posting(string("R") + term, termPos);
}
// Lower case the term
term = StringManip::toLowerCase(term);
// Stem the term ?
if ((mode == STORE_UNSTEM) ||
(pStemmer == NULL))
{
doc.add_posting(limitTermLength(prefix + term), termPos++);
}
else if (mode == STORE_STEM)
{
string stemmedTerm = pStemmer->stem_word(term);
doc.add_posting(limitTermLength(prefix + stemmedTerm), termPos++);
}
else if (mode == STORE_BOTH)
{
string stemmedTerm = pStemmer->stem_word(term);
// Add both
doc.add_posting(limitTermLength(prefix + term), termPos);
// ...at the same position
doc.add_posting(limitTermLength(prefix + stemmedTerm), termPos++);
}
}
#ifdef DEBUG
cout << "XapianIndex::addTermsToDocument: added " << termPos << " terms" << endl;
#endif
if (pStemmer != NULL)
{
delete pStemmer;
}
}
示例3: getDocumentInfo
/// Returns a document's properties.
bool XapianIndex::getDocumentInfo(unsigned int docId, DocumentInfo &docInfo) const
{
bool foundDocument = false;
if (docId == 0)
{
return false;
}
XapianDatabase *pDatabase = XapianDatabaseFactory::getDatabase(m_databaseName, false);
if (pDatabase == NULL)
{
cerr << "Bad index " << m_databaseName << endl;
return false;
}
try
{
Xapian::Database *pIndex = pDatabase->readLock();
if (pIndex != NULL)
{
Xapian::Document doc = pIndex->get_document(docId);
// Get the current document data
string record = doc.get_data();
if (record.empty() == false)
{
string language = Languages::toLocale(StringManip::extractField(record, "language=", ""));
docInfo = DocumentInfo(StringManip::extractField(record, "caption=", "\n"),
StringManip::extractField(record, "url=", "\n"),
StringManip::extractField(record, "type=", "\n"),
language);
docInfo.setTimestamp(StringManip::extractField(record, "timestamp=", "\n"));
#ifdef DEBUG
cout << "XapianIndex::getDocumentInfo: language is "
<< docInfo.getLanguage() << endl;
#endif
foundDocument = true;
}
}
}
catch (const Xapian::Error &error)
{
cerr << "Couldn't get document properties: " << error.get_msg() << endl;
}
catch (...)
{
cerr << "Couldn't get document properties, unknown exception occured" << endl;
}
pDatabase->unlock();
return foundDocument;
}
示例4: setDocumentData
void XapianIndex::setDocumentData(const DocumentInfo &info, Xapian::Document &doc,
const string &language) const
{
time_t timeT = TimeConverter::fromTimestamp(info.getTimestamp());
// Add this value to allow sorting by date
doc.add_value(0, StringManip::integerToBinaryString((uint32_t)timeT));
DocumentInfo docCopy(info);
docCopy.setLanguage(language);
doc.set_data(XapianDatabase::propsToRecord(&docCopy));
}
示例5: renameLabel
/// Renames a label.
bool XapianIndex::renameLabel(const string &name, const string &newName)
{
bool renamedLabel = false;
XapianDatabase *pDatabase = XapianDatabaseFactory::getDatabase(m_databaseName, false);
if (pDatabase == NULL)
{
cerr << "Bad index " << m_databaseName << endl;
return false;
}
try
{
Xapian::WritableDatabase *pIndex = pDatabase->writeLock();
if (pIndex != NULL)
{
string term("XLABEL:");
// Get documents that have this label
term += name;
for (Xapian::PostingIterator postingIter = pIndex->postlist_begin(term);
postingIter != pIndex->postlist_end(term); ++postingIter)
{
Xapian::docid docId = *postingIter;
// Get the document
Xapian::Document doc = pIndex->get_document(docId);
// Remove the term
doc.remove_term(term);
// ...add the new one
doc.add_term(limitTermLength(string("XLABEL:") + newName));
// ...and update the document
pIndex->replace_document(docId, doc);
}
renamedLabel = true;
}
}
catch (const Xapian::Error &error)
{
cerr << "Couldn't delete label: " << error.get_type() << ": " << error.get_msg() << endl;
}
catch (...)
{
cerr << "Couldn't delete label, unknown exception occured" << endl;
}
pDatabase->unlock();
return renamedLabel;
}
示例6: setDocumentData
void XapianIndex::setDocumentData(const DocumentInfo &info, Xapian::Document &doc,
const string &language) const
{
string title(info.getTitle());
string timestamp(info.getTimestamp());
char timeStr[64];
time_t timeT = TimeConverter::fromTimestamp(timestamp);
// Set the document data omindex-style
string record = "url=";
record += info.getLocation();
// The sample will be generated at query time
record += "\nsample=";
record += "\ncaption=";
if (badField(title) == true)
{
// Modify the title if necessary
string::size_type pos = title.find("=");
while (pos != string::npos)
{
title[pos] = ' ';
pos = title.find("=", pos + 1);
}
#ifdef DEBUG
cout << "XapianIndex::setDocumentData: modified title" << endl;
#endif
}
record += title;
record += "\ntype=";
record += info.getType();
// Append a timestamp, in a format compatible with Omega
record += "\nmodtime=";
snprintf(timeStr, 64, "%ld", timeT);
record += timeStr;
// ...and the language
record += "\nlanguage=";
record += StringManip::toLowerCase(language);
#ifdef DEBUG
cout << "XapianIndex::setDocumentData: document data is " << record << endl;
#endif
doc.set_data(record);
// Add this value to allow sorting by date
doc.add_value(0, StringManip::integerToBinaryString((uint32_t)timeT));
}
示例7: saveMessage
void HistoryLogger::saveMessage(const Message* message)
{
if (message->flags() & MESSAGE_FLAG_ALARM)
return;
Xapian::Document doc;
quint32 flags = message->flags();
std::string plainText(message->plainText().toUtf8());
std::string confUser(message->getConfUser().constData());
std::string data;
if (flags & MESSAGE_FLAG_RTF)
data = message->rtfText().constData();
else
data = plainText;
std::cout << "HistoryLogger::saveMessage data = " << data << std::endl;
doc.set_data(data);
Xapian::TermGenerator termGen;
termGen.set_stemmer(Xapian::Stem("ru"));
termGen.set_document(doc);
termGen.index_text(plainText);
doc.add_value(0, message->dateTime().toString("yyyyMMdd").toStdString());
doc.add_value(1, message->dateTime().toString("hhmmss").toStdString());
doc.add_value(2, QString::number(flags, 16).toStdString());
doc.add_value(3, message->type() == Message::Outgoing? "o" : "i");
doc.add_value(4, confUser);
database->add_document(doc);
database->flush();
}
示例8: setDocumentData
void XapianIndex::setDocumentData(Xapian::Document &doc, const DocumentInfo &info,
const string &language) const
{
string title(info.getTitle());
string timestamp(info.getTimestamp());
char timeStr[64];
// Set the document data omindex-style
string record = "url=";
record += info.getLocation();
// The sample will be generated at query time
record += "\nsample=";
record += "\ncaption=";
if (badField(title) == true)
{
// Modify the title if necessary
string::size_type pos = title.find("=");
while (pos != string::npos)
{
title[pos] = ' ';
pos = title.find("=", pos + 1);
}
#ifdef DEBUG
cout << "XapianIndex::setDocumentData: modified title" << endl;
#endif
}
record += title;
record += "\ntype=";
record += info.getType();
// Append a timestamp
record += "\ntimestamp=";
record += timestamp;
// ...and the language
record += "\nlanguage=";
record += language;
#ifdef DEBUG
cout << "XapianIndex::setDocumentData: document data is " << record << endl;
#endif
doc.set_data(record);
// Add this value to allow sorting by date
snprintf(timeStr, 64, "%d", TimeConverter::fromTimestamp(timestamp));
doc.add_value(0, timeStr);
}
示例9: main
int main(int argc, char **argv)
{
// Simplest possible options parsing: we just require two or more
// parameters.
if (argc < 3) {
cout << "usage: " << argv[0] << " <path to database> <search terms>" << endl;
exit(1);
}
// Catch any Xapian::Error exceptions thrown
try {
// Make the database
Xapian::Database db(argv[1]);
// Start an enquire session
Xapian::Enquire enquire(db);
// Set percent and/or weight cutoffs
enquire.set_cutoff(90,0.2);
// Set weighting schema
BM25Weight bm1(1.0,0.0,1.0,0.5,0.3);
enquire.set_weighting_scheme(bm1);
// Build the query object
Xapian::Query query(Xapian::Query::OP_AND, argv + 2, argv + argc);
cout << "Performing query" << query.get_description() << "'" << endl;
// Set Stopper
string stop[8]={"的","了","呵","吧","就","你","我","他"};
SimpleStopper *ss=new SimpleStopper;
for(int i=0;i<8;i++){
ss->add(stop[i]);
}
QueryParser qparser;
qparser.set_stopper(ss);
qparser.set_database(db);
// Give the query object to the enquire session
enquire.set_query(query);
// Get the top 10 results of the query
Xapian::MSet matches = enquire.get_mset(0, 10); //最多返回10个文档
// Display the results
cout << matches.size() << " results found" << endl;
for (Xapian::MSetIterator i = matches.begin();i != matches.end(); ++i) {
Xapian::Document doc = i.get_document();
cout << "Document ID " << *i << "\nPercent " <<i.get_percent() << "%\n" << doc.get_data() << "\n" << endl;
}
db.close();
} catch(const Xapian::Error &error) {
cout << "Exception: " << error.get_msg() << endl;
}
}
示例10: removeFirstPostingsFromDocument
void XapianIndex::removeFirstPostingsFromDocument(Tokenizer &tokens, Xapian::Document &doc,
const string &prefix, const string &language, StemmingMode mode) const
{
Xapian::TermIterator termListIter = doc.termlist_begin();
Xapian::Stem *pStemmer = NULL;
string term;
// Do we know what language to use for stemming ?
if (language.empty() == false)
{
pStemmer = new Xapian::Stem(StringManip::toLowerCase(language));
}
// Get the terms and remove the first posting for each
while (tokens.nextToken(term) == true)
{
if (term.empty() == true)
{
continue;
}
// Does it start with a capital letter ?
if (isupper((int)term[0]) != 0)
{
// R-prefix the raw term
removeFirstPosting(doc, termListIter, string("R") + term);
}
// Lower case the term
term = StringManip::toLowerCase(term);
// Stem the term ?
if ((mode == STORE_UNSTEM) ||
(pStemmer == NULL))
{
removeFirstPosting(doc, termListIter, limitTermLength(prefix + term));
}
else if (mode == STORE_STEM)
{
removeFirstPosting(doc, termListIter, limitTermLength(prefix + pStemmer->stem_word(term)));
}
else if (mode == STORE_BOTH)
{
string stemmedTerm = pStemmer->stem_word(term);
removeFirstPosting(doc, termListIter, limitTermLength(prefix + term));
if (stemmedTerm != term)
{
removeFirstPosting(doc, termListIter, limitTermLength(prefix + stemmedTerm));
}
}
}
if (pStemmer != NULL)
{
delete pStemmer;
}
}
示例11: requestImage
QImage ThumbnailProvider::requestImage(const QString &id, QSize *size, const QSize &requestedSize)
{
QImage image;
if (m_thumb32->findImage(id, &image)) {
return image;
} else {
QString filePath;
if (id.at(0) == QLatin1Char('Q')) {
Xapian::Document doc = m_xapianDB->findDocument(id);
if (doc.get_docid() == 0) {
return image;
} else {
filePath = QString::fromStdString(doc.get_value(Database::FilePath));
}
} else {
filePath = id;
}
// Load thumbnail
// KExiv2Iface::KExiv2Previews preview(filePath);
KExiv2Iface::KExiv2 preview(filePath);
image = preview.getExifThumbnail(true);
if (image.isNull()) {
// image = preview.image();
// } else {
// Store thumbnail
// TODO smooth or fast?
image = QImage(filePath).scaled(160, 120, Qt::KeepAspectRatio);
// preview.
kWarning() << "Could not find preview image for" << filePath << image.isNull();
}
// Store the thumbnail into the cache file
if (m_thumb32->insertImage(id, image)) {
kWarning() << "Added preview for" << image.byteCount() << filePath << id;
} else {
kWarning() << "FAILED to add preview for" << filePath << id;
}
}
return image;
}
示例12: text
QString EmailSearchStore::text(int queryId)
{
Xapian::Document doc = docForQuery(queryId);
QMutexLocker lock(&m_mutex);
std::string data;
try {
data = doc.get_data();
} catch (const Xapian::Error &) {
// Nothing to do, move along
}
QString subject = QString::fromUtf8(data.c_str(), data.length());
if (subject.isEmpty()) {
return QStringLiteral("No Subject");
}
return subject;
}
示例13: prepareDocument
bool XapianIndex::prepareDocument(const DocumentInfo &info, Xapian::Document &doc,
Xapian::termcount &termPos) const
{
string title(info.getTitle());
string location(info.getLocation());
Url urlObj(location);
// Add a magic term :-)
doc.add_term(MAGIC_TERM);
// Index the title with and without prefix S
if (title.empty() == false)
{
Document titleDoc;
titleDoc.setData(title.c_str(), title.length());
Tokenizer titleTokens(&titleDoc);
addTermsToDocument(titleTokens, doc, "S", termPos, STORE_UNSTEM);
titleTokens.rewind();
addTermsToDocument(titleTokens, doc, "", termPos, m_stemMode);
}
// Index the full URL with prefix U
doc.add_term(limitTermLength(string("U") + location, true));
// ...the host name and included domains with prefix H
string hostName(StringManip::toLowerCase(urlObj.getHost()));
if (hostName.empty() == false)
{
doc.add_term(limitTermLength(string("H") + hostName, true));
string::size_type dotPos = hostName.find('.');
while (dotPos != string::npos)
{
doc.add_term(limitTermLength(string("H") + hostName.substr(dotPos + 1), true));
// Next
dotPos = hostName.find('.', dotPos + 1);
}
}
// ...and the file name with prefix P
string fileName(urlObj.getFile());
if (fileName.empty() == false)
{
doc.add_term(limitTermLength(string("P") + StringManip::toLowerCase(fileName), true));
}
// Finally, add the language code with prefix L
doc.add_term(string("L") + Languages::toCode(m_stemLanguage));
setDocumentData(doc, info, m_stemLanguage);
return true;
}
示例14: removeFirstPosting
static void removeFirstPosting(Xapian::Document &doc,
Xapian::TermIterator &termListIter, const string &term)
{
termListIter.skip_to(term);
Xapian::PositionIterator firstPosIter = termListIter.positionlist_begin();
if (firstPosIter != termListIter.positionlist_end())
{
try
{
doc.remove_posting(term, *firstPosIter);
}
catch (const Xapian::Error &error)
{
// This posting may have been removed already
#ifdef DEBUG
cout << "XapianIndex::removeFirstPosting: " << error.get_msg() << endl;
#endif
}
}
}
示例15: db
Indexer::Indexer(const string &datapath, const string &dbpath)
{
// Hardcode field offsets for simplicity.
const size_t FIELD_ID_NUMBER = 0;
const size_t FIELD_TITLE = 2;
const size_t FIELD_DESCRIPTION = 8;
// Create or open the database we're going to be writing to.
Xapian::WritableDatabase db(dbpath, Xapian::DB_CREATE_OR_OPEN);
// Set up a TermGenerator that we'll use in indexing.
Xapian::TermGenerator termgenerator;
termgenerator.set_stemmer(Xapian::Stem("en"));
ifstream csv(datapath.c_str());
vector<string> fields;
csv_parse_line(csv, fields);
// Check the CSV header line matches our hard-code offsets.
if (fields.at(FIELD_ID_NUMBER) != "id_NUMBER" ||
fields.at(FIELD_TITLE) != "TITLE" ||
fields.at(FIELD_DESCRIPTION) != "DESCRIPTION") {
// The CSV format doesn't match what we expect.
cerr << "CSV format has changed!" << endl;
exit(1);
}
while (csv_parse_line(csv, fields)) {
// 'fields' is a vector mapping from field number to value.
// We look up fields with the 'at' method so we get an exception
// if that field isn't set.
//
// We're just going to use DESCRIPTION, TITLE and id_NUMBER.
const string & description = fields.at(FIELD_DESCRIPTION);
const string & title = fields.at(FIELD_TITLE);
const string & identifier = fields.at(FIELD_ID_NUMBER);
// We make a document and tell the term generator to use this.
Xapian::Document doc;
termgenerator.set_document(doc);
// Index each field with a suitable prefix.
termgenerator.index_text(title, 1, "S");
termgenerator.index_text(description, 1, "XD");
// Index fields without prefixes for general search.
termgenerator.index_text(title);
termgenerator.increase_termpos();
termgenerator.index_text(description);
// Store all the fields for display purposes.
doc.set_data(identifier + "\n" + title + "\n" + description);
// We use the identifier to ensure each object ends up in the
// database only once no matter how many times we run the
// indexer.
string idterm = "Q" + identifier;
doc.add_boolean_term(idterm);
db.replace_document(idterm, doc);
}
}