本文整理汇总了C++中xapian::Document::set_data方法的典型用法代码示例。如果您正苦于以下问题:C++ Document::set_data方法的具体用法?C++ Document::set_data怎么用?C++ Document::set_data使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类xapian::Document
的用法示例。
在下文中一共展示了Document::set_data方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: setDocumentData
void XapianIndex::setDocumentData(Xapian::Document &doc, const DocumentInfo &info,
const string &language) const
{
string title(info.getTitle());
string timestamp(info.getTimestamp());
char timeStr[64];
// Set the document data omindex-style
string record = "url=";
record += info.getLocation();
// The sample will be generated at query time
record += "\nsample=";
record += "\ncaption=";
if (badField(title) == true)
{
// Modify the title if necessary
string::size_type pos = title.find("=");
while (pos != string::npos)
{
title[pos] = ' ';
pos = title.find("=", pos + 1);
}
#ifdef DEBUG
cout << "XapianIndex::setDocumentData: modified title" << endl;
#endif
}
record += title;
record += "\ntype=";
record += info.getType();
// Append a timestamp
record += "\ntimestamp=";
record += timestamp;
// ...and the language
record += "\nlanguage=";
record += language;
#ifdef DEBUG
cout << "XapianIndex::setDocumentData: document data is " << record << endl;
#endif
doc.set_data(record);
// Add this value to allow sorting by date
snprintf(timeStr, 64, "%d", TimeConverter::fromTimestamp(timestamp));
doc.add_value(0, timeStr);
}
示例2: db
Indexer::Indexer(const string &datapath, const string &dbpath)
{
// Hardcode field offsets for simplicity.
const size_t FIELD_ID_NUMBER = 0;
const size_t FIELD_TITLE = 2;
const size_t FIELD_DESCRIPTION = 8;
// Create or open the database we're going to be writing to.
Xapian::WritableDatabase db(dbpath, Xapian::DB_CREATE_OR_OPEN);
// Set up a TermGenerator that we'll use in indexing.
Xapian::TermGenerator termgenerator;
termgenerator.set_stemmer(Xapian::Stem("en"));
ifstream csv(datapath.c_str());
vector<string> fields;
csv_parse_line(csv, fields);
// Check the CSV header line matches our hard-code offsets.
if (fields.at(FIELD_ID_NUMBER) != "id_NUMBER" ||
fields.at(FIELD_TITLE) != "TITLE" ||
fields.at(FIELD_DESCRIPTION) != "DESCRIPTION") {
// The CSV format doesn't match what we expect.
cerr << "CSV format has changed!" << endl;
exit(1);
}
while (csv_parse_line(csv, fields)) {
// 'fields' is a vector mapping from field number to value.
// We look up fields with the 'at' method so we get an exception
// if that field isn't set.
//
// We're just going to use DESCRIPTION, TITLE and id_NUMBER.
const string & description = fields.at(FIELD_DESCRIPTION);
const string & title = fields.at(FIELD_TITLE);
const string & identifier = fields.at(FIELD_ID_NUMBER);
// We make a document and tell the term generator to use this.
Xapian::Document doc;
termgenerator.set_document(doc);
// Index each field with a suitable prefix.
termgenerator.index_text(title, 1, "S");
termgenerator.index_text(description, 1, "XD");
// Index fields without prefixes for general search.
termgenerator.index_text(title);
termgenerator.increase_termpos();
termgenerator.index_text(description);
// Store all the fields for display purposes.
doc.set_data(identifier + "\n" + title + "\n" + description);
// We use the identifier to ensure each object ends up in the
// database only once no matter how many times we run the
// indexer.
string idterm = "Q" + identifier;
doc.add_boolean_term(idterm);
db.replace_document(idterm, doc);
}
}
示例3: main
int main(int argc, char **argv)
{
if(argc < 2) {
usage(argv);
return 1;
}
try {
char *action = argv[1];
char *db_path = argv[2];
if(!strcmp(action, "index")) {
Xapian::WritableDatabase db(db_path, Xapian::DB_CREATE_OR_OPEN);
Xapian::TermGenerator indexer;
Xapian::Stem stemmer("english");
indexer.set_stemmer(stemmer);
std::string doc_txt;
while(true) {
if(std::cin.eof()) break;
std::string line;
getline(std::cin, line);
doc_txt += line;
}
if(!doc_txt.empty()) {
Xapian::Document doc;
doc.set_data(doc_txt);
indexer.set_document(doc);
indexer.index_text(doc_txt);
db.add_document(doc);
std::cout << "Indexed: " << indexer.get_description() << std::endl;
}
db.commit();
} else if(!strcmp(action, "search")) {
if(argc < 4) {
std::cerr << "You must supply a query string" << std::endl;
return 1;
}
Xapian::Database db(db_path);
Xapian::Enquire enquire(db);
std::string query_str = argv[3];
argv+= 4;
while(*argv) {
query_str += ' ';
query_str += *argv++;
}
Xapian::QueryParser qp;
Xapian::Stem stemmer("english");
qp.set_stemmer(stemmer);
qp.set_database(db);
qp.set_stemming_strategy(Xapian::QueryParser::STEM_SOME);
Xapian::Query query = qp.parse_query(query_str);
std::cout << "Parsed query is: " << query.get_description() <<
std::endl;
enquire.set_query(query);
Xapian::MSet matches = enquire.get_mset(0, 10);
std::cout << matches.get_matches_estimated() << " results found.\n";
std::cout << "Matches 1-" << matches.size() << ":\n" << std::endl;
for (Xapian::MSetIterator i = matches.begin();
i != matches.end(); ++i) {
std::cout << i.get_rank() + 1 << ": " << i.get_percent() <<
"% docid=" << *i << " [" <<
i.get_document().get_data()<< "]" << std::endl <<
std::endl;
}
} else {
std::cerr << "Invalid action " << action << std::endl;
usage(argv);
return 1;
}
} catch (const Xapian::Error &error) {
std::cout << "Exception: " << error.get_msg() << std::endl;
}
}
示例4: db
bool
DatabaseWrite::rebuild (GList *cpt_list)
{
string old_path = m_dbPath + "_old";
string rebuild_path = m_dbPath + "_rb";
string db_locale;
// Create the rebuild directory
if (g_mkdir_with_parents (rebuild_path.c_str (), 0755) != 0) {
g_warning ("Unable to create database rebuild directory.");
return false;
}
// check if old unrequired version of db still exists on filesystem
if (g_file_test (old_path.c_str (), G_FILE_TEST_EXISTS)) {
g_warning ("Existing xapian old db was not cleaned previously: '%s'.", old_path.c_str ());
as_utils_delete_dir_recursive (old_path.c_str ());
}
// check if old unrequired version of db still exists on filesystem
if (g_file_test (rebuild_path.c_str (), G_FILE_TEST_EXISTS)) {
g_debug ("Removing old rebuild-dir from previous database rebuild.");
as_utils_delete_dir_recursive (rebuild_path.c_str ());
}
Xapian::WritableDatabase db (rebuild_path, Xapian::DB_CREATE_OR_OVERWRITE);
Xapian::TermGenerator term_generator;
term_generator.set_database(db);
try {
/* this tests if we have spelling suggestions (there must be
* a better way?!?) - this is needed as inmemory does not have
* spelling corrections, but it allows setting the flag and will
* raise a exception much later
*/
db.add_spelling("test");
db.remove_spelling("test");
/* this enables the flag for it (we only reach this line if
* the db supports spelling suggestions)
*/
term_generator.set_flags(Xapian::TermGenerator::FLAG_SPELLING);
} catch (const Xapian::UnimplementedError &error) {
// Ignore
}
for (GList *list = cpt_list; list != NULL; list = list->next) {
AsComponent *cpt = (AsComponent*) list->data;
Xapian::Document doc;
term_generator.set_document (doc);
doc.set_data (as_component_get_name (cpt));
// Sanity check
if (!as_component_has_install_candidate (cpt)) {
g_warning ("Skipped component '%s' from inclusion into database: Does not have an installation candidate.",
as_component_get_id (cpt));
continue;
}
// Package name
gchar **pkgs = as_component_get_pkgnames (cpt);
if (pkgs != NULL) {
gchar *pkgs_cstr = g_strjoinv (";", pkgs);
string pkgs_str = pkgs_cstr;
doc.add_value (XapianValues::PKGNAMES, pkgs_str);
g_free (pkgs_cstr);
for (uint i = 0; pkgs[i] != NULL; i++) {
string pkgname = pkgs[i];
doc.add_term("AP" + pkgname);
if (pkgname.find ("-") != string::npos) {
// we need this to work around xapian oddness
string tmp = pkgname;
replace (tmp.begin (), tmp.end (), '-', '_');
doc.add_term (tmp);
}
// add packagename as meta-data too
term_generator.index_text_without_positions (pkgname, WEIGHT_PKGNAME);
}
}
// Source package name
const gchar *spkgname_cstr = as_component_get_source_pkgname (cpt);
if (spkgname_cstr != NULL) {
string spkgname = spkgname_cstr;
doc.add_value (XapianValues::SOURCE_PKGNAME, spkgname);
if (!spkgname.empty()) {
doc.add_term("AP" + spkgname);
if (spkgname.find ("-") != string::npos) {
// we need this to work around xapian oddness
string tmp = spkgname;
replace (tmp.begin (), tmp.end (), '-', '_');
doc.add_term (tmp);
}
// add packagename as meta-data too
term_generator.index_text_without_positions (spkgname, WEIGHT_PKGNAME);
}
}
//.........这里部分代码省略.........