当前位置: 首页>>代码示例>>C++>>正文


C++ xapian::Document类代码示例

本文整理汇总了C++中xapian::Document的典型用法代码示例。如果您正苦于以下问题:C++ Document类的具体用法?C++ Document怎么用?C++ Document使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了Document类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: main

int main(int argc, char **argv)
{
    // Simplest possible options parsing: we just require three or more
    // parameters.
    if(argc < 4) {
        cout << "usage: " << argv[0] <<
	    " <path to database> <document data> <document terms>" << endl;
        exit(1);
    }

    // Catch any Xapian::Error exceptions thrown
    try {
        // Make the database
	Xapian::WritableDatabase database(argv[1], Xapian::DB_CREATE_OR_OPEN);

        // Make the document
	Xapian::Document newdocument;

        // Put the data in the document
        newdocument.set_data(string(argv[2]));

        // Put the terms into the document
        for (int i = 3; i < argc; ++i) {
            newdocument.add_posting(argv[i], i - 2);
        }

        // Add the document to the database
        database.add_document(newdocument);
    } catch(const Xapian::Error &error) {
        cout << "Exception: "  << error.get_msg() << endl;
    }
}
开发者ID:IthacaDream,项目名称:Test,代码行数:32,代码来源:quickstartindex.cpp

示例2: addTermsToDocument

void XapianIndex::addTermsToDocument(Tokenizer &tokens, Xapian::Document &doc,
	const string &prefix, Xapian::termcount &termPos, StemmingMode mode) const
{
	Xapian::Stem *pStemmer = NULL;
	string term;

	// Do we know what language to use for stemming ?
	if (m_stemLanguage.empty() == false)
	{
		pStemmer = new Xapian::Stem(StringManip::toLowerCase(m_stemLanguage));
	}

	// Get the terms
	while (tokens.nextToken(term) == true)
	{
		if (term.empty() == true)
		{
			continue;
		}
		// Does it start with a capital letter ?
		if (isupper((int)term[0]) != 0)
		{
			// R-prefix the raw term
			doc.add_posting(string("R") + term, termPos);
		}
		// Lower case the term
		term = StringManip::toLowerCase(term);

		// Stem the term ?
		if ((mode == STORE_UNSTEM) ||
			(pStemmer == NULL))
		{
			doc.add_posting(limitTermLength(prefix + term), termPos++);
		}
		else if (mode == STORE_STEM)
		{
			string stemmedTerm = pStemmer->stem_word(term);

			doc.add_posting(limitTermLength(prefix + stemmedTerm), termPos++);
		}
		else if (mode == STORE_BOTH)
		{
			string stemmedTerm = pStemmer->stem_word(term);

			// Add both
			doc.add_posting(limitTermLength(prefix + term), termPos);
			// ...at the same position
			doc.add_posting(limitTermLength(prefix + stemmedTerm), termPos++);
		}
	}
#ifdef DEBUG
	cout << "XapianIndex::addTermsToDocument: added " << termPos << " terms" << endl;
#endif

	if (pStemmer != NULL)
	{
		delete pStemmer;
	}
}
开发者ID:BackupTheBerlios,项目名称:pinot-svn,代码行数:59,代码来源:XapianIndex.cpp

示例3: getDocumentInfo

/// Returns a document's properties.
bool XapianIndex::getDocumentInfo(unsigned int docId, DocumentInfo &docInfo) const
{
	bool foundDocument = false;

	if (docId == 0)
	{
		return false;
	}

	XapianDatabase *pDatabase = XapianDatabaseFactory::getDatabase(m_databaseName, false);
	if (pDatabase == NULL)
	{
		cerr << "Bad index " << m_databaseName << endl;
		return false;
	}

	try
	{
		Xapian::Database *pIndex = pDatabase->readLock();
		if (pIndex != NULL)
		{
			Xapian::Document doc = pIndex->get_document(docId);

			// Get the current document data
			string record = doc.get_data();
			if (record.empty() == false)
			{
				string language = Languages::toLocale(StringManip::extractField(record, "language=", ""));

				docInfo = DocumentInfo(StringManip::extractField(record, "caption=", "\n"),
					StringManip::extractField(record, "url=", "\n"),
					StringManip::extractField(record, "type=", "\n"),
					language);
				docInfo.setTimestamp(StringManip::extractField(record, "timestamp=", "\n"));
#ifdef DEBUG
				cout << "XapianIndex::getDocumentInfo: language is "
					<< docInfo.getLanguage() << endl;
#endif
				foundDocument = true;
			}
		}
	}
	catch (const Xapian::Error &error)
	{
		cerr << "Couldn't get document properties: " << error.get_msg() << endl;
	}
	catch (...)
	{
		cerr << "Couldn't get document properties, unknown exception occured" << endl;
	}
	pDatabase->unlock();

	return foundDocument;
}
开发者ID:BackupTheBerlios,项目名称:pinot-svn,代码行数:55,代码来源:XapianIndex.cpp

示例4: setDocumentData

void XapianIndex::setDocumentData(const DocumentInfo &info, Xapian::Document &doc,
                                  const string &language) const
{
    time_t timeT = TimeConverter::fromTimestamp(info.getTimestamp());

    // Add this value to allow sorting by date
    doc.add_value(0, StringManip::integerToBinaryString((uint32_t)timeT));

    DocumentInfo docCopy(info);
    docCopy.setLanguage(language);
    doc.set_data(XapianDatabase::propsToRecord(&docCopy));
}
开发者ID:BackupTheBerlios,项目名称:pinot-svn,代码行数:12,代码来源:XapianIndex.cpp

示例5: renameLabel

/// Renames a label.
bool XapianIndex::renameLabel(const string &name, const string &newName)
{
	bool renamedLabel = false;

	XapianDatabase *pDatabase = XapianDatabaseFactory::getDatabase(m_databaseName, false);
	if (pDatabase == NULL)
	{
		cerr << "Bad index " << m_databaseName << endl;
		return false;
	}

	try
	{
		Xapian::WritableDatabase *pIndex = pDatabase->writeLock();
		if (pIndex != NULL)
		{
			string term("XLABEL:");

			// Get documents that have this label
			term += name;
			for (Xapian::PostingIterator postingIter = pIndex->postlist_begin(term);
				postingIter != pIndex->postlist_end(term); ++postingIter)
			{
				Xapian::docid docId = *postingIter;

				// Get the document
				Xapian::Document doc = pIndex->get_document(docId);
				// Remove the term
				doc.remove_term(term);
				// ...add the new one
				doc.add_term(limitTermLength(string("XLABEL:") + newName));
				// ...and update the document
				pIndex->replace_document(docId, doc);
			}

			renamedLabel = true;
		}
	}
	catch (const Xapian::Error &error)
	{
		cerr << "Couldn't delete label: " << error.get_type() << ": " << error.get_msg() << endl;
	}
	catch (...)
	{
		cerr << "Couldn't delete label, unknown exception occured" << endl;
	}
	pDatabase->unlock();

	return renamedLabel;
}
开发者ID:BackupTheBerlios,项目名称:pinot-svn,代码行数:51,代码来源:XapianIndex.cpp

示例6: setDocumentData

void XapianIndex::setDocumentData(const DocumentInfo &info, Xapian::Document &doc,
	const string &language) const
{
	string title(info.getTitle());
	string timestamp(info.getTimestamp());
	char timeStr[64];
	time_t timeT = TimeConverter::fromTimestamp(timestamp);

	// Set the document data omindex-style
	string record = "url=";
	record += info.getLocation();
	// The sample will be generated at query time
	record += "\nsample=";
	record += "\ncaption=";
	if (badField(title) == true)
	{
		// Modify the title if necessary
		string::size_type pos = title.find("=");
		while (pos != string::npos)
		{
			title[pos] = ' ';
			pos = title.find("=", pos + 1);
		}
#ifdef DEBUG
		cout << "XapianIndex::setDocumentData: modified title" << endl;
#endif
	}
	record += title;
	record += "\ntype=";
	record += info.getType();
	// Append a timestamp, in a format compatible with Omega
	record += "\nmodtime=";
	snprintf(timeStr, 64, "%ld", timeT);
	record += timeStr;
	// ...and the language
	record += "\nlanguage=";
	record += StringManip::toLowerCase(language);
#ifdef DEBUG
	cout << "XapianIndex::setDocumentData: document data is " << record << endl;
#endif
	doc.set_data(record);

	// Add this value to allow sorting by date
	doc.add_value(0, StringManip::integerToBinaryString((uint32_t)timeT));
}
开发者ID:BackupTheBerlios,项目名称:pinot-svn,代码行数:45,代码来源:XapianIndex.cpp

示例7: saveMessage

void HistoryLogger::saveMessage(const Message* message)
{
	if (message->flags() & MESSAGE_FLAG_ALARM)
		return;
	
	Xapian::Document doc;
	
	quint32 flags = message->flags();
	std::string plainText(message->plainText().toUtf8());
	std::string confUser(message->getConfUser().constData());

	std::string data;
	if (flags & MESSAGE_FLAG_RTF)
		data = message->rtfText().constData();
	else
		data = plainText;

	std::cout << "HistoryLogger::saveMessage data = " << data << std::endl;
	doc.set_data(data);

	Xapian::TermGenerator termGen;
	termGen.set_stemmer(Xapian::Stem("ru"));
	termGen.set_document(doc);
	termGen.index_text(plainText);

	doc.add_value(0, message->dateTime().toString("yyyyMMdd").toStdString());
	doc.add_value(1, message->dateTime().toString("hhmmss").toStdString());
	doc.add_value(2, QString::number(flags, 16).toStdString());
	doc.add_value(3, message->type() == Message::Outgoing? "o" : "i");
	doc.add_value(4, confUser);

	database->add_document(doc);
	database->flush();
}
开发者ID:Andrsid,项目名称:myagent-im,代码行数:34,代码来源:historylogger.cpp

示例8: setDocumentData

void XapianIndex::setDocumentData(Xapian::Document &doc, const DocumentInfo &info,
	const string &language) const
{
	string title(info.getTitle());
	string timestamp(info.getTimestamp());
	char timeStr[64];

	// Set the document data omindex-style
	string record = "url=";
	record += info.getLocation();
	// The sample will be generated at query time
	record += "\nsample=";
	record += "\ncaption=";
	if (badField(title) == true)
	{
		// Modify the title if necessary
		string::size_type pos = title.find("=");
		while (pos != string::npos)
		{
			title[pos] = ' ';
			pos = title.find("=", pos + 1);
		}
#ifdef DEBUG
		cout << "XapianIndex::setDocumentData: modified title" << endl;
#endif
	}
	record += title;
	record += "\ntype=";
	record += info.getType();
	// Append a timestamp
	record += "\ntimestamp=";
	record += timestamp;
	// ...and the language
	record += "\nlanguage=";
	record += language;
#ifdef DEBUG
	cout << "XapianIndex::setDocumentData: document data is " << record << endl;
#endif
	doc.set_data(record);

	// Add this value to allow sorting by date
	snprintf(timeStr, 64, "%d", TimeConverter::fromTimestamp(timestamp));
	doc.add_value(0, timeStr);
}
开发者ID:BackupTheBerlios,项目名称:pinot-svn,代码行数:44,代码来源:XapianIndex.cpp

示例9: main

int main(int argc, char **argv)
{
    // Simplest possible options parsing: we just require two or more
    // parameters.
    if (argc < 3) {
        cout << "usage: " << argv[0] << " <path to database> <search terms>" << endl;
        exit(1);
    }
 
    // Catch any Xapian::Error exceptions thrown
    try {
        // Make the database
	Xapian::Database db(argv[1]);
 
        // Start an enquire session
	Xapian::Enquire enquire(db);
         
        // Set percent and/or weight cutoffs
        enquire.set_cutoff(90,0.2);
         
        // Set weighting schema
        BM25Weight bm1(1.0,0.0,1.0,0.5,0.3);
        enquire.set_weighting_scheme(bm1);
 
        // Build the query object
	Xapian::Query query(Xapian::Query::OP_AND, argv + 2, argv + argc);
        cout << "Performing query" << query.get_description() << "'" << endl;
	
        // Set Stopper
        string stop[8]={"的","了","呵","吧","就","你","我","他"};
        SimpleStopper *ss=new SimpleStopper;
        for(int i=0;i<8;i++){
            ss->add(stop[i]);
        }
        QueryParser qparser;
        qparser.set_stopper(ss);
        qparser.set_database(db);
 
        // Give the query object to the enquire session
        enquire.set_query(query);
 
        // Get the top 10 results of the query
	Xapian::MSet matches = enquire.get_mset(0, 10);                     //最多返回10个文档
 
        // Display the results
        cout << matches.size() << " results found" << endl;
 
        for (Xapian::MSetIterator i = matches.begin();i != matches.end(); ++i) {
	    Xapian::Document doc = i.get_document();
            cout << "Document ID " << *i << "\nPercent " <<i.get_percent() << "%\n" << doc.get_data() << "\n" << endl;
        }
        db.close();
    } catch(const Xapian::Error &error) {
        cout << "Exception: "  << error.get_msg() << endl;
    }
}
开发者ID:IthacaDream,项目名称:Test,代码行数:56,代码来源:xapian_test.cpp

示例10: removeFirstPostingsFromDocument

void XapianIndex::removeFirstPostingsFromDocument(Tokenizer &tokens, Xapian::Document &doc,
	const string &prefix, const string &language, StemmingMode mode) const
{
	Xapian::TermIterator termListIter = doc.termlist_begin();
	Xapian::Stem *pStemmer = NULL;
	string term;

	// Do we know what language to use for stemming ?
	if (language.empty() == false)
	{
		pStemmer = new Xapian::Stem(StringManip::toLowerCase(language));
	}

	// Get the terms and remove the first posting for each
	while (tokens.nextToken(term) == true)
	{
		if (term.empty() == true)
		{
			continue;
		}
		// Does it start with a capital letter ?
		if (isupper((int)term[0]) != 0)
		{
			// R-prefix the raw term
			removeFirstPosting(doc, termListIter, string("R") + term);
		}
		// Lower case the term
		term = StringManip::toLowerCase(term);

		// Stem the term ?
		if ((mode == STORE_UNSTEM) ||
			(pStemmer == NULL))
		{
			removeFirstPosting(doc, termListIter, limitTermLength(prefix + term));
		}
		else if (mode == STORE_STEM)
		{
			removeFirstPosting(doc, termListIter, limitTermLength(prefix + pStemmer->stem_word(term)));
		}
		else if (mode == STORE_BOTH)
		{
			string stemmedTerm = pStemmer->stem_word(term);

			removeFirstPosting(doc, termListIter, limitTermLength(prefix + term));
			if (stemmedTerm != term)
			{
				removeFirstPosting(doc, termListIter, limitTermLength(prefix + stemmedTerm));
			}
		}
	}

	if (pStemmer != NULL)
	{
		delete pStemmer;
	}
}
开发者ID:BackupTheBerlios,项目名称:pinot-svn,代码行数:56,代码来源:XapianIndex.cpp

示例11: requestImage

QImage ThumbnailProvider::requestImage(const QString &id, QSize *size, const QSize &requestedSize)
{
    QImage image;

    if (m_thumb32->findImage(id, &image)) {
        return image;
    } else {
        QString filePath;
        if (id.at(0) == QLatin1Char('Q')) {
            Xapian::Document doc = m_xapianDB->findDocument(id);
            if (doc.get_docid() == 0) {
                return image;
            } else {
                filePath = QString::fromStdString(doc.get_value(Database::FilePath));
            }
        } else {
            filePath = id;
        }

        // Load thumbnail
//        KExiv2Iface::KExiv2Previews preview(filePath);
        KExiv2Iface::KExiv2	preview(filePath);
        image = preview.getExifThumbnail(true);
        if (image.isNull()) {
//            image = preview.image();
//        } else {
            // Store thumbnail
            // TODO smooth or fast?
            image = QImage(filePath).scaled(160, 120, Qt::KeepAspectRatio);
//            preview.
            kWarning() << "Could not find preview image for" << filePath << image.isNull();
        }

        // Store the thumbnail into the cache file
        if (m_thumb32->insertImage(id, image)) {
            kWarning() << "Added preview for" << image.byteCount() << filePath << id;
        } else {
            kWarning() << "FAILED to add preview for" << filePath << id;
        }
    }

    return image;
}
开发者ID:KDE,项目名称:photobook,代码行数:43,代码来源:ThumbnailProvider.cpp

示例12: text

QString EmailSearchStore::text(int queryId)
{
    Xapian::Document doc = docForQuery(queryId);

    QMutexLocker lock(&m_mutex);
    std::string data;
    try {
        data = doc.get_data();
    } catch (const Xapian::Error &) {
        // Nothing to do, move along
    }

    QString subject = QString::fromUtf8(data.c_str(), data.length());
    if (subject.isEmpty()) {
        return QStringLiteral("No Subject");
    }

    return subject;
}
开发者ID:KDE,项目名称:akonadi-search,代码行数:19,代码来源:emailsearchstore.cpp

示例13: prepareDocument

bool XapianIndex::prepareDocument(const DocumentInfo &info, Xapian::Document &doc,
	Xapian::termcount &termPos) const
{
	string title(info.getTitle());
	string location(info.getLocation());
	Url urlObj(location);

	// Add a magic term :-)
	doc.add_term(MAGIC_TERM);

	// Index the title with and without prefix S
	if (title.empty() == false)
	{
		Document titleDoc;
		titleDoc.setData(title.c_str(), title.length());
		Tokenizer titleTokens(&titleDoc);
		addTermsToDocument(titleTokens, doc, "S", termPos, STORE_UNSTEM);
		titleTokens.rewind();
		addTermsToDocument(titleTokens, doc, "", termPos, m_stemMode);
	}

	// Index the full URL with prefix U
	doc.add_term(limitTermLength(string("U") + location, true));
	// ...the host name and included domains with prefix H
	string hostName(StringManip::toLowerCase(urlObj.getHost()));
	if (hostName.empty() == false)
	{
		doc.add_term(limitTermLength(string("H") + hostName, true));
		string::size_type dotPos = hostName.find('.');
		while (dotPos != string::npos)
		{
			doc.add_term(limitTermLength(string("H") + hostName.substr(dotPos + 1), true));

			// Next
			dotPos = hostName.find('.', dotPos + 1);
		}
	}
	// ...and the file name with prefix P
	string fileName(urlObj.getFile());
	if (fileName.empty() == false)
	{
		doc.add_term(limitTermLength(string("P") + StringManip::toLowerCase(fileName), true));
	}
	// Finally, add the language code with prefix L
	doc.add_term(string("L") + Languages::toCode(m_stemLanguage));

	setDocumentData(doc, info, m_stemLanguage);

	return true;
}
开发者ID:BackupTheBerlios,项目名称:pinot-svn,代码行数:50,代码来源:XapianIndex.cpp

示例14: removeFirstPosting

static void removeFirstPosting(Xapian::Document &doc,
                               Xapian::TermIterator &termListIter, const string &term)
{
    termListIter.skip_to(term);

    Xapian::PositionIterator firstPosIter = termListIter.positionlist_begin();
    if (firstPosIter != termListIter.positionlist_end())
    {
        try
        {
            doc.remove_posting(term, *firstPosIter);
        }
        catch (const Xapian::Error &error)
        {
            // This posting may have been removed already
#ifdef DEBUG
            cout << "XapianIndex::removeFirstPosting: " << error.get_msg() << endl;
#endif
        }
    }
}
开发者ID:BackupTheBerlios,项目名称:pinot-svn,代码行数:21,代码来源:XapianIndex.cpp

示例15: db

Indexer::Indexer(const string &datapath, const string &dbpath)
{
    // Hardcode field offsets for simplicity.
    const size_t FIELD_ID_NUMBER = 0;
    const size_t FIELD_TITLE = 2;
    const size_t FIELD_DESCRIPTION = 8;

    // Create or open the database we're going to be writing to.
    Xapian::WritableDatabase db(dbpath, Xapian::DB_CREATE_OR_OPEN);

    // Set up a TermGenerator that we'll use in indexing.
    Xapian::TermGenerator termgenerator;
    termgenerator.set_stemmer(Xapian::Stem("en"));

    ifstream csv(datapath.c_str());
    vector<string> fields;
    csv_parse_line(csv, fields);

    // Check the CSV header line matches our hard-code offsets.
    if (fields.at(FIELD_ID_NUMBER) != "id_NUMBER" ||
    fields.at(FIELD_TITLE) != "TITLE" ||
    fields.at(FIELD_DESCRIPTION) != "DESCRIPTION") {
    // The CSV format doesn't match what we expect.
    cerr << "CSV format has changed!" << endl;
    exit(1);
    }

    while (csv_parse_line(csv, fields)) {
    // 'fields' is a vector mapping from field number to value.
    // We look up fields with the 'at' method so we get an exception
    // if that field isn't set.
    //
    // We're just going to use DESCRIPTION, TITLE and id_NUMBER.
    const string & description = fields.at(FIELD_DESCRIPTION);
    const string & title = fields.at(FIELD_TITLE);
    const string & identifier = fields.at(FIELD_ID_NUMBER);

    // We make a document and tell the term generator to use this.
    Xapian::Document doc;
    termgenerator.set_document(doc);

    // Index each field with a suitable prefix.
    termgenerator.index_text(title, 1, "S");
    termgenerator.index_text(description, 1, "XD");

    // Index fields without prefixes for general search.
    termgenerator.index_text(title);
    termgenerator.increase_termpos();
    termgenerator.index_text(description);

    // Store all the fields for display purposes.
    doc.set_data(identifier + "\n" + title + "\n" + description);

    // We use the identifier to ensure each object ends up in the
    // database only once no matter how many times we run the
    // indexer.
    string idterm = "Q" + identifier;
    doc.add_boolean_term(idterm);
    db.replace_document(idterm, doc);
    }
}
开发者ID:jainnidhi703,项目名称:xapianclusteringexample,代码行数:61,代码来源:indexer.cpp


注:本文中的xapian::Document类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。