当前位置: 首页>>代码示例>>C++>>正文


C++ Document::add_term方法代码示例

本文整理汇总了C++中xapian::Document::add_term方法的典型用法代码示例。如果您正苦于以下问题:C++ Document::add_term方法的具体用法?C++ Document::add_term怎么用?C++ Document::add_term使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在xapian::Document的用法示例。


在下文中一共展示了Document::add_term方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: renameLabel

/// Renames a label.
bool XapianIndex::renameLabel(const string &name, const string &newName)
{
	bool renamedLabel = false;

	XapianDatabase *pDatabase = XapianDatabaseFactory::getDatabase(m_databaseName, false);
	if (pDatabase == NULL)
	{
		cerr << "Bad index " << m_databaseName << endl;
		return false;
	}

	try
	{
		Xapian::WritableDatabase *pIndex = pDatabase->writeLock();
		if (pIndex != NULL)
		{
			string term("XLABEL:");

			// Get documents that have this label
			term += name;
			for (Xapian::PostingIterator postingIter = pIndex->postlist_begin(term);
				postingIter != pIndex->postlist_end(term); ++postingIter)
			{
				Xapian::docid docId = *postingIter;

				// Get the document
				Xapian::Document doc = pIndex->get_document(docId);
				// Remove the term
				doc.remove_term(term);
				// ...add the new one
				doc.add_term(limitTermLength(string("XLABEL:") + newName));
				// ...and update the document
				pIndex->replace_document(docId, doc);
			}

			renamedLabel = true;
		}
	}
	catch (const Xapian::Error &error)
	{
		cerr << "Couldn't delete label: " << error.get_type() << ": " << error.get_msg() << endl;
	}
	catch (...)
	{
		cerr << "Couldn't delete label, unknown exception occured" << endl;
	}
	pDatabase->unlock();

	return renamedLabel;
}
开发者ID:BackupTheBerlios,项目名称:pinot-svn,代码行数:51,代码来源:XapianIndex.cpp

示例2: prepareDocument

bool XapianIndex::prepareDocument(const DocumentInfo &info, Xapian::Document &doc,
	Xapian::termcount &termPos) const
{
	string title(info.getTitle());
	string location(info.getLocation());
	Url urlObj(location);

	// Add a magic term :-)
	doc.add_term(MAGIC_TERM);

	// Index the title with and without prefix S
	if (title.empty() == false)
	{
		Document titleDoc;
		titleDoc.setData(title.c_str(), title.length());
		Tokenizer titleTokens(&titleDoc);
		addTermsToDocument(titleTokens, doc, "S", termPos, STORE_UNSTEM);
		titleTokens.rewind();
		addTermsToDocument(titleTokens, doc, "", termPos, m_stemMode);
	}

	// Index the full URL with prefix U
	doc.add_term(limitTermLength(string("U") + location, true));
	// ...the host name and included domains with prefix H
	string hostName(StringManip::toLowerCase(urlObj.getHost()));
	if (hostName.empty() == false)
	{
		doc.add_term(limitTermLength(string("H") + hostName, true));
		string::size_type dotPos = hostName.find('.');
		while (dotPos != string::npos)
		{
			doc.add_term(limitTermLength(string("H") + hostName.substr(dotPos + 1), true));

			// Next
			dotPos = hostName.find('.', dotPos + 1);
		}
	}
	// ...and the file name with prefix P
	string fileName(urlObj.getFile());
	if (fileName.empty() == false)
	{
		doc.add_term(limitTermLength(string("P") + StringManip::toLowerCase(fileName), true));
	}
	// Finally, add the language code with prefix L
	doc.add_term(string("L") + Languages::toCode(m_stemLanguage));

	setDocumentData(doc, info, m_stemLanguage);

	return true;
}
开发者ID:BackupTheBerlios,项目名称:pinot-svn,代码行数:50,代码来源:XapianIndex.cpp

示例3: addCommonTerms

void XapianIndex::addCommonTerms(const DocumentInfo &info, Xapian::Document &doc,
                                 Xapian::termcount &termPos) const
{
    string title(info.getTitle());
    string location(info.getLocation());
    Url urlObj(location);

    // Add a magic term :-)
    doc.add_term(MAGIC_TERM);

    // Index the title with and without prefix S
    if (title.empty() == false)
    {
        Document titleDoc;
        titleDoc.setData(title.c_str(), title.length());
        Tokenizer titleTokens(&titleDoc);
        addPostingsToDocument(titleTokens, doc, "S", termPos, STORE_UNSTEM);
        titleTokens.rewind();
        addPostingsToDocument(titleTokens, doc, "", termPos, m_stemMode);
    }

    // Index the full URL with prefix U
    doc.add_term(string("U") + XapianDatabase::limitTermLength(Url::escapeUrl(location), true));
    // ...the base file with XFILE:
    string::size_type qmPos = location.find("?");
    if ((urlObj.isLocal() == true) &&
            (qmPos != string::npos))
    {
        doc.add_term(string("XFILE:") + XapianDatabase::limitTermLength(Url::escapeUrl(location.substr(0, qmPos)), true));
    }
    // ...the host name and included domains with prefix H
    string hostName(StringManip::toLowerCase(urlObj.getHost()));
    if (hostName.empty() == false)
    {
        doc.add_term(string("H") + XapianDatabase::limitTermLength(hostName, true));
        string::size_type dotPos = hostName.find('.');
        while (dotPos != string::npos)
        {
            doc.add_term(string("H") + XapianDatabase::limitTermLength(hostName.substr(dotPos + 1), true));

            // Next
            dotPos = hostName.find('.', dotPos + 1);
        }
    }
    // ...the location (as is) and all directories with prefix XDIR:
    string tree(urlObj.getLocation());
    if (tree.empty() == false)
    {
        doc.add_term(string("XDIR:") + XapianDatabase::limitTermLength(Url::escapeUrl(tree), true));
        if (tree[0] == '/')
        {
            doc.add_term("XDIR:/");
        }
        string::size_type slashPos = tree.find('/', 1);
        while (slashPos != string::npos)
        {
            doc.add_term(string("XDIR:") + XapianDatabase::limitTermLength(Url::escapeUrl(tree.substr(0, slashPos)), true));

            // Next
            slashPos = tree.find('/', slashPos + 1);
        }
    }
    // ...and the file name with prefix P
    string fileName(urlObj.getFile());
    if (fileName.empty() == false)
    {
        string extension;

        doc.add_term(string("P") + XapianDatabase::limitTermLength(Url::escapeUrl(fileName), true));

        // Does it have an extension ?
        string::size_type extPos = fileName.rfind('.');
        if ((extPos != string::npos) &&
                (extPos + 1 < fileName.length()))
        {
            extension = StringManip::toLowerCase(fileName.substr(extPos + 1));
        }
        doc.add_term(string("XEXT:") + XapianDatabase::limitTermLength(extension));
    }
    // Add the date terms D, M and Y
    time_t timeT = TimeConverter::fromTimestamp(info.getTimestamp());
    struct tm *tm = localtime(&timeT);
    string yyyymmdd = TimeConverter::toYYYYMMDDString(tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday);
    if (yyyymmdd.length() == 8)
    {
        doc.add_term(string("D") + yyyymmdd);
        doc.add_term(string("M") + yyyymmdd.substr(0, 6));
        doc.add_term(string("Y") + yyyymmdd.substr(0, 4));
    }
    // Finally, add the language code with prefix L
    doc.add_term(string("L") + Languages::toCode(m_stemLanguage));
    // ...and the MIME type with prefix T
    doc.add_term(string("T") + info.getType());
}
开发者ID:BackupTheBerlios,项目名称:pinot-svn,代码行数:94,代码来源:XapianIndex.cpp

示例4: addPostingsToDocument

void XapianIndex::addPostingsToDocument(Tokenizer &tokens, Xapian::Document &doc,
                                        const string &prefix, Xapian::termcount &termPos, StemmingMode mode) const
{
    Xapian::Stem *pStemmer = NULL;
    string stemPrefix("Z");
    string term;

    // Do we know what language to use for stemming ?
    if (m_stemLanguage.empty() == false)
    {
        try
        {
            pStemmer = new Xapian::Stem(StringManip::toLowerCase(m_stemLanguage));
        }
        catch (const Xapian::Error &error)
        {
            cerr << "Couldn't create stemmer: " << error.get_type() << ": " << error.get_msg() << endl;
        }
    }

    // Stems are Z-prefixed, unless a prefix is already defined
    if (prefix.empty() == false)
    {
        stemPrefix = prefix;
    }

    // Get the terms
    while (tokens.nextToken(term) == true)
    {
        bool addStem = false;

        if (term.empty() == true)
        {
            continue;
        }
        // Lower case the term
        term = StringManip::toLowerCase(term);

        // Stem the term ?
        if ((mode == STORE_UNSTEM) ||
                (pStemmer == NULL))
        {
            doc.add_posting(prefix + XapianDatabase::limitTermLength(term), termPos);
        }
        else if (mode == STORE_STEM)
        {
            addStem = true;
        }
        else if (mode == STORE_BOTH)
        {
            // Add both
            doc.add_posting(prefix + XapianDatabase::limitTermLength(term), termPos);
            addStem = true;
        }

        // Don't stem if the term starts with a digit
        if ((addStem == true) &&
                (isdigit((int)term[0]) == 0))
        {
#if XAPIAN_MAJOR_VERSION==0
            string stemmedTerm(pStemmer->stem_word(term));
#else
            string stemmedTerm((*pStemmer)(term));
#endif

            doc.add_term(stemPrefix + XapianDatabase::limitTermLength(stemmedTerm));
        }

        ++termPos;
    }
#ifdef DEBUG
    cout << "XapianIndex::addPostingsToDocument: added " << termPos << " terms" << endl;
#endif

    if (pStemmer != NULL)
    {
        delete pStemmer;
    }
}
开发者ID:BackupTheBerlios,项目名称:pinot-svn,代码行数:79,代码来源:XapianIndex.cpp

示例5: indexDocument

/// Indexes the given data.
bool XapianIndex::indexDocument(Tokenizer &tokens, const std::set<std::string> &labels,
	unsigned int &docId)
{
	unsigned int dataLength = 0;
	bool indexed = false;

	XapianDatabase *pDatabase = XapianDatabaseFactory::getDatabase(m_databaseName, false);
	if (pDatabase == NULL)
	{
		cerr << "Bad index " << m_databaseName << endl;
		return false;
	}

	try
	{
		// Get the document
		const Document *pDocument = tokens.getDocument();
		if (pDocument == NULL)
		{
#ifdef DEBUG
			cout << "XapianIndex::indexDocument: no document" << endl;
#endif
			return false;
		}

		// Cache the document's properties
		DocumentInfo docInfo(pDocument->getTitle(), pDocument->getLocation(),
			pDocument->getType(), pDocument->getLanguage());
		docInfo.setTimestamp(pDocument->getTimestamp());
		docInfo.setLocation(Url::canonicalizeUrl(docInfo.getLocation()));

		const char *pData = pDocument->getData(dataLength);
		if (pData != NULL)
		{
			m_stemLanguage = scanDocument(pData, dataLength, docInfo);
		}

		Xapian::Document doc;
		Xapian::termcount termPos = 0;

#ifdef DEBUG
		cout << "XapianIndex::indexDocument: adding terms" << endl;
#endif
		// Add the tokenizer's terms to the Xapian document
		addPostingsToDocument(tokens, doc, "", termPos, m_stemMode);
		// Add labels
		for (set<string>::const_iterator labelIter = labels.begin(); labelIter != labels.end();
			++labelIter)
		{
			doc.add_term(limitTermLength(string("XLABEL:") + *labelIter));
		}
		if (addCommonTerms(docInfo, doc, termPos) == true)
		{
			setDocumentData(docInfo, doc, m_stemLanguage);

			Xapian::WritableDatabase *pIndex = pDatabase->writeLock();
			if (pIndex != NULL)
			{
				// Add this document to the Xapian index
				docId = pIndex->add_document(doc);
				indexed = true;
			}
		}
	}
	catch (const Xapian::Error &error)
	{
		cerr << "Couldn't index document: " << error.get_type() << ": " << error.get_msg() << endl;
	}
	catch (...)
	{
		cerr << "Couldn't index document, unknown exception occured" << endl;
	}
	pDatabase->unlock();

	return indexed;
}
开发者ID:BackupTheBerlios,项目名称:pinot-svn,代码行数:77,代码来源:XapianIndex.cpp

示例6: setDocumentLabels

/// Sets a document's labels.
bool XapianIndex::setDocumentLabels(unsigned int docId, const set<string> &labels,
	bool resetLabels)
{
	bool updatedLabels = false;

	XapianDatabase *pDatabase = XapianDatabaseFactory::getDatabase(m_databaseName, false);
	if (pDatabase == NULL)
	{
		cerr << "Bad index " << m_databaseName << endl;
		return false;
	}

	try
	{
		Xapian::WritableDatabase *pIndex = pDatabase->writeLock();
		if (pIndex != NULL)
		{
			Xapian::Document doc = pIndex->get_document(docId);

			// Reset existing labels ?
			if (resetLabels == true)
			{
				Xapian::TermIterator termIter = pIndex->termlist_begin(docId);
				if (termIter != pIndex->termlist_end(docId))
				{
					for (termIter.skip_to("XLABEL:");
						termIter != pIndex->termlist_end(docId); ++termIter)
					{
						// Is this a label ?
						if (strncasecmp((*termIter).c_str(), "XLABEL:", min(7, (int)(*termIter).length())) == 0)
						{
							doc.remove_term(*termIter);
						}
					}
				}
			}

			// Set new labels
			for (set<string>::const_iterator labelIter = labels.begin(); labelIter != labels.end();
				++labelIter)
			{
				if (labelIter->empty() == false)
				{
					doc.add_term(limitTermLength(string("XLABEL:") + *labelIter));
				}
			}

			pIndex->replace_document(docId, doc);
			updatedLabels = true;
		}
	}
	catch (const Xapian::Error &error)
	{
		cerr << "Couldn't update document's labels: " << error.get_type() << ": " << error.get_msg() << endl;
	}
	catch (...)
	{
		cerr << "Couldn't update document's labels, unknown exception occured" << endl;
	}
	pDatabase->unlock();

	return updatedLabels;
}
开发者ID:BackupTheBerlios,项目名称:pinot-svn,代码行数:64,代码来源:XapianIndex.cpp

示例7: updateDocument

/// Updates the given document; true if success.
bool XapianIndex::updateDocument(unsigned int docId, Tokenizer &tokens)
{
	unsigned int dataLength = 0;
	bool updated = false;

	const Document *pDocument = tokens.getDocument();
	if (pDocument == NULL)
	{
		return false;
	}

	XapianDatabase *pDatabase = XapianDatabaseFactory::getDatabase(m_databaseName, false);
	if (pDatabase == NULL)
	{
		cerr << "Bad index " << m_databaseName << endl;
		return false;
	}

	const char *pData = pDocument->getData(dataLength);
	if (pData == NULL)
	{
		return false;
	}

	// Cache the document's properties
	DocumentInfo docInfo(pDocument->getTitle(), pDocument->getLocation(),
		pDocument->getType(), pDocument->getLanguage());
	docInfo.setTimestamp(pDocument->getTimestamp());
	docInfo.setLocation(Url::canonicalizeUrl(docInfo.getLocation()));

	// Don't scan the document if a language is specified
	m_stemLanguage = Languages::toEnglish(pDocument->getLanguage());
	if (m_stemLanguage.empty() == true)
	{
		m_stemLanguage = scanDocument(pData, dataLength, docInfo);
	}

	try
	{
		set<string> labels;
		Xapian::Document doc;
		Xapian::termcount termPos = 0;

		// Add the tokenizer's terms to the document
		addPostingsToDocument(tokens, doc, "", termPos, m_stemMode);
		// Get the document's labels
		if (getDocumentLabels(docId, labels) == true)
		{
			// Add labels
			for (set<string>::const_iterator labelIter = labels.begin(); labelIter != labels.end();
				++labelIter)
			{
				doc.add_term(limitTermLength(string("XLABEL:") + *labelIter));
			}
		}
		if (addCommonTerms(docInfo, doc, termPos) == true)
		{
			setDocumentData(docInfo, doc, m_stemLanguage);

			Xapian::WritableDatabase *pIndex = pDatabase->writeLock();
			if (pIndex != NULL)
			{
				// Update the document in the database
				pIndex->replace_document(docId, doc);
				updated = true;
			}
		}
	}
	catch (const Xapian::Error &error)
	{
		cerr << "Couldn't update document: " << error.get_type() << ": " << error.get_msg() << endl;
	}
	catch (...)
	{
		cerr << "Couldn't update document, unknown exception occured" << endl;
	}
	pDatabase->unlock();

	return updated;
}
开发者ID:BackupTheBerlios,项目名称:pinot-svn,代码行数:81,代码来源:XapianIndex.cpp

示例8: db

bool
DatabaseWrite::rebuild (GList *cpt_list)
{
	string old_path = m_dbPath + "_old";
	string rebuild_path = m_dbPath + "_rb";
	string db_locale;

	// Create the rebuild directory
	if (g_mkdir_with_parents (rebuild_path.c_str (), 0755) != 0) {
		g_warning ("Unable to create database rebuild directory.");
		return false;
	}

	// check if old unrequired version of db still exists on filesystem
	if (g_file_test (old_path.c_str (), G_FILE_TEST_EXISTS)) {
		g_warning ("Existing xapian old db was not cleaned previously: '%s'.", old_path.c_str ());
		as_utils_delete_dir_recursive (old_path.c_str ());
	}

	// check if old unrequired version of db still exists on filesystem
	if (g_file_test (rebuild_path.c_str (), G_FILE_TEST_EXISTS)) {
		g_debug ("Removing old rebuild-dir from previous database rebuild.");
		as_utils_delete_dir_recursive (rebuild_path.c_str ());
	}

	Xapian::WritableDatabase db (rebuild_path, Xapian::DB_CREATE_OR_OVERWRITE);

	Xapian::TermGenerator term_generator;
	term_generator.set_database(db);
	try {
		/* this tests if we have spelling suggestions (there must be
		 * a better way?!?) - this is needed as inmemory does not have
		 * spelling corrections, but it allows setting the flag and will
		 * raise a exception much later
		 */
		db.add_spelling("test");
		db.remove_spelling("test");

		/* this enables the flag for it (we only reach this line if
		 * the db supports spelling suggestions)
		 */
		term_generator.set_flags(Xapian::TermGenerator::FLAG_SPELLING);
	} catch (const Xapian::UnimplementedError &error) {
		// Ignore
	}

	for (GList *list = cpt_list; list != NULL; list = list->next) {
		AsComponent *cpt = (AsComponent*) list->data;

		Xapian::Document doc;
		term_generator.set_document (doc);

		doc.set_data (as_component_get_name (cpt));

		// Sanity check
		if (!as_component_has_install_candidate (cpt)) {
			g_warning ("Skipped component '%s' from inclusion into database: Does not have an installation candidate.",
					   as_component_get_id (cpt));
			continue;
		}

		// Package name
		gchar **pkgs = as_component_get_pkgnames (cpt);
		if (pkgs != NULL) {
			gchar *pkgs_cstr = g_strjoinv (";", pkgs);
			string pkgs_str = pkgs_cstr;
			doc.add_value (XapianValues::PKGNAMES, pkgs_str);
			g_free (pkgs_cstr);

			for (uint i = 0; pkgs[i] != NULL; i++) {
				string pkgname = pkgs[i];
				doc.add_term("AP" + pkgname);
				if (pkgname.find ("-") != string::npos) {
					// we need this to work around xapian oddness
					string tmp = pkgname;
					replace (tmp.begin (), tmp.end (), '-', '_');
					doc.add_term (tmp);
				}
				// add packagename as meta-data too
				term_generator.index_text_without_positions (pkgname, WEIGHT_PKGNAME);
			}
		}

		// Source package name
		const gchar *spkgname_cstr = as_component_get_source_pkgname (cpt);
		if (spkgname_cstr != NULL) {
			string spkgname = spkgname_cstr;
			doc.add_value (XapianValues::SOURCE_PKGNAME, spkgname);
			if (!spkgname.empty()) {
				doc.add_term("AP" + spkgname);
				if (spkgname.find ("-") != string::npos) {
					// we need this to work around xapian oddness
					string tmp = spkgname;
					replace (tmp.begin (), tmp.end (), '-', '_');
					doc.add_term (tmp);
				}
				// add packagename as meta-data too
				term_generator.index_text_without_positions (spkgname, WEIGHT_PKGNAME);
			}
		}
//.........这里部分代码省略.........
开发者ID:ebassi,项目名称:appstream,代码行数:101,代码来源:database-write.cpp


注:本文中的xapian::Document::add_term方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。