C++ Xml::set方法代码示例

本文整理汇总了C++中Xml::set方法的典型用法代码示例。如果您正苦于以下问题：C++ Xml::set方法的具体用法？C++ Xml::set怎么用？C++ Xml::set使用的例子？那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Xml的用法示例。

在下文中一共展示了Xml::set方法的11个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: sizeof

TEST( XmlTest, MetaDescription) {
	const char* input_strs[] =  {
	    // valid
	    "totally valid description",
	    "“inside special quotes” and outside",

	    // invalid
	    "my \"invalid\" double quote description",
	    "\"someone has quotes\", and nobody else has it"
	    "'my 'invalid' single quote description'",
	    "it's a description",
	    "what is this quote \" doing here?"
	};

	const char* format_strs[] = {
	    "<meta name=\"description\" content=\"%s\">",
	    "<meta name=\"description\" content='%s'>",
	    "<meta name=\"description\" content=\"%s\" ng-attr-content=\"{{meta.description}}\">",
	    "<meta name=\"description\" content='%s' ng-attr-content=\"{{meta.description}}\" >",
	    "<meta name=\"description\" ng-attr-content=\"{{meta.description}}\" content=\"%s\">",
	    "<meta name=\"description\" ng-attr-content=\"{{meta.description}}\" content='%s'>",
	    "<meta name=\"description\" content=\"%s\" other-content=\"%s\">",
	    "<meta name=\"description\" content='%s' other-content='%s'>",
	    "<meta content=\"%s\" name=\"description\">",
	    "<meta content='%s' name=\"description\">",
	    "<meta name=\"description\" other-content=\"%s\" content=\"%s\">",
	    "<meta name=\"description\" other-content='%s' content='%s'>"
	};

	size_t len = sizeof( input_strs ) / sizeof( input_strs[0] );
	size_t format_len = sizeof( format_strs ) / sizeof( format_strs[0] );

	for ( size_t i = 0; i < len; i++ ) {
		for (size_t j = 0; j < format_len; j++) {
			const char *input_str = input_strs[i];

			char desc[MAX_BUF_SIZE];
			std::sprintf(desc, format_strs[j], input_str, input_str);

			char input[MAX_BUF_SIZE];
			std::sprintf(input, HTML_HEAD_FORMAT, desc);

			Xml xml;
			ASSERT_TRUE(xml.set(input, strlen(input), 0, CT_HTML));

			char buf[MAX_BUF_SIZE];
			int32_t bufLen = MAX_BUF_SIZE;
			int32_t contentLen = 0;

			ASSERT_TRUE(xml.getTagContent("name", "description", buf, bufLen, 0, bufLen, &contentLen, false, TAG_META));
			EXPECT_EQ(strlen(input_str), contentLen);
			EXPECT_STREQ(input_str, buf);
		}
	}
}

开发者ID:privacore，项目名称:open-source-search-engine，代码行数:55，代码来源:XmlTest.cpp

示例2: parse_doc_icu

void parse_doc_icu(char *s, int len, bool doHash, char *charset){
	Xml xml;
	xml.set( s, len, TITLEREC_CURRENT_VERSION, 0, CT_HTML );

	// Extract text from (x)html
	char *text_buf = (char*)malloc(64*1024);
	int32_t textLen = xml.getText( text_buf, 64 * 1024, 0, 99999999, doFilterSpaces );
	Words w;
	w.set(text_buf, textLen, doHash);
	free(text_buf);
}

开发者ID:exename，项目名称:open-source-search-engine，代码行数:11，代码来源:test_parser.cpp

示例3: parse_doc_8859_1

void parse_doc_8859_1(char *s, int len, bool doHash,char *charset)
{
	Xml xml;
	xml.set( s, len, TITLEREC_CURRENT_VERSION, 0, CT_HTML );

	// Extract text from (x)html
	char *text_buf = (char*)malloc(len+1);
	xml.getText( text_buf, len, 0, 99999999, doFilterSpaces );
	Words words;

	// just tokenize words
	words.set(text_buf, len, doHash);
	free(text_buf);
}

开发者ID:exename，项目名称:open-source-search-engine，代码行数:14，代码来源:test_parser.cpp

示例4: than

TEST( XmlTest, MetaDescriptionStripTags) {
	const char* input_strs[] =  {
	    "my title<br> my <b>very important</b> text",
	    "Lesser than (<) and greater than (>).",
	    "We shouldn't strip <3 out",
	    "123 < 1234; 1234 > 123",
	    "<p style='text-align: center;'>A color cartoon drawing of a clapping cod fish ( rebus in the danish language for klaptorsk )</p>"
	};

	const char* expected_outputs[] = {
	    "my title. my very important text",
	    "Lesser than (<) and greater than (>).",
	    "We shouldn't strip <3 out",
	    "123 < 1234; 1234 > 123",
	    "A color cartoon drawing of a clapping cod fish ( rebus in the danish language for klaptorsk ). "
	};

	const char* format_str = "<meta name=\"description\" content=\"%s\">";

	size_t len = sizeof( input_strs ) / sizeof( input_strs[0] );

	ASSERT_EQ(sizeof(input_strs)/sizeof(input_strs[0]), sizeof(expected_outputs)/sizeof(expected_outputs[0]));

	for ( size_t i = 0; i < len; i++ ) {
		const char *input_str = input_strs[i];
		const char *output_str = expected_outputs[i];

		char desc[MAX_BUF_SIZE];
		std::sprintf(desc, format_str, input_str, input_str);

		char input[MAX_BUF_SIZE];
		std::sprintf(input, HTML_HEAD_FORMAT, desc);

		Xml xml;
		ASSERT_TRUE(xml.set(input, strlen(input), 0, CT_HTML));

		char buf[MAX_BUF_SIZE];
		int32_t bufLen = MAX_BUF_SIZE;
		int32_t contentLen = 0;

		ASSERT_TRUE(xml.getTagContent("name", "description", buf, bufLen, 0, bufLen, &contentLen, false, TAG_META));
		EXPECT_EQ(strlen(output_str), contentLen);
		EXPECT_STREQ(output_str, buf);
	}
}

开发者ID:privacore，项目名称:open-source-search-engine，代码行数:45，代码来源:XmlTest.cpp

示例5: parse_doc_icu

void parse_doc_icu(char *s, int len, bool doHash, char *charset){
	Xml xml;
	xml.set(csUTF8,s,len,false, 0,false, TITLEREC_CURRENT_VERSION);
	//fprintf(stderr,"\nparse_doc_icu\n");	
	// Extract text from (x)html
	char *text_buf = (char*)malloc(64*1024);
	long textLen = xml.getText(text_buf, 
				   64*1024, 
				   0,
				   99999999,
				   false,
				   true,
				   false,
				   doFilterSpaces,
				   false);
	Words w;
	w.set(true,false, text_buf, textLen, TITLEREC_CURRENT_VERSION,doHash);
	free(text_buf);
}

开发者ID:BILObilo，项目名称:open-source-search-engine，代码行数:19，代码来源:test_parser.cpp

示例6: generateSummary

static void generateSummary( Summary &summary, char *htmlInput, const char *queryStr, const char *urlStr ) {
	Xml xml;
	ASSERT_TRUE(xml.set(htmlInput, strlen(htmlInput), 0, CT_HTML));

	Words words;
	ASSERT_TRUE(words.set(&xml, true));

	Bits bits;
	ASSERT_TRUE(bits.set(&words));

	Url url;
	url.set(urlStr);

	Sections sections;
	ASSERT_TRUE(sections.set(&words, &bits, &url, "", CT_HTML));

	Query query;
	ASSERT_TRUE(query.set2(queryStr, langEnglish, true));

	LinkInfo linkInfo;
	memset ( &linkInfo , 0 , sizeof(LinkInfo) );
	linkInfo.m_lisize = sizeof(LinkInfo);

	Title title;
	ASSERT_TRUE(title.setTitle(&xml, &words, 80, &query, &linkInfo, &url, NULL, 0, CT_HTML, langEnglish));

	Pos pos;
	ASSERT_TRUE(pos.set(&words));

	Bits bitsForSummary;
	ASSERT_TRUE(bitsForSummary.setForSummary(&words));

	Phrases phrases;
	ASSERT_TRUE(phrases.set(&words, &bits));

	Matches matches;
	matches.setQuery(&query);
	ASSERT_TRUE(matches.set(&words, &phrases, &sections, &bitsForSummary, &pos, &xml, &title, &url, &linkInfo));

	summary.setSummary(&xml, &words, &sections, &pos, &query, 180, 3, 3, 180, &url, &matches, title.getTitle(), title.getTitleLen());
}

开发者ID:privacore，项目名称:open-source-search-engine，代码行数:41，代码来源:SummaryTest.cpp

示例7: parse_doc_8859_1

void parse_doc_8859_1(char *s, int len, bool doHash,char *charset)
{
	Xml xml;
	xml.set(csASCII,s,len,false, 0, false, TITLEREC_CURRENT_VERSION);
	//fprintf(stderr,"\nparse_doc_8859_1\n");

	// Extract text from (x)html
	char *text_buf = (char*)malloc(len+1);
	xml.getText(text_buf, 
		    len, 
		    0,
		    99999999,
		    false,
		    true,
		    false,
		    doFilterSpaces,
		    false);
	Words words;

	// just tokenize words
	words.set(false, text_buf, TITEREC_CURRENT_VERSION, doHash);
	free(text_buf);
}

开发者ID:BILObilo，项目名称:open-source-search-engine，代码行数:23，代码来源:test_parser.cpp

示例8: processLoop

// returns false if blocked, true otherwise
bool processLoop ( void *state ) {
	// get it
	State2 *st = (State2 *)state;
	// get the tcp socket from the state
	TcpSocket *s = st->m_socket;
	// get it
	XmlDoc *xd = &st->m_xd;

	if ( ! xd->m_loaded ) {
		// setting just the docid. niceness is 0.
		//xd->set3 ( st->m_docId , st->m_coll , 0 );
		// callback
		xd->setCallback ( state , processLoop );
		// . and tell it to load from the old title rec
		// . this sets xd->m_oldTitleRec/m_oldTitleRecSize
		// . this sets xd->ptr_* and all other member vars from
		//   the old title rec if found in titledb.
		if ( ! xd->loadFromOldTitleRec ( ) ) return false;
	}

	if ( g_errno ) return sendErrorReply ( st , g_errno );
	// now force it to load old title rec
	//char **tr = xd->getTitleRec();
	SafeBuf *tr = xd->getTitleRecBuf();
	// blocked? return false if so. it will call processLoop() when it rets
	if ( tr == (void *)-1 ) return false;
	// we did not block. check for error? this will free "st" too.
	if ( ! tr ) return sendErrorReply ( st , g_errno );
	// if title rec was empty, that is a problem
	if ( xd->m_titleRecBuf.length() == 0 ) 
		return sendErrorReply ( st , ENOTFOUND);

	// set callback
	char *na = xd->getIsNoArchive();
	// wait if blocked
	if ( na == (void *)-1 ) return false;
	// error?
	if ( ! na ) return sendErrorReply ( st , g_errno );
	// forbidden? allow turkeys through though...
	if ( ! st->m_isAdmin && *na )
		return sendErrorReply ( st , ENOCACHE );

	SafeBuf *sb = &st->m_sb;


	// &page=4 will print rainbow sections
	if ( ! st->m_printed && st->m_r.getLong("page",0) ) {
		// do not repeat this call
		st->m_printed = true;
		// this will call us again since we called
		// xd->setCallback() above to us
		if ( ! xd->printDocForProCog ( sb , &st->m_r ) )
			return false;
	}

	char *contentType = "text/html";
	char format = st->m_format;
	if ( format == FORMAT_XML ) contentType = "text/xml";
	if ( format == FORMAT_JSON ) contentType = "application/json";

	// if we printed a special page (like rainbow sections) then return now
	if ( st->m_printed ) {
		bool status = g_httpServer.sendDynamicPage (s,
							    //buf,bufLen,
							    sb->getBufStart(),
							    sb->getLength(),
							    -1,false,
							    //"text/html",
							    contentType,
							    -1, NULL, "utf8" );
		// nuke state2
		mdelete ( st , sizeof(State2) , "PageGet1" );
		delete (st);
		return status;
	}

	/*
	  // this was calling XmlDoc and setting sections, etc. to
	  // get the SpiderReply junk... no no no
	// is it banned or filtered? this ignores the TagRec in the titleRec
	// and uses msg8a to get it fresh instead
	char *vi = xd->getIsFiltered();//Visible( );
	// wait if blocked
	if ( vi == (void *)-1 ) return false;
	// error?
	if ( ! vi ) return sendErrorReply ( st , g_errno );
	// banned?
	if ( ! st->m_isAdmin && ! *vi ) return sendErrorReply (st,EDOCBANNED);
	*/

	// get the utf8 content
	char **utf8 = xd->getUtf8Content();
	//long   len  = xd->size_utf8Content - 1;
	// wait if blocked???
	if ( utf8 == (void *)-1 ) return false;
	// strange
	if ( xd->size_utf8Content<=0) {
		log("pageget: utf8 content <= 0");
		return sendErrorReply(st,EBADENGINEER );
//.........这里部分代码省略.........

开发者ID:firatkarakusoglu，项目名称:open-source-search-engine，代码行数:101，代码来源:PageGet.cpp

示例9: parse

void DataFeed::parse ( char *dataFeedPage,
		       long  dataFeedPageLen ) {
	// use Xml Class to parse up the page
	Xml xml;
	xml.set ( csUTF8, dataFeedPage, dataFeedPageLen, false, 0, false,
		  TITLEREC_CURRENT_VERSION );
	// get the nodes
	long numNodes  = xml.getNumNodes();
	XmlNode *nodes = xml.getNodes();
	// to count the tiers, result levels, and level costs
	long currTier = 0;
	long currResultLevel = 0;
	long currLevelCost = 0;
	// pull out the keywords for the data feed
	for (long i = 0; i < numNodes; i++) {
		// skip if this isn't a meta tag, shouldn't happen
		if (nodes[i].m_nodeId != 68)
			continue;
		// get the meta tag name
		//long tagLen;
		//char *tag = xml.getString(i, "name", &tagLen);
		long  ucTagLen;
		char *ucTag = xml.getString(i, "name", &ucTagLen);
		char tag[256];
		long tagLen = utf16ToLatin1 ( tag, 256,
					      (UChar*)ucTag, ucTagLen>>1 );
		// skip if empty
		if (!tag || tagLen <= 0)
			continue;
		// get the content
		long ucConLen;
		char *ucCon = xml.getString(i, "content", &ucConLen);
		char con[1024];
		long conLen = utf16ToLatin1 ( con, 1024,
					      (UChar*)ucCon, ucConLen>>1 );
		if (!con || conLen <= 0)
			continue;
		// match the meta tag to its local var and copy content
		if (tagLen == 10 && strncasecmp(tag, "customerid", 10) == 0)
			m_customerId = atoll(con);
		else if (tagLen == 11 && strncasecmp(tag, "datafeedurl", 11) == 0)
			setUrl(con, conLen);
		else if (tagLen == 8 && strncasecmp(tag, "passcode", 8) == 0)
			m_passcodeLen = setstr(m_passcode, MAX_PASSCODELEN, con, conLen);
		else if (tagLen == 6 && strncasecmp(tag, "status", 6) == 0)
			m_isActive = (bool)atoi(con);
		else if (tagLen == 6 && strncasecmp(tag, "locked", 6) == 0)
			m_isLocked = (bool)atoi(con);
		else if (tagLen == 14 && 
			 strncasecmp(tag, "dfcreationtime", 14) == 0)
			m_creationTime = atol(con);
		else if (tagLen == 8 && strncasecmp(tag, "numtiers", 8) == 0)
			m_priceTable.m_numTiers = atol(con);
		else if (tagLen == 15 && strncasecmp(tag, "numresultlevels", 15) == 0)
			m_priceTable.m_numResultLevels = atol(con);
		else if (tagLen == 10 && strncasecmp(tag, "monthlyfee", 10) == 0)
			m_priceTable.m_monthlyFee = atol(con);
		else if (tagLen == 7 && strncasecmp(tag, "tiermax", 7) == 0) {
			m_priceTable.m_tierMax[currTier] = (unsigned long)atol(con);
			currTier++;
		}
		else if (tagLen == 11 && strncasecmp(tag, "resultlevel", 11) == 0) {
			m_priceTable.m_resultLevels[currResultLevel] = (unsigned long)atol(con);
			currResultLevel++;
		}
		else if (tagLen == 9 && strncasecmp(tag, "levelcost", 9) == 0) {
			m_priceTable.m_levelCosts[currLevelCost] = (unsigned long)atol(con);
			currLevelCost++;
		}
		else
			log(LOG_INFO, "datafeed: Invalid Meta Tag Parsed [%li]:"
			    " %s", tagLen, tag);
	}
}

开发者ID:BILObilo，项目名称:open-source-search-engine，代码行数:74，代码来源:DataFeed.cpp

示例10: strstr

void Blaster::gotDoc4 ( void *state, TcpSocket *s){
	StateBD *st=(StateBD *)state;
	st->m_numUrlDocsReceived++;
	if (!s) {
		//Shouldn't happen, but still putting a checkpoint
		log (LOG_WARN,"blaster: Got a null s in gotDoc4."
		     "Happened because ip could not be found for gigablast"
		     "server");
		if (st->m_numUrlDocsReceived==st->m_numUrlDocsSent){
			m_launched--;
			// Free stateBD
			freeStateBD(st);
		}
		return;
	}
	// bail if got cut off
	if ( s->m_readOffset == 0 ) {
		log("blasterDiff : lost the Request in gotDoc4");
		if (st->m_numUrlDocsReceived==st->m_numUrlDocsSent){
			m_launched--;
			freeStateBD(st);
		}
		return;
	}
	char *reply = s->m_readBuf ;
	long  size  = s->m_readOffset;
	HttpMime mime;
	mime.set ( reply , size , NULL );
	char *content    = reply + mime.getMimeLen();
	long  contentLen = size  - mime.getMimeLen();

	//short csEnum = get_iana_charset(mime.getCharset(), 
	//				mime.getCharsetLen());
	/*	if (csEnum == csUnknown)
		log(LOG_DEBUG, "blaster: Unknown charset : %s", mime.getCharset());*/
	
	Xml xml;
	if (!xml.set(
		     content, 
		     contentLen,
		     false,
		     0,
		     false,
		     TITLEREC_CURRENT_VERSION)){
		log(LOG_WARN,"blaster: Couldn't set XML Class in gotDoc4");
	}
	Links links;
	Url *url=mime.getLocationUrl();
	if (!links.set(0,//siterec xml
		       &xml,
		       url,
		       false,
		       NULL,
		       TITLEREC_CURRENT_VERSION,
		       0,
		       false,
		       NULL)){
		log(LOG_WARN, "blaster: Coudn't set Links class in gotDoc4");
	}
	for (long i=0;i<links.getNumLinks();i++){
		char *ss=links.getLink(i);
		char *p;
		// This page *should* always be a gigablast page. So not adding
		// checks for msn or yahoo or google page.
		p=strstr(ss,"google.");
		if(p) continue;
		p=strstr(ss,"cache:");  //googles cache page
		if(p) continue;
		p= strstr(ss,"gigablast.");
		if(p) continue;
		p= strstr(ss,"web.archive.org");//older copies on gigablast
		if(p) continue;
		p= strstr(ss,"search.yahoo.com");//from gigablast search
		if(p) continue;
		p= strstr(ss,"search.msn.com");//from gigablast search
		if(p) continue;
		p= strstr(ss,"s.teoma.com");//from gigablast search
		if(p) continue;
		p= strstr(ss,"search.dmoz.org");//from gigablast search
		if(p) continue;
		p= strstr(ss,"www.answers.com");//from gigablast search
		if(p) continue;
       		if (m_verbose)
			log(LOG_WARN,"blaster: Link Present on server2=%s",ss);
	}
	
	// So if one of the links that is returned is the exact url,
	// then we know that the url is present.So get the url from the
	// mime, search for it in the links that are returned.
	char tmp[1024];
	char *sendBuf=s->m_sendBuf;
	char *p1,*p2;

	// First get the Host, which is the domain. Since socket s is going to
	// be useless after this function, changing m_sendBuf instead of using 
	// more space
	p1=strstr(sendBuf,"%3A");
	if(p1){
		p1+=3;
		p2=strstr(p1," HTTP");
//.........这里部分代码省略.........

开发者ID:alvinlai，项目名称:open-source-search-engine，代码行数:101，代码来源:Blaster.cpp

示例11: stripHtml

// returns length of stripped content, but will set g_errno and return -1
// on error
int32_t stripHtml( char *content, int32_t contentLen, int32_t version, int32_t strip ) {
	if ( !strip ) {
		log( LOG_WARN, "query: html stripping not required!" );
		return contentLen;
	}
	if ( ! content )
		return 0;
	if ( contentLen == 0 )
		return 0;

	// filter content if we should
	// keep this on the big stack so "content" still references something
	Xml tmpXml;
	// . get the content as xhtml (should be NULL terminated)
	// . parse as utf8 since all we are doing is messing with 
	//   the tags...content manipulation comes later
	if ( !tmpXml.set( content, contentLen, version, CT_HTML ) ) {
		return -1;
	}

	//if( strip == 4 )
	//	return tmpXml.getText( content, contentLen );

	// go tag by tag
	int32_t     n       = tmpXml.getNumNodes();
	XmlNode *nodes   = tmpXml.getNodes();
	// Xml class may have converted to utf16
	content    = tmpXml.getContent();
	contentLen = tmpXml.getContentLen();
	char    *x       = content;
	char    *xend    = content + contentLen;
	int32_t     stackid = -1;
	int32_t     stackc  =  0;
	char     skipIt  =  0;
	// . hack COL tag to NOT require a back tag
	// . do not leave it that way as it could mess up our parsing
	//g_nodes[25].m_hasBackTag = 0;
	for ( int32_t i = 0 ; i < n ; i++ ) {
		// get id of this node
		int32_t id = nodes[i].m_nodeId;
		
		// if strip is 4, just remove the script tag
		if( strip == 4 ){
			if ( id ){
				if ( id == TAG_SCRIPT ){
					skipIt ^= 1;
					continue;
				}
			}
			else if ( skipIt ) continue;
			goto keepit;
		}
		
		// if strip is 3, ALL tags will be removed!
		if( strip == 3 ) {
			if( id ) {
				// . we dont want anything in between:
				//   - script tags (83)
				//   - style tags  (111)
				if ((id == TAG_SCRIPT) || (id == TAG_STYLE)) skipIt ^= 1;
				// save img to have alt text kept.
				if ( id == TAG_IMG  ) goto keepit;
				continue;
			}
			else {
				if( skipIt ) continue;
				goto keepit;
			}
		}
		// get it
		int32_t fk;
		if   ( strip == 1 ) fk = g_nodes[id].m_filterKeep1;
		else                fk = g_nodes[id].m_filterKeep2;
		// if tag is <link ...> only keep it if it has
		// rel="stylesheet" or rel=stylesheet
		if ( strip == 2 && id == TAG_LINK ) { // <link> tag id
			int32_t   fflen;
			char *ff = nodes[i].getFieldValue ( "rel" , &fflen );
			if ( ff && fflen == 10 &&
			     strncmp(ff,"stylesheet",10) == 0 )
				goto keepit;
		}
		// just remove just the tag if this is 2
		if ( fk == 2 ) continue;
		// keep it if not in a stack
		if ( ! stackc && fk ) goto keepit;
		// if no front/back for tag, just skip it
		if ( ! nodes[i].m_hasBackTag ) continue;
		// start stack if none
		if ( stackc == 0 ) {
			// but not if this is a back tag
			if ( nodes[i].m_node[1] == '/' ) continue;
			// now start the stack
			stackid = id;
			stackc  =  1;
			continue;
		}
		// skip if this tag does not match what is on stack
//.........这里部分代码省略.........

开发者ID:privacore，项目名称:open-source-search-engine，代码行数:101，代码来源:fctypes.cpp

注：本文中的Xml::set方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。