本文整理汇总了C++中Xml::set方法的典型用法代码示例。如果您正苦于以下问题:C++ Xml::set方法的具体用法?C++ Xml::set怎么用?C++ Xml::set使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Xml
的用法示例。
在下文中一共展示了Xml::set方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: sizeof
TEST( XmlTest, MetaDescription) {
const char* input_strs[] = {
// valid
"totally valid description",
"“inside special quotes” and outside",
// invalid
"my \"invalid\" double quote description",
"\"someone has quotes\", and nobody else has it"
"'my 'invalid' single quote description'",
"it's a description",
"what is this quote \" doing here?"
};
const char* format_strs[] = {
"<meta name=\"description\" content=\"%s\">",
"<meta name=\"description\" content='%s'>",
"<meta name=\"description\" content=\"%s\" ng-attr-content=\"{{meta.description}}\">",
"<meta name=\"description\" content='%s' ng-attr-content=\"{{meta.description}}\" >",
"<meta name=\"description\" ng-attr-content=\"{{meta.description}}\" content=\"%s\">",
"<meta name=\"description\" ng-attr-content=\"{{meta.description}}\" content='%s'>",
"<meta name=\"description\" content=\"%s\" other-content=\"%s\">",
"<meta name=\"description\" content='%s' other-content='%s'>",
"<meta content=\"%s\" name=\"description\">",
"<meta content='%s' name=\"description\">",
"<meta name=\"description\" other-content=\"%s\" content=\"%s\">",
"<meta name=\"description\" other-content='%s' content='%s'>"
};
size_t len = sizeof( input_strs ) / sizeof( input_strs[0] );
size_t format_len = sizeof( format_strs ) / sizeof( format_strs[0] );
for ( size_t i = 0; i < len; i++ ) {
for (size_t j = 0; j < format_len; j++) {
const char *input_str = input_strs[i];
char desc[MAX_BUF_SIZE];
std::sprintf(desc, format_strs[j], input_str, input_str);
char input[MAX_BUF_SIZE];
std::sprintf(input, HTML_HEAD_FORMAT, desc);
Xml xml;
ASSERT_TRUE(xml.set(input, strlen(input), 0, CT_HTML));
char buf[MAX_BUF_SIZE];
int32_t bufLen = MAX_BUF_SIZE;
int32_t contentLen = 0;
ASSERT_TRUE(xml.getTagContent("name", "description", buf, bufLen, 0, bufLen, &contentLen, false, TAG_META));
EXPECT_EQ(strlen(input_str), contentLen);
EXPECT_STREQ(input_str, buf);
}
}
}
示例2: parse_doc_icu
void parse_doc_icu(char *s, int len, bool doHash, char *charset){
Xml xml;
xml.set( s, len, TITLEREC_CURRENT_VERSION, 0, CT_HTML );
// Extract text from (x)html
char *text_buf = (char*)malloc(64*1024);
int32_t textLen = xml.getText( text_buf, 64 * 1024, 0, 99999999, doFilterSpaces );
Words w;
w.set(text_buf, textLen, doHash);
free(text_buf);
}
示例3: parse_doc_8859_1
void parse_doc_8859_1(char *s, int len, bool doHash,char *charset)
{
Xml xml;
xml.set( s, len, TITLEREC_CURRENT_VERSION, 0, CT_HTML );
// Extract text from (x)html
char *text_buf = (char*)malloc(len+1);
xml.getText( text_buf, len, 0, 99999999, doFilterSpaces );
Words words;
// just tokenize words
words.set(text_buf, len, doHash);
free(text_buf);
}
示例4: than
TEST( XmlTest, MetaDescriptionStripTags) {
const char* input_strs[] = {
"my title<br> my <b>very important</b> text",
"Lesser than (<) and greater than (>).",
"We shouldn't strip <3 out",
"123 < 1234; 1234 > 123",
"<p style='text-align: center;'>A color cartoon drawing of a clapping cod fish ( rebus in the danish language for klaptorsk )</p>"
};
const char* expected_outputs[] = {
"my title. my very important text",
"Lesser than (<) and greater than (>).",
"We shouldn't strip <3 out",
"123 < 1234; 1234 > 123",
"A color cartoon drawing of a clapping cod fish ( rebus in the danish language for klaptorsk ). "
};
const char* format_str = "<meta name=\"description\" content=\"%s\">";
size_t len = sizeof( input_strs ) / sizeof( input_strs[0] );
ASSERT_EQ(sizeof(input_strs)/sizeof(input_strs[0]), sizeof(expected_outputs)/sizeof(expected_outputs[0]));
for ( size_t i = 0; i < len; i++ ) {
const char *input_str = input_strs[i];
const char *output_str = expected_outputs[i];
char desc[MAX_BUF_SIZE];
std::sprintf(desc, format_str, input_str, input_str);
char input[MAX_BUF_SIZE];
std::sprintf(input, HTML_HEAD_FORMAT, desc);
Xml xml;
ASSERT_TRUE(xml.set(input, strlen(input), 0, CT_HTML));
char buf[MAX_BUF_SIZE];
int32_t bufLen = MAX_BUF_SIZE;
int32_t contentLen = 0;
ASSERT_TRUE(xml.getTagContent("name", "description", buf, bufLen, 0, bufLen, &contentLen, false, TAG_META));
EXPECT_EQ(strlen(output_str), contentLen);
EXPECT_STREQ(output_str, buf);
}
}
示例5: parse_doc_icu
void parse_doc_icu(char *s, int len, bool doHash, char *charset){
Xml xml;
xml.set(csUTF8,s,len,false, 0,false, TITLEREC_CURRENT_VERSION);
//fprintf(stderr,"\nparse_doc_icu\n");
// Extract text from (x)html
char *text_buf = (char*)malloc(64*1024);
long textLen = xml.getText(text_buf,
64*1024,
0,
99999999,
false,
true,
false,
doFilterSpaces,
false);
Words w;
w.set(true,false, text_buf, textLen, TITLEREC_CURRENT_VERSION,doHash);
free(text_buf);
}
示例6: generateSummary
static void generateSummary( Summary &summary, char *htmlInput, const char *queryStr, const char *urlStr ) {
Xml xml;
ASSERT_TRUE(xml.set(htmlInput, strlen(htmlInput), 0, CT_HTML));
Words words;
ASSERT_TRUE(words.set(&xml, true));
Bits bits;
ASSERT_TRUE(bits.set(&words));
Url url;
url.set(urlStr);
Sections sections;
ASSERT_TRUE(sections.set(&words, &bits, &url, "", CT_HTML));
Query query;
ASSERT_TRUE(query.set2(queryStr, langEnglish, true));
LinkInfo linkInfo;
memset ( &linkInfo , 0 , sizeof(LinkInfo) );
linkInfo.m_lisize = sizeof(LinkInfo);
Title title;
ASSERT_TRUE(title.setTitle(&xml, &words, 80, &query, &linkInfo, &url, NULL, 0, CT_HTML, langEnglish));
Pos pos;
ASSERT_TRUE(pos.set(&words));
Bits bitsForSummary;
ASSERT_TRUE(bitsForSummary.setForSummary(&words));
Phrases phrases;
ASSERT_TRUE(phrases.set(&words, &bits));
Matches matches;
matches.setQuery(&query);
ASSERT_TRUE(matches.set(&words, &phrases, §ions, &bitsForSummary, &pos, &xml, &title, &url, &linkInfo));
summary.setSummary(&xml, &words, §ions, &pos, &query, 180, 3, 3, 180, &url, &matches, title.getTitle(), title.getTitleLen());
}
示例7: parse_doc_8859_1
void parse_doc_8859_1(char *s, int len, bool doHash,char *charset)
{
Xml xml;
xml.set(csASCII,s,len,false, 0, false, TITLEREC_CURRENT_VERSION);
//fprintf(stderr,"\nparse_doc_8859_1\n");
// Extract text from (x)html
char *text_buf = (char*)malloc(len+1);
xml.getText(text_buf,
len,
0,
99999999,
false,
true,
false,
doFilterSpaces,
false);
Words words;
// just tokenize words
words.set(false, text_buf, TITEREC_CURRENT_VERSION, doHash);
free(text_buf);
}
示例8: processLoop
// returns false if blocked, true otherwise
bool processLoop ( void *state ) {
// get it
State2 *st = (State2 *)state;
// get the tcp socket from the state
TcpSocket *s = st->m_socket;
// get it
XmlDoc *xd = &st->m_xd;
if ( ! xd->m_loaded ) {
// setting just the docid. niceness is 0.
//xd->set3 ( st->m_docId , st->m_coll , 0 );
// callback
xd->setCallback ( state , processLoop );
// . and tell it to load from the old title rec
// . this sets xd->m_oldTitleRec/m_oldTitleRecSize
// . this sets xd->ptr_* and all other member vars from
// the old title rec if found in titledb.
if ( ! xd->loadFromOldTitleRec ( ) ) return false;
}
if ( g_errno ) return sendErrorReply ( st , g_errno );
// now force it to load old title rec
//char **tr = xd->getTitleRec();
SafeBuf *tr = xd->getTitleRecBuf();
// blocked? return false if so. it will call processLoop() when it rets
if ( tr == (void *)-1 ) return false;
// we did not block. check for error? this will free "st" too.
if ( ! tr ) return sendErrorReply ( st , g_errno );
// if title rec was empty, that is a problem
if ( xd->m_titleRecBuf.length() == 0 )
return sendErrorReply ( st , ENOTFOUND);
// set callback
char *na = xd->getIsNoArchive();
// wait if blocked
if ( na == (void *)-1 ) return false;
// error?
if ( ! na ) return sendErrorReply ( st , g_errno );
// forbidden? allow turkeys through though...
if ( ! st->m_isAdmin && *na )
return sendErrorReply ( st , ENOCACHE );
SafeBuf *sb = &st->m_sb;
// &page=4 will print rainbow sections
if ( ! st->m_printed && st->m_r.getLong("page",0) ) {
// do not repeat this call
st->m_printed = true;
// this will call us again since we called
// xd->setCallback() above to us
if ( ! xd->printDocForProCog ( sb , &st->m_r ) )
return false;
}
char *contentType = "text/html";
char format = st->m_format;
if ( format == FORMAT_XML ) contentType = "text/xml";
if ( format == FORMAT_JSON ) contentType = "application/json";
// if we printed a special page (like rainbow sections) then return now
if ( st->m_printed ) {
bool status = g_httpServer.sendDynamicPage (s,
//buf,bufLen,
sb->getBufStart(),
sb->getLength(),
-1,false,
//"text/html",
contentType,
-1, NULL, "utf8" );
// nuke state2
mdelete ( st , sizeof(State2) , "PageGet1" );
delete (st);
return status;
}
/*
// this was calling XmlDoc and setting sections, etc. to
// get the SpiderReply junk... no no no
// is it banned or filtered? this ignores the TagRec in the titleRec
// and uses msg8a to get it fresh instead
char *vi = xd->getIsFiltered();//Visible( );
// wait if blocked
if ( vi == (void *)-1 ) return false;
// error?
if ( ! vi ) return sendErrorReply ( st , g_errno );
// banned?
if ( ! st->m_isAdmin && ! *vi ) return sendErrorReply (st,EDOCBANNED);
*/
// get the utf8 content
char **utf8 = xd->getUtf8Content();
//long len = xd->size_utf8Content - 1;
// wait if blocked???
if ( utf8 == (void *)-1 ) return false;
// strange
if ( xd->size_utf8Content<=0) {
log("pageget: utf8 content <= 0");
return sendErrorReply(st,EBADENGINEER );
//.........这里部分代码省略.........
示例9: parse
void DataFeed::parse ( char *dataFeedPage,
long dataFeedPageLen ) {
// use Xml Class to parse up the page
Xml xml;
xml.set ( csUTF8, dataFeedPage, dataFeedPageLen, false, 0, false,
TITLEREC_CURRENT_VERSION );
// get the nodes
long numNodes = xml.getNumNodes();
XmlNode *nodes = xml.getNodes();
// to count the tiers, result levels, and level costs
long currTier = 0;
long currResultLevel = 0;
long currLevelCost = 0;
// pull out the keywords for the data feed
for (long i = 0; i < numNodes; i++) {
// skip if this isn't a meta tag, shouldn't happen
if (nodes[i].m_nodeId != 68)
continue;
// get the meta tag name
//long tagLen;
//char *tag = xml.getString(i, "name", &tagLen);
long ucTagLen;
char *ucTag = xml.getString(i, "name", &ucTagLen);
char tag[256];
long tagLen = utf16ToLatin1 ( tag, 256,
(UChar*)ucTag, ucTagLen>>1 );
// skip if empty
if (!tag || tagLen <= 0)
continue;
// get the content
long ucConLen;
char *ucCon = xml.getString(i, "content", &ucConLen);
char con[1024];
long conLen = utf16ToLatin1 ( con, 1024,
(UChar*)ucCon, ucConLen>>1 );
if (!con || conLen <= 0)
continue;
// match the meta tag to its local var and copy content
if (tagLen == 10 && strncasecmp(tag, "customerid", 10) == 0)
m_customerId = atoll(con);
else if (tagLen == 11 && strncasecmp(tag, "datafeedurl", 11) == 0)
setUrl(con, conLen);
else if (tagLen == 8 && strncasecmp(tag, "passcode", 8) == 0)
m_passcodeLen = setstr(m_passcode, MAX_PASSCODELEN, con, conLen);
else if (tagLen == 6 && strncasecmp(tag, "status", 6) == 0)
m_isActive = (bool)atoi(con);
else if (tagLen == 6 && strncasecmp(tag, "locked", 6) == 0)
m_isLocked = (bool)atoi(con);
else if (tagLen == 14 &&
strncasecmp(tag, "dfcreationtime", 14) == 0)
m_creationTime = atol(con);
else if (tagLen == 8 && strncasecmp(tag, "numtiers", 8) == 0)
m_priceTable.m_numTiers = atol(con);
else if (tagLen == 15 && strncasecmp(tag, "numresultlevels", 15) == 0)
m_priceTable.m_numResultLevels = atol(con);
else if (tagLen == 10 && strncasecmp(tag, "monthlyfee", 10) == 0)
m_priceTable.m_monthlyFee = atol(con);
else if (tagLen == 7 && strncasecmp(tag, "tiermax", 7) == 0) {
m_priceTable.m_tierMax[currTier] = (unsigned long)atol(con);
currTier++;
}
else if (tagLen == 11 && strncasecmp(tag, "resultlevel", 11) == 0) {
m_priceTable.m_resultLevels[currResultLevel] = (unsigned long)atol(con);
currResultLevel++;
}
else if (tagLen == 9 && strncasecmp(tag, "levelcost", 9) == 0) {
m_priceTable.m_levelCosts[currLevelCost] = (unsigned long)atol(con);
currLevelCost++;
}
else
log(LOG_INFO, "datafeed: Invalid Meta Tag Parsed [%li]:"
" %s", tagLen, tag);
}
}
示例10: strstr
void Blaster::gotDoc4 ( void *state, TcpSocket *s){
StateBD *st=(StateBD *)state;
st->m_numUrlDocsReceived++;
if (!s) {
//Shouldn't happen, but still putting a checkpoint
log (LOG_WARN,"blaster: Got a null s in gotDoc4."
"Happened because ip could not be found for gigablast"
"server");
if (st->m_numUrlDocsReceived==st->m_numUrlDocsSent){
m_launched--;
// Free stateBD
freeStateBD(st);
}
return;
}
// bail if got cut off
if ( s->m_readOffset == 0 ) {
log("blasterDiff : lost the Request in gotDoc4");
if (st->m_numUrlDocsReceived==st->m_numUrlDocsSent){
m_launched--;
freeStateBD(st);
}
return;
}
char *reply = s->m_readBuf ;
long size = s->m_readOffset;
HttpMime mime;
mime.set ( reply , size , NULL );
char *content = reply + mime.getMimeLen();
long contentLen = size - mime.getMimeLen();
//short csEnum = get_iana_charset(mime.getCharset(),
// mime.getCharsetLen());
/* if (csEnum == csUnknown)
log(LOG_DEBUG, "blaster: Unknown charset : %s", mime.getCharset());*/
Xml xml;
if (!xml.set(
content,
contentLen,
false,
0,
false,
TITLEREC_CURRENT_VERSION)){
log(LOG_WARN,"blaster: Couldn't set XML Class in gotDoc4");
}
Links links;
Url *url=mime.getLocationUrl();
if (!links.set(0,//siterec xml
&xml,
url,
false,
NULL,
TITLEREC_CURRENT_VERSION,
0,
false,
NULL)){
log(LOG_WARN, "blaster: Coudn't set Links class in gotDoc4");
}
for (long i=0;i<links.getNumLinks();i++){
char *ss=links.getLink(i);
char *p;
// This page *should* always be a gigablast page. So not adding
// checks for msn or yahoo or google page.
p=strstr(ss,"google.");
if(p) continue;
p=strstr(ss,"cache:"); //googles cache page
if(p) continue;
p= strstr(ss,"gigablast.");
if(p) continue;
p= strstr(ss,"web.archive.org");//older copies on gigablast
if(p) continue;
p= strstr(ss,"search.yahoo.com");//from gigablast search
if(p) continue;
p= strstr(ss,"search.msn.com");//from gigablast search
if(p) continue;
p= strstr(ss,"s.teoma.com");//from gigablast search
if(p) continue;
p= strstr(ss,"search.dmoz.org");//from gigablast search
if(p) continue;
p= strstr(ss,"www.answers.com");//from gigablast search
if(p) continue;
if (m_verbose)
log(LOG_WARN,"blaster: Link Present on server2=%s",ss);
}
// So if one of the links that is returned is the exact url,
// then we know that the url is present.So get the url from the
// mime, search for it in the links that are returned.
char tmp[1024];
char *sendBuf=s->m_sendBuf;
char *p1,*p2;
// First get the Host, which is the domain. Since socket s is going to
// be useless after this function, changing m_sendBuf instead of using
// more space
p1=strstr(sendBuf,"%3A");
if(p1){
p1+=3;
p2=strstr(p1," HTTP");
//.........这里部分代码省略.........
示例11: stripHtml
// returns length of stripped content, but will set g_errno and return -1
// on error
int32_t stripHtml( char *content, int32_t contentLen, int32_t version, int32_t strip ) {
if ( !strip ) {
log( LOG_WARN, "query: html stripping not required!" );
return contentLen;
}
if ( ! content )
return 0;
if ( contentLen == 0 )
return 0;
// filter content if we should
// keep this on the big stack so "content" still references something
Xml tmpXml;
// . get the content as xhtml (should be NULL terminated)
// . parse as utf8 since all we are doing is messing with
// the tags...content manipulation comes later
if ( !tmpXml.set( content, contentLen, version, CT_HTML ) ) {
return -1;
}
//if( strip == 4 )
// return tmpXml.getText( content, contentLen );
// go tag by tag
int32_t n = tmpXml.getNumNodes();
XmlNode *nodes = tmpXml.getNodes();
// Xml class may have converted to utf16
content = tmpXml.getContent();
contentLen = tmpXml.getContentLen();
char *x = content;
char *xend = content + contentLen;
int32_t stackid = -1;
int32_t stackc = 0;
char skipIt = 0;
// . hack COL tag to NOT require a back tag
// . do not leave it that way as it could mess up our parsing
//g_nodes[25].m_hasBackTag = 0;
for ( int32_t i = 0 ; i < n ; i++ ) {
// get id of this node
int32_t id = nodes[i].m_nodeId;
// if strip is 4, just remove the script tag
if( strip == 4 ){
if ( id ){
if ( id == TAG_SCRIPT ){
skipIt ^= 1;
continue;
}
}
else if ( skipIt ) continue;
goto keepit;
}
// if strip is 3, ALL tags will be removed!
if( strip == 3 ) {
if( id ) {
// . we dont want anything in between:
// - script tags (83)
// - style tags (111)
if ((id == TAG_SCRIPT) || (id == TAG_STYLE)) skipIt ^= 1;
// save img to have alt text kept.
if ( id == TAG_IMG ) goto keepit;
continue;
}
else {
if( skipIt ) continue;
goto keepit;
}
}
// get it
int32_t fk;
if ( strip == 1 ) fk = g_nodes[id].m_filterKeep1;
else fk = g_nodes[id].m_filterKeep2;
// if tag is <link ...> only keep it if it has
// rel="stylesheet" or rel=stylesheet
if ( strip == 2 && id == TAG_LINK ) { // <link> tag id
int32_t fflen;
char *ff = nodes[i].getFieldValue ( "rel" , &fflen );
if ( ff && fflen == 10 &&
strncmp(ff,"stylesheet",10) == 0 )
goto keepit;
}
// just remove just the tag if this is 2
if ( fk == 2 ) continue;
// keep it if not in a stack
if ( ! stackc && fk ) goto keepit;
// if no front/back for tag, just skip it
if ( ! nodes[i].m_hasBackTag ) continue;
// start stack if none
if ( stackc == 0 ) {
// but not if this is a back tag
if ( nodes[i].m_node[1] == '/' ) continue;
// now start the stack
stackid = id;
stackc = 1;
continue;
}
// skip if this tag does not match what is on stack
//.........这里部分代码省略.........