本文整理汇总了C++中HtmlParser类的典型用法代码示例。如果您正苦于以下问题:C++ HtmlParser类的具体用法?C++ HtmlParser怎么用?C++ HtmlParser使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了HtmlParser类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: HtmlParser02
static void HtmlParser02()
{
HtmlParser p;
HtmlElement *root = p.Parse("<a><b/><c></c ><d at1=\"<quo&ted>\" at2='also quoted' att3=notquoted att4=end/></a>");
assert(4 == p.ElementsCount());
assert(4 == p.TotalAttrCount());
assert(str::Eq("a", root->name));
assert(NULL == root->next);
HtmlElement *el = root->down;
assert(str::Eq("b", el->name));
assert(root == el->up);
el = el->next;
assert(str::Eq("c", el->name));
assert(root == el->up);
el = el->next;
assert(str::Eq("d", el->name));
assert(NULL == el->next);
assert(root == el->up);
ScopedMem<TCHAR> val(el->GetAttribute("at1"));
assert(str::Eq(val, _T("<quo&ted>")));
val.Set(el->GetAttribute("at2"));
assert(str::Eq(val, _T("also quoted")));
val.Set(el->GetAttribute("att3"));
assert(str::Eq(val, _T("notquoted")));
val.Set(el->GetAttribute("att4"));
assert(str::Eq(val, _T("end")));
}
示例2: htmlData
bool ChmDoc::ParseTocOrIndex(EbookTocVisitor *visitor, const char *path, bool isIndex)
{
if (!path)
return false;
// TODO: is path already UTF-8 encoded - or do we need str::conv::ToUtf8(ToStr(path)) ?
ScopedMem<unsigned char> htmlData(GetData(path, NULL));
const char *html = (char *)htmlData.Get();
if (!html)
return false;
HtmlParser p;
UINT cp = codepage;
// detect UTF-8 content by BOM
if (str::StartsWith(html, UTF8_BOM)) {
html += 3;
cp = CP_UTF8;
}
// enforce the default codepage, so that pre-encoded text and
// entities are in the same codepage and VisitChmTocItem yields
// consistent results
HtmlElement *el = p.Parse(html, CP_CHM_DEFAULT);
if (!el)
return false;
el = p.FindElementByName("body");
// since <body> is optional, also continue without one
el = p.FindElementByName("ul", el);
if (!el)
return WalkBrokenChmTocOrIndex(visitor, p, cp, isIndex);
WalkChmTocOrIndex(visitor, el, cp, isIndex);
return true;
}
示例3: HtmlParser07
static void HtmlParser07()
{
HtmlParser p;
HtmlElement *root = p.Parse("<test umls=ä\xC3\xB6ü Zero=�&#-1;>", CP_UTF8);
utassert(1 == p.ElementsCount());
ScopedMem<WCHAR> val(root->GetAttribute("umls"));
utassert(str::Eq(val, L"\xE4\xF6\xFC"));
val.Set(root->GetAttribute("zerO"));
utassert(str::Eq(val, L"\x01??"));
}
示例4: HtmlParser11
static void HtmlParser11()
{
HtmlParser p;
HtmlElement *root = p.Parse("<root/><!-- comment -->");
utassert(1 == p.ElementsCount());
utassert(0 == p.TotalAttrCount());
utassert(root && root->NameIs("root"));
root = p.Parse("<root><!---></root>");
utassert(!root);
}
示例5: HtmlParser00
static void HtmlParser00()
{
HtmlParser p;
HtmlElement *root = p.Parse("<a></A>");
assert(p.ElementsCount() == 1);
assert(root);
assert(str::Eq("a", root->name));
root = p.Parse("<b></B>");
assert(p.ElementsCount() == 1);
assert(root);
assert(str::Eq("b", root->name));
}
示例6: HtmlParser07
static void HtmlParser07()
{
HtmlParser p;
HtmlElement *root = p.Parse("<test umls=ä\xC3\xB6ü zero=�&#-1;>", CP_UTF8);
assert(1 == p.ElementsCount());
ScopedMem<TCHAR> val(root->GetAttribute("umls"));
#ifdef UNICODE
assert(str::Eq(val, L"\xE4\xF6\xFC"));
#else
assert(str::EndsWith(val, "\xFC"));
#endif
val.Set(root->GetAttribute("zero"));
assert(str::Eq(val, _T("\x01??")));
}
示例7: HtmlParser03
static void HtmlParser03()
{
HtmlParser p;
HtmlElement *root = p.Parse("<el att =v"al/>");
assert(1 == p.ElementsCount());
assert(1 == p.TotalAttrCount());
assert(str::Eq("el", root->name));
assert(NULL == root->next);
assert(NULL == root->up);
assert(NULL == root->down);
ScopedMem<TCHAR> val(root->GetAttribute("att"));
assert(str::Eq(val, _T("v\"al")));
assert(!root->firstAttr->next);
}
示例8: HtmlParser04
static void HtmlParser04()
{
HtmlParser p;
HtmlElement *root = p.Parse("<el att= va'l></ el >");
utassert(1 == p.ElementsCount());
utassert(1 == p.TotalAttrCount());
utassert(root->NameIs("el"));
utassert(NULL == root->next);
utassert(NULL == root->up);
utassert(NULL == root->down);
ScopedMem<WCHAR> val(root->GetAttribute("att"));
utassert(str::Eq(val, L"va'l"));
utassert(!root->firstAttr->next);
}
示例9: req
int Tab::realDoUrl(const std::string &url, std::string &location)
{
int ret = -1;
bool ssl = UTIL::startsWith(url, "https");
int port = (ssl)? 443 : 80;
unsigned read_timeout = 5; // sec
std::string host;
try
{
Request req(url);
host = req.host();
m_Client.reset(new HttpClient(read_timeout));
m_Client->setOptionSSL(ssl);
m_Client->connect(host, UTIL::i2s(port));
m_Client->handshake();
std::string request = req.toGetRequsetString();
std::cerr << "\nREQUEST:\n" << request << "\n";
m_Client->write(request);
int bytes = m_Client->read();
std::cerr << "read bytes: " << bytes << "\n";
location = m_Client->getLocation();
ret = m_Client->responseCode();
std::cerr << "\nRESPONSE:\n";
std::cerr << m_Client->getHttpHeaders() << "\n";
//std::cerr << "***\n";
//std::cerr << m_Client->response() << "\n";
HtmlParser parser;
parser.parse(m_Client->response());
std::cerr << "\nPARSER HTML: " << parser.getHtml().size() << " bytes\n";
std::cerr << parser.getHtml() << "\n";
std::cerr << "\nPARSER PLAIN: " << parser.getPlain().size() << " bytes\n";
std::cerr << parser.getPlain() << "\n";
m_Headers = m_Client->getHttpHeaders();
m_Body = parser.getPlain();
//m_Body = parser.getHtml();
m_Links = parser.getLinks();
}
catch(std::exception &e)
{
m_Body = e.what();
m_Body += " [host: " + host + ", port: " + UTIL::i2s(port) + "]";
}
// generate paint event
//this->update();
emit needReloadPage();
return ret;
}
示例10: HtmlParser01
static void HtmlParser01()
{
HtmlParser p;
HtmlElement *root = p.Parse("<A><bAh></a>");
assert(p.ElementsCount() == 2);
assert(str::Eq("a", root->name));
assert(NULL == root->up);
assert(NULL == root->next);
HtmlElement *el = root->down;
assert(NULL == el->firstAttr);
assert(str::Eq("bah", el->name));
assert(el->up == root);
assert(NULL == el->down);
assert(NULL == el->next);
}
示例11: HtmlParser00
static void HtmlParser00()
{
HtmlParser p;
HtmlElement *root = p.Parse("<a></A>");
utassert(p.ElementsCount() == 1);
utassert(root);
utassert(Tag_A == root->tag && !root->name);
utassert(root->NameIs("a"));
root = p.Parse("<b></B>");
utassert(p.ElementsCount() == 1);
utassert(root);
utassert(Tag_B == root->tag && !root->name);
utassert(root->NameIs("b"));
}
示例12: HtmlParser01
static void HtmlParser01()
{
HtmlParser p;
HtmlElement *root = p.Parse("<A><bAh></a>");
utassert(p.ElementsCount() == 2);
utassert(Tag_A == root->tag && !root->name);
utassert(NULL == root->up);
utassert(NULL == root->next);
HtmlElement *el = root->down;
utassert(NULL == el->firstAttr);
utassert(el->NameIs("bah") && el->NameIs("BAH"));
utassert(Tag_NotFound == el->tag && str::Eq("bAh", el->name));
utassert(el->up == root);
utassert(NULL == el->down);
utassert(NULL == el->next);
}
示例13: WalkBrokenChmTocOrIndex
// ignores any <ul><li> list structure and just extracts a linear list of <object type="text/sitemap">...</object>
static bool WalkBrokenChmTocOrIndex(EbookTocVisitor* visitor, HtmlParser& p, UINT cp, bool isIndex) {
bool hadOne = false;
HtmlElement* el = p.FindElementByName("body");
while ((el = p.FindElementByName("object", el)) != nullptr) {
AutoFreeW type(el->GetAttribute("type"));
if (!str::EqI(type, L"text/sitemap"))
continue;
if (isIndex)
hadOne |= VisitChmIndexItem(visitor, el, cp, 1);
else
hadOne |= VisitChmTocItem(visitor, el, cp, 1);
}
return hadOne;
}
示例14: HtmlParser05
static void HtmlParser05()
{
HtmlParser p;
HtmlElement *root = p.Parse("<!doctype><html><HEAD><meta name=foo></head><body><object t=la><param name=foo val=bar></object><ul><li></ul></object></body></Html>");
assert(8 == p.ElementsCount());
assert(4 == p.TotalAttrCount());
assert(str::Eq("html", root->name));
assert(NULL == root->up);
assert(NULL == root->next);
HtmlElement *el = root->down;
assert(str::Eq("head", el->name));
HtmlElement *el2 = el->down;
assert(str::Eq("meta", el2->name));
assert(NULL == el2->next);
assert(NULL == el2->down);
el2 = el->next;
assert(str::Eq("body", el2->name));
assert(NULL == el2->next);
el2 = el2->down;
assert(str::Eq("object", el2->name));
el = p.FindElementByName("html");
assert(el);
el = p.FindElementByName("head", el);
assert(el);
assert(str::Eq("head", el->name));
el = p.FindElementByName("ul", el);
assert(el);
}
示例15: HtmlParser06
static void HtmlParser06()
{
HtmlParser p;
HtmlElement *root = p.Parse("<ul><p>ignore<li><br><meta><li><ol><li></ul><dropme>");
assert(9 == p.ElementsCount());
assert(0 == p.TotalAttrCount());
assert(str::Eq("ul", root->name));
assert(!root->next);
HtmlElement *el = root->GetChildByName("li");
assert(el);
assert(str::Eq(el->down->name, "br"));
assert(str::Eq(el->down->next->name, "meta"));
assert(!el->down->next->next);
el = root->GetChildByName("li", 1);
assert(el);
assert(!el->next);
el = el->GetChildByName("ol");
assert(!el->next);
assert(str::Eq(el->down->name, "li"));
assert(!el->down->down);
}