当前位置: 首页>>代码示例>>C++>>正文


C++ HtmlParser类代码示例

本文整理汇总了C++中HtmlParser的典型用法代码示例。如果您正苦于以下问题:C++ HtmlParser类的具体用法?C++ HtmlParser怎么用?C++ HtmlParser使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了HtmlParser类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: HtmlParser02

static void HtmlParser02()
{
    HtmlParser p;
    HtmlElement *root = p.Parse("<a><b/><c></c  ><d at1=\"&lt;quo&amp;ted&gt;\" at2='also quoted'   att3=notquoted att4=&#101;&#x6e;d/></a>");
    assert(4 == p.ElementsCount());
    assert(4 == p.TotalAttrCount());
    assert(str::Eq("a", root->name));
    assert(NULL == root->next);
    HtmlElement *el = root->down;
    assert(str::Eq("b", el->name));
    assert(root == el->up);
    el = el->next;
    assert(str::Eq("c", el->name));
    assert(root == el->up);
    el = el->next;
    assert(str::Eq("d", el->name));
    assert(NULL == el->next);
    assert(root == el->up);
    ScopedMem<TCHAR> val(el->GetAttribute("at1"));
    assert(str::Eq(val, _T("<quo&ted>")));
    val.Set(el->GetAttribute("at2"));
    assert(str::Eq(val, _T("also quoted")));
    val.Set(el->GetAttribute("att3"));
    assert(str::Eq(val, _T("notquoted")));
    val.Set(el->GetAttribute("att4"));
    assert(str::Eq(val, _T("end")));
}
开发者ID:monolithpl,项目名称:sumatrapdf,代码行数:27,代码来源:TrivialHtmlParser_ut.cpp

示例2: htmlData

bool ChmDoc::ParseTocOrIndex(EbookTocVisitor *visitor, const char *path, bool isIndex)
{
    if (!path)
        return false;
    // TODO: is path already UTF-8 encoded - or do we need str::conv::ToUtf8(ToStr(path)) ?
    ScopedMem<unsigned char> htmlData(GetData(path, NULL));
    const char *html = (char *)htmlData.Get();
    if (!html)
        return false;

    HtmlParser p;
    UINT cp = codepage;
    // detect UTF-8 content by BOM
    if (str::StartsWith(html, UTF8_BOM)) {
        html += 3;
        cp = CP_UTF8;
    }
    // enforce the default codepage, so that pre-encoded text and
    // entities are in the same codepage and VisitChmTocItem yields
    // consistent results
    HtmlElement *el = p.Parse(html, CP_CHM_DEFAULT);
    if (!el)
        return false;
    el = p.FindElementByName("body");
    // since <body> is optional, also continue without one
    el = p.FindElementByName("ul", el);
    if (!el)
        return WalkBrokenChmTocOrIndex(visitor, p, cp, isIndex);
    WalkChmTocOrIndex(visitor, el, cp, isIndex);
    return true;
}
开发者ID:azaleafisitania,项目名称:sumatrapdf,代码行数:31,代码来源:ChmDoc.cpp

示例3: HtmlParser07

static void HtmlParser07()
{
    HtmlParser p;
    HtmlElement *root = p.Parse("<test umls=&auml;\xC3\xB6&#xFC; Zero=&#1;&#0;&#-1;>", CP_UTF8);
    utassert(1 == p.ElementsCount());
    ScopedMem<WCHAR> val(root->GetAttribute("umls"));
    utassert(str::Eq(val, L"\xE4\xF6\xFC"));
    val.Set(root->GetAttribute("zerO"));
    utassert(str::Eq(val, L"\x01??"));
}
开发者ID:eminemence,项目名称:advancedoptionsui-sumatrapdf,代码行数:10,代码来源:TrivialHtmlParser_ut.cpp

示例4: HtmlParser11

static void HtmlParser11()
{
    HtmlParser p;
    HtmlElement *root = p.Parse("<root/><!-- comment -->");
    utassert(1 == p.ElementsCount());
    utassert(0 == p.TotalAttrCount());
    utassert(root && root->NameIs("root"));

    root = p.Parse("<root><!---></root>");
    utassert(!root);
}
开发者ID:eminemence,项目名称:advancedoptionsui-sumatrapdf,代码行数:11,代码来源:TrivialHtmlParser_ut.cpp

示例5: HtmlParser00

static void HtmlParser00()
{
    HtmlParser p;
    HtmlElement *root = p.Parse("<a></A>");
    assert(p.ElementsCount() == 1);
    assert(root);
    assert(str::Eq("a", root->name));

    root = p.Parse("<b></B>");
    assert(p.ElementsCount() == 1);
    assert(root);
    assert(str::Eq("b", root->name));
}
开发者ID:monolithpl,项目名称:sumatrapdf,代码行数:13,代码来源:TrivialHtmlParser_ut.cpp

示例6: HtmlParser07

static void HtmlParser07()
{
    HtmlParser p;
    HtmlElement *root = p.Parse("<test umls=&auml;\xC3\xB6&#xFC; zero=&#1;&#0;&#-1;>", CP_UTF8);
    assert(1 == p.ElementsCount());
    ScopedMem<TCHAR> val(root->GetAttribute("umls"));
#ifdef UNICODE
    assert(str::Eq(val, L"\xE4\xF6\xFC"));
#else
    assert(str::EndsWith(val, "\xFC"));
#endif
    val.Set(root->GetAttribute("zero"));
    assert(str::Eq(val, _T("\x01??")));
}
开发者ID:monolithpl,项目名称:sumatrapdf,代码行数:14,代码来源:TrivialHtmlParser_ut.cpp

示例7: HtmlParser03

static void HtmlParser03()
{
    HtmlParser p;
    HtmlElement *root = p.Parse("<el   att  =v&quot;al/>");
    assert(1 == p.ElementsCount());
    assert(1 == p.TotalAttrCount());
    assert(str::Eq("el", root->name));
    assert(NULL == root->next);
    assert(NULL == root->up);
    assert(NULL == root->down);
    ScopedMem<TCHAR> val(root->GetAttribute("att"));
    assert(str::Eq(val, _T("v\"al")));
    assert(!root->firstAttr->next);
}
开发者ID:monolithpl,项目名称:sumatrapdf,代码行数:14,代码来源:TrivialHtmlParser_ut.cpp

示例8: HtmlParser04

static void HtmlParser04()
{
    HtmlParser p;
    HtmlElement *root = p.Parse("<el att=  va&apos;l></ el >");
    utassert(1 == p.ElementsCount());
    utassert(1 == p.TotalAttrCount());
    utassert(root->NameIs("el"));
    utassert(NULL == root->next);
    utassert(NULL == root->up);
    utassert(NULL == root->down);
    ScopedMem<WCHAR> val(root->GetAttribute("att"));
    utassert(str::Eq(val, L"va'l"));
    utassert(!root->firstAttr->next);
}
开发者ID:eminemence,项目名称:advancedoptionsui-sumatrapdf,代码行数:14,代码来源:TrivialHtmlParser_ut.cpp

示例9: req

int Tab::realDoUrl(const std::string &url, std::string &location)
{
    int ret = -1;
    bool ssl = UTIL::startsWith(url, "https");
    int port = (ssl)? 443 : 80;
    unsigned read_timeout = 5;  // sec
    std::string host;

    try
    {
        Request req(url);
        host = req.host();

        m_Client.reset(new HttpClient(read_timeout));
        m_Client->setOptionSSL(ssl);
        m_Client->connect(host, UTIL::i2s(port));
        m_Client->handshake();

        std::string request = req.toGetRequsetString();

        std::cerr << "\nREQUEST:\n" << request << "\n";

        m_Client->write(request);
        int bytes = m_Client->read();
        std::cerr << "read bytes: " << bytes << "\n";

        location = m_Client->getLocation();
        ret = m_Client->responseCode();

        std::cerr << "\nRESPONSE:\n";
        std::cerr << m_Client->getHttpHeaders() << "\n";
        //std::cerr << "***\n";
        //std::cerr << m_Client->response() << "\n";

        HtmlParser parser;
        parser.parse(m_Client->response());
        std::cerr << "\nPARSER HTML: " << parser.getHtml().size() << " bytes\n";
        std::cerr << parser.getHtml() << "\n";

        std::cerr << "\nPARSER PLAIN: " << parser.getPlain().size() << " bytes\n";
        std::cerr << parser.getPlain() << "\n";

        m_Headers = m_Client->getHttpHeaders();
        m_Body = parser.getPlain();
        //m_Body = parser.getHtml();
        m_Links = parser.getLinks();
    }
    catch(std::exception &e)
    {
        m_Body = e.what();
        m_Body += " [host: " + host + ", port: " + UTIL::i2s(port) + "]";
    }

    // generate paint event
    //this->update();

    emit needReloadPage();
    return ret;
}
开发者ID:o2gy84,项目名称:o2browser,代码行数:59,代码来源:tab.cpp

示例10: HtmlParser01

static void HtmlParser01()
{
    HtmlParser p;
    HtmlElement *root = p.Parse("<A><bAh></a>");
    assert(p.ElementsCount() == 2);
    assert(str::Eq("a", root->name));
    assert(NULL == root->up);
    assert(NULL == root->next);
    HtmlElement *el = root->down;
    assert(NULL == el->firstAttr);
    assert(str::Eq("bah", el->name));
    assert(el->up == root);
    assert(NULL == el->down);
    assert(NULL == el->next);
}
开发者ID:monolithpl,项目名称:sumatrapdf,代码行数:15,代码来源:TrivialHtmlParser_ut.cpp

示例11: HtmlParser00

static void HtmlParser00()
{
    HtmlParser p;
    HtmlElement *root = p.Parse("<a></A>");
    utassert(p.ElementsCount() == 1);
    utassert(root);
    utassert(Tag_A == root->tag && !root->name);
    utassert(root->NameIs("a"));

    root = p.Parse("<b></B>");
    utassert(p.ElementsCount() == 1);
    utassert(root);
    utassert(Tag_B == root->tag && !root->name);
    utassert(root->NameIs("b"));
}
开发者ID:eminemence,项目名称:advancedoptionsui-sumatrapdf,代码行数:15,代码来源:TrivialHtmlParser_ut.cpp

示例12: HtmlParser01

static void HtmlParser01()
{
    HtmlParser p;
    HtmlElement *root = p.Parse("<A><bAh></a>");
    utassert(p.ElementsCount() == 2);
    utassert(Tag_A == root->tag && !root->name);
    utassert(NULL == root->up);
    utassert(NULL == root->next);
    HtmlElement *el = root->down;
    utassert(NULL == el->firstAttr);
    utassert(el->NameIs("bah") && el->NameIs("BAH"));
    utassert(Tag_NotFound == el->tag && str::Eq("bAh", el->name));
    utassert(el->up == root);
    utassert(NULL == el->down);
    utassert(NULL == el->next);
}
开发者ID:eminemence,项目名称:advancedoptionsui-sumatrapdf,代码行数:16,代码来源:TrivialHtmlParser_ut.cpp

示例13: WalkBrokenChmTocOrIndex

// ignores any <ul><li> list structure and just extracts a linear list of <object type="text/sitemap">...</object>
static bool WalkBrokenChmTocOrIndex(EbookTocVisitor* visitor, HtmlParser& p, UINT cp, bool isIndex) {
    bool hadOne = false;

    HtmlElement* el = p.FindElementByName("body");
    while ((el = p.FindElementByName("object", el)) != nullptr) {
        AutoFreeW type(el->GetAttribute("type"));
        if (!str::EqI(type, L"text/sitemap"))
            continue;
        if (isIndex)
            hadOne |= VisitChmIndexItem(visitor, el, cp, 1);
        else
            hadOne |= VisitChmTocItem(visitor, el, cp, 1);
    }

    return hadOne;
}
开发者ID:jingyu9575,项目名称:sumatrapdf,代码行数:17,代码来源:ChmDoc.cpp

示例14: HtmlParser05

static void HtmlParser05()
{
    HtmlParser p;
    HtmlElement *root = p.Parse("<!doctype><html><HEAD><meta name=foo></head><body><object t=la><param name=foo val=bar></object><ul><li></ul></object></body></Html>");
    assert(8 == p.ElementsCount());
    assert(4 == p.TotalAttrCount());
    assert(str::Eq("html", root->name));
    assert(NULL == root->up);
    assert(NULL == root->next);
    HtmlElement *el = root->down;
    assert(str::Eq("head", el->name));
    HtmlElement *el2 = el->down;
    assert(str::Eq("meta", el2->name));
    assert(NULL == el2->next);
    assert(NULL == el2->down);
    el2 = el->next;
    assert(str::Eq("body", el2->name));
    assert(NULL == el2->next);
    el2 = el2->down;
    assert(str::Eq("object", el2->name));
    el = p.FindElementByName("html");
    assert(el);
    el = p.FindElementByName("head", el);
    assert(el);
    assert(str::Eq("head", el->name));
    el = p.FindElementByName("ul", el);
    assert(el);
}
开发者ID:monolithpl,项目名称:sumatrapdf,代码行数:28,代码来源:TrivialHtmlParser_ut.cpp

示例15: HtmlParser06

static void HtmlParser06()
{
    HtmlParser p;
    HtmlElement *root = p.Parse("<ul><p>ignore<li><br><meta><li><ol><li></ul><dropme>");
    assert(9 == p.ElementsCount());
    assert(0 == p.TotalAttrCount());
    assert(str::Eq("ul", root->name));
    assert(!root->next);
    HtmlElement *el = root->GetChildByName("li");
    assert(el);
    assert(str::Eq(el->down->name, "br"));
    assert(str::Eq(el->down->next->name, "meta"));
    assert(!el->down->next->next);
    el = root->GetChildByName("li", 1);
    assert(el);
    assert(!el->next);
    el = el->GetChildByName("ol");
    assert(!el->next);
    assert(str::Eq(el->down->name, "li"));
    assert(!el->down->down);
}
开发者ID:monolithpl,项目名称:sumatrapdf,代码行数:21,代码来源:TrivialHtmlParser_ut.cpp


注:本文中的HtmlParser类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。