本文整理汇总了C++中HtmlToken类的典型用法代码示例。如果您正苦于以下问题:C++ HtmlToken类的具体用法?C++ HtmlToken怎么用?C++ HtmlToken使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了HtmlToken类的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: ResolveHtmlEntities
static char *GetTextContent(HtmlPullParser& parser)
{
HtmlToken *tok = parser.Next();
if (!tok || !tok->IsText())
return NULL;
return ResolveHtmlEntities(tok->s, tok->sLen);
}
示例2: parser
DocTocItem *MobiEngineImpl::GetTocTree()
{
if (!tocReparsePoint)
return NULL;
EbookTocItem *root = NULL;
ScopedMem<WCHAR> itemText;
ScopedMem<WCHAR> itemLink;
int itemLevel = 0;
int idCounter = 0;
// there doesn't seem to be a standard for Mobi ToCs, so we try to
// determine the author's intentions by looking at commonly used tags
HtmlPullParser parser(tocReparsePoint, str::Len(tocReparsePoint));
HtmlToken *tok;
while ((tok = parser.Next()) && !tok->IsError()) {
if (itemLink && tok->IsText()) {
ScopedMem<WCHAR> linkText(str::conv::FromHtmlUtf8(tok->s, tok->sLen));
if (itemText)
itemText.Set(str::Join(itemText, L" ", linkText));
else
itemText.Set(linkText.StealData());
}
else if (!tok->IsTag())
continue;
else if (Tag_Mbp_Pagebreak == tok->tag)
break;
else if (!itemLink && tok->IsStartTag() && Tag_A == tok->tag) {
AttrInfo *attr = tok->GetAttrByName("filepos");
if (!attr)
attr = tok->GetAttrByName("href");
if (attr)
itemLink.Set(str::conv::FromHtmlUtf8(attr->val, attr->valLen));
}
else if (itemLink && tok->IsEndTag() && Tag_A == tok->tag) {
PageDestination *dest = NULL;
if (!itemText) {
itemLink.Set(NULL);
continue;
}
if (IsExternalUrl(itemLink))
dest = new SimpleDest2(0, RectD(), itemLink.StealData());
else
dest = GetNamedDest(itemLink);
EbookTocItem *item = new EbookTocItem(itemText.StealData(), dest);
item->id = ++idCounter;
item->open = itemLevel <= 2;
AppendTocItem(root, item, itemLevel);
itemLink.Set(NULL);
}
else if (Tag_Blockquote == tok->tag || Tag_Ul == tok->tag || Tag_Ol == tok->tag) {
if (tok->IsStartTag())
itemLevel++;
else if (tok->IsEndTag() && itemLevel > 0)
itemLevel--;
}
}
return root;
}
示例3: Test02
static void Test02() {
const char* s = "<p>Last paragraph";
HtmlPullParser parser(s, str::Len(s));
HtmlToken* t = parser.Next();
utassert(t && t->IsTag() && t->IsStartTag() && Tag_P == t->tag);
t = parser.Next();
utassert(t && t->IsText() && str::EqNIx(t->s, t->sLen, "Last paragraph"));
}
示例4: Test03
static void Test03() {
const char* s = "a < b > c <> d <";
HtmlPullParser parser(s, str::Len(s));
HtmlToken* t = parser.Next();
utassert(t && t->IsText() && str::EqNIx(t->s, t->sLen, "a "));
t = parser.Next();
utassert(t && t->IsText() && str::EqNIx(t->s, t->sLen, "< b > c "));
t = parser.Next();
utassert(t && t->IsText() && str::EqNIx(t->s, t->sLen, "<> d "));
t = parser.Next();
utassert(t && t->IsError() && HtmlToken::UnclosedTag == t->error);
t = parser.Next();
utassert(!t);
}
示例5: name
// the name doesn't quite fit: this handles FB2 tags
void Fb2Formatter::HandleHtmlTag(HtmlToken *t)
{
if (Tag_Title == t->tag || Tag_Subtitle == t->tag) {
bool isSubtitle = Tag_Subtitle == t->tag;
ScopedMem<char> name(str::Format("h%d", section + (isSubtitle ? 1 : 0)));
HtmlToken tok;
tok.SetTag(t->type, name, name + str::Len(name));
HandleTagHx(&tok);
HandleAnchorAttr(t);
if (!isSubtitle && t->IsStartTag()) {
char *link = (char *)Allocator::Alloc(textAllocator, 24);
sprintf_s(link, 24, FB2_TOC_ENTRY_MARK "%d", ++titleCount);
currPage->instructions.Append(DrawInstr::Anchor(link, str::Len(link), RectF(0, currY, pageDx, 0)));
}
}
else if (Tag_Section == t->tag) {
if (t->IsStartTag())
section++;
else if (t->IsEndTag() && section > 1)
section--;
FlushCurrLine(true);
HandleAnchorAttr(t);
}
else if (Tag_P == t->tag) {
if (!tagNesting.Contains(Tag_Title))
HtmlFormatter::HandleHtmlTag(t);
}
else if (Tag_Image == t->tag) {
HandleTagImg(t);
HandleAnchorAttr(t);
}
else if (Tag_A == t->tag) {
HandleTagA(t, "href", "http://www.w3.org/1999/xlink");
HandleAnchorAttr(t, true);
}
else if (Tag_Pagebreak == t->tag)
ForceNewPage();
else if (Tag_Strong == t->tag)
HandleTagAsHtml(t, "b");
else if (t->NameIs("emphasis"))
HandleTagAsHtml(t, "i");
else if (t->NameIs("epigraph"))
HandleTagAsHtml(t, "blockquote");
else if (t->NameIs("empty-line")) {
if (!t->IsEndTag())
EmitParagraph(0);
}
else if (t->NameIs("stylesheet"))
HandleTagAsHtml(t, "style");
}
示例6: text
static WCHAR *ExtractHtmlText(EpubDoc *doc)
{
size_t len;
const char *data = doc->GetTextData(&len);
str::Str<char> text(len / 2);
HtmlPullParser p(data, len);
HtmlToken *t;
Vec<HtmlTag> tagNesting;
while ((t = p.Next()) != NULL && !t->IsError()) {
if (t->IsText() && !tagNesting.Contains(Tag_Head) && !tagNesting.Contains(Tag_Script) && !tagNesting.Contains(Tag_Style)) {
// trim whitespace (TODO: also normalize within text?)
while (t->sLen > 0 && str::IsWs(t->s[0])) {
t->s++;
t->sLen--;
}
while (t->sLen > 0 && str::IsWs(t->s[t->sLen-1]))
t->sLen--;
if (t->sLen > 0) {
text.AppendAndFree(ResolveHtmlEntities(t->s, t->sLen));
text.Append(' ');
}
}
else if (t->IsStartTag()) {
// TODO: force-close tags similar to HtmlFormatter.cpp's AutoCloseOnOpen?
if (!IsTagSelfClosing(t->tag))
tagNesting.Append(t->tag);
}
else if (t->IsEndTag()) {
if (!IsInlineTag(t->tag) && text.Size() > 0 && text.Last() == ' ') {
text.Pop();
text.Append("\r\n");
}
// when closing a tag, if the top tag doesn't match but
// there are only potentially self-closing tags on the
// stack between the matching tag, we pop all of them
if (tagNesting.Contains(t->tag)) {
while (tagNesting.Last() != t->tag)
tagNesting.Pop();
}
if (tagNesting.Count() > 0 && tagNesting.Last() == t->tag)
tagNesting.Pop();
}
}
return str::conv::FromUtf8(text.Get());
}
示例7: Test00
static void Test00(const char *s, HtmlToken::TokenType expectedType) {
HtmlPullParser parser(s, str::Len(s));
HtmlToken *t = parser.Next();
assert(t->type == expectedType);
assert(t->NameIs("p"));
assert(Tag_P == t->tag);
AttrInfo *a = t->GetAttrByName("a1");
assert(a->NameIs("a1"));
assert(a->ValIs(">"));
a = t->GetAttrByName("foo");
assert(a->NameIs("foo"));
assert(a->ValIs("bar"));
a = t->GetAttrByName("nope");
assert(!a);
t = parser.Next();
assert(!t);
}
示例8: Reset
// Parse s in place i.e. we assume we can modify it. Must be 0-terminated.
// The caller owns the memory for s.
HtmlElement *HtmlParser::ParseInPlace(char *s, UINT codepage)
{
if (this->html)
Reset();
this->html = s;
this->codepage = codepage;
HtmlPullParser parser(s, strlen(s));
HtmlToken *tok;
while ((tok = parser.Next())) {
char *tag = (char *)tok->s;
if (tok->IsError()) {
errorContext = tag;
switch (tok->error) {
case HtmlToken::UnclosedTag: return ParseError(ErrParsingElementName);
case HtmlToken::InvalidTag: return ParseError(ErrParsingClosingElement);
default: return ParseError(ErrParsingElement);
}
}
if (!tok->IsTag()) {
// ignore text content
assert(tok->IsText());
continue;
}
char *tagEnd = tag + tok->nLen;
if (!tok->IsEndTag()) {
// note: call tok->NextAttr() before zero-terminating names and values
AttrInfo *attr = tok->NextAttr();
*tagEnd = '\0';
StartTag(tag);
while (attr) {
char *name = (char *)attr->name;
char *nameEnd = name + attr->nameLen;
char *value = (char *)attr->val;
char *valueEnd = value + attr->valLen;
attr = tok->NextAttr();
*nameEnd = *valueEnd = '\0';
AppendAttr(name, value);
}
}
if (!tok->IsStartTag() || IsTagSelfClosing(tok->tag)) {
*tagEnd = '\0';
CloseTag(tag);
}
}
return rootElement;
}
示例9: while
// Return the next parsed page. Returns NULL if finished parsing.
// For simplicity of implementation, we parse xml text node or
// xml element at a time. This might cause a creation of one
// or more pages, which we remeber and send to the caller
// if we detect accumulated pages.
HtmlPage *HtmlFormatter::Next(bool skipEmptyPages)
{
for (;;)
{
// send out all pages accumulated so far
while (pagesToSend.Count() > 0) {
HtmlPage *ret = pagesToSend.At(0);
pagesToSend.RemoveAt(0);
pageCount++;
if (skipEmptyPages && IsEmptyPage(ret))
delete ret;
else
return ret;
}
// we can call ourselves recursively to send outstanding
// pages after parsing has finished so this is to detect
// that case and really end parsing
if (finishedParsing)
return NULL;
HtmlToken *t = htmlParser->Next();
if (!t || t->IsError())
break;
currReparseIdx = t->GetReparsePoint() - htmlParser->Start();
CrashIf(!ValidReparseIdx(currReparseIdx, htmlParser));
if (t->IsTag())
HandleHtmlTag(t);
else if (!IgnoreText())
HandleText(t);
}
// force layout of the last line
AutoCloseTags(tagNesting.Count());
FlushCurrLine(true);
UpdateLinkBboxes(currPage);
pagesToSend.Append(currPage);
currPage = NULL;
// call ourselves recursively to return accumulated pages
finishedParsing = true;
return Next();
}
示例10: parser
// extract ComicInfo.xml metadata
// cf. http://comicrack.cyolito.com/downloads/comicrack/ComicRack/Support-Files/ComicInfoSchema.zip/
void CbxEngineImpl::ParseComicInfoXml(const char *xmlData)
{
PoolAllocator allocator;
HtmlPullParser parser(xmlData, str::Len(xmlData));
HtmlToken *tok;
while ((tok = parser.Next()) && !tok->IsError()) {
if (!tok->IsStartTag())
continue;
if (tok->NameIs("Title")) {
ScopedMem<char> value(GetTextContent(parser));
if (value)
Visit("/ComicBookInfo/1.0/title", value, json::Type_String);
}
else if (tok->NameIs("Year")) {
ScopedMem<char> value(GetTextContent(parser));
if (value)
Visit("/ComicBookInfo/1.0/publicationYear", value, json::Type_Number);
}
else if (tok->NameIs("Month")) {
ScopedMem<char> value(GetTextContent(parser));
if (value)
Visit("/ComicBookInfo/1.0/publicationMonth", value, json::Type_Number);
}
else if (tok->NameIs("Summary")) {
ScopedMem<char> value(GetTextContent(parser));
if (value)
Visit("/X-summary", value, json::Type_String);
}
else if (tok->NameIs("Writer")) {
ScopedMem<char> value(GetTextContent(parser));
if (value) {
Visit("/ComicBookInfo/1.0/credits[0]/person", value, json::Type_String);
Visit("/ComicBookInfo/1.0/credits[0]/primary", "true", json::Type_Bool);
}
}
else if (tok->NameIs("Penciller")) {
ScopedMem<char> value(GetTextContent(parser));
if (value) {
Visit("/ComicBookInfo/1.0/credits[1]/person", value, json::Type_String);
Visit("/ComicBookInfo/1.0/credits[1]/primary", "true", json::Type_Bool);
}
}
}
}
示例11: ExtractHttpCharset
// cf. http://www.w3.org/TR/html4/charset.html#h-5.2.2
static UINT ExtractHttpCharset(const char *html, size_t htmlLen)
{
if (!strstr(html, "charset="))
return 0;
HtmlPullParser parser(html, min(htmlLen, 1024));
HtmlToken *tok;
while ((tok = parser.Next()) && !tok->IsError()) {
if (tok->tag != Tag_Meta)
continue;
AttrInfo *attr = tok->GetAttrByName("http-equiv");
if (!attr || !attr->ValIs("Content-Type"))
continue;
attr = tok->GetAttrByName("content");
ScopedMem<char> mimetype, charset;
if (!attr || !str::Parse(attr->val, attr->valLen, "%S;%_charset=%S", &mimetype, &charset))
continue;
static struct {
const char *name;
UINT codepage;
} codepages[] = {
{ "ISO-8859-1", 1252 }, { "Latin1", 1252 }, { "CP1252", 1252 }, { "Windows-1252", 1252 },
{ "ISO-8859-2", 28592 }, { "Latin2", 28592 },
{ "CP1251", 1251 }, { "Windows-1251", 1251 }, { "KOI8-R", 20866 },
{ "shift-jis", 932 }, { "x-euc", 932 }, { "euc-kr", 949 },
{ "Big5", 950 }, { "GB2312", 936 },
{ "UTF-8", CP_UTF8 },
};
for (int i = 0; i < dimof(codepages); i++) {
if (str::EqI(charset, codepages[i].name))
return codepages[i].codepage;
}
break;
}
return 0;
}
示例12: CrashIf
void EbookController::ExtractPageAnchors()
{
if (pageAnchorIds || pageAnchorIdxs) {
CrashIf(!pageAnchorIds || !pageAnchorIdxs);
return;
}
pageAnchorIds = new WStrVec();
pageAnchorIdxs = new Vec<int>();
ScopedMem<WCHAR> epubPagePath;
int fb2TitleCount = 0;
size_t len;
const char *data = doc.GetHtmlData(len);
HtmlPullParser parser(data, len);
HtmlToken *tok;
while ((tok = parser.Next()) != nullptr && !tok->IsError()) {
if (!tok->IsStartTag() && !tok->IsEmptyElementEndTag())
continue;
AttrInfo *attr = tok->GetAttrByName("id");
if (!attr && Tag_A == tok->tag && doc.Type() != Doc_Fb2)
attr = tok->GetAttrByName("name");
if (attr) {
ScopedMem<WCHAR> id(str::conv::FromUtf8(attr->val, attr->valLen));
pageAnchorIds->Append(str::Format(L"%s#%s", epubPagePath ? epubPagePath : L"", id.Get()));
pageAnchorIdxs->Append((int)(tok->GetReparsePoint() - parser.Start()));
}
// update EPUB page paths and create an anchor per chapter
if (Tag_Pagebreak == tok->tag &&
(attr = tok->GetAttrByName("page_path")) != nullptr &&
str::StartsWith(attr->val + attr->valLen, "\" page_marker />")) {
CrashIf(doc.Type() != Doc_Epub);
epubPagePath.Set(str::conv::FromUtf8(attr->val, attr->valLen));
pageAnchorIds->Append(str::Dup(epubPagePath));
pageAnchorIdxs->Append((int)(tok->GetReparsePoint() - parser.Start()));
}
// create FB2 title anchors (cf. Fb2Doc::ParseToc)
if (Tag_Title == tok->tag && tok->IsStartTag() && Doc_Fb2 == doc.Type()) {
ScopedMem<WCHAR> id(str::Format(TEXT(FB2_TOC_ENTRY_MARK) L"%d", ++fb2TitleCount));
pageAnchorIds->Append(id.StealData());
pageAnchorIdxs->Append((int)(tok->GetReparsePoint() - parser.Start()));
}
}
}
示例13:
void Fb2Formatter::HandleTagAsHtml(HtmlToken *t, const char *name)
{
HtmlToken tok;
tok.SetTag(t->type, name, name + str::Len(name));
HtmlFormatter::HandleHtmlTag(&tok);
}