本文整理汇总了Golang中code/google/com/p/go/net/html.Tokenizer类的典型用法代码示例。如果您正苦于以下问题:Golang Tokenizer类的具体用法?Golang Tokenizer怎么用?Golang Tokenizer使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Tokenizer类的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Golang代码示例。
示例1: setEndTagRaw
// setEndTagRaw sets an endTagRaw to the parent.
func setEndTagRaw(tokenizer *html.Tokenizer, parent *tagElement, tagName string) string {
if parent != nil && parent.tagName == tagName {
parent.endTagRaw = string(tokenizer.Raw())
return ""
}
return tagName
}
示例2: readTag
/*
* Read current tag with attributes.
* @param tk *html.Tokenizer - tokenizer instance
* @return *Tag - reference to read tag
*/
func readTag(tk *html.Tokenizer) *Tag {
// we are only interested in certain tags
tag, _ := tk.TagName()
name := string(tag)
switch name {
//-----------------------------------------------------
// external script files
//-----------------------------------------------------
case "script":
attrs := getAttrs(tk)
if attrs != nil {
if _, ok := attrs["src"]; ok {
// add external reference to script file
return NewTag("script", attrs)
}
}
//-----------------------------------------------------
// external image
//-----------------------------------------------------
case "img":
attrs := getAttrs(tk)
if attrs != nil {
return NewTag("img", attrs)
}
//-----------------------------------------------------
// external links (style sheets)
//-----------------------------------------------------
case "link":
attrs := getAttrs(tk)
if attrs != nil {
if _, ok := attrs["href"]; ok {
// add external reference to link
return NewTag("link", attrs)
}
}
//-----------------------------------------------------
// input fields
//-----------------------------------------------------
case "input":
attrs := getAttrs(tk)
if attrs != nil {
if _, ok := attrs["type"]; ok {
// add external reference to link
return NewTag("input", attrs)
}
}
}
//-----------------------------------------------------
// ignore all other tags (no tag processed).
//-----------------------------------------------------
return nil
}
示例3: hasTablPluieClass
func hasTablPluieClass(z *html.Tokenizer) bool {
key, val, more := z.TagAttr()
if string(key) == "class" && string(val) == "tablPluie" {
return true
}
if more {
return hasTablPluieClass(z)
}
return false
}
示例4: traverse_html_token
func traverse_html_token(z *html.Tokenizer) {
for {
if z.Next() == html.ErrorToken {
return
}
token := z.Token()
token_type := token.Type
fmt.Printf("|token_type:%-20s|token.Data:%-10s|token.Attr:%-10s|\n", token_type, token.Data, token.Attr)
}
}
示例5: getAttrVal
func getAttrVal(tokenizer *html.Tokenizer, attrName string) string {
for {
key, val, moreAttr := tokenizer.TagAttr()
if string(key) == attrName {
return string(val)
}
if !moreAttr {
return ""
}
}
}
示例6: parseAnchorAttrs
// parseAnchorAttrs iterates over all of the attributes in the current anchor token.
// If a href is found, it adds the link value to the links slice.
// Returns the new link slice.
func parseAnchorAttrs(tokenizer *html.Tokenizer, links []*URL) []*URL {
//TODO: rework this to be cleaner, passing in `links` to be appended to
//isn't great
for {
key, val, moreAttr := tokenizer.TagAttr()
if bytes.Compare(key, []byte("href")) == 0 {
u, err := ParseURL(strings.TrimSpace(string(val)))
if err == nil {
links = append(links, u)
}
}
if !moreAttr {
return links
}
}
}
示例7: burnTokensUntilEndTag
func burnTokensUntilEndTag(firewood *html.Tokenizer, tagName string) {
rawTagName := []byte(tagName)
for {
token := firewood.Next()
switch token {
case html.ErrorToken:
return
case html.EndTagToken:
name, _ := firewood.TagName()
// log.Println("Struck token " + string(name))
if bytes.Equal(name, rawTagName) {
// log.Println("Extinguishing token fire.")
return
}
}
}
}
示例8: getAttrs
/*
* Get list of attributes for a tag.
* If the tag is at the end of a HTML fragment and not all attributes
* can be read by the tokenizer, this call terminates with a "nil"
* map to indicate failure. The tag is than dropped (for an eavesdropper
* this looks like a cached resource)
* @param tk *html.Tokenizer - tokenizer instance
* @return map[string]string - list of attributes
*/
func getAttrs(tk *html.Tokenizer) (list map[string]string) {
// handle panic during parsing
defer func() {
if r := recover(); r != nil {
logger.Printf(logger.WARN, "[sid.html] Skipping fragmented tag: %v\n", r)
list = nil
}
}()
// parse attributes from HTML text
list = make(map[string]string)
for {
key, val, more := tk.TagAttr()
list[string(key)] = string(val)
if !more {
break
}
}
return
}
示例9: traverse_html_tokenizer
func traverse_html_tokenizer(z *html.Tokenizer) {
for {
if z.Next() == html.ErrorToken {
return
}
text_b := z.Text()
tag_name_b, hasAttri := z.TagName()
tag_attr_key_b, tag_attr_value_b, _ := z.TagAttr()
text := string(text_b)
tag_name := string(tag_name_b)
tag_attr_key := string(tag_attr_key_b)
tag_attr_value := string(tag_attr_value_b)
fmt.Printf("|Tokenizer.Text:%-10s|Tokenizer.TagName:%-10s|hasAttri:%-10t|tag_attr_key:%-10s|tag_attr_value:%-10s|\n", text, tag_name, hasAttri, tag_attr_key, tag_attr_value)
}
}
示例10: parseToken
func parseToken(tokenizer *html.Tokenizer, htmlDoc *htmlDocument, parent *tagElement) (bool, bool, string) {
tokenType := tokenizer.Next()
switch tokenType {
case html.ErrorToken:
return true, false, ""
case html.TextToken:
text := string(tokenizer.Text())
if strings.TrimSpace(text) == "" {
break
}
textElement := &textElement{text: text}
appendElement(htmlDoc, parent, textElement)
case html.StartTagToken:
tagElement := &tagElement{tagName: getTagName(tokenizer), startTagRaw: string(tokenizer.Raw())}
appendElement(htmlDoc, parent, tagElement)
for {
errorToken, parentEnded, unsetEndTag := parseToken(tokenizer, htmlDoc, tagElement)
if errorToken {
return true, false, ""
}
if parentEnded {
if unsetEndTag != "" {
return false, false, unsetEndTag
}
break
}
if unsetEndTag != "" {
return false, false, setEndTagRaw(tokenizer, tagElement, unsetEndTag)
}
}
case html.EndTagToken:
return false, true, setEndTagRaw(tokenizer, parent, getTagName(tokenizer))
case html.DoctypeToken, html.SelfClosingTagToken, html.CommentToken:
tagElement := &tagElement{tagName: getTagName(tokenizer), startTagRaw: string(tokenizer.Raw())}
appendElement(htmlDoc, parent, tagElement)
}
return false, false, ""
}
示例11: next
// skip forward to the next text, and return it as a string
func next(z *html.Tokenizer) string {
for tt := z.Next(); true; tt = z.Next() {
if tt == html.TextToken {
res := string(z.Text())
if debug {
fmt.Printf("next: %q\n", res)
}
return res
}
if tt == html.ErrorToken {
return ""
}
if debug {
fmt.Println("skipping: ", tt)
}
}
return ""
}
示例12: textUpToEndTag
func textUpToEndTag(tokenizer *html.Tokenizer, tagName string) []byte {
var textBuffer bytes.Buffer
rawTagName := []byte(tagName)
for done := false; !done; {
token := tokenizer.Next()
switch token {
case html.TextToken:
textBuffer.Write(tokenizer.Text())
case html.EndTagToken:
name, _ := tokenizer.TagName()
if bytes.Equal(rawTagName, name) {
done = true
}
case html.ErrorToken:
done = true
}
}
return textBuffer.Bytes()
}
示例13: getTagName
// getTagName gets a tagName from tokenizer.
func getTagName(tokenizer *html.Tokenizer) string {
tagName, _ := tokenizer.TagName()
return string(tagName)
}