当前位置: 首页>>代码示例>>Golang>>正文


Golang html.Tokenizer类代码示例

本文整理汇总了Golang中golang.org/x/net/html.Tokenizer的典型用法代码示例。如果您正苦于以下问题:Golang Tokenizer类的具体用法?Golang Tokenizer怎么用?Golang Tokenizer使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了Tokenizer类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Golang代码示例。

示例1: setEndTagRaw

// setEndTagRaw sets an endTagRaw to the parent.
func setEndTagRaw(tokenizer *html.Tokenizer, parent *tagElement, tagName string) string {
	if parent != nil && parent.tagName == tagName {
		parent.endTagRaw = string(tokenizer.Raw())
		return ""
	}
	return tagName
}
开发者ID:blevesearch,项目名称:hugoidx,代码行数:8,代码来源:parser.go

示例2: flushTagToken

func flushTagToken(htmlBuf *[]byte, tz *html.Tokenizer, url string) string {
	*htmlBuf = append(*htmlBuf, '<')
	tagName, hasAttr := tz.TagName()
	*htmlBuf = append(*htmlBuf, tagName...)
	if hasAttr {
		for {
			attrKey, attrValue, hasMore := tz.TagAttr()
			*htmlBuf = append(*htmlBuf, ' ')
			*htmlBuf = append(*htmlBuf, attrKey...)
			*htmlBuf = append(*htmlBuf, '=', '"')
			if tagAttrToProxy[string(tagName)][string(attrKey)] {
				urlInAttr := string(attrValue)
				*htmlBuf = append(*htmlBuf, []byte(GetProxiedUrl(urlInAttr, url))...)
			} else {
				*htmlBuf = append(*htmlBuf, attrValue...)
			}
			*htmlBuf = append(*htmlBuf, '"')
			if !hasMore {
				break
			}
		}
	}
	*htmlBuf = append(*htmlBuf, '>')
	if string(tagName) == "head" {
		*htmlBuf = append(*htmlBuf, []byte(getJsHookTag())...)
	}
	return string(tagName)
}
开发者ID:gongshw,项目名称:lighthouse,代码行数:28,代码来源:html.go

示例3: getInclude

func getInclude(z *html.Tokenizer, attrs []html.Attribute) (startMarker, endMarker string, error error) {
	var srcString string
	if url, hasUrl := getAttr(attrs, "src"); !hasUrl {
		return "", "", fmt.Errorf("include definition without src %s", z.Raw())
	} else {
		srcString = strings.TrimSpace(url.Val)
		if strings.HasPrefix(srcString, "#") {
			srcString = srcString[1:]
		}
	}

	required := false
	if r, hasRequired := getAttr(attrs, "required"); hasRequired {
		if requiredBool, err := strconv.ParseBool(r.Val); err != nil {
			return "", "", fmt.Errorf("error parsing bool in %s: %s", z.Raw(), err.Error())
		} else {
			required = requiredBool
		}
	}

	if required {
		return fmt.Sprintf("§[> %s]§", srcString), "", nil
	} else {
		return fmt.Sprintf("§[#> %s]§", srcString), fmt.Sprintf("§[/%s]§", srcString), nil
	}
}
开发者ID:tarent,项目名称:lib-compose,代码行数:26,代码来源:html_content_parser.go

示例4: skipSubtreeIfUicRemove

func skipSubtreeIfUicRemove(z *html.Tokenizer, tt html.TokenType, tagName string, attrs []html.Attribute) bool {
	_, foundRemoveTag := getAttr(attrs, UicRemove)
	if !foundRemoveTag {
		return false
	}

	if isSelfClosingTag(tagName, tt) {
		return true
	}

	depth := 0
	for {
		tt := z.Next()
		tag, _ := z.TagName()

		switch {
		case tt == html.ErrorToken:
			return true
		case tt == html.StartTagToken && !isSelfClosingTag(string(tag), tt):
			depth++
		case tt == html.EndTagToken:
			depth--
			if depth < 0 {
				return true
			}
		}
	}
}
开发者ID:tarent,项目名称:lib-compose,代码行数:28,代码来源:html_content_parser.go

示例5: advanceToTextToken

func advanceToTextToken(z *html.Tokenizer) *html.Token {
	for {
		tt := z.Next()

		switch tt {
		case html.ErrorToken:
			return nil
		case html.TextToken:
			t := z.Token()
			return &t
		}
	}
}
开发者ID:ericdaugherty,项目名称:gotsport-scraper,代码行数:13,代码来源:scraper.go

示例6: readAttributes

func readAttributes(z *html.Tokenizer, buff []html.Attribute) []html.Attribute {
	buff = buff[:0]
	for {
		key, value, more := z.TagAttr()
		if key != nil {
			buff = append(buff, html.Attribute{Key: string(key), Val: string(value)})
		}

		if !more {
			return buff
		}
	}
}
开发者ID:tarent,项目名称:lib-compose,代码行数:13,代码来源:html_content_parser.go

示例7: readNameAndLink

func (item *AnimeConventionItem) readNameAndLink(t *html.Tokenizer) {
	if label := t.Next(); label == html.StartTagToken {
		_, hasmore := t.TagName()
		if hasmore {
			if key, val, _ := t.TagAttr(); strings.EqualFold(string(key), "href") {
				item.siteURL = string(val)
			}
		}
	}
	if label := t.Next(); label == html.TextToken {
		item.name = string(t.Text())
	}
}
开发者ID:John-zhy,项目名称:First-Go-Project---A-Crawler,代码行数:13,代码来源:AnimeConventionCrawler.go

示例8: parse2

func parse2(z *html.Tokenizer) (*Schedule, error) {

	schedule := &Schedule{}
	currentDate := ""

	for {
		tt := z.Next()

		switch tt {
		case html.ErrorToken:
			return schedule, nil
		case html.StartTagToken:
			t := z.Token()
			if isTokenTagWithAttr("font", "class", "PageHeading", &t, z) {
				z.Next()
				currentDate = z.Token().Data
			} else if isTokenTagWithAttr("tr", "bgcolor", "#ffffff", &t, z) || isTokenTagWithAttr("tr", "bgcolor", "#f5f5f5", &t, z) {
				game, err := parseGame(currentDate, z)
				if err != nil {
					return nil, err
				}
				schedule.Games = append(schedule.Games, game)
			}
		}
	}
}
开发者ID:ericdaugherty,项目名称:gotsport-scraper,代码行数:26,代码来源:scraper.go

示例9: AttrMap

// AttrMap parses the attributes of the current element into a friendly map.
// It only makes sense to call this while processing a start or self closing tag token.
func AttrMap(hasAttr bool, z *html.Tokenizer) map[string]string {
	attrs := make(map[string]string)
	if !hasAttr {
		return attrs
	}
	for {
		k, v, more := z.TagAttr()
		attrs[string(k)] = string(v)
		if !more {
			break
		}
	}
	return attrs
}
开发者ID:tborg,项目名称:metascraper,代码行数:16,代码来源:page.go

示例10: Parse

func (item *AnimeConventionItem) Parse(t *html.Tokenizer) {
	for {
		label := t.Next()
		switch label {
		case html.ErrorToken:
			fmt.Errorf("%v\n", t.Err())
			return
		case html.TextToken:
			switch string(t.Text()) {
			case "Advance Rates:":
				//fmt.Println("rate")
				item.readadvanceRate(t)
			case "At-Door Rates:":
				item.readatDoorRate(t)
			}
		case html.StartTagToken, html.EndTagToken, html.SelfClosingTagToken:
			tag, hasmore := t.TagName()
			if strings.EqualFold(string(tag), "big") {
				item.readResgiterNowurl(t)
			} else if hasmore {
				key, val, hasmore := t.TagAttr()
				if strings.EqualFold(string(key), "itemprop") {
					//fmt.Println(string(val))
					switch string(val) {
					case "description":
						item.readDescription(t)
					case "latitude":
						item.readLatitude(t)
					case "longitude":
						item.readLongitude(t)
					case "startDate":
						item.readStartDate(t)
					case "endDate":
						item.readEndDate(t)
					case "location":
						item.readLocation(t)
					case "addressLocality":
						item.readCity(t)
					case "addressRegion":
						item.readState(t)
					case "addressCountry":
						item.readCountry(t, hasmore)
					case "name":
						item.readNameAndLink(t)
					}
				}
			}
		}
	}
}
开发者ID:John-zhy,项目名称:First-Go-Project---A-Crawler,代码行数:50,代码来源:AnimeConventionCrawler.go

示例11: getMatchInfoTitle

func getMatchInfoTitle(z *html.Tokenizer) string {
	eof := false
	for !eof {
		tt := z.Next()

		switch {

		case tt == html.ErrorToken:
			eof = true

		case tt == html.StartTagToken:
			t := z.Token()

			// Check if the token is a <title> tag
			isTitle := t.Data == "title"

			if isTitle {
				z.Next()
				// This is the title
				return z.Token().Data
			}

		}
	}
	// If we reached here something went wrong :^(
	Error.Printf("Could not get title...")
	return ""
}
开发者ID:Newbrict,项目名称:EzSkins,代码行数:28,代码来源:scrape.go

示例12: advanceToStartTag

func advanceToStartTag(tagName string, z *html.Tokenizer) *html.Token {
	for {
		tt := z.Next()

		switch tt {
		case html.ErrorToken:
			return nil
		case html.StartTagToken:
			t := z.Token()
			if t.Data == tagName {
				return &t
			}
		}
	}
}
开发者ID:ericdaugherty,项目名称:gotsport-scraper,代码行数:15,代码来源:scraper.go

示例13: readLocation

func (item *AnimeConventionItem) readLocation(t *html.Tokenizer) {
	for {
		if label := t.Next(); label == html.StartTagToken {
			_, hasmore := t.TagName()
			if hasmore {
				if _, val, _ := t.TagAttr(); strings.EqualFold(string(val), "name") {
					break
				}
			}
		}
	}
	if label := t.Next(); label == html.TextToken {
		item.location = string(t.Text())
	}
}
开发者ID:John-zhy,项目名称:First-Go-Project---A-Crawler,代码行数:15,代码来源:AnimeConventionCrawler.go

示例14: ParseToken

// ParseToken is to parse token
func ParseToken(z *html.Tokenizer, tag string) {
	for {
		tt := z.Next()

		switch {
		case tt == html.ErrorToken:
			// End of the document, we're done
			return
		case tt == html.StartTagToken:
			t := z.Token()

			// check element
			checkElement(t, tag)
		}
	}
}
开发者ID:hiromaily,项目名称:golibs,代码行数:17,代码来源:html.go

示例15: parseFragment

func parseFragment(z *html.Tokenizer) (f Fragment, dependencies []*FetchDefinition, err error) {
	attrs := make([]html.Attribute, 0, 10)
	dependencies = make([]*FetchDefinition, 0, 0)

	buff := bytes.NewBuffer(nil)
forloop:
	for {
		tt := z.Next()
		tag, _ := z.TagName()
		raw := byteCopy(z.Raw()) // create a copy here, because readAttributes modifies z.Raw, if attributes contain an &
		attrs = readAttributes(z, attrs)

		switch {
		case tt == html.ErrorToken:
			if z.Err() != io.EOF {
				return nil, nil, z.Err()
			}
			break forloop
		case tt == html.StartTagToken || tt == html.SelfClosingTagToken:
			if string(tag) == UicInclude {
				if replaceTextStart, replaceTextEnd, err := getInclude(z, attrs); err != nil {
					return nil, nil, err
				} else {
					fmt.Fprintf(buff, replaceTextStart)
					// Enhancement: WriteOut sub tree, to allow alternative content
					//              for optional includes.
					fmt.Fprintf(buff, replaceTextEnd)
					continue
				}
			}

			if skipSubtreeIfUicRemove(z, tt, string(tag), attrs) {
				continue
			}

		case tt == html.EndTagToken:
			if string(tag) == UicFragment || string(tag) == UicTail {
				break forloop
			}
		}
		buff.Write(raw)
	}

	return StringFragment(buff.String()), dependencies, nil
}
开发者ID:tarent,项目名称:lib-compose,代码行数:45,代码来源:html_content_parser.go


注:本文中的golang.org/x/net/html.Tokenizer类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。