当前位置: 首页>>代码示例>>Golang>>正文


Golang Tokenizer.Token方法代码示例

本文整理汇总了Golang中golang.org/x/net/html.Tokenizer.Token方法的典型用法代码示例。如果您正苦于以下问题:Golang Tokenizer.Token方法的具体用法?Golang Tokenizer.Token怎么用?Golang Tokenizer.Token使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在golang.org/x/net/html.Tokenizer的用法示例。


在下文中一共展示了Tokenizer.Token方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Golang代码示例。

示例1: getMatchInfoTitle

func getMatchInfoTitle(z *html.Tokenizer) string {
	eof := false
	for !eof {
		tt := z.Next()

		switch {

		case tt == html.ErrorToken:
			eof = true

		case tt == html.StartTagToken:
			t := z.Token()

			// Check if the token is a <title> tag
			isTitle := t.Data == "title"

			if isTitle {
				z.Next()
				// This is the title
				return z.Token().Data
			}

		}
	}
	// If we reached here something went wrong :^(
	Error.Printf("Could not get title...")
	return ""
}
开发者ID:Newbrict,项目名称:EzSkins,代码行数:28,代码来源:scrape.go

示例2: parse2

func parse2(z *html.Tokenizer) (*Schedule, error) {

	schedule := &Schedule{}
	currentDate := ""

	for {
		tt := z.Next()

		switch tt {
		case html.ErrorToken:
			return schedule, nil
		case html.StartTagToken:
			t := z.Token()
			if isTokenTagWithAttr("font", "class", "PageHeading", &t, z) {
				z.Next()
				currentDate = z.Token().Data
			} else if isTokenTagWithAttr("tr", "bgcolor", "#ffffff", &t, z) || isTokenTagWithAttr("tr", "bgcolor", "#f5f5f5", &t, z) {
				game, err := parseGame(currentDate, z)
				if err != nil {
					return nil, err
				}
				schedule.Games = append(schedule.Games, game)
			}
		}
	}
}
开发者ID:ericdaugherty,项目名称:gotsport-scraper,代码行数:26,代码来源:scraper.go

示例3: advanceToTextToken

func advanceToTextToken(z *html.Tokenizer) *html.Token {
	for {
		tt := z.Next()

		switch tt {
		case html.ErrorToken:
			return nil
		case html.TextToken:
			t := z.Token()
			return &t
		}
	}
}
开发者ID:ericdaugherty,项目名称:gotsport-scraper,代码行数:13,代码来源:scraper.go

示例4: parseGame

func parseGame(date string, z *html.Tokenizer) (Game, error) {
	var game Game
	td := advanceToStartTag("td", z)
	if td == nil {
		return game, errors.New("Unable to find Game Number")
	}
	z.Next()
	gameNum := strings.TrimSpace(z.Token().Data)

	td = advanceToStartTag("td", z)
	if td == nil {
		return game, errors.New("Unable to find Game Time")
	}
	td = advanceToStartTag("div", z)
	if td == nil {
		return game, errors.New("Unable to find Game Time")
	}
	z.Next()
	gameTime := strings.TrimSpace(z.Token().Data)
	if gameTime == "" {
		t := advanceToTextToken(z)
		gameTime = strings.TrimSpace(t.Data)
	}

	var homeTeam, homeScore, awayTeam, awayScore string

	skipAwayScore := false

	homeTeam = parseTeamName(z)
	homeScore = parseScore(z)
	if len(homeScore) > 3 {
		awayTeam = homeScore
		homeScore = ""
		skipAwayScore = true
	} else {
		awayTeam = parseTeamName(z)
	}
	if !skipAwayScore {
		awayScore = parseScore(z)
	} else {
		awayScore = ""
	}

	gameDate, err := time.Parse("1/2/2006 3:04 PM", date+" "+gameTime)
	if err != nil {
		return game, err
	}

	return Game{gameDate, gameNum, homeTeam, homeScore, awayTeam, awayScore}, nil
}
开发者ID:ericdaugherty,项目名称:gotsport-scraper,代码行数:50,代码来源:scraper.go

示例5: advanceToStartTag

func advanceToStartTag(tagName string, z *html.Tokenizer) *html.Token {
	for {
		tt := z.Next()

		switch tt {
		case html.ErrorToken:
			return nil
		case html.StartTagToken:
			t := z.Token()
			if t.Data == tagName {
				return &t
			}
		}
	}
}
开发者ID:ericdaugherty,项目名称:gotsport-scraper,代码行数:15,代码来源:scraper.go

示例6: getMatchInfoBets

func getMatchInfoBets(z *html.Tokenizer) (bets []*Bet) {
	var bettor string
	var item string
	var statTrak bool

	eof := false
	for !eof {
		tt := z.Next()

		switch {

		case tt == html.ErrorToken:
			eof = true

		case tt == html.StartTagToken:
			t := z.Token()
			isDiv := t.Data == "div"
			isSpan := t.Data == "span"

			if isSpan {
				for _, a := range t.Attr {
					if a.Key == "class" && a.Val == "user" {
						z.Next()
						z.Next()
						t := z.Token()
						bettor = strings.TrimSpace(t.Data)
					}
				}
			}

			if isDiv {
				for _, a := range t.Attr {
					if a.Key == "class" && strings.Contains(a.Val, "item") {
						z.Next()
						z.Next()
						t = z.Token()

						// Get StatTrak status
						statTrak = strings.Contains(t.Attr[0].Val, "clreff")

						if statTrak {
							z.Next()
							z.Next()
							z.Next()
							z.Next()
							t = z.Token()
						}
						item = t.Attr[2].Val
						thisBet := &Bet{bettor, item, statTrak}
						bets = append(bets, thisBet)
					}
				}
			}
		}
	}

	return
}
开发者ID:Newbrict,项目名称:EzSkins,代码行数:58,代码来源:scrape.go

示例7: ParseToken

// ParseToken is to parse token
func ParseToken(z *html.Tokenizer, tag string) {
	for {
		tt := z.Next()

		switch {
		case tt == html.ErrorToken:
			// End of the document, we're done
			return
		case tt == html.StartTagToken:
			t := z.Token()

			// check element
			checkElement(t, tag)
		}
	}
}
开发者ID:hiromaily,项目名称:golibs,代码行数:17,代码来源:html.go

示例8: getMatchInfoDateTime

func getMatchInfoDateTime(z *html.Tokenizer) (matchDate, matchTime string) {
	eof := false
	for !eof {
		tt := z.Next()

		switch {

		case tt == html.ErrorToken:
			eof = true

		case tt == html.StartTagToken:
			t := z.Token()
			isDiv := t.Data == "div"

			if isDiv {
				possibleDate := false
				for _, a := range t.Attr {
					if a.Key == "class" && a.Val == "half" {
						possibleDate = true
					}

					if possibleDate && a.Key == "title" {
						// Definitely a date now, grab both date and time
						matchDate = a.Val
						z.Next()
						matchTime = z.Token().Data
						// Trim the whitespace around time
						matchTime = strings.TrimSpace(matchTime)
						return
					}
				}
			}
		}
	}
	Error.Printf("Could not get date and time...")
	return "", ""
}
开发者ID:Newbrict,项目名称:EzSkins,代码行数:37,代码来源:scrape.go

示例9: DoParse

func (article *NYTArticle) DoParse(parser *html.Tokenizer) error {

articleOpeningTagLoop:
	for {
		token := parser.Next()

		switch {
		case token == html.ErrorToken:
			return fmt.Errorf("problem moving article %s to open tag", article.GetTitle())
		case token == html.StartTagToken:
			tmp := parser.Token()
			isStartArticle := tmp.Data == "p"
			if isStartArticle {
				for _, attr := range tmp.Attr {
					if attr.Key == "class" && attr.Val == "story-body-text story-content" {
						break articleOpeningTagLoop
					}
				}
			}
		}
	}

	isInParagraph := true
articleClosingTagLoop:
	for {
		token := parser.Next()
		switch {
		case token == html.ErrorToken:
			return fmt.Errorf("problem scraping article %s", article.GetTitle())
		case token == html.StartTagToken:
			tmp := parser.Token()
			isEndArticle := tmp.Data == "footer"
			if isEndArticle {
				for _, attr := range tmp.Attr {
					if attr.Key == "class" && attr.Val == "story-footer story-content" {
						break articleClosingTagLoop
					}
				}
			}

			if tmp.Data == "p" {
				for _, attr := range tmp.Attr {
					if attr.Key == "class" && strings.Contains(attr.Val, "story-body-text") {
						isInParagraph = true
					}
				}
				if isInParagraph {
					continue
				}
			}

			// is a link
			if tmp.Data == "a" {
				shouldSkip := false
				for _, attr := range tmp.Attr {
					if attr.Key == "class" && strings.Contains(attr.Val, "visually-hidden") {
						shouldSkip = true
					}
				}

				if shouldSkip {
					continue
				}

				parser.Next()
				tmp = parser.Token()
				newBody := strings.TrimSpace(article.GetData()) + " " + strings.TrimSpace(tmp.Data) + " "
				article.SetData(newBody)
				isInParagraph = true
			}

		case token == html.EndTagToken:
			tmp := parser.Token()
			if tmp.Data == "p" {
				isInParagraph = false
			}

		default:
			if !isInParagraph {
				continue
			}
			tmp := parser.Token()

			newBody := article.GetData()
			// add a space on the left just in case there is a comment or something
			if unicode.IsPunct(rune(tmp.Data[0])) {
				newBody = strings.TrimSpace(newBody)
			}
			newBody = newBody + strings.TrimSpace(tmp.Data)
			article.SetData(newBody)
			isInParagraph = false
		}
	}
	fmt.Println(article.GetData())
	return nil
}
开发者ID:opinionated,项目名称:scraper-core,代码行数:96,代码来源:NYT.go

示例10: DoParse

func (article *ECONArticle) DoParse(parser *html.Tokenizer) error {

	// ENDS WITH div class content clearfix everywhere
articleOpeningTagLoop:
	for {
		token := parser.Next()

		switch {
		case token == html.ErrorToken:
			fmt.Println("Prollem")
			return nil
		case token == html.StartTagToken:
			tmp := parser.Token()
			isStartArticle := tmp.Data == "p"
			if isStartArticle {
				for _, attr := range tmp.Attr {
					if attr.Key == "class" && attr.Val == "main-content" {
						fmt.Println("Found it, bitch")
						break articleOpeningTagLoop
					}
				}
			}
		}
	}

	isInParagraph := true
articleClosingTagLoop:
	for {
		token := parser.Next()
		switch {
		case token == html.ErrorToken:
			fmt.Println("Prollem")
			return nil
		case token == html.StartTagToken:
			tmp := parser.Token()
			isEndArticle := tmp.Data == "footer"
			if isEndArticle {
				for _, attr := range tmp.Attr {
					if attr.Key == "class" && attr.Val == "story-footer story-content" {
						fmt.Println("Hit end")
						break articleClosingTagLoop
					}
				}
			}
			isInParagraph = true
		default:
			if !isInParagraph {
				continue
			}
			tmp := parser.Token()

			newBody := article.GetData()
			// add a space on the left just in case there is a comment or something
			newBody = newBody + strings.TrimSpace(tmp.Data)
			article.SetData(newBody)
			isInParagraph = false
			//fmt.Println("Next p", newBody)
		}
	}
	fmt.Println(article.GetData())
	return nil
}
开发者ID:opinionated,项目名称:scraper-core,代码行数:62,代码来源:Economist.go

示例11: DoParse

func (article *WSJArticle) DoParse(parser *html.Tokenizer) error {

	// find the start of the article
	// starts at the top of the html body, ends at the article tag
articleTagLoop:
	for {
		token := parser.Next()

		switch {
		case token == html.ErrorToken:
			fmt.Println("OH NOSE!!!! ERROR before we hit the end")
			return nil
		case token == html.StartTagToken:
			tmp := parser.Token()

			isStartArticle := tmp.Data == "article"
			if isStartArticle {
				break articleTagLoop
			}
		}
	}

	// find the article header, which has author, time etc
	// starts at the article tag, ends at the article header
	// TODO: get author info and such here
articleStartLoop:
	for {
		token := parser.Next()

		switch {
		case token == html.ErrorToken:
			return nil
		case token == html.StartTagToken:
			tmp := parser.Token()

			isStartArticleBody := tmp.Data == "div"
			// loop until we are at the first paragraph of the article body
			if isStartArticleBody {
				isStartArticleBody = false
				for _, attr := range tmp.Attr {
					if attr.Key == "class" && attr.Val == "clearfix byline-wrap" {
						isStartArticleBody = true
						break
					}
				}
				if isStartArticleBody {
					break articleStartLoop
				}
			}
		}
	}

	// find the start of the article
	// starts at the end of the article header, ends at the first article paragraph
articleBodyStartLoop:
	for {
		token := parser.Next()
		switch {
		case token == html.ErrorToken:
			return nil
		case token == html.StartTagToken:
			tmp := parser.Token()
			isStartArticleBody := tmp.Data == "p"
			if isStartArticleBody {
				break articleBodyStartLoop
			}
		}
	}

	// pull the article out of the html
	// starts at first paragraph, returns at the end of the article
	isInParagraph := true // true because we start inside the first paragraph
	depth := 1            // one because this loop starts at first paragraph
	for {
		token := parser.Next()
		switch {
		case token == html.ErrorToken:
			fmt.Println("hit err, depth is:", depth)
			return nil
		case token == html.StartTagToken:
			depth++
			tmp := parser.Token()

			isParagraph := tmp.Data == "p"
			if isParagraph {
				// start of a new paragraph
				if depth != 1 {
					fmt.Println("ERROR: hit new paragraph while depth != 0")
				}
				if isInParagraph {
					fmt.Println("ERROR: hit unexpected new paragraph tag while in paragraph")
				}
				isInParagraph = true
			}

			// text can have embeded links
			isLink := tmp.Data == "a"
			if isLink {
				if !isInParagraph {
					fmt.Println("ERROR: hit unexpected link outside of a paragraph")
//.........这里部分代码省略.........
开发者ID:jpatsenker,项目名称:Opinionated,代码行数:101,代码来源:WSJScraper.go


注:本文中的golang.org/x/net/html.Tokenizer.Token方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。