本文整理汇总了Golang中golang.org/x/net/html.Tokenizer.Token方法的典型用法代码示例。如果您正苦于以下问题:Golang Tokenizer.Token方法的具体用法?Golang Tokenizer.Token怎么用?Golang Tokenizer.Token使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类golang.org/x/net/html.Tokenizer
的用法示例。
在下文中一共展示了Tokenizer.Token方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Golang代码示例。
示例1: getMatchInfoTitle
func getMatchInfoTitle(z *html.Tokenizer) string {
eof := false
for !eof {
tt := z.Next()
switch {
case tt == html.ErrorToken:
eof = true
case tt == html.StartTagToken:
t := z.Token()
// Check if the token is a <title> tag
isTitle := t.Data == "title"
if isTitle {
z.Next()
// This is the title
return z.Token().Data
}
}
}
// If we reached here something went wrong :^(
Error.Printf("Could not get title...")
return ""
}
示例2: parse2
func parse2(z *html.Tokenizer) (*Schedule, error) {
schedule := &Schedule{}
currentDate := ""
for {
tt := z.Next()
switch tt {
case html.ErrorToken:
return schedule, nil
case html.StartTagToken:
t := z.Token()
if isTokenTagWithAttr("font", "class", "PageHeading", &t, z) {
z.Next()
currentDate = z.Token().Data
} else if isTokenTagWithAttr("tr", "bgcolor", "#ffffff", &t, z) || isTokenTagWithAttr("tr", "bgcolor", "#f5f5f5", &t, z) {
game, err := parseGame(currentDate, z)
if err != nil {
return nil, err
}
schedule.Games = append(schedule.Games, game)
}
}
}
}
示例3: advanceToTextToken
func advanceToTextToken(z *html.Tokenizer) *html.Token {
for {
tt := z.Next()
switch tt {
case html.ErrorToken:
return nil
case html.TextToken:
t := z.Token()
return &t
}
}
}
示例4: parseGame
func parseGame(date string, z *html.Tokenizer) (Game, error) {
var game Game
td := advanceToStartTag("td", z)
if td == nil {
return game, errors.New("Unable to find Game Number")
}
z.Next()
gameNum := strings.TrimSpace(z.Token().Data)
td = advanceToStartTag("td", z)
if td == nil {
return game, errors.New("Unable to find Game Time")
}
td = advanceToStartTag("div", z)
if td == nil {
return game, errors.New("Unable to find Game Time")
}
z.Next()
gameTime := strings.TrimSpace(z.Token().Data)
if gameTime == "" {
t := advanceToTextToken(z)
gameTime = strings.TrimSpace(t.Data)
}
var homeTeam, homeScore, awayTeam, awayScore string
skipAwayScore := false
homeTeam = parseTeamName(z)
homeScore = parseScore(z)
if len(homeScore) > 3 {
awayTeam = homeScore
homeScore = ""
skipAwayScore = true
} else {
awayTeam = parseTeamName(z)
}
if !skipAwayScore {
awayScore = parseScore(z)
} else {
awayScore = ""
}
gameDate, err := time.Parse("1/2/2006 3:04 PM", date+" "+gameTime)
if err != nil {
return game, err
}
return Game{gameDate, gameNum, homeTeam, homeScore, awayTeam, awayScore}, nil
}
示例5: advanceToStartTag
func advanceToStartTag(tagName string, z *html.Tokenizer) *html.Token {
for {
tt := z.Next()
switch tt {
case html.ErrorToken:
return nil
case html.StartTagToken:
t := z.Token()
if t.Data == tagName {
return &t
}
}
}
}
示例6: getMatchInfoBets
func getMatchInfoBets(z *html.Tokenizer) (bets []*Bet) {
var bettor string
var item string
var statTrak bool
eof := false
for !eof {
tt := z.Next()
switch {
case tt == html.ErrorToken:
eof = true
case tt == html.StartTagToken:
t := z.Token()
isDiv := t.Data == "div"
isSpan := t.Data == "span"
if isSpan {
for _, a := range t.Attr {
if a.Key == "class" && a.Val == "user" {
z.Next()
z.Next()
t := z.Token()
bettor = strings.TrimSpace(t.Data)
}
}
}
if isDiv {
for _, a := range t.Attr {
if a.Key == "class" && strings.Contains(a.Val, "item") {
z.Next()
z.Next()
t = z.Token()
// Get StatTrak status
statTrak = strings.Contains(t.Attr[0].Val, "clreff")
if statTrak {
z.Next()
z.Next()
z.Next()
z.Next()
t = z.Token()
}
item = t.Attr[2].Val
thisBet := &Bet{bettor, item, statTrak}
bets = append(bets, thisBet)
}
}
}
}
}
return
}
示例7: ParseToken
// ParseToken is to parse token
func ParseToken(z *html.Tokenizer, tag string) {
for {
tt := z.Next()
switch {
case tt == html.ErrorToken:
// End of the document, we're done
return
case tt == html.StartTagToken:
t := z.Token()
// check element
checkElement(t, tag)
}
}
}
示例8: getMatchInfoDateTime
func getMatchInfoDateTime(z *html.Tokenizer) (matchDate, matchTime string) {
eof := false
for !eof {
tt := z.Next()
switch {
case tt == html.ErrorToken:
eof = true
case tt == html.StartTagToken:
t := z.Token()
isDiv := t.Data == "div"
if isDiv {
possibleDate := false
for _, a := range t.Attr {
if a.Key == "class" && a.Val == "half" {
possibleDate = true
}
if possibleDate && a.Key == "title" {
// Definitely a date now, grab both date and time
matchDate = a.Val
z.Next()
matchTime = z.Token().Data
// Trim the whitespace around time
matchTime = strings.TrimSpace(matchTime)
return
}
}
}
}
}
Error.Printf("Could not get date and time...")
return "", ""
}
示例9: DoParse
func (article *NYTArticle) DoParse(parser *html.Tokenizer) error {
articleOpeningTagLoop:
for {
token := parser.Next()
switch {
case token == html.ErrorToken:
return fmt.Errorf("problem moving article %s to open tag", article.GetTitle())
case token == html.StartTagToken:
tmp := parser.Token()
isStartArticle := tmp.Data == "p"
if isStartArticle {
for _, attr := range tmp.Attr {
if attr.Key == "class" && attr.Val == "story-body-text story-content" {
break articleOpeningTagLoop
}
}
}
}
}
isInParagraph := true
articleClosingTagLoop:
for {
token := parser.Next()
switch {
case token == html.ErrorToken:
return fmt.Errorf("problem scraping article %s", article.GetTitle())
case token == html.StartTagToken:
tmp := parser.Token()
isEndArticle := tmp.Data == "footer"
if isEndArticle {
for _, attr := range tmp.Attr {
if attr.Key == "class" && attr.Val == "story-footer story-content" {
break articleClosingTagLoop
}
}
}
if tmp.Data == "p" {
for _, attr := range tmp.Attr {
if attr.Key == "class" && strings.Contains(attr.Val, "story-body-text") {
isInParagraph = true
}
}
if isInParagraph {
continue
}
}
// is a link
if tmp.Data == "a" {
shouldSkip := false
for _, attr := range tmp.Attr {
if attr.Key == "class" && strings.Contains(attr.Val, "visually-hidden") {
shouldSkip = true
}
}
if shouldSkip {
continue
}
parser.Next()
tmp = parser.Token()
newBody := strings.TrimSpace(article.GetData()) + " " + strings.TrimSpace(tmp.Data) + " "
article.SetData(newBody)
isInParagraph = true
}
case token == html.EndTagToken:
tmp := parser.Token()
if tmp.Data == "p" {
isInParagraph = false
}
default:
if !isInParagraph {
continue
}
tmp := parser.Token()
newBody := article.GetData()
// add a space on the left just in case there is a comment or something
if unicode.IsPunct(rune(tmp.Data[0])) {
newBody = strings.TrimSpace(newBody)
}
newBody = newBody + strings.TrimSpace(tmp.Data)
article.SetData(newBody)
isInParagraph = false
}
}
fmt.Println(article.GetData())
return nil
}
示例10: DoParse
func (article *ECONArticle) DoParse(parser *html.Tokenizer) error {
// ENDS WITH div class content clearfix everywhere
articleOpeningTagLoop:
for {
token := parser.Next()
switch {
case token == html.ErrorToken:
fmt.Println("Prollem")
return nil
case token == html.StartTagToken:
tmp := parser.Token()
isStartArticle := tmp.Data == "p"
if isStartArticle {
for _, attr := range tmp.Attr {
if attr.Key == "class" && attr.Val == "main-content" {
fmt.Println("Found it, bitch")
break articleOpeningTagLoop
}
}
}
}
}
isInParagraph := true
articleClosingTagLoop:
for {
token := parser.Next()
switch {
case token == html.ErrorToken:
fmt.Println("Prollem")
return nil
case token == html.StartTagToken:
tmp := parser.Token()
isEndArticle := tmp.Data == "footer"
if isEndArticle {
for _, attr := range tmp.Attr {
if attr.Key == "class" && attr.Val == "story-footer story-content" {
fmt.Println("Hit end")
break articleClosingTagLoop
}
}
}
isInParagraph = true
default:
if !isInParagraph {
continue
}
tmp := parser.Token()
newBody := article.GetData()
// add a space on the left just in case there is a comment or something
newBody = newBody + strings.TrimSpace(tmp.Data)
article.SetData(newBody)
isInParagraph = false
//fmt.Println("Next p", newBody)
}
}
fmt.Println(article.GetData())
return nil
}
示例11: DoParse
func (article *WSJArticle) DoParse(parser *html.Tokenizer) error {
// find the start of the article
// starts at the top of the html body, ends at the article tag
articleTagLoop:
for {
token := parser.Next()
switch {
case token == html.ErrorToken:
fmt.Println("OH NOSE!!!! ERROR before we hit the end")
return nil
case token == html.StartTagToken:
tmp := parser.Token()
isStartArticle := tmp.Data == "article"
if isStartArticle {
break articleTagLoop
}
}
}
// find the article header, which has author, time etc
// starts at the article tag, ends at the article header
// TODO: get author info and such here
articleStartLoop:
for {
token := parser.Next()
switch {
case token == html.ErrorToken:
return nil
case token == html.StartTagToken:
tmp := parser.Token()
isStartArticleBody := tmp.Data == "div"
// loop until we are at the first paragraph of the article body
if isStartArticleBody {
isStartArticleBody = false
for _, attr := range tmp.Attr {
if attr.Key == "class" && attr.Val == "clearfix byline-wrap" {
isStartArticleBody = true
break
}
}
if isStartArticleBody {
break articleStartLoop
}
}
}
}
// find the start of the article
// starts at the end of the article header, ends at the first article paragraph
articleBodyStartLoop:
for {
token := parser.Next()
switch {
case token == html.ErrorToken:
return nil
case token == html.StartTagToken:
tmp := parser.Token()
isStartArticleBody := tmp.Data == "p"
if isStartArticleBody {
break articleBodyStartLoop
}
}
}
// pull the article out of the html
// starts at first paragraph, returns at the end of the article
isInParagraph := true // true because we start inside the first paragraph
depth := 1 // one because this loop starts at first paragraph
for {
token := parser.Next()
switch {
case token == html.ErrorToken:
fmt.Println("hit err, depth is:", depth)
return nil
case token == html.StartTagToken:
depth++
tmp := parser.Token()
isParagraph := tmp.Data == "p"
if isParagraph {
// start of a new paragraph
if depth != 1 {
fmt.Println("ERROR: hit new paragraph while depth != 0")
}
if isInParagraph {
fmt.Println("ERROR: hit unexpected new paragraph tag while in paragraph")
}
isInParagraph = true
}
// text can have embeded links
isLink := tmp.Data == "a"
if isLink {
if !isInParagraph {
fmt.Println("ERROR: hit unexpected link outside of a paragraph")
//.........这里部分代码省略.........