當前位置: 首頁>>代碼示例>>Golang>>正文


Golang html.NewTokenizer函數代碼示例

本文整理匯總了Golang中code/google/com/p/go/net/html.NewTokenizer函數的典型用法代碼示例。如果您正苦於以下問題:Golang NewTokenizer函數的具體用法?Golang NewTokenizer怎麽用?Golang NewTokenizer使用的例子?那麽, 這裏精選的函數代碼示例或許可以為您提供幫助。


在下文中一共展示了NewTokenizer函數的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Golang代碼示例。

示例1: main

func main() {
	s := `<p>Links:<a href="a1" class="test"/></p><ul><li><a href="foo">Foo</a><li><a href="/bar/baz">BarBaz</a></ul>`

	doc, _ := html.Parse(strings.NewReader(s))
	traverse_html_node(doc, 0)

	z := html.NewTokenizer(strings.NewReader(s))
	traverse_html_tokenizer(z)

	z1 := html.NewTokenizer(strings.NewReader(s))
	traverse_html_token(z1)
}
開發者ID:vvilp,項目名稱:go_test_example,代碼行數:12,代碼來源:html.go

示例2: TokenizePage

func TokenizePage(r io.Reader) ([]string, string) {
	res := []string{}
	z := html.NewTokenizer(r)
	isTitle := false
	title := ""
loop:
	for {
		tt := z.Next()
		switch tt {
		case html.ErrorToken:
			break loop
		case html.TextToken:
			text := string(z.Text())
			if isTitle {
				title = cleanTitle(text)
				continue
			}
			res = append(res, bstrings.TokenizeWords(text)...)
		case html.EndTagToken:
			tn, _ := z.TagName()
			if string(tn) == "title" {
				isTitle = false
			}
		case html.StartTagToken:
			tn, _ := z.TagName()
			if string(tn) == "title" {
				isTitle = true
			}
		}
	}
	return res, title
}
開發者ID:bonnefoa,項目名稱:gobot,代碼行數:32,代碼來源:html_helper.go

示例3: FindLinks

func FindLinks(body io.Reader) chan link {
	c := make(chan link)

	go func() {
		z := html.NewTokenizer(body)
		for {
			tt := z.Next()
			if tt == html.ErrorToken {
				break
			}
			if tt == html.StartTagToken {
				tn, _ := z.TagName()
				if len(tn) == 1 && tn[0] == 'a' {
					for {
						key, value, more := z.TagAttr()
						// http://stackoverflow.com/questions/14230145/what-is-the-best-way-to-convert-byte-array-to-string
						if string(key) == "href" {
							v := string(value)
							// http://codereview.stackexchange.com/questions/28386/fibonacci-generator-with-golang
							c <- link{v, v}
						}
						if !more {
							break
						}
					}
				}
			}
		}
		c <- link{"", ""}
	}()

	return c
}
開發者ID:p,項目名稱:smf-mirror,代碼行數:33,代碼來源:mirror.go

示例4: getLinks

// getLinks parses the response for links, doing it's best with bad HTML.
func getLinks(contents []byte) ([]*URL, error) {
	utf8Reader, err := charset.NewReader(bytes.NewReader(contents), "text/html")
	if err != nil {
		return nil, err
	}
	tokenizer := html.NewTokenizer(utf8Reader)

	var links []*URL
	tags := getIncludedTags()

	for {
		tokenType := tokenizer.Next()
		switch tokenType {
		case html.ErrorToken:
			//TODO: should use tokenizer.Err() to see if this is io.EOF
			//		(meaning success) or an actual error
			return links, nil
		case html.StartTagToken:

			tagName, hasAttrs := tokenizer.TagName()
			if hasAttrs && tags[string(tagName)] {
				links = parseAnchorAttrs(tokenizer, links)
			}
		}
	}

	return links, nil
}
開發者ID:pombredanne,項目名稱:walker-2,代碼行數:29,代碼來源:fetcher.go

示例5: findMetaXrdsLocation

// Search for
// <head>
//    <meta http-equiv="X-XRDS-Location" content="....">
func findMetaXrdsLocation(input io.Reader) (location string, err error) {
	tokenizer := html.NewTokenizer(input)
	inHead := false
	for {
		tt := tokenizer.Next()
		switch tt {
		case html.ErrorToken:
			return "", tokenizer.Err()
		case html.StartTagToken, html.EndTagToken:
			tk := tokenizer.Token()
			if tk.Data == "head" {
				if tt == html.StartTagToken {
					inHead = true
				} else {
					return "", errors.New("Meta X-XRDS-Location not found")
				}
			} else if inHead && tk.Data == "meta" {
				ok := false
				content := ""
				for _, attr := range tk.Attr {
					if attr.Key == "http-equiv" &&
						attr.Val == "X-XRDS-Location" {
						ok = true
					} else if attr.Key == "content" {
						content = attr.Val
					}
				}
				if ok && len(content) > 0 {
					return content, nil
				}
			}
		}
	}
	return "", errors.New("Meta X-XRDS-Location not found")
}
開發者ID:JamesDunne,項目名稱:go-openid,代碼行數:38,代碼來源:yadis_discovery.go

示例6: ExtractText

func ExtractText(reader io.Reader, remover func(string) (string, error)) (string, error) {
	z := html.NewTokenizer(reader)

	var buf bytes.Buffer
	bodyBlock := false

loop:
	for {
		tokenType := z.Next()
		switch tokenType {
		case html.StartTagToken:
			if z.Token().DataAtom == atom.Body {
				bodyBlock = true
			}
		case html.EndTagToken:
			if z.Token().DataAtom == atom.Body {
				bodyBlock = false
			}
		case html.TextToken:
			if bodyBlock {
				buf.Write(z.Text())
			}
		case html.ErrorToken:
			if z.Err() != io.EOF {
				return "", z.Err()
			}
			break loop
		}
	}

	return remover(buf.String())
}
開發者ID:postfix,項目名稱:spamdefender,代碼行數:32,代碼來源:htmlcleaner.go

示例7: html_detect_content_type

func html_detect_content_type(head []byte) string {
	reader := bytes.NewReader(head)
	z := html.NewTokenizer(reader)
	expect_html_root := true
FORBEGIN:
	for tt := z.Next(); tt != html.ErrorToken; tt = z.Next() {
		t := z.Token()
		switch {
		case t.Data == "meta" && (tt == html.StartTagToken || tt == html.SelfClosingTagToken):
			if ct, ok := detect_charset_by_token(t.Attr); ok == true {
				return ct
			}
		case t.Data == "head" && tt == html.EndTagToken:
			break
			// un-html file
		case expect_html_root && (tt == html.StartTagToken || tt == html.SelfClosingTagToken):
			if t.Data == "html" {
				expect_html_root = false
			} else {
				break FORBEGIN
			}
		}
	}
	return ""
}
開發者ID:heartszhang,項目名稱:famous,代碼行數:25,代碼來源:utils.go

示例8: FindIcon

// Returns the href attribute of a <link rel="shortcut icon"> tag or error if not found.
func FindIcon(b []byte) (string, error) {
	r := bytes.NewReader(b)
	z := html.NewTokenizer(r)
	for {
		if z.Next() == html.ErrorToken {
			if err := z.Err(); err == io.EOF {
				break
			} else {
				return "", ErrNoIcon
			}
		}
		t := z.Token()
		switch t.DataAtom {
		case atom.Link:
			if t.Type == html.StartTagToken || t.Type == html.SelfClosingTagToken {
				attrs := make(map[string]string)
				for _, a := range t.Attr {
					attrs[a.Key] = a.Val
				}
				if attrs["rel"] == "shortcut icon" && attrs["href"] != "" {
					return attrs["href"], nil
				}
			}
		}
	}
	return "", ErrNoIcon
}
開發者ID:RenzoF,項目名稱:goread,代碼行數:28,代碼來源:autodiscover.go

示例9: linkParser

func linkParser(page_chan chan string) <-chan string {
	link_chan := make(chan string)
	go func() {
		for page := range page_chan {
			//page := <-page_chan
			page_bytes := bytes.NewBufferString(page)
			d := html.NewTokenizer(io.Reader(page_bytes))
			for {
				tokenType := d.Next()
				if tokenType == html.ErrorToken {
					fmt.Println("\nFinished to parse page")
					break
				}
				token := d.Token()
				switch tokenType {
				case html.StartTagToken:
					if strings.EqualFold(token.Data, "A") {
						for _, a := range token.Attr {
							if strings.EqualFold(a.Key, "HREF") {
								link_chan <- a.Val
							}
						}
					}
				}
			}
		}
		close(link_chan)
	}()
	return link_chan
}
開發者ID:rodsenra,項目名稱:go_exercises,代碼行數:30,代碼來源:main.go

示例10: TestPushHTML

func TestPushHTML(t *testing.T) {
	xmlns := NewXmlNamespace()

	for i := range xmlNsSamples {
		j := 0
		z := html.NewTokenizer(strings.NewReader(xhtmlNsSamples[i].sample))
		for {
			tt := z.Next()
			if tt == html.ErrorToken {
				err := z.Err()
				if err == io.EOF {
					err = nil
					break
				}
				t.Fatal(err)
			}
			switch tt {
			case html.StartTagToken, html.SelfClosingTagToken:
				xmlns.PushHTML(z.Token())
				checkState("push", j, xmlns, xhtmlNsSamples[i].prefix[j], xhtmlNsSamples[i].uri[j], t)
				j++
			case html.EndTagToken:
				j--
				checkState("pop", j, xmlns, xhtmlNsSamples[i].prefix[j], xhtmlNsSamples[i].uri[j], t)
				xmlns.Pop()
			}
		}
	}
}
開發者ID:jimrobinson,項目名稱:xml,代碼行數:29,代碼來源:xmlns_test.go

示例11: Sanitize

func Sanitize(s string) (string, string) {
	r := bytes.NewReader([]byte(s))
	z := html.NewTokenizer(r)
	buf := &bytes.Buffer{}
	snip := &bytes.Buffer{}
	scripts := 0
	for {
		if z.Next() == html.ErrorToken {
			if err := z.Err(); err == io.EOF {
				break
			} else {
				return s, snipper(s)
			}
		}
		t := z.Token()
		if t.DataAtom == atom.Script {
			if t.Type == html.StartTagToken {
				scripts++
			} else if t.Type == html.EndTagToken {
				scripts--
			}
		} else if scripts == 0 {
			buf.WriteString(t.String())
			if t.Type == html.TextToken {
				snip.WriteString(t.String())
			}
		}
	}

	return buf.String(), snipper(snip.String())
}
開發者ID:baijum,項目名稱:goread,代碼行數:31,代碼來源:sanitize.go

示例12: Autodiscover

func Autodiscover(b []byte) (string, error) {
	r := bytes.NewReader(b)
	z := html.NewTokenizer(r)
	inHtml := false
	inHead := false
	for {
		if z.Next() == html.ErrorToken {
			if err := z.Err(); err == io.EOF {
				break
			} else {
				return "", ErrNoRssLink
			}
		}
		t := z.Token()
		switch t.DataAtom {
		case atom.Html:
			inHtml = !inHtml
		case atom.Head:
			inHead = !inHead
		case atom.Link:
			if inHead && inHtml && (t.Type == html.StartTagToken || t.Type == html.SelfClosingTagToken) {
				attrs := make(map[string]string)
				for _, a := range t.Attr {
					attrs[a.Key] = a.Val
				}
				if attrs["rel"] == "alternate" && attrs["href"] != "" &&
					(attrs["type"] == "application/rss+xml" || attrs["type"] == "application/atom+xml") {
					return attrs["href"], nil
				}
			}
		}
	}

	return "", ErrNoRssLink
}
開發者ID:johnvilsack,項目名稱:golang-stuff,代碼行數:35,代碼來源:autodiscover.go

示例13: GetAllLinks

func GetAllLinks(data io.ReadCloser) (links []string, err error) {
	tokenizer := html.NewTokenizer(data)
	for {
		tokenizer.Next()
		token := tokenizer.Token()
		switch token.Type {
		case html.ErrorToken:
			return
		case html.EndTagToken:
		case html.CommentToken:
		case html.TextToken:
		case html.StartTagToken, html.SelfClosingTagToken:
			if *debug {
				log.Print("type ", token.Type)
				log.Print("data ", token.Data)
			}
			if token.Data == "a" {
				for _, a := range token.Attr {
					if a.Key == "href" {
						for _, ext := range strings.Split(*fileType, ",") {
							if strings.HasSuffix(a.Val, ext) {
								if strings.HasPrefix(a.Val, "//") {
									links = append(links, "http:"+a.Val)
								} else {
									links = append(links, a.Val)
								}
							}
						}
					}
				}
			}
		}
	}
	return
}
開發者ID:uovobw,項目名稱:multiget,代碼行數:35,代碼來源:multiget.go

示例14: bookshelfToBooks

// Given the HTML of a Goodreads bookshelf, returns the books.
func bookshelfToBooks(body io.ReadCloser) (books []Book) {
	z := html.NewTokenizer(body)

	books = make([]Book, 100)
	for i := 0; i < 1000; {
		book := new(Book)
		tok := z.Next()
		// fmt.Println(tok)
		if tok == html.ErrorToken {
			// ...
			return books
		}
		_, atr, _ := z.TagAttr()
		if strings.Contains(string(atr), "/book/show") {
			_, atr, _ := z.TagAttr()
			book.title = string(string(atr))
			//			fmt.Println("Got book:", book.title)
		} else if strings.Contains(string(atr), "staticStars") {
			_, atr, _ := z.TagAttr()
			book.rating = getRating(string(atr))
		}

		if book.title != "" {
			books[i] = *book
			i++
		}
	}

	return books
}
開發者ID:robertseaton,項目名稱:creep,代碼行數:31,代碼來源:main.go

示例15: Parse

func Parse(reader io.Reader) (newPost *post.Post, err error) {

	newPost = &post.Post{}
	currentIdx := 0
	parsers := []post.PartParser{&ReceiverParser{}, &SenderParser{}, &SubjectParser{}, &PostDateParser{}, &ContentParser{}}
	linkParser := &LinkParser{}
	bodyBlock := false

	z := html.NewTokenizer(reader)

loop:
	for {
		tokenType := z.Next()
		switch tokenType {
		case html.StartTagToken:
			tk := z.Token()
			if tk.DataAtom == atom.Body {
				bodyBlock = true
			} else if tk.DataAtom == atom.A {
				for _, attr := range tk.Attr {
					if attr.Key == "href" {
						linkParser.Parse(newPost, []byte(attr.Val))
					}
				}
			}
		case html.EndTagToken:
			if z.Token().DataAtom == atom.Body {
				bodyBlock = false
			}
		case html.TextToken:
			if bodyBlock {
				flow := parsers[currentIdx].Parse(newPost, z.Text())
				switch flow {
				case post.Next:
					if currentIdx < len(parsers) {
						currentIdx += 1
					}
				case post.Error:
					err = parsers[currentIdx].Err()
					break loop
				case post.Stop:
					break loop
				}
			}
		case html.ErrorToken:
			if z.Err() != io.EOF {
				err = z.Err()
			}
			break loop
		}
	}

	if currentIdx != len(parsers)-1 {
		err = errors.New("malformed Post format")
	}

	return
}
開發者ID:postfix,項目名稱:spamdefender,代碼行數:58,代碼來源:parse.go


注:本文中的code/google/com/p/go/net/html.NewTokenizer函數示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。