本文整理匯總了Golang中code/google/com/p/go/net/html.NewTokenizer函數的典型用法代碼示例。如果您正苦於以下問題:Golang NewTokenizer函數的具體用法?Golang NewTokenizer怎麽用?Golang NewTokenizer使用的例子?那麽, 這裏精選的函數代碼示例或許可以為您提供幫助。
在下文中一共展示了NewTokenizer函數的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Golang代碼示例。
示例1: main
func main() {
s := `<p>Links:<a href="a1" class="test"/></p><ul><li><a href="foo">Foo</a><li><a href="/bar/baz">BarBaz</a></ul>`
doc, _ := html.Parse(strings.NewReader(s))
traverse_html_node(doc, 0)
z := html.NewTokenizer(strings.NewReader(s))
traverse_html_tokenizer(z)
z1 := html.NewTokenizer(strings.NewReader(s))
traverse_html_token(z1)
}
示例2: TokenizePage
func TokenizePage(r io.Reader) ([]string, string) {
res := []string{}
z := html.NewTokenizer(r)
isTitle := false
title := ""
loop:
for {
tt := z.Next()
switch tt {
case html.ErrorToken:
break loop
case html.TextToken:
text := string(z.Text())
if isTitle {
title = cleanTitle(text)
continue
}
res = append(res, bstrings.TokenizeWords(text)...)
case html.EndTagToken:
tn, _ := z.TagName()
if string(tn) == "title" {
isTitle = false
}
case html.StartTagToken:
tn, _ := z.TagName()
if string(tn) == "title" {
isTitle = true
}
}
}
return res, title
}
示例3: FindLinks
func FindLinks(body io.Reader) chan link {
c := make(chan link)
go func() {
z := html.NewTokenizer(body)
for {
tt := z.Next()
if tt == html.ErrorToken {
break
}
if tt == html.StartTagToken {
tn, _ := z.TagName()
if len(tn) == 1 && tn[0] == 'a' {
for {
key, value, more := z.TagAttr()
// http://stackoverflow.com/questions/14230145/what-is-the-best-way-to-convert-byte-array-to-string
if string(key) == "href" {
v := string(value)
// http://codereview.stackexchange.com/questions/28386/fibonacci-generator-with-golang
c <- link{v, v}
}
if !more {
break
}
}
}
}
}
c <- link{"", ""}
}()
return c
}
示例4: getLinks
// getLinks parses the response for links, doing it's best with bad HTML.
func getLinks(contents []byte) ([]*URL, error) {
utf8Reader, err := charset.NewReader(bytes.NewReader(contents), "text/html")
if err != nil {
return nil, err
}
tokenizer := html.NewTokenizer(utf8Reader)
var links []*URL
tags := getIncludedTags()
for {
tokenType := tokenizer.Next()
switch tokenType {
case html.ErrorToken:
//TODO: should use tokenizer.Err() to see if this is io.EOF
// (meaning success) or an actual error
return links, nil
case html.StartTagToken:
tagName, hasAttrs := tokenizer.TagName()
if hasAttrs && tags[string(tagName)] {
links = parseAnchorAttrs(tokenizer, links)
}
}
}
return links, nil
}
示例5: findMetaXrdsLocation
// Search for
// <head>
// <meta http-equiv="X-XRDS-Location" content="....">
func findMetaXrdsLocation(input io.Reader) (location string, err error) {
tokenizer := html.NewTokenizer(input)
inHead := false
for {
tt := tokenizer.Next()
switch tt {
case html.ErrorToken:
return "", tokenizer.Err()
case html.StartTagToken, html.EndTagToken:
tk := tokenizer.Token()
if tk.Data == "head" {
if tt == html.StartTagToken {
inHead = true
} else {
return "", errors.New("Meta X-XRDS-Location not found")
}
} else if inHead && tk.Data == "meta" {
ok := false
content := ""
for _, attr := range tk.Attr {
if attr.Key == "http-equiv" &&
attr.Val == "X-XRDS-Location" {
ok = true
} else if attr.Key == "content" {
content = attr.Val
}
}
if ok && len(content) > 0 {
return content, nil
}
}
}
}
return "", errors.New("Meta X-XRDS-Location not found")
}
示例6: ExtractText
func ExtractText(reader io.Reader, remover func(string) (string, error)) (string, error) {
z := html.NewTokenizer(reader)
var buf bytes.Buffer
bodyBlock := false
loop:
for {
tokenType := z.Next()
switch tokenType {
case html.StartTagToken:
if z.Token().DataAtom == atom.Body {
bodyBlock = true
}
case html.EndTagToken:
if z.Token().DataAtom == atom.Body {
bodyBlock = false
}
case html.TextToken:
if bodyBlock {
buf.Write(z.Text())
}
case html.ErrorToken:
if z.Err() != io.EOF {
return "", z.Err()
}
break loop
}
}
return remover(buf.String())
}
示例7: html_detect_content_type
func html_detect_content_type(head []byte) string {
reader := bytes.NewReader(head)
z := html.NewTokenizer(reader)
expect_html_root := true
FORBEGIN:
for tt := z.Next(); tt != html.ErrorToken; tt = z.Next() {
t := z.Token()
switch {
case t.Data == "meta" && (tt == html.StartTagToken || tt == html.SelfClosingTagToken):
if ct, ok := detect_charset_by_token(t.Attr); ok == true {
return ct
}
case t.Data == "head" && tt == html.EndTagToken:
break
// un-html file
case expect_html_root && (tt == html.StartTagToken || tt == html.SelfClosingTagToken):
if t.Data == "html" {
expect_html_root = false
} else {
break FORBEGIN
}
}
}
return ""
}
示例8: FindIcon
// Returns the href attribute of a <link rel="shortcut icon"> tag or error if not found.
func FindIcon(b []byte) (string, error) {
r := bytes.NewReader(b)
z := html.NewTokenizer(r)
for {
if z.Next() == html.ErrorToken {
if err := z.Err(); err == io.EOF {
break
} else {
return "", ErrNoIcon
}
}
t := z.Token()
switch t.DataAtom {
case atom.Link:
if t.Type == html.StartTagToken || t.Type == html.SelfClosingTagToken {
attrs := make(map[string]string)
for _, a := range t.Attr {
attrs[a.Key] = a.Val
}
if attrs["rel"] == "shortcut icon" && attrs["href"] != "" {
return attrs["href"], nil
}
}
}
}
return "", ErrNoIcon
}
示例9: linkParser
func linkParser(page_chan chan string) <-chan string {
link_chan := make(chan string)
go func() {
for page := range page_chan {
//page := <-page_chan
page_bytes := bytes.NewBufferString(page)
d := html.NewTokenizer(io.Reader(page_bytes))
for {
tokenType := d.Next()
if tokenType == html.ErrorToken {
fmt.Println("\nFinished to parse page")
break
}
token := d.Token()
switch tokenType {
case html.StartTagToken:
if strings.EqualFold(token.Data, "A") {
for _, a := range token.Attr {
if strings.EqualFold(a.Key, "HREF") {
link_chan <- a.Val
}
}
}
}
}
}
close(link_chan)
}()
return link_chan
}
示例10: TestPushHTML
func TestPushHTML(t *testing.T) {
xmlns := NewXmlNamespace()
for i := range xmlNsSamples {
j := 0
z := html.NewTokenizer(strings.NewReader(xhtmlNsSamples[i].sample))
for {
tt := z.Next()
if tt == html.ErrorToken {
err := z.Err()
if err == io.EOF {
err = nil
break
}
t.Fatal(err)
}
switch tt {
case html.StartTagToken, html.SelfClosingTagToken:
xmlns.PushHTML(z.Token())
checkState("push", j, xmlns, xhtmlNsSamples[i].prefix[j], xhtmlNsSamples[i].uri[j], t)
j++
case html.EndTagToken:
j--
checkState("pop", j, xmlns, xhtmlNsSamples[i].prefix[j], xhtmlNsSamples[i].uri[j], t)
xmlns.Pop()
}
}
}
}
示例11: Sanitize
func Sanitize(s string) (string, string) {
r := bytes.NewReader([]byte(s))
z := html.NewTokenizer(r)
buf := &bytes.Buffer{}
snip := &bytes.Buffer{}
scripts := 0
for {
if z.Next() == html.ErrorToken {
if err := z.Err(); err == io.EOF {
break
} else {
return s, snipper(s)
}
}
t := z.Token()
if t.DataAtom == atom.Script {
if t.Type == html.StartTagToken {
scripts++
} else if t.Type == html.EndTagToken {
scripts--
}
} else if scripts == 0 {
buf.WriteString(t.String())
if t.Type == html.TextToken {
snip.WriteString(t.String())
}
}
}
return buf.String(), snipper(snip.String())
}
示例12: Autodiscover
func Autodiscover(b []byte) (string, error) {
r := bytes.NewReader(b)
z := html.NewTokenizer(r)
inHtml := false
inHead := false
for {
if z.Next() == html.ErrorToken {
if err := z.Err(); err == io.EOF {
break
} else {
return "", ErrNoRssLink
}
}
t := z.Token()
switch t.DataAtom {
case atom.Html:
inHtml = !inHtml
case atom.Head:
inHead = !inHead
case atom.Link:
if inHead && inHtml && (t.Type == html.StartTagToken || t.Type == html.SelfClosingTagToken) {
attrs := make(map[string]string)
for _, a := range t.Attr {
attrs[a.Key] = a.Val
}
if attrs["rel"] == "alternate" && attrs["href"] != "" &&
(attrs["type"] == "application/rss+xml" || attrs["type"] == "application/atom+xml") {
return attrs["href"], nil
}
}
}
}
return "", ErrNoRssLink
}
示例13: GetAllLinks
func GetAllLinks(data io.ReadCloser) (links []string, err error) {
tokenizer := html.NewTokenizer(data)
for {
tokenizer.Next()
token := tokenizer.Token()
switch token.Type {
case html.ErrorToken:
return
case html.EndTagToken:
case html.CommentToken:
case html.TextToken:
case html.StartTagToken, html.SelfClosingTagToken:
if *debug {
log.Print("type ", token.Type)
log.Print("data ", token.Data)
}
if token.Data == "a" {
for _, a := range token.Attr {
if a.Key == "href" {
for _, ext := range strings.Split(*fileType, ",") {
if strings.HasSuffix(a.Val, ext) {
if strings.HasPrefix(a.Val, "//") {
links = append(links, "http:"+a.Val)
} else {
links = append(links, a.Val)
}
}
}
}
}
}
}
}
return
}
示例14: bookshelfToBooks
// Given the HTML of a Goodreads bookshelf, returns the books.
func bookshelfToBooks(body io.ReadCloser) (books []Book) {
z := html.NewTokenizer(body)
books = make([]Book, 100)
for i := 0; i < 1000; {
book := new(Book)
tok := z.Next()
// fmt.Println(tok)
if tok == html.ErrorToken {
// ...
return books
}
_, atr, _ := z.TagAttr()
if strings.Contains(string(atr), "/book/show") {
_, atr, _ := z.TagAttr()
book.title = string(string(atr))
// fmt.Println("Got book:", book.title)
} else if strings.Contains(string(atr), "staticStars") {
_, atr, _ := z.TagAttr()
book.rating = getRating(string(atr))
}
if book.title != "" {
books[i] = *book
i++
}
}
return books
}
示例15: Parse
func Parse(reader io.Reader) (newPost *post.Post, err error) {
newPost = &post.Post{}
currentIdx := 0
parsers := []post.PartParser{&ReceiverParser{}, &SenderParser{}, &SubjectParser{}, &PostDateParser{}, &ContentParser{}}
linkParser := &LinkParser{}
bodyBlock := false
z := html.NewTokenizer(reader)
loop:
for {
tokenType := z.Next()
switch tokenType {
case html.StartTagToken:
tk := z.Token()
if tk.DataAtom == atom.Body {
bodyBlock = true
} else if tk.DataAtom == atom.A {
for _, attr := range tk.Attr {
if attr.Key == "href" {
linkParser.Parse(newPost, []byte(attr.Val))
}
}
}
case html.EndTagToken:
if z.Token().DataAtom == atom.Body {
bodyBlock = false
}
case html.TextToken:
if bodyBlock {
flow := parsers[currentIdx].Parse(newPost, z.Text())
switch flow {
case post.Next:
if currentIdx < len(parsers) {
currentIdx += 1
}
case post.Error:
err = parsers[currentIdx].Err()
break loop
case post.Stop:
break loop
}
}
case html.ErrorToken:
if z.Err() != io.EOF {
err = z.Err()
}
break loop
}
}
if currentIdx != len(parsers)-1 {
err = errors.New("malformed Post format")
}
return
}