当前位置: 首页>>代码示例>>Golang>>正文


Golang Node.Data方法代码示例

本文整理汇总了Golang中golang.org/x/net/html.Node.Data方法的典型用法代码示例。如果您正苦于以下问题:Golang Node.Data方法的具体用法?Golang Node.Data怎么用?Golang Node.Data使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在golang.org/x/net/html.Node的用法示例。


在下文中一共展示了Node.Data方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Golang代码示例。

示例1: FindTitleAndBody_Ria

// finds article's title and body in ria.ru html style
// works cleary on 15.12.2015
func FindTitleAndBody_Ria(node *html.Node) (*html.Node, *html.Node) {
	var title, fulltext *html.Node

	if node.Type == html.ElementNode {
		for _, tag := range node.Attr {
			if tag.Key == "itemprop" {
				if tag.Val == "articleBody" {
					node.Data = "body"
					fulltext = node
					break
				}
				if tag.Val == "name" {
					node.Data = "title"
					title = node
					break
				}
			}
		}
	}

	for c := node.FirstChild; c != nil; c = c.NextSibling {
		ptitle, pfulltext := FindTitleAndBody_Ria(c)
		if ptitle != nil {
			title = ptitle
		}
		if pfulltext != nil {
			fulltext = pfulltext
		}
		if title != nil && fulltext != nil {
			break
		}
	}
	return title, fulltext

}
开发者ID:Vetcher,项目名称:pagedownloader,代码行数:37,代码来源:cleaner.go

示例2: mergeNodes

func (u *parserUtils) mergeNodes(parent, prev, next *html.Node, addSeparator bool) *html.Node {
	prevText := prev != nil && prev.Type == html.TextNode
	nextText := next != nil && next.Type == html.TextNode
	delim := ""
	if addSeparator {
		delim = " "
	}

	if prevText && nextText {
		prev.Data = prev.Data + delim + next.Data
		parent.RemoveChild(next)
		return prev.NextSibling
	}

	if prevText {
		prev.Data = prev.Data + delim
	} else if nextText {
		next.Data = delim + next.Data
	} else if addSeparator {
		newNode := &html.Node{
			Type: html.TextNode,
			Data: delim}
		parent.InsertBefore(newNode, next)
	}

	return next
}
开发者ID:ReanGD,项目名称:go-web-search,代码行数:27,代码来源:parser_utils.go

示例3: FindTitleAndBody_MK

func FindTitleAndBody_MK(node *html.Node) (*html.Node, *html.Node) {
	var title, fulltext *html.Node

	if node.Type == html.ElementNode {
		for _, tag := range node.Attr {
			if tag.Key == "class" {
				if tag.Val == "content" {
					title = FindTitleMK(node)
					node.Data = "body"
					fulltext = node
					break
				}
			}
		}
	}

	for c := node.FirstChild; c != nil; c = c.NextSibling {
		ptitle, pfulltext := FindTitleAndBody_MK(c)
		if ptitle != nil {
			title = ptitle
			title.Data = "title"
		}
		if pfulltext != nil {
			fulltext = pfulltext
		}
		if title != nil && fulltext != nil {
			break
		}
	}
	return title, fulltext

}
开发者ID:Vetcher,项目名称:pagedownloader,代码行数:32,代码来源:cleaner.go

示例4: reIndent

func reIndent(n *html.Node, lvl int) {

	if lvl > cScaffoldLvls && n.Parent == nil {
		bb := dom.PrintSubtree(n)
		_ = bb
		// log.Printf("%s", bb.Bytes())
		hint := ""
		if ml3[n] > 0 {
			hint = "   from ml3"
		}
		log.Print("reIndent: no parent ", hint)
		return
	}

	// Before children processing
	switch n.Type {
	case html.ElementNode:
		if lvl > cScaffoldLvls && n.Parent.Type == html.ElementNode {
			ind := strings.Repeat("\t", lvl-2)
			dom.InsertBefore(n, &html.Node{Type: html.TextNode, Data: "\n" + ind})
		}
	case html.CommentNode:
		dom.InsertBefore(n, &html.Node{Type: html.TextNode, Data: "\n"})
	case html.TextNode:
		n.Data = strings.TrimSpace(n.Data) + " "
		if !strings.HasPrefix(n.Data, ",") && !strings.HasPrefix(n.Data, ".") {
			n.Data = " " + n.Data
		}
		// link texts without trailing space
		if n.Parent != nil && n.Parent.Data == "a" {
			n.Data = strings.TrimSpace(n.Data)
		}
	}

	// Children
	for c := n.FirstChild; c != nil; c = c.NextSibling {
		reIndent(c, lvl+1)
	}

	// After children processing
	switch n.Type {
	case html.ElementNode:
		// I dont know why,
		// but this needs to happend AFTER the children
		if lvl > cScaffoldLvls && n.Parent.Type == html.ElementNode {
			ind := strings.Repeat("\t", lvl-2)
			ind = "\n" + ind
			// link texts without new line
			if n.Data == "a" {
				ind = ""
			}
			if n.LastChild != nil {
				dom.InsertAfter(n.LastChild, &html.Node{Type: html.TextNode, Data: ind})
			}
		}
	}

}
开发者ID:aarzilli,项目名称:tools,代码行数:58,代码来源:09_reformat_indent.go

示例5: toDiv

func (m *minificationHTML) toDiv(node *html.Node) (*html.Node, error) {
	node.DataAtom = atom.Div
	node.Data = "div"
	node.Attr = nil

	return m.parseChildren(node)
}
开发者ID:ReanGD,项目名称:go-web-search,代码行数:7,代码来源:minification_html.go

示例6: getSiblingsContent

func (this *contentExtractor) getSiblingsContent(currentSibling *goquery.Selection, baselinescoreSiblingsPara float64) []*goquery.Selection {
	ps := make([]*goquery.Selection, 0)
	if currentSibling.Get(0).DataAtom.String() == "p" && len(currentSibling.Text()) > 0 {
		ps = append(ps, currentSibling)
		return ps
	} else {
		potentialParagraphs := currentSibling.Find("p")
		potentialParagraphs.Each(func(i int, s *goquery.Selection) {
			text := s.Text()
			if len(text) > 0 {
				ws := this.config.stopWords.stopWordsCount(this.config.targetLanguage, text)
				paragraphScore := ws.stopWordCount
				siblingBaselineScore := 0.30
				highLinkDensity := this.isHighLinkDensity(s)
				score := siblingBaselineScore * baselinescoreSiblingsPara
				if score < float64(paragraphScore) && !highLinkDensity {
					node := new(html.Node)
					node.Type = html.TextNode
					node.Data = text
					node.DataAtom = atom.P
					nodes := make([]*html.Node, 1)
					nodes[0] = node
					newSelection := new(goquery.Selection)
					newSelection.Nodes = nodes
					ps = append(ps, newSelection)
				}
			}

		})
	}
	return ps
}
开发者ID:hotei,项目名称:GoOse,代码行数:32,代码来源:extractor.go

示例7: cleanseDom

// cleansDom performs brute reduction and simplification
//
func cleanseDom(n *html.Node, lvl int) {

	n.Attr = removeAttr(n.Attr, unwantedAttrs)

	// Children
	for c := n.FirstChild; c != nil; c = c.NextSibling {
		cleanseDom(c, lvl+1)
	}

	if directlyRemoveUnwanted {
		removeUnwanted(n)
	} else {
		convertUnwanted(n)
	}

	// ---

	convertExotic(n)

	// one time text normalization
	if n.Type == html.TextNode {
		n.Data = stringspb.NormalizeInnerWhitespace(n.Data)
	}

}
开发者ID:aarzilli,项目名称:tools,代码行数:27,代码来源:01_cleanse.go

示例8: copyNode

func copyNode(to, from *html.Node) {
	to.Attr = from.Attr
	to.Data = from.Data
	to.DataAtom = from.DataAtom
	to.Namespace = from.Namespace
	to.Type = from.Type
}
开发者ID:documize,项目名称:html-diff,代码行数:7,代码来源:nodes.go

示例9: img2Link

func img2Link(img *html.Node) {

	if img.Data == "img" {

		img.Data = "a"
		for i := 0; i < len(img.Attr); i++ {
			if img.Attr[i].Key == "src" {
				img.Attr[i].Key = "href"
			}
		}

		double := closureTextNodeExists(img)
		imgContent := ""
		title := attrX(img.Attr, "title")

		if double {
			imgContent = fmt.Sprintf("[img] %v %v | ",
				"[ctdr]", // content title double removed
				urlBeautify(attrX(img.Attr, "href")))

		} else {
			imgContent = fmt.Sprintf("[img] %v %v | ",
				title,
				urlBeautify(attrX(img.Attr, "href")))
		}

		img.Attr = attrSet(img.Attr, "cfrom", "img")
		nd := dom.Nd("text", imgContent)
		img.AppendChild(nd)
	}

}
开发者ID:aarzilli,项目名称:tools,代码行数:32,代码来源:06_img2link.go

示例10: parseText

func (m *minificationText) parseText(node *html.Node) (*html.Node, error) {
	next := node.NextSibling
	text := m.processText(node.Data)
	if len(text) != 0 {
		node.Data = text
	} else {
		node.Parent.RemoveChild(node)
	}
	return next, nil
}
开发者ID:ReanGD,项目名称:go-web-search,代码行数:10,代码来源:minification_text.go

示例11: Nd

func Nd(ntype string, content ...string) *html.Node {

	nd0 := new(html.Node)

	if ntype == "text" {
		nd0.Type = html.TextNode
		if len(content) > 0 {
			nd0.Data = content[0]
		}
	} else {
		nd0.Type = html.ElementNode
		nd0.Data = ntype
		if len(content) > 0 {
			runtimepb.StackTrace(4)
			log.Printf("Element nodes can't have content")
		}
	}

	return nd0

}
开发者ID:aarzilli,项目名称:tools,代码行数:21,代码来源:mini_go_query.go

示例12: walkPrint

func walkPrint(w io.Writer, i int, n *html.Node) {
	for ; n != nil; n = n.NextSibling {
		if n.Type == html.TextNode && strings.TrimSpace(n.Data) == "" {
			continue
		}

		d := getData(n)
		isMostChild := getData(n.Parent).Child == n
		if isMostChild {
			w.Write([]byte(`<div style="background: rgba(0, 0, 100, 0.1)">`))
		}
		if d.Chosen || d.ChosenBy {
			color := "rgb(40, 79, 40)"
			if d.ChosenBy {
				color = "rgba(90, 60, 30, 0.8)"
			}
			w.Write([]byte(`<div id="chosen" style="background: ` + color + `;color: #fff">`))
		}
		factor := 0
		if d.Count > 0 {
			factor = d.MaxChild * 100 / d.Count
		}

		if len([]rune(n.Data)) > 40 {
			n.Data = string([]rune(n.Data)[:40])
		}
		if n.Type == html.ElementNode {
			fmt.Fprintf(w, "%v&lt;%v&gt;", strings.Repeat("\t", i), n.Data)
			fmt.Fprintf(w, " (%v/%v = <b>%v%%</b>) - %v\n",
				d.MaxChild,
				d.Count,
				factor,

				n.Attr,
			)
		} else {
			fmt.Fprintf(w, "%v%v\n", strings.Repeat("\t", i), strconv.Quote(ghtml.EscapeString(n.Data)))
		}

		if n.FirstChild != nil {
			walkPrint(w, i+1, n.FirstChild)
		}
		if isMostChild {
			w.Write([]byte(`</div>`))
		}

		if d.Chosen || d.ChosenBy {
			w.Write([]byte("</div>"))
		}

	}
}
开发者ID:chzyer,项目名称:pocket,代码行数:52,代码来源:utils.go

示例13: CloneNode

// CloneNode makes a copy of a Node with all descendants.
func CloneNode(n *exphtml.Node) *exphtml.Node {
	clone := new(exphtml.Node)
	clone.Type = n.Type
	clone.DataAtom = n.DataAtom
	clone.Data = n.Data
	clone.Attr = make([]exphtml.Attribute, len(n.Attr))
	copy(clone.Attr, n.Attr)
	for c := n.FirstChild; c != nil; c = c.NextSibling {
		nc := CloneNode(c)
		clone.AppendChild(nc)
	}
	return clone
}
开发者ID:kristofer,项目名称:go-html-transform,代码行数:14,代码来源:node.go

示例14: TestParseScriptTagNoSrc

func TestParseScriptTagNoSrc(t *testing.T) {
	node := new(html.Node)
	node.Data = "script"

	page := newWebPage(startUrl)
	page.parseScriptTag(node)

	expected1 := 0
	val1 := page.scriptFiles.Len()
	if val1 != expected1 {
		t.Error("Expected:", expected1, " Got:", val1)
	}
}
开发者ID:zlisinski,项目名称:go_crawl,代码行数:13,代码来源:go_crawl_test.go

示例15: TestParseATagNoHref

func TestParseATagNoHref(t *testing.T) {
	node := new(html.Node)
	node.Data = "a"

	page := newWebPage(startUrl)
	page.parseATag(node)

	expected1 := 0
	val1 := page.links.Len()
	if val1 != expected1 {
		t.Error("Expected:", expected1, " Got:", val1)
	}
}
开发者ID:zlisinski,项目名称:go_crawl,代码行数:13,代码来源:go_crawl_test.go


注:本文中的golang.org/x/net/html.Node.Data方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。