当前位置: 首页>>代码示例>>Golang>>正文


Golang html.Node类代码示例

本文整理汇总了Golang中golang.org/x/net/html.Node的典型用法代码示例。如果您正苦于以下问题:Golang Node类的具体用法?Golang Node怎么用?Golang Node使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了Node类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Golang代码示例。

示例1: removeNegativeAttributeMatches

func removeNegativeAttributeMatches(n *html.Node) *html.Node {
	for c := n.FirstChild; c != nil; c = c.NextSibling {
		if c.Type != html.TextNode && containerregrex.MatchString(c.Data) {
			for _, attr := range c.Attr {
				key := strings.ToLower(attr.Key)
				if key == "id" || key == "class" {
					val := strings.ToLower(attr.Val)
					values := nonwordregex.Split(val, -1)
					penalty := 0
					for _, value := range values {
						if negativeregex.MatchString(value) {
							penalty = penalty + 4
						}
					}
					if penalty > 0 {
						if c.PrevSibling != nil {
							c.PrevSibling.NextSibling = c.NextSibling
						} else {
							n.FirstChild = c.NextSibling
						}
					} else {
						d := removeNegativeAttributeMatches(c)
						if c.PrevSibling != nil {
							c.PrevSibling.NextSibling = d
						} else {
							n.FirstChild = c.NextSibling
						}
					}
				}
			}
		}
	}
	return n
}
开发者ID:hygerth,项目名称:brooklet,代码行数:34,代码来源:siteparser.go

示例2: wrapText

func wrapText(nodes []*html.Node) []*html.Node {
	wrapped := make([]*html.Node, 0, len(nodes))
	var wrapper *html.Node
	appendWrapper := func() {
		if wrapper != nil {
			// render and re-parse so p-inline-p expands
			wrapped = append(wrapped, ParseDepth(Render(wrapper), 0)...)
			wrapper = nil
		}
	}
	for _, n := range nodes {
		if n.Type == html.ElementNode && isBlockElement[n.DataAtom] {
			appendWrapper()
			wrapped = append(wrapped, n)
			continue
		}
		if wrapper == nil && n.Type == html.TextNode && strings.TrimSpace(n.Data) == "" {
			wrapped = append(wrapped, n)
			continue
		}
		if wrapper == nil {
			wrapper = &html.Node{
				Type:     html.ElementNode,
				Data:     "p",
				DataAtom: atom.P,
			}
		}

		wrapper.AppendChild(n)
	}
	appendWrapper()
	return wrapped
}
开发者ID:BenLubar,项目名称:htmlcleaner,代码行数:33,代码来源:cleaner.go

示例3: getSiblingsContent

func (this *contentExtractor) getSiblingsContent(currentSibling *goquery.Selection, baselinescoreSiblingsPara float64) []*goquery.Selection {
	ps := make([]*goquery.Selection, 0)
	if currentSibling.Get(0).DataAtom.String() == "p" && len(currentSibling.Text()) > 0 {
		ps = append(ps, currentSibling)
		return ps
	} else {
		potentialParagraphs := currentSibling.Find("p")
		potentialParagraphs.Each(func(i int, s *goquery.Selection) {
			text := s.Text()
			if len(text) > 0 {
				ws := this.config.stopWords.stopWordsCount(this.config.targetLanguage, text)
				paragraphScore := ws.stopWordCount
				siblingBaselineScore := 0.30
				highLinkDensity := this.isHighLinkDensity(s)
				score := siblingBaselineScore * baselinescoreSiblingsPara
				if score < float64(paragraphScore) && !highLinkDensity {
					node := new(html.Node)
					node.Type = html.TextNode
					node.Data = text
					node.DataAtom = atom.P
					nodes := make([]*html.Node, 1)
					nodes[0] = node
					newSelection := new(goquery.Selection)
					newSelection.Nodes = nodes
					ps = append(ps, newSelection)
				}
			}

		})
	}
	return ps
}
开发者ID:hotei,项目名称:GoOse,代码行数:32,代码来源:extractor.go

示例4: cleanseDom

// cleansDom performs brute reduction and simplification
//
func cleanseDom(n *html.Node, lvl int) {

	n.Attr = removeAttr(n.Attr, unwantedAttrs)

	// Children
	for c := n.FirstChild; c != nil; c = c.NextSibling {
		cleanseDom(c, lvl+1)
	}

	if directlyRemoveUnwanted {
		removeUnwanted(n)
	} else {
		convertUnwanted(n)
	}

	// ---

	convertExotic(n)

	// one time text normalization
	if n.Type == html.TextNode {
		n.Data = stringspb.NormalizeInnerWhitespace(n.Data)
	}

}
开发者ID:aarzilli,项目名称:tools,代码行数:27,代码来源:01_cleanse.go

示例5: FindTitleAndBody_Ria

// finds article's title and body in ria.ru html style
// works cleary on 15.12.2015
func FindTitleAndBody_Ria(node *html.Node) (*html.Node, *html.Node) {
	var title, fulltext *html.Node

	if node.Type == html.ElementNode {
		for _, tag := range node.Attr {
			if tag.Key == "itemprop" {
				if tag.Val == "articleBody" {
					node.Data = "body"
					fulltext = node
					break
				}
				if tag.Val == "name" {
					node.Data = "title"
					title = node
					break
				}
			}
		}
	}

	for c := node.FirstChild; c != nil; c = c.NextSibling {
		ptitle, pfulltext := FindTitleAndBody_Ria(c)
		if ptitle != nil {
			title = ptitle
		}
		if pfulltext != nil {
			fulltext = pfulltext
		}
		if title != nil && fulltext != nil {
			break
		}
	}
	return title, fulltext

}
开发者ID:Vetcher,项目名称:pagedownloader,代码行数:37,代码来源:cleaner.go

示例6: FindTitleAndBody_MK

func FindTitleAndBody_MK(node *html.Node) (*html.Node, *html.Node) {
	var title, fulltext *html.Node

	if node.Type == html.ElementNode {
		for _, tag := range node.Attr {
			if tag.Key == "class" {
				if tag.Val == "content" {
					title = FindTitleMK(node)
					node.Data = "body"
					fulltext = node
					break
				}
			}
		}
	}

	for c := node.FirstChild; c != nil; c = c.NextSibling {
		ptitle, pfulltext := FindTitleAndBody_MK(c)
		if ptitle != nil {
			title = ptitle
			title.Data = "title"
		}
		if pfulltext != nil {
			fulltext = pfulltext
		}
		if title != nil && fulltext != nil {
			break
		}
	}
	return title, fulltext

}
开发者ID:Vetcher,项目名称:pagedownloader,代码行数:32,代码来源:cleaner.go

示例7: copyNode

func copyNode(to, from *html.Node) {
	to.Attr = from.Attr
	to.Data = from.Data
	to.DataAtom = from.DataAtom
	to.Namespace = from.Namespace
	to.Type = from.Type
}
开发者ID:documize,项目名称:html-diff,代码行数:7,代码来源:nodes.go

示例8: img2Link

func img2Link(img *html.Node) {

	if img.Data == "img" {

		img.Data = "a"
		for i := 0; i < len(img.Attr); i++ {
			if img.Attr[i].Key == "src" {
				img.Attr[i].Key = "href"
			}
		}

		double := closureTextNodeExists(img)
		imgContent := ""
		title := attrX(img.Attr, "title")

		if double {
			imgContent = fmt.Sprintf("[img] %v %v | ",
				"[ctdr]", // content title double removed
				urlBeautify(attrX(img.Attr, "href")))

		} else {
			imgContent = fmt.Sprintf("[img] %v %v | ",
				title,
				urlBeautify(attrX(img.Attr, "href")))
		}

		img.Attr = attrSet(img.Attr, "cfrom", "img")
		nd := dom.Nd("text", imgContent)
		img.AppendChild(nd)
	}

}
开发者ID:aarzilli,项目名称:tools,代码行数:32,代码来源:06_img2link.go

示例9: CompactNode

func CompactNode(n *html.Node) {
	var appendNodes []*html.Node
	for c := n.FirstChild; c != nil; {
		CompactNode(c)
		if _mergeTextElements[c.Data] {
			appendNodes = append(appendNodes, GetChildNodes(c)...)
			log.Info("delete", c.Data)
			c = RemoveNode(c)
		} else if c.Type == html.ElementNode && c.FirstChild == nil && !_voidElements[c.Data] {
			log.Info("delete", c.Data)
			c = RemoveNode(c)
		} else {
			c = c.NextSibling
		}
	}

	DetachNodes(appendNodes)
	AppendChildNodes(n, appendNodes)
	if n.FirstChild != nil && n.FirstChild.NextSibling == nil {
		if n.FirstChild.Data == n.Data || (n.FirstChild.Data == "br" && (n.Data == "p" || n.Data == "div")) {
			childNodes := GetChildNodes(n.FirstChild)
			log.Info("delete", n.FirstChild.Data)
			n.RemoveChild(n.FirstChild)
			DetachNodes(childNodes)
			AppendChildNodes(n, childNodes)
		} else if n.FirstChild.Data == "img" && n.Data == "a" {
			*n = *n.FirstChild
		}
	}
}
开发者ID:justintan,项目名称:gox,代码行数:30,代码来源:node.go

示例10: toDiv

func (m *minificationHTML) toDiv(node *html.Node) (*html.Node, error) {
	node.DataAtom = atom.Div
	node.Data = "div"
	node.Attr = nil

	return m.parseChildren(node)
}
开发者ID:ReanGD,项目名称:go-web-search,代码行数:7,代码来源:minification_html.go

示例11: convert

// convert nodes to /x/net/html.Node siblings.
// Document node children are integrated as siblings.
// Nils are skipped.
func (s Siblings) convert(parent *html.Node) (first, last *html.Node) {
	var prev *html.Node
	for _, n := range s {
		if n == nil {
			continue
		}
		if n.Type == html.DocumentNode {
			start, end := n.Children.convert(parent)
			if prev != nil {
				prev.NextSibling = start
			} else {
				first = start
			}
			prev = end
			continue
		}
		h := n.convert()
		h.Parent = parent
		h.PrevSibling = prev
		if prev != nil {
			prev.NextSibling = h
		} else {
			first = h
		}
		prev = h
	}
	return first, prev
}
开发者ID:arnehormann,项目名称:hck,代码行数:31,代码来源:nodes.go

示例12: reIndent

func reIndent(n *html.Node, lvl int) {

	if lvl > cScaffoldLvls && n.Parent == nil {
		bb := dom.PrintSubtree(n)
		_ = bb
		// log.Printf("%s", bb.Bytes())
		hint := ""
		if ml3[n] > 0 {
			hint = "   from ml3"
		}
		log.Print("reIndent: no parent ", hint)
		return
	}

	// Before children processing
	switch n.Type {
	case html.ElementNode:
		if lvl > cScaffoldLvls && n.Parent.Type == html.ElementNode {
			ind := strings.Repeat("\t", lvl-2)
			dom.InsertBefore(n, &html.Node{Type: html.TextNode, Data: "\n" + ind})
		}
	case html.CommentNode:
		dom.InsertBefore(n, &html.Node{Type: html.TextNode, Data: "\n"})
	case html.TextNode:
		n.Data = strings.TrimSpace(n.Data) + " "
		if !strings.HasPrefix(n.Data, ",") && !strings.HasPrefix(n.Data, ".") {
			n.Data = " " + n.Data
		}
		// link texts without trailing space
		if n.Parent != nil && n.Parent.Data == "a" {
			n.Data = strings.TrimSpace(n.Data)
		}
	}

	// Children
	for c := n.FirstChild; c != nil; c = c.NextSibling {
		reIndent(c, lvl+1)
	}

	// After children processing
	switch n.Type {
	case html.ElementNode:
		// I dont know why,
		// but this needs to happend AFTER the children
		if lvl > cScaffoldLvls && n.Parent.Type == html.ElementNode {
			ind := strings.Repeat("\t", lvl-2)
			ind = "\n" + ind
			// link texts without new line
			if n.Data == "a" {
				ind = ""
			}
			if n.LastChild != nil {
				dom.InsertAfter(n.LastChild, &html.Node{Type: html.TextNode, Data: ind})
			}
		}
	}

}
开发者ID:aarzilli,项目名称:tools,代码行数:58,代码来源:09_reformat_indent.go

示例13: topDownV1

/*
   div                     div
       div                     p
           p         TO        img
           img                 p
           p


	Operates from the *middle* div.
	Saves all children in inverted slice.
	Removes each child and reattaches it one level higher.
	Finally the intermediary, now childless div is removed.




   \                  /
    \       /\       /
     \_____/  \_____/

     \              /
      \_____/\_____/

       \__________/     => Breaks are gone


       \p1___p2___/     => Wrapping preserves breaks




*/
func topDownV1(n *html.Node, couple []string, parentType string) {

	if noParent(n) {
		return
	}
	p := n.Parent

	parDiv := p.Type == html.ElementNode && p.Data == couple[0] // Parent is a div
	iAmDiv := n.Type == html.ElementNode && n.Data == couple[1] // I am a div

	noSiblings := n.PrevSibling == nil && n.NextSibling == nil

	only1Child := n.FirstChild != nil && n.FirstChild == n.LastChild
	svrlChildn := n.FirstChild != nil && n.FirstChild != n.LastChild
	noChildren := n.FirstChild == nil

	_, _ = noSiblings, noChildren

	if parDiv && iAmDiv {

		if only1Child || svrlChildn {

			var children []*html.Node
			for c := n.FirstChild; c != nil; c = c.NextSibling {
				children = append([]*html.Node{c}, children...) // order inversion
			}

			insertionPoint := n.NextSibling
			for _, c1 := range children {

				n.RemoveChild(c1)

				if c1.Type == html.TextNode || c1.Data == "a" {
					// pf("wrapping %v\n", NodeTypeStr(c1.Type))
					wrap := html.Node{Type: html.ElementNode, Data: "p",
						Attr: []html.Attribute{html.Attribute{Key: "cfrm", Val: "div"}}}
					wrap.FirstChild = c1
					p.InsertBefore(&wrap, insertionPoint)
					c1.Parent = &wrap
					insertionPoint = &wrap

				} else {
					p.InsertBefore(c1, insertionPoint)
					insertionPoint = c1
				}

			}
			p.RemoveChild(n)
			if p.Data != parentType {
				p.Data = parentType
			}

		}

	}

}
开发者ID:aarzilli,项目名称:tools,代码行数:89,代码来源:03_top_down_v1.go

示例14: runMergeNodes

func runMergeNodes(parent, prev, next *html.Node, addSeparator bool) *html.Node {
	var u parserUtils
	if prev != nil {
		parent.AppendChild(prev)
	}
	if next != nil {
		parent.AppendChild(next)
	}
	return u.mergeNodes(parent, prev, next, addSeparator)
}
开发者ID:ReanGD,项目名称:go-web-search,代码行数:10,代码来源:parser_utils_test.go

示例15: setNodeText

// Replace the given node's children with the given string.
func setNodeText(node *html.Node, s string) {
	// remove all existing children
	for node.FirstChild != nil {
		node.RemoveChild(node.FirstChild)
	}
	// add the text
	node.AppendChild(&html.Node{
		Type: html.TextNode,
		Data: s,
	})
}
开发者ID:albertjin,项目名称:goquery,代码行数:12,代码来源:mutate.go


注:本文中的golang.org/x/net/html.Node类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。