本文整理汇总了Golang中golang.org/x/net/html.Node.Data方法的典型用法代码示例。如果您正苦于以下问题:Golang Node.Data方法的具体用法?Golang Node.Data怎么用?Golang Node.Data使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类golang.org/x/net/html.Node
的用法示例。
在下文中一共展示了Node.Data方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Golang代码示例。
示例1: FindTitleAndBody_Ria
// finds article's title and body in ria.ru html style
// works cleary on 15.12.2015
func FindTitleAndBody_Ria(node *html.Node) (*html.Node, *html.Node) {
var title, fulltext *html.Node
if node.Type == html.ElementNode {
for _, tag := range node.Attr {
if tag.Key == "itemprop" {
if tag.Val == "articleBody" {
node.Data = "body"
fulltext = node
break
}
if tag.Val == "name" {
node.Data = "title"
title = node
break
}
}
}
}
for c := node.FirstChild; c != nil; c = c.NextSibling {
ptitle, pfulltext := FindTitleAndBody_Ria(c)
if ptitle != nil {
title = ptitle
}
if pfulltext != nil {
fulltext = pfulltext
}
if title != nil && fulltext != nil {
break
}
}
return title, fulltext
}
示例2: mergeNodes
func (u *parserUtils) mergeNodes(parent, prev, next *html.Node, addSeparator bool) *html.Node {
prevText := prev != nil && prev.Type == html.TextNode
nextText := next != nil && next.Type == html.TextNode
delim := ""
if addSeparator {
delim = " "
}
if prevText && nextText {
prev.Data = prev.Data + delim + next.Data
parent.RemoveChild(next)
return prev.NextSibling
}
if prevText {
prev.Data = prev.Data + delim
} else if nextText {
next.Data = delim + next.Data
} else if addSeparator {
newNode := &html.Node{
Type: html.TextNode,
Data: delim}
parent.InsertBefore(newNode, next)
}
return next
}
示例3: FindTitleAndBody_MK
func FindTitleAndBody_MK(node *html.Node) (*html.Node, *html.Node) {
var title, fulltext *html.Node
if node.Type == html.ElementNode {
for _, tag := range node.Attr {
if tag.Key == "class" {
if tag.Val == "content" {
title = FindTitleMK(node)
node.Data = "body"
fulltext = node
break
}
}
}
}
for c := node.FirstChild; c != nil; c = c.NextSibling {
ptitle, pfulltext := FindTitleAndBody_MK(c)
if ptitle != nil {
title = ptitle
title.Data = "title"
}
if pfulltext != nil {
fulltext = pfulltext
}
if title != nil && fulltext != nil {
break
}
}
return title, fulltext
}
示例4: reIndent
func reIndent(n *html.Node, lvl int) {
if lvl > cScaffoldLvls && n.Parent == nil {
bb := dom.PrintSubtree(n)
_ = bb
// log.Printf("%s", bb.Bytes())
hint := ""
if ml3[n] > 0 {
hint = " from ml3"
}
log.Print("reIndent: no parent ", hint)
return
}
// Before children processing
switch n.Type {
case html.ElementNode:
if lvl > cScaffoldLvls && n.Parent.Type == html.ElementNode {
ind := strings.Repeat("\t", lvl-2)
dom.InsertBefore(n, &html.Node{Type: html.TextNode, Data: "\n" + ind})
}
case html.CommentNode:
dom.InsertBefore(n, &html.Node{Type: html.TextNode, Data: "\n"})
case html.TextNode:
n.Data = strings.TrimSpace(n.Data) + " "
if !strings.HasPrefix(n.Data, ",") && !strings.HasPrefix(n.Data, ".") {
n.Data = " " + n.Data
}
// link texts without trailing space
if n.Parent != nil && n.Parent.Data == "a" {
n.Data = strings.TrimSpace(n.Data)
}
}
// Children
for c := n.FirstChild; c != nil; c = c.NextSibling {
reIndent(c, lvl+1)
}
// After children processing
switch n.Type {
case html.ElementNode:
// I dont know why,
// but this needs to happend AFTER the children
if lvl > cScaffoldLvls && n.Parent.Type == html.ElementNode {
ind := strings.Repeat("\t", lvl-2)
ind = "\n" + ind
// link texts without new line
if n.Data == "a" {
ind = ""
}
if n.LastChild != nil {
dom.InsertAfter(n.LastChild, &html.Node{Type: html.TextNode, Data: ind})
}
}
}
}
示例5: toDiv
func (m *minificationHTML) toDiv(node *html.Node) (*html.Node, error) {
node.DataAtom = atom.Div
node.Data = "div"
node.Attr = nil
return m.parseChildren(node)
}
示例6: getSiblingsContent
func (this *contentExtractor) getSiblingsContent(currentSibling *goquery.Selection, baselinescoreSiblingsPara float64) []*goquery.Selection {
ps := make([]*goquery.Selection, 0)
if currentSibling.Get(0).DataAtom.String() == "p" && len(currentSibling.Text()) > 0 {
ps = append(ps, currentSibling)
return ps
} else {
potentialParagraphs := currentSibling.Find("p")
potentialParagraphs.Each(func(i int, s *goquery.Selection) {
text := s.Text()
if len(text) > 0 {
ws := this.config.stopWords.stopWordsCount(this.config.targetLanguage, text)
paragraphScore := ws.stopWordCount
siblingBaselineScore := 0.30
highLinkDensity := this.isHighLinkDensity(s)
score := siblingBaselineScore * baselinescoreSiblingsPara
if score < float64(paragraphScore) && !highLinkDensity {
node := new(html.Node)
node.Type = html.TextNode
node.Data = text
node.DataAtom = atom.P
nodes := make([]*html.Node, 1)
nodes[0] = node
newSelection := new(goquery.Selection)
newSelection.Nodes = nodes
ps = append(ps, newSelection)
}
}
})
}
return ps
}
示例7: cleanseDom
// cleansDom performs brute reduction and simplification
//
func cleanseDom(n *html.Node, lvl int) {
n.Attr = removeAttr(n.Attr, unwantedAttrs)
// Children
for c := n.FirstChild; c != nil; c = c.NextSibling {
cleanseDom(c, lvl+1)
}
if directlyRemoveUnwanted {
removeUnwanted(n)
} else {
convertUnwanted(n)
}
// ---
convertExotic(n)
// one time text normalization
if n.Type == html.TextNode {
n.Data = stringspb.NormalizeInnerWhitespace(n.Data)
}
}
示例8: copyNode
func copyNode(to, from *html.Node) {
to.Attr = from.Attr
to.Data = from.Data
to.DataAtom = from.DataAtom
to.Namespace = from.Namespace
to.Type = from.Type
}
示例9: img2Link
func img2Link(img *html.Node) {
if img.Data == "img" {
img.Data = "a"
for i := 0; i < len(img.Attr); i++ {
if img.Attr[i].Key == "src" {
img.Attr[i].Key = "href"
}
}
double := closureTextNodeExists(img)
imgContent := ""
title := attrX(img.Attr, "title")
if double {
imgContent = fmt.Sprintf("[img] %v %v | ",
"[ctdr]", // content title double removed
urlBeautify(attrX(img.Attr, "href")))
} else {
imgContent = fmt.Sprintf("[img] %v %v | ",
title,
urlBeautify(attrX(img.Attr, "href")))
}
img.Attr = attrSet(img.Attr, "cfrom", "img")
nd := dom.Nd("text", imgContent)
img.AppendChild(nd)
}
}
示例10: parseText
func (m *minificationText) parseText(node *html.Node) (*html.Node, error) {
next := node.NextSibling
text := m.processText(node.Data)
if len(text) != 0 {
node.Data = text
} else {
node.Parent.RemoveChild(node)
}
return next, nil
}
示例11: Nd
func Nd(ntype string, content ...string) *html.Node {
nd0 := new(html.Node)
if ntype == "text" {
nd0.Type = html.TextNode
if len(content) > 0 {
nd0.Data = content[0]
}
} else {
nd0.Type = html.ElementNode
nd0.Data = ntype
if len(content) > 0 {
runtimepb.StackTrace(4)
log.Printf("Element nodes can't have content")
}
}
return nd0
}
示例12: walkPrint
func walkPrint(w io.Writer, i int, n *html.Node) {
for ; n != nil; n = n.NextSibling {
if n.Type == html.TextNode && strings.TrimSpace(n.Data) == "" {
continue
}
d := getData(n)
isMostChild := getData(n.Parent).Child == n
if isMostChild {
w.Write([]byte(`<div style="background: rgba(0, 0, 100, 0.1)">`))
}
if d.Chosen || d.ChosenBy {
color := "rgb(40, 79, 40)"
if d.ChosenBy {
color = "rgba(90, 60, 30, 0.8)"
}
w.Write([]byte(`<div id="chosen" style="background: ` + color + `;color: #fff">`))
}
factor := 0
if d.Count > 0 {
factor = d.MaxChild * 100 / d.Count
}
if len([]rune(n.Data)) > 40 {
n.Data = string([]rune(n.Data)[:40])
}
if n.Type == html.ElementNode {
fmt.Fprintf(w, "%v<%v>", strings.Repeat("\t", i), n.Data)
fmt.Fprintf(w, " (%v/%v = <b>%v%%</b>) - %v\n",
d.MaxChild,
d.Count,
factor,
n.Attr,
)
} else {
fmt.Fprintf(w, "%v%v\n", strings.Repeat("\t", i), strconv.Quote(ghtml.EscapeString(n.Data)))
}
if n.FirstChild != nil {
walkPrint(w, i+1, n.FirstChild)
}
if isMostChild {
w.Write([]byte(`</div>`))
}
if d.Chosen || d.ChosenBy {
w.Write([]byte("</div>"))
}
}
}
示例13: CloneNode
// CloneNode makes a copy of a Node with all descendants.
func CloneNode(n *exphtml.Node) *exphtml.Node {
clone := new(exphtml.Node)
clone.Type = n.Type
clone.DataAtom = n.DataAtom
clone.Data = n.Data
clone.Attr = make([]exphtml.Attribute, len(n.Attr))
copy(clone.Attr, n.Attr)
for c := n.FirstChild; c != nil; c = c.NextSibling {
nc := CloneNode(c)
clone.AppendChild(nc)
}
return clone
}
示例14: TestParseScriptTagNoSrc
func TestParseScriptTagNoSrc(t *testing.T) {
node := new(html.Node)
node.Data = "script"
page := newWebPage(startUrl)
page.parseScriptTag(node)
expected1 := 0
val1 := page.scriptFiles.Len()
if val1 != expected1 {
t.Error("Expected:", expected1, " Got:", val1)
}
}
示例15: TestParseATagNoHref
func TestParseATagNoHref(t *testing.T) {
node := new(html.Node)
node.Data = "a"
page := newWebPage(startUrl)
page.parseATag(node)
expected1 := 0
val1 := page.links.Len()
if val1 != expected1 {
t.Error("Expected:", expected1, " Got:", val1)
}
}