本文整理汇总了Golang中code/google/com/p/go/net/html.Node.DataAtom方法的典型用法代码示例。如果您正苦于以下问题:Golang Node.DataAtom方法的具体用法?Golang Node.DataAtom怎么用?Golang Node.DataAtom使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类code/google/com/p/go/net/html.Node
的用法示例。
在下文中一共展示了Node.DataAtom方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Golang代码示例。
示例1: getSiblingsContent
func (this *contentExtractor) getSiblingsContent(currentSibling *goquery.Selection, baselinescoreSiblingsPara float64) []*goquery.Selection {
ps := make([]*goquery.Selection, 0)
if currentSibling.Get(0).DataAtom.String() == "p" && len(currentSibling.Text()) > 0 {
ps = append(ps, currentSibling)
return ps
} else {
potentialParagraphs := currentSibling.Find("p")
potentialParagraphs.Each(func(i int, s *goquery.Selection) {
text := s.Text()
if len(text) > 0 {
ws := this.config.stopWords.stopWordsCount(this.config.targetLanguage, text)
paragraphScore := ws.stopWordCount
siblingBaselineScore := 0.30
highLinkDensity := this.isHighLinkDensity(s)
score := siblingBaselineScore * baselinescoreSiblingsPara
if score < float64(paragraphScore) && !highLinkDensity {
node := new(html.Node)
node.Type = html.TextNode
node.Data = text
node.DataAtom = atom.P
nodes := make([]*html.Node, 1)
nodes[0] = node
newSelection := new(goquery.Selection)
newSelection.Nodes = nodes
ps = append(ps, newSelection)
}
}
})
}
return ps
}
示例2: CloneNode
// CloneNode makes a copy of a Node with all descendants.
func CloneNode(n *exphtml.Node) *exphtml.Node {
clone := new(exphtml.Node)
clone.Type = n.Type
clone.DataAtom = n.DataAtom
clone.Data = n.Data
clone.Attr = make([]exphtml.Attribute, len(n.Attr))
copy(clone.Attr, n.Attr)
for c := n.FirstChild; c != nil; c = c.NextSibling {
nc := CloneNode(c)
clone.AppendChild(nc)
}
return clone
}
示例3: convertDivsToParagraphs
func (this *cleaner) convertDivsToParagraphs(doc *goquery.Document, domType string) *goquery.Document {
if this.config.debug {
log.Println("Starting to replace bad divs...")
}
badDivs := 0
convertedTextNodes := 0
divs := doc.Find(domType)
tags := []string{"a", "blockquote", "dl", "div", "img", "ol", "p", "pre", "table", "ul"}
divs.Each(func(i int, div *goquery.Selection) {
if this.config.parser.getElementsByTags(div, tags).Size() == 0 {
this.replaceWithPara(div)
badDivs++
} else {
replacementText := make([]string, 0)
nodesToRemove := list.New()
children := div.Contents()
if this.config.debug {
log.Printf("Found %d children of div\n", children.Size())
}
children.EachWithBreak(func(i int, kid *goquery.Selection) bool {
text := kid.Text()
kidNode := kid.Get(0)
tag := kidNode.Data
if tag == text {
tag = "#text"
}
if tag == "#text" {
text = strings.Replace(text, "\n", "", -1)
text = tabsRegEx.ReplaceAllString(text, "")
if text == "" {
return true
}
if len(text) > 1 {
prev := kidNode.PrevSibling
if this.config.debug {
log.Printf("PARENT CLASS: %s NODENAME: %s\n", this.config.parser.name("class", div), tag)
log.Printf("TEXTREPLACE: %s\n", strings.Replace(text, "\n", "", -1))
}
if prev != nil && prev.DataAtom == atom.A {
nodeSelection := kid.HasNodes(prev)
html, _ := nodeSelection.Html()
replacementText = append(replacementText, html)
if this.config.debug {
log.Printf("SIBLING NODENAME ADDITION: %s TEXT: %s\n", prev.Data, html)
}
}
replacementText = append(replacementText, text)
nodesToRemove.PushBack(kidNode)
convertedTextNodes++
}
}
return true
})
newNode := new(html.Node)
newNode.Type = html.ElementNode
newNode.Data = strings.Join(replacementText, "")
newNode.DataAtom = atom.P
div.First().AddNodes(newNode)
for s := nodesToRemove.Front(); s != nil; s = s.Next() {
node := s.Value.(*html.Node)
if node != nil && node.Parent != nil {
node.Parent.RemoveChild(node)
}
}
}
})
if this.config.debug {
log.Printf("Found %d total divs with %d bad divs replaced and %d textnodes converted inside divs", divs.Size(), badDivs, convertedTextNodes)
}
return doc
}