本文整理汇总了Golang中golang.org/x/net/html.Node.Type方法的典型用法代码示例。如果您正苦于以下问题:Golang Node.Type方法的具体用法?Golang Node.Type怎么用?Golang Node.Type使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类golang.org/x/net/html.Node
的用法示例。
在下文中一共展示了Node.Type方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Golang代码示例。
示例1: copyNode
func copyNode(to, from *html.Node) {
to.Attr = from.Attr
to.Data = from.Data
to.DataAtom = from.DataAtom
to.Namespace = from.Namespace
to.Type = from.Type
}
示例2: getSiblingsContent
func (this *contentExtractor) getSiblingsContent(currentSibling *goquery.Selection, baselinescoreSiblingsPara float64) []*goquery.Selection {
ps := make([]*goquery.Selection, 0)
if currentSibling.Get(0).DataAtom.String() == "p" && len(currentSibling.Text()) > 0 {
ps = append(ps, currentSibling)
return ps
} else {
potentialParagraphs := currentSibling.Find("p")
potentialParagraphs.Each(func(i int, s *goquery.Selection) {
text := s.Text()
if len(text) > 0 {
ws := this.config.stopWords.stopWordsCount(this.config.targetLanguage, text)
paragraphScore := ws.stopWordCount
siblingBaselineScore := 0.30
highLinkDensity := this.isHighLinkDensity(s)
score := siblingBaselineScore * baselinescoreSiblingsPara
if score < float64(paragraphScore) && !highLinkDensity {
node := new(html.Node)
node.Type = html.TextNode
node.Data = text
node.DataAtom = atom.P
nodes := make([]*html.Node, 1)
nodes[0] = node
newSelection := new(goquery.Selection)
newSelection.Nodes = nodes
ps = append(ps, newSelection)
}
}
})
}
return ps
}
示例3: Nd
func Nd(ntype string, content ...string) *html.Node {
nd0 := new(html.Node)
if ntype == "text" {
nd0.Type = html.TextNode
if len(content) > 0 {
nd0.Data = content[0]
}
} else {
nd0.Type = html.ElementNode
nd0.Data = ntype
if len(content) > 0 {
runtimepb.StackTrace(4)
log.Printf("Element nodes can't have content")
}
}
return nd0
}
示例4: CloneNode
// CloneNode makes a copy of a Node with all descendants.
func CloneNode(n *exphtml.Node) *exphtml.Node {
clone := new(exphtml.Node)
clone.Type = n.Type
clone.DataAtom = n.DataAtom
clone.Data = n.Data
clone.Attr = make([]exphtml.Attribute, len(n.Attr))
copy(clone.Attr, n.Attr)
for c := n.FirstChild; c != nil; c = c.NextSibling {
nc := CloneNode(c)
clone.AppendChild(nc)
}
return clone
}
示例5: removeEmptyNodes
func removeEmptyNodes(n *html.Node, lvl int) {
// children
cc := []*html.Node{}
for c := n.FirstChild; c != nil; c = c.NextSibling {
cc = append(cc, c)
}
for _, c := range cc {
removeEmptyNodes(c, lvl+1)
}
// processing
// empty element nodes
if n.Type == html.ElementNode && n.Data == "img" {
src := attrX(n.Attr, "src")
if src == "" {
n.Parent.RemoveChild(n)
}
}
if n.Type == html.ElementNode && n.FirstChild == nil && n.Data == "a" {
href := attrX(n.Attr, "href")
if href == "#" || href == "" {
n.Parent.RemoveChild(n)
}
}
if n.Type == html.ElementNode && n.FirstChild == nil &&
(n.Data == "em" || n.Data == "strong") {
n.Parent.RemoveChild(n)
}
if n.Type == html.ElementNode && n.FirstChild == nil &&
(n.Data == "div" || n.Data == "span" || n.Data == "li" || n.Data == "p") {
n.Parent.RemoveChild(n)
}
// spans with less than 2 characters inside => flatten to text
only1Child := n.FirstChild != nil && n.FirstChild == n.LastChild
if n.Type == html.ElementNode &&
n.Data == "span" &&
only1Child &&
n.FirstChild.Type == html.TextNode &&
len(strings.TrimSpace(n.FirstChild.Data)) < 3 {
n.Type = html.TextNode
n.Data = n.FirstChild.Data
n.RemoveChild(n.FirstChild)
}
}
示例6: dedupApply
func dedupApply(n *html.Node, dedups map[string]bool) {
// Children
for c := n.FirstChild; c != nil; c = c.NextSibling {
dedupApply(c, dedups)
}
if n.Type == html.ElementNode {
outline := attrX(n.Attr, "ol") + "."
if dedups[outline] {
n.Type = html.CommentNode
n.Data = n.Data + " replaced"
}
}
}
示例7: forceMaxDepth
func forceMaxDepth(n *html.Node, depth int) {
if depth == 0 {
n.Type = html.TextNode
n.FirstChild, n.LastChild = nil, nil
n.Attr = nil
n.Data = "[omitted]"
for n.NextSibling != nil {
n.Parent.RemoveChild(n.NextSibling)
}
return
}
if n.Type != html.ElementNode {
return
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
forceMaxDepth(c, depth-1)
}
}
示例8: convertDivsToParagraphs
func (c *Cleaner) convertDivsToParagraphs(doc *goquery.Document, domType string) *goquery.Document {
if c.config.debug {
log.Println("Starting to replace bad divs...")
}
badDivs := 0
convertedTextNodes := 0
divs := doc.Find(domType)
divs.Each(func(i int, div *goquery.Selection) {
divHTML, _ := div.Html()
if divToPElementsPattern.Match([]byte(divHTML)) {
c.replaceWithPara(div)
badDivs++
} else {
var replacementText []string
nodesToRemove := list.New()
children := div.Contents()
if c.config.debug {
log.Printf("Found %d children of div\n", children.Size())
}
children.EachWithBreak(func(i int, kid *goquery.Selection) bool {
text := kid.Text()
kidNode := kid.Get(0)
tag := kidNode.Data
if tag == text {
tag = "#text"
}
if tag == "#text" {
text = strings.Replace(text, "\n", "", -1)
text = tabsRegEx.ReplaceAllString(text, "")
if text == "" {
return true
}
if len(text) > 1 {
prev := kidNode.PrevSibling
if c.config.debug {
log.Printf("PARENT CLASS: %s NODENAME: %s\n", c.config.parser.name("class", div), tag)
log.Printf("TEXTREPLACE: %s\n", strings.Replace(text, "\n", "", -1))
}
if prev != nil && prev.DataAtom == atom.A {
nodeSelection := kid.HasNodes(prev)
html, _ := nodeSelection.Html()
replacementText = append(replacementText, html)
if c.config.debug {
log.Printf("SIBLING NODENAME ADDITION: %s TEXT: %s\n", prev.Data, html)
}
}
replacementText = append(replacementText, text)
nodesToRemove.PushBack(kidNode)
convertedTextNodes++
}
}
return true
})
newNode := new(html.Node)
newNode.Type = html.ElementNode
newNode.Data = strings.Join(replacementText, "")
newNode.DataAtom = atom.P
div.First().AddNodes(newNode)
for s := nodesToRemove.Front(); s != nil; s = s.Next() {
node := s.Value.(*html.Node)
if node != nil && node.Parent != nil {
node.Parent.RemoveChild(node)
}
}
}
})
if c.config.debug {
log.Printf("Found %d total divs with %d bad divs replaced and %d textnodes converted inside divs", divs.Size(), badDivs, convertedTextNodes)
}
return doc
}
示例9: findNodeformNodesbyIndexOrPro
//从nodes中找到node 根据index 和 属性 先index
func findNodeformNodesbyIndexOrPro(nodes []*goquery.Selection, index *int, m map[string]string, Type string, visible bool) {
switch {
case Type == OPTION || Type == RADIO:
for _, v := range nodes {
for _, vv := range v.Get(0).Attr {
if vv.Key == VALUE {
if vv.Val == m[VALUE] {
if Type == RADIO {
v.SetAttr("checked", "checked")
} else {
v.SetAttr("selected", "selected")
}
return
}
}
}
}
if visible {
var node html.Node
node.Data = nodes[0].Get(0).Data
node.Type = nodes[0].Get(0).Type
attr := make([]html.Attribute, 0, 2)
var tr html.Attribute
tr.Key = VALUE
tr.Val = m[VALUE]
attr = append(attr, tr)
if Type == RADIO {
tr.Key = "checked"
tr.Val = "checked"
} else {
tr.Key = "selected"
tr.Val = "selected"
}
attr = append(attr, tr)
tr.Key = TYPE
tr.Val = Type
attr = append(attr, tr)
node.Attr = attr
nodes[0].Parent().AppendNodes(&node)
}
return
default:
}
if len(nodes) <= *index {
return
}
for k, v := range m {
nodes[*index].SetAttr(k, v)
}
*index++
}
示例10: convertUnwanted
// convertUnwanted neutralizes a node.
// Note: We can not directly Remove() nor Replace()
// Since that breaks the recursion one step above!
// At a later stage we employ horizontal traversal
// to actually remove unwanted nodes.
//
// Meanwhile we have devised removeUnwanted() which
// makes convertUnwanted-removeComment obsolete.
//
func convertUnwanted(n *html.Node) {
if unwanteds[n.Data] {
n.Type = html.CommentNode
n.Data = n.Data + " replaced"
}
}
示例11: ModifyHTML
// r is the request to the proxy
// u is the url, that the proxy has called
func ModifyHTML(r *http.Request, u *url.URL, s string) string {
var nums int // counter
// needed to get the current request into the
// "static" recursive functions
var PackageProxyHost = r.Host // port included!
var PackageRemoteHost = fetch.HostFromUrl(u)
fCondenseNode = func(n *html.Node, depth int) (ret string) {
if n.Type == html.ElementNode && n.Data == "script" {
ret += fmt.Sprintf(" var script%v = '[script]'; ", nums)
nums++
return
}
if n.Type == html.ElementNode && n.Data == "style" {
ret += fmt.Sprintf(" .xxx {margin:2px;} ")
return
}
if n.Type == html.ElementNode && n.Data == "img" {
ret += fmt.Sprintf(" [img] %v %v | ", getAttrVal(n.Attr, "alt"), getAttrVal(n.Attr, "src"))
}
if n.Type == html.ElementNode && n.Data == "a" {
ret += "[a]"
}
if n.Type == html.TextNode {
s := n.Data
// s = replTabsNewline.Replace(s)
// s = strings.TrimSpace(s)
if len(s) < 4 {
ret += s
} else if s != "" {
if depth > 0 {
ret += fmt.Sprintf(" [txt%v] %v", depth, s)
} else {
ret += " [txt] " + s
}
}
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
ret += fCondenseNode(c, depth+1)
}
return
}
// --------------------------
// ----------------------
fRecurse = func(n *html.Node) {
if n.Type == html.ElementNode && n.Data == "form" {
hidFld := new(html.Node)
hidFld.Type = html.ElementNode
hidFld.Data = "input"
hidFld.Attr = []html.Attribute{
html.Attribute{Key: "name", Val: "redirect-to"},
html.Attribute{Key: "value", Val: absolutize(getAttrVal(n.Attr, "action"), PackageRemoteHost)},
}
n.AppendChild(hidFld)
submt := new(html.Node)
submt.Type = html.ElementNode
submt.Data = "input"
submt.Attr = []html.Attribute{
html.Attribute{Key: "type", Val: "submit"},
html.Attribute{Key: "value", Val: "subm"},
html.Attribute{Key: "accesskey", Val: "f"},
}
n.AppendChild(submt)
n.Attr = rewriteAttributes(n.Attr, PackageProxyHost, PackageRemoteHost)
}
if n.Type == html.ElementNode && n.Data == "script" {
for i := 0; i < len(n.Attr); i++ {
if n.Attr[i].Key == "src" {
n.Attr[i].Val = emptySrc
}
}
}
if n.Type == html.ElementNode &&
(n.Data == "a" || n.Data == "img" || n.Data == "script" || n.Data == "style") {
s := fCondenseNode(n, 0)
//fmt.Printf("found %v\n", s)
textReplacement := new(html.Node)
textReplacement.Type = html.TextNode
textReplacement.Data = s
attrStore := []html.Attribute{}
if n.Data == "a" || n.Data == "img" {
attrStore = rewriteAttributes(n.Attr, PackageProxyHost, PackageRemoteHost)
}
//.........这里部分代码省略.........