本文整理汇总了Golang中code/google/com/p/go/net/html.Node类的典型用法代码示例。如果您正苦于以下问题:Golang Node类的具体用法?Golang Node怎么用?Golang Node使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Node类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Golang代码示例。
示例1: convert_flowdocument
func (this *flowdocument_maker) convert_flowdocument(frag *html.Node) {
if frag.Type == html.TextNode {
return
}
ignore_children := false
switch frag.Data {
case "img":
frag.Type = html.CommentNode
node_clear_children(frag)
frag.Attr = nil
case "a":
frag.Data = "Hyperlink"
frag.Attr = extract_ahref_attr(frag.Attr)
case "article":
frag.Data = "FlowDocument"
// set namespace dont work
frag.Attr = []html.Attribute{html.Attribute{Key: "xmlns", Val: fdocns}}
case "object", "video", "audio", "embed":
frag.Type = html.CommentNode
node_clear_children(frag)
frag.Attr = nil
case "p":
fallthrough
default:
frag.Data = "Paragraph"
frag.Attr = nil
if this.first_paragraph == nil {
this.first_paragraph = frag
}
}
for child := frag.FirstChild; ignore_children == false && child != nil; child = child.NextSibling {
this.convert_flowdocument(child)
}
}
示例2: getSiblingsContent
func (this *contentExtractor) getSiblingsContent(currentSibling *goquery.Selection, baselinescoreSiblingsPara float64) []*goquery.Selection {
ps := make([]*goquery.Selection, 0)
if currentSibling.Get(0).DataAtom.String() == "p" && len(currentSibling.Text()) > 0 {
ps = append(ps, currentSibling)
return ps
} else {
potentialParagraphs := currentSibling.Find("p")
potentialParagraphs.Each(func(i int, s *goquery.Selection) {
text := s.Text()
if len(text) > 0 {
ws := this.config.stopWords.stopWordsCount(this.config.targetLanguage, text)
paragraphScore := ws.stopWordCount
siblingBaselineScore := 0.30
highLinkDensity := this.isHighLinkDensity(s)
score := siblingBaselineScore * baselinescoreSiblingsPara
if score < float64(paragraphScore) && !highLinkDensity {
node := new(html.Node)
node.Type = html.TextNode
node.Data = text
node.DataAtom = atom.P
nodes := make([]*html.Node, 1)
nodes[0] = node
newSelection := new(goquery.Selection)
newSelection.Nodes = nodes
ps = append(ps, newSelection)
}
}
})
}
return ps
}
示例3: node_clear_children
func node_clear_children(frag *html.Node) {
for child := frag.FirstChild; child != nil; {
next := child.NextSibling
frag.RemoveChild(child)
child = next
}
}
示例4: clean_element_before_header
func clean_element_before_header(body *html.Node, name string) {
child := body.FirstChild
for child != nil {
if child.Type == html.ElementNode && child.Data != name {
next := child.NextSibling
body.RemoveChild(child)
child = next
} else {
break
}
}
}
示例5: remove_decentant
func remove_decentant(n *html.Node, tag string) {
child := n.FirstChild
for child != nil {
if child.Type == html.ElementNode && child.Data == tag {
next := child.NextSibling
n.RemoveChild(child)
child = next
} else {
remove_decentant(child, tag)
child = child.NextSibling
}
}
}
示例6: html_clean_root
func html_clean_root(root *html.Node, uribase string) (*html.Node, []*html.Node) {
cleaner := &html_cleaner{}
cleaner.current_url, _ = url.Parse(uribase)
cleaner.html_drop_unprintable(root)
cleaner.remove_head()
var (
h1l = len(cleaner.header1s)
h2l = len(cleaner.header2s)
h3l = len(cleaner.header3s)
h4l = len(cleaner.header4s)
)
alter := false
//文档中如果只有一个h1,通常这个h1所在的div就是文档内容
if h1l == 1 { // only one h1
ab := find_article_via_header_i(cleaner.header1s[0])
alter = cleaner.try_update_article(ab)
if !alter && cleaner.title_similar(cleaner.header1s[0].Data) {
alter = true
cleaner.article = ab
}
}
//如果文档中只有一个h2,这时又没有h1,h2就是其中的标题,所在的div就是文档内容
if h1l == 0 && h2l == 1 {
ab := find_article_via_header_i(cleaner.header2s[0])
alter = alter || cleaner.try_update_article(ab)
}
if alter == false && h3l == 1 {
ab := find_article_via_header_i(cleaner.header3s[0])
alter = alter || cleaner.try_update_article(ab)
}
if alter == false && h4l == 1 {
ab := find_article_via_header_i(cleaner.header4s[0])
alter = alter || cleaner.try_update_article(ab)
}
if cleaner.article == nil {
cleaner.article = &html.Node{Type: html.ElementNode,
DataAtom: atom.Body,
Data: "body"}
root.AppendChild(cleaner.article)
}
cleaner.fix_forms() // may alter form to div, so do this before try_catch_phpwind
cleaner.try_catch_phpwnd()
cleaner.clean_body()
cleaner.clean_empty_nodes(cleaner.article)
cleaner.clean_attributes(cleaner.article)
return cleaner.article, cleaner.iframes
}
示例7: node_append_children
func node_append_children(src *html.Node, target *html.Node) {
foreach_child(src, func(child *html.Node) {
switch {
case child.Type == html.TextNode:
target.AppendChild(create_text(child.Data))
case child.Data == "a" || node_is_object(child):
// ommit all children elements
a := shallow_clone_element(child)
node_append_children(child, a)
target.AppendChild(a)
default:
node_append_children(child, target)
}
})
}
示例8: trim_small_image
func trim_small_image(img *html.Node) (drop bool) {
width, height, _ := media_get_dim(img)
if img.Parent == nil {
return
}
if width > 0 && height > 0 && width*height < small_image_t*small_image_t && img.Parent.Data == "a" {
img.Data = "input"
drop = true
} else if width == 1 && height == 1 {
img.Data = "input"
drop = true
}
return
}
示例9: clean_inline_node
// flatten inlines text image a object video audio seq
// n is element-node
// inline node may have div element
func (this *html_cleaner) clean_inline_node(n *html.Node) {
inlines := this.flatten_inline_node(n)
for child := n.FirstChild; child != nil; child = n.FirstChild {
n.RemoveChild(child)
}
for _, inline := range inlines {
p := inline.Parent
if p != nil {
p.RemoveChild(inline) // this.article.RemoveChild(child)
}
n.AppendChild(inline)
}
}
示例10: trim_empty_spaces_func
//删除行前后空白
func (this *html_cleaner) trim_empty_spaces_func(n *html.Node, trim func(string) string) {
child := n.FirstChild
for child != nil {
if child.Type == html.TextNode {
child.Data = trim(child.Data)
} else {
this.trim_empty_spaces_func(child, trim)
}
if node_is_not_empty(child) {
break
}
next := child.NextSibling
n.RemoveChild(child)
child = next
}
}
示例11: cleanupTree
// The splitting process may leave TextNodes with no Data, which we keep
// around to make the data manipulation simpler. This function removes
// them.
func cleanupTree(node *html.Node) {
var next *html.Node
for n := node.FirstChild; n != nil; n = next {
next = n.NextSibling
switch n.Type {
case html.TextNode:
if len(n.Data) == 0 {
node.RemoveChild(n)
}
case html.ElementNode:
cleanupTree(n)
default:
// ignore other node types.
}
}
}
示例12: trim_display_none
func trim_display_none(n *html.Node) {
st := get_attribute(n, "style")
if strings.Contains(st, "display") && (strings.Contains(st, "none")) {
// log.Println("hide-node display:none", n.Data)
n.Data = "input"
}
}
示例13: processTextNode
func processTextNode(node *html.Node, tags []openTag) (outTags []openTag, next *html.Node, err error) {
i := 0
for i < len(node.Data) {
r, rsize := utf8.DecodeRuneInString(node.Data[i:])
switch r {
case '[':
size, openClose, tag, rest := parseShortcode(node.Data[i+1:])
if size != 0 {
// looks like we found a shortcode!
if tag == "" { // escape code?
// remove the outer [] and continue
node.Data = node.Data[:i] + rest + node.Data[i+1+size:]
i += len(rest)
} else {
return handleShortcode(node, tags, i, i+1+size, openClose, tag, rest)
}
} else {
i += rsize
}
default:
i += rsize
}
}
// default: no shortcode found
outTags = tags
next = node.NextSibling
err = nil
return
}
示例14: try_update_class_attr
func try_update_class_attr(b *html.Node, class string) {
if len(class) > 0 {
ca := make([]html.Attribute, len(b.Attr)+1)
copy(ca, b.Attr)
ca[len(b.Attr)] = html.Attribute{Key: "class", Val: class}
b.Attr = ca
}
}
示例15: addFiles
func addFiles(form uint8, parent *html.Node, files []string) {
for _, file := range files {
node := html.Node{
Type: html.ElementNode,
}
switch form {
case SCRIPT:
node.Data = "script"
node.Attr = []html.Attribute{
html.Attribute{
Key: "src",
Val: file,
},
}
case STYLE:
node.Data = "link"
node.Attr = []html.Attribute{
html.Attribute{
Key: "rel",
Val: "stylesheet",
},
html.Attribute{
Key: "href",
Val: file,
},
}
default:
panic("Type not understood")
}
parent.AppendChild(&node)
}
}