本文整理汇总了Golang中github.com/PuerkitoBio/goquery.Selection.Get方法的典型用法代码示例。如果您正苦于以下问题:Golang Selection.Get方法的具体用法?Golang Selection.Get怎么用?Golang Selection.Get使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类github.com/PuerkitoBio/goquery.Selection
的用法示例。
在下文中一共展示了Selection.Get方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Golang代码示例。
示例1: replaceWithPara
func (this *cleaner) replaceWithPara(div *goquery.Selection) {
if div.Size() > 0 {
node := div.Get(0)
node.Data = atom.P.String()
node.DataAtom = atom.P
}
}
示例2: isBoostable
//a lot of times the first paragraph might be the caption under an image so we'll want to make sure if we're going to
//boost a parent node that it should be connected to other paragraphs, at least for the first n paragraphs
//so we'll want to make sure that the next sibling is a paragraph and has at least some substatial weight to it
func (this *contentExtractor) isBoostable(node *goquery.Selection) bool {
stepsAway := 0
next := node.Next()
for next != nil && stepsAway < node.Siblings().Length() {
currentNodeTag := node.Get(0).DataAtom.String()
if currentNodeTag == "p" {
if stepsAway >= 3 {
if this.config.debug {
log.Println("Next paragraph is too far away, not boosting")
}
return false
}
paraText := node.Text()
ws := this.config.stopWords.stopWordsCount(this.config.targetLanguage, paraText)
if ws.stopWordCount > 5 {
if this.config.debug {
log.Println("We're gonna boost this node, seems content")
}
return true
}
}
stepsAway++
next = next.Next()
}
return false
}
示例3: getSiblingsContent
func (this *contentExtractor) getSiblingsContent(currentSibling *goquery.Selection, baselinescoreSiblingsPara float64) []*goquery.Selection {
ps := make([]*goquery.Selection, 0)
if currentSibling.Get(0).DataAtom.String() == "p" && len(currentSibling.Text()) > 0 {
ps = append(ps, currentSibling)
return ps
} else {
potentialParagraphs := currentSibling.Find("p")
potentialParagraphs.Each(func(i int, s *goquery.Selection) {
text := s.Text()
if len(text) > 0 {
ws := this.config.stopWords.stopWordsCount(this.config.targetLanguage, text)
paragraphScore := ws.stopWordCount
siblingBaselineScore := 0.30
highLinkDensity := this.isHighLinkDensity(s)
score := siblingBaselineScore * baselinescoreSiblingsPara
if score < float64(paragraphScore) && !highLinkDensity {
node := new(html.Node)
node.Type = html.TextNode
node.Data = text
node.DataAtom = atom.P
nodes := make([]*html.Node, 1)
nodes[0] = node
newSelection := new(goquery.Selection)
newSelection.Nodes = nodes
ps = append(ps, newSelection)
}
}
})
}
return ps
}
示例4: node
// node returns a string representation of the selection.
func node(i int, s *goquery.Selection) string {
switch node := s.Get(0); {
case node.Data == "h1":
return fmt.Sprintf(" \033[%dm# %s\033[0m\n\n", blue, text(s))
case node.Data == "h2":
return fmt.Sprintf(" \033[%dm## %s\033[0m\n\n", blue, text(s))
case node.Data == "h3":
return fmt.Sprintf(" \033[%dm### %s\033[0m\n\n", blue, text(s))
case node.Data == "p":
return fmt.Sprintf("\033[%dm%s\033[0m\n\n", none, indent(text(s), 1))
case node.Data == "pre" || s.HasClass("highlight"):
return fmt.Sprintf("\033[1m%s\033[0m\n\n", indent(text(s), 2))
case node.Data == "a":
return fmt.Sprintf("%s (%s) ", s.Text(), s.AttrOr("href", "missing link"))
case node.Data == "li":
return fmt.Sprintf(" • %s\n", contents(s))
case node.Data == "ul":
return fmt.Sprintf("%s\n", nodes(s))
case node.Data == "code":
return fmt.Sprintf("\033[1m%s\033[0m ", s.Text())
case node.Type == html.TextNode:
return strings.TrimSpace(node.Data)
default:
return ""
}
}
示例5: delAttr
func (this *parser) delAttr(selection *goquery.Selection, attr string) {
idx := this.indexOfAttribute(selection, attr)
if idx > -1 {
node := selection.Get(0)
node.Attr = append(node.Attr[:idx], node.Attr[idx+1:]...)
}
}
示例6: removeNode
func (this *parser) removeNode(selection *goquery.Selection) {
if selection != nil {
node := selection.Get(0)
if node != nil && node.Parent != nil {
node.Parent.RemoveChild(node)
}
}
}
示例7: isNodescoreThresholdMet
func (this *contentExtractor) isNodescoreThresholdMet(node *goquery.Selection, e *goquery.Selection) bool {
topNodeScore := this.getNodeGravityScore(node)
currentNodeScore := this.getNodeGravityScore(e)
threasholdScore := float64(topNodeScore) * 0.08
if (float64(currentNodeScore) < threasholdScore) && e.Get(0).DataAtom.String() != "td" {
return false
}
return true
}
示例8: indexOfAttribute
func (this *parser) indexOfAttribute(selection *goquery.Selection, attr string) int {
node := selection.Get(0)
for i, a := range node.Attr {
if a.Key == attr {
return i
}
}
return -1
}
示例9: removeNodes
func removeNodes(s *goquery.Selection) {
s.Each(func(i int, s *goquery.Selection) {
parent := s.Parent()
if parent.Length() == 0 {
// TODO???
} else {
parent.Get(0).RemoveChild(s.Get(0))
}
})
}
示例10: getVideo
func (ve *VideoExtractor) getVideo(node *goquery.Selection) video {
src := ve.getSrc(node)
video := video{
embedCode: ve.getEmbedCode(node),
embedType: node.Get(0).DataAtom.String(),
width: ve.getWidth(node),
height: ve.getHeight(node),
src: src,
provider: ve.getProvider(src),
}
return video
}
示例11: isTableAndNoParaExist
func (this *contentExtractor) isTableAndNoParaExist(selection *goquery.Selection) bool {
subParagraph := selection.Find("p")
subParagraph.Each(func(i int, s *goquery.Selection) {
txt := s.Text()
if len(txt) < 25 {
node := s.Get(0)
parent := node.Parent
parent.RemoveChild(node)
}
})
subParagraph2 := selection.Find("p")
if subParagraph2.Length() == 0 && selection.Get(0).DataAtom.String() != "td" {
return true
}
return false
}
示例12: setAttr
func (this *parser) setAttr(selection *goquery.Selection, attr string, value string) {
if selection.Size() > 0 {
node := selection.Get(0)
attrs := make([]html.Attribute, 0)
for _, a := range node.Attr {
if a.Key != attr {
newAttr := new(html.Attribute)
newAttr.Key = a.Key
newAttr.Val = a.Val
attrs = append(attrs, *newAttr)
}
}
newAttr := new(html.Attribute)
newAttr.Key = attr
newAttr.Val = value
attrs = append(attrs, *newAttr)
node.Attr = attrs
}
}
示例13: extractData
func extractData(tds *goquery.Selection, parsed_url *url.URL, visited_urls map[string]string, result_chan chan string) {
val := tds.Get(0).Attr[0]
new_path, err := url.Parse(val.Val)
if err != nil {
panic(err)
}
recomposed_url := parsed_url.ResolveReference(new_path)
if _, ok := visited_urls[recomposed_url.String()]; !ok {
var full_url = recomposed_url.String()
if !strings.Contains(recomposed_url.Path, ".") {
visited_urls[full_url] = full_url
newSearch(full_url, &visited_urls, result_chan)
} else {
result_chan <- full_url
}
}
}
示例14: cleanConditionally
func (d *Document) cleanConditionally(s *goquery.Selection, selector string) {
if !d.CleanConditionally {
return
}
s.Find(selector).Each(func(i int, s *goquery.Selection) {
node := s.Get(0)
weight := float32(d.classWeight(s))
contentScore := float32(0)
if c, ok := d.candidates[node]; ok {
contentScore = c.score
}
if weight+contentScore < 0 {
removeNodes(s)
Logger.Printf("Conditionally cleaned %s%s with weight %f and content score %f\n", node.Data, getName(s), weight, contentScore)
return
}
text := s.Text()
if strings.Count(text, ",") < 10 {
counts := map[string]int{
"p": s.Find("p").Length(),
"img": s.Find("img").Length(),
"li": s.Find("li").Length() - 100,
"a": s.Find("a").Length(),
"embed": s.Find("embed").Length(),
"input": s.Find("input").Length(),
}
contentLength := len(strings.TrimSpace(text))
linkDensity := d.getLinkDensity(s)
remove := false
reason := ""
if counts["img"] > counts["p"] {
reason = "too many images"
remove = true
} else if counts["li"] > counts["p"] && !s.Is("ul,ol") {
reason = "more <li>s than <p>s"
remove = true
} else if counts["input"] > int(counts["p"]/3.0) {
reason = "less than 3x <p>s than <input>s"
remove = true
} else if contentLength < d.MinTextLength && (counts["img"] == 0 || counts["img"] > 2) {
reason = "too short content length without a single image"
remove = true
} else if weight < 25 && linkDensity > 0.2 {
reason = fmt.Sprintf("too many links for its weight (%f)", weight)
remove = true
} else if weight >= 25 && linkDensity > 0.5 {
reason = fmt.Sprintf("too many links for its weight (%f)", weight)
remove = true
} else if (counts["embed"] == 1 && contentLength < 75) || counts["embed"] > 1 {
reason = "<embed>s with too short a content length, or too many <embed>s"
remove = true
}
if remove {
Logger.Printf("Conditionally cleaned %s%s with weight %f and content score %f because it has %s\n", node.Data, getName(s), weight, contentScore, reason)
removeNodes(s)
}
}
})
}