本文整理汇总了Golang中github.com/PuerkitoBio/goquery.Selection.Next方法的典型用法代码示例。如果您正苦于以下问题:Golang Selection.Next方法的具体用法?Golang Selection.Next怎么用?Golang Selection.Next使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类github.com/PuerkitoBio/goquery.Selection
的用法示例。
在下文中一共展示了Selection.Next方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Golang代码示例。
示例1: isBoostable
//a lot of times the first paragraph might be the caption under an image so we'll want to make sure if we're going to
//boost a parent node that it should be connected to other paragraphs, at least for the first n paragraphs
//so we'll want to make sure that the next sibling is a paragraph and has at least some substatial weight to it
func (this *contentExtractor) isBoostable(node *goquery.Selection) bool {
stepsAway := 0
next := node.Next()
for next != nil && stepsAway < node.Siblings().Length() {
currentNodeTag := node.Get(0).DataAtom.String()
if currentNodeTag == "p" {
if stepsAway >= 3 {
if this.config.debug {
log.Println("Next paragraph is too far away, not boosting")
}
return false
}
paraText := node.Text()
ws := this.config.stopWords.stopWordsCount(this.config.targetLanguage, paraText)
if ws.stopWordCount > 5 {
if this.config.debug {
log.Println("We're gonna boost this node, seems content")
}
return true
}
}
stepsAway++
next = next.Next()
}
return false
}
示例2: guessCaption
func guessCaption(sel *goquery.Selection, imgTag string) string {
possibleCapTags := []string{
"alt",
"title",
"data-caption", // bbc.com
}
possibleCreditClasses := []string{
"credit",
"caption-credit",
}
possibleCapClasses := []string{
".caption",
".caption.source",
"p.caption",
".media-caption",
".caption-credit",
".caption-left",
".caption-left",
".caption-right",
".caption-center",
".photoCaption",
".pb-caption",
"figurecaption",
"figcaption",
".imageCaption", // evolution institute
"wp-caption-text", // mcntyr.com
".wp-caption-text", // quantamagazin.com
".article__image__caption", // aeon
"p", // technologie review
}
// collect all captions
var captionsFromTags []string
for _, capTag := range possibleCapTags {
// Since sescapeDocLatexMetaChars only looks for Text() elements,
// these attributes are typically not covered, so we must do it here by
// hand
//log.Println("tag: ", capTag)
// first try to find caption tag from img environment (sel)
caption, exist := sel.Attr(capTag)
if !exist {
// if no caption is found, try to find it within the imgTag
tagSel := sel.Find(imgTag)
caption, exist = tagSel.Attr(capTag)
}
if exist {
//log.Println("tags: ", caption)
EscapeLatexMetaChars(&caption)
if captionIsValid(caption, "") {
captionsFromTags = append(captionsFromTags, caption)
}
}
}
var captionsFromClass []string
for _, capClass := range possibleCapClasses {
caption, _ := sel.Find(capClass).Html()
// for aeon, where the image caption is not with the fig environment
// but in the next sibling we append this selection to the figEnv
if caption == "" {
classAttr, _ := sel.Next().Attr("class")
if strings.Contains(classAttr, capClass) {
caption, _ = sel.Next().Html()
sel.AppendSelection(sel.Next())
}
}
if captionIsValid(caption, capClass) {
captionsFromClass = append(captionsFromClass, caption)
}
}
// collect all credits
var credits []string
for _, creditClass := range possibleCreditClasses {
credit, _ := sel.Find(creditClass).Html()
if credit != "" {
credits = append(credits, credit)
}
}
// prefer captions from a caption class over the caption from an 'alt' tag
var captions []string
if captionsFromClass != nil {
captions = captionsFromClass
} else {
captions = captionsFromTags
}
_, longestCap := longestElement(captions)
_, longestCredit := longestElement(credits)
longestCap = strings.TrimSpace(longestCap)
longestCredit = strings.TrimSpace(longestCredit)
total := longestCap + " " + longestCredit
total = sanatizeCaptions(total)
capDoc, err := goquery.NewDocumentFromReader(strings.NewReader(total))
//.........这里部分代码省略.........