本文整理汇总了Golang中visual_spider_go/spider/core/common/page.Page.GetPageItems方法的典型用法代码示例。如果您正苦于以下问题:Golang Page.GetPageItems方法的具体用法?Golang Page.GetPageItems怎么用?Golang Page.GetPageItems使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类visual_spider_go/spider/core/common/page.Page
的用法示例。
在下文中一共展示了Page.GetPageItems方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Golang代码示例。
示例1: Process
func (this *PageProcesserHtml) Process(p *page.Page) {
if !p.IsSucc() {
println(p.Errormsg())
return
}
result := map[string]string{}
for k, _ := range this.rule {
result[k] = ""
}
query := p.GetHtmlParser()
var urls []string
query.Find(this.page["rule"]).Each(func(i int, s *goquery.Selection) {
href := ""
if this.page["fun"] == "text" {
href = s.Text()
} else {
href, _ = s.Attr(this.page["fun"])
}
urls = append(urls, this.page["pre"]+href)
})
p.AddMyTargetRequests(urls, this.conf["texttype"], "", this.conf["resqType"], this.conf["postdata"], this.conf["proxy"], this.conf["heardefile"], this.conf["cookie"])
for k, v := range this.rule {
if this.num[k] == "ALL" {
var items []string
query.Find(v).Each(func(i int, s *goquery.Selection) {
item := ""
if this.fun[k] == "text" {
item = s.Text()
} else {
item, _ = s.Attr(this.fun[k])
}
items = append(items, item)
})
result[k] = strings.Join(items, "|")
} else {
if this.fun[k] == "text" {
result[k] = query.Find(v).Text()
} else {
result[k], _ = query.Find(v).Attr(this.fun[k])
}
result[k] = strings.Trim(result[k], " \t\n")
}
if result[k] == "" {
p.SetSkip(true)
}
p.AddField(k, result[k])
}
for k, v := range p.GetPageItems().GetAll() {
println(k, v)
}
}
示例2: pageProcess
// core processer
func (this *Spider) pageProcess(req *request.Request) {
var p *page.Page
defer func() {
if err := recover(); err != nil { // do not affect other
if strerr, ok := err.(string); ok {
mlog.LogInst().LogError(strerr)
} else {
mlog.LogInst().LogError("pageProcess error")
}
}
}()
// download page
for i := 0; i < 3; i++ {
this.sleep()
p = this.pDownloader.Download(req)
if p.IsSucc() { // if fail retry 3 times
break
}
}
if !p.IsSucc() { // if fail do not need process
return
}
this.pPageProcesser.Process(p)
for _, req := range p.GetTargetRequests() {
this.AddRequest(req)
}
// output
if !p.GetSkip() {
for _, pip := range this.pPiplelines {
//fmt.Println("%v",p.GetPageItems().GetAll())
pip.Process(p.GetPageItems(), this)
}
}
}