本文整理匯總了Golang中git/oschina/net/ciweilao/game_spider/git/page.Page.AddNewUrl方法的典型用法代碼示例。如果您正苦於以下問題:Golang Page.AddNewUrl方法的具體用法?Golang Page.AddNewUrl怎麽用?Golang Page.AddNewUrl使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類git/oschina/net/ciweilao/game_spider/git/page.Page
的用法示例。
在下文中一共展示了Page.AddNewUrl方法的2個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Golang代碼示例。
示例1: parseNewsLinkListInfo
func (this *YouxiduoProcesser) parseNewsLinkListInfo(content string, p *page.Page) *page.Page {
//println("B LINK URLS")
if p.IsBreak() {
return p
}
reg, _ := regexp.Compile(`<a href(.)*<\/a>`)
urlStr := reg.FindAllString(content, -1)
for _, tmp := range urlStr {
var pos1 int = strings.Index(tmp, "href=")
var pos2 int = strings.Index(tmp, ">")
if (pos2 - 1) > (pos1 + 6) {
tmp = string(tmp[pos1+6 : pos2-1])
if strings.Index(tmp, "http://") >= 0 {
continue
}
tmp = util.GetRealUrl(p.GetRequest().GetUrl(), tmp)
p.AddNewUrl(tmp, "list")
// println("list url = " + tmp)
}
}
//println("E LINK URLS")
return p
}
示例2: parseNewsBreifInfo
func (this *YouxiduoProcesser) parseNewsBreifInfo(content string, p *page.Page) *page.Page {
logs.GetFirstLogger().Trace("B TEST LIST ITEMS")
var pos1 int = strings.Index(content, "<li>")
var pos2 int = strings.Index(content, "</li>")
var count int = 1
for pos1 >= 0 && pos2 >= 0 && (pos2 > pos1) {
item := page.NewPageItems("")
tmpStr := string(content[pos1 : pos2+5])
content = string(content[pos2+5 : len(content)])
pos1 = strings.Index(content, "<li>")
pos2 = strings.Index(content, "</li>")
logs.GetFirstLogger().Trace("B================>")
reg, _ := regexp.Compile(`<span>(.)*[\d]{4}-[\d]{2}-[\d]{2}`)
timeStr := reg.FindString(tmpStr)
reg, _ = regexp.Compile(`[\d]{4}-[\d]{2}-[\d]{2}`)
timeStr = reg.FindString(timeStr)
if this.exitDate > timeStr {
p.SetBreak(true)
continue
}
item.AddItem("time", timeStr)
reg, _ = regexp.Compile("title=\"(.)*\"")
title := reg.FindString(tmpStr)
title = string(title[strings.Index(title, "\"")+1 : len(title)])
title = string(title[0:strings.Index(title, "\"")])
logs.GetFirstLogger().Trace("title = " + title)
//p.AddResultItem("title", title)
item.AddItem("title", title)
reg, _ = regexp.Compile("<img src=(.)*alt")
pic := reg.FindString(tmpStr)
pic = string(pic[strings.Index(pic, "\"")+1 : len(pic)])
pic = string(pic[0:strings.Index(pic, "\"")])
if util.IsRelativePath(pic) {
pic = util.GetRealUrl(p.GetRequest().GetUrl(), pic)
}
logs.GetFirstLogger().Trace("pic = " + pic)
//p.AddResultItem("pic", pic)
item.AddItem("pic", pic)
reg, _ = regexp.Compile("<p>(.)*</p>")
info := reg.FindString(tmpStr)
logs.GetFirstLogger().Trace("info = " + info)
//p.AddResultItem("info", info)
info = strings.Replace(info, "'", "\"", -1)
info = strings.Replace(info, "'", "\"", -1)
item.AddItem("info", info)
reg, _ = regexp.Compile("<span(.)*<a(.)*</span>")
detailurl := reg.FindString(tmpStr)
reg, _ = regexp.Compile("href(.)*\">")
detailurl = reg.FindString(detailurl)
detailurl = detailurl[strings.Index(detailurl, "\"")+1 : len(detailurl)]
detailurl = detailurl[0:strings.Index(detailurl, "\"")]
logs.GetFirstLogger().Trace("detailurl = " + detailurl)
//p.AddResultItem("detailurl", detailurl)
item.AddItem("detailurl", detailurl)
//p.AddResultItem("key", detailurl)
item.SetKey(detailurl)
p.AddNewUrl(detailurl, "content")
logs.GetFirstLogger().Trace("E================>")
logs.GetFirstLogger().Tracef("count = %d", count)
count = count + 1
logs.GetFirstLogger().Warn(title)
pos1 = strings.Index(content, "<li>")
pos2 = strings.Index(content, "</li>")
p.AddPageItems(item)
}
return p
}