當前位置: 首頁>>代碼示例>>Golang>>正文


Golang Page.AddPageItems方法代碼示例

本文整理匯總了Golang中git/oschina/net/ciweilao/game_spider/git/page.Page.AddPageItems方法的典型用法代碼示例。如果您正苦於以下問題:Golang Page.AddPageItems方法的具體用法?Golang Page.AddPageItems怎麽用?Golang Page.AddPageItems使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在git/oschina/net/ciweilao/game_spider/git/page.Page的用法示例。


在下文中一共展示了Page.AddPageItems方法的2個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Golang代碼示例。

示例1: parseNewsBreifInfo

func (this *YouxiduoProcesser) parseNewsBreifInfo(content string, p *page.Page) *page.Page {
	logs.GetFirstLogger().Trace("B TEST LIST ITEMS")
	var pos1 int = strings.Index(content, "<li>")
	var pos2 int = strings.Index(content, "</li>")
	var count int = 1

	for pos1 >= 0 && pos2 >= 0 && (pos2 > pos1) {
		item := page.NewPageItems("")
		tmpStr := string(content[pos1 : pos2+5])
		content = string(content[pos2+5 : len(content)])

		pos1 = strings.Index(content, "<li>")
		pos2 = strings.Index(content, "</li>")
		logs.GetFirstLogger().Trace("B================>")
		reg, _ := regexp.Compile(`<span>(.)*[\d]{4}-[\d]{2}-[\d]{2}`)
		timeStr := reg.FindString(tmpStr)
		reg, _ = regexp.Compile(`[\d]{4}-[\d]{2}-[\d]{2}`)
		timeStr = reg.FindString(timeStr)
		if this.exitDate > timeStr {
			p.SetBreak(true)
			continue
		}
		item.AddItem("time", timeStr)

		reg, _ = regexp.Compile("title=\"(.)*\"")
		title := reg.FindString(tmpStr)
		title = string(title[strings.Index(title, "\"")+1 : len(title)])
		title = string(title[0:strings.Index(title, "\"")])
		logs.GetFirstLogger().Trace("title = " + title)
		//p.AddResultItem("title", title)
		item.AddItem("title", title)
		reg, _ = regexp.Compile("<img src=(.)*alt")
		pic := reg.FindString(tmpStr)
		pic = string(pic[strings.Index(pic, "\"")+1 : len(pic)])
		pic = string(pic[0:strings.Index(pic, "\"")])

		if util.IsRelativePath(pic) {
			pic = util.GetRealUrl(p.GetRequest().GetUrl(), pic)
		}
		logs.GetFirstLogger().Trace("pic = " + pic)
		//p.AddResultItem("pic", pic)
		item.AddItem("pic", pic)

		reg, _ = regexp.Compile("<p>(.)*</p>")
		info := reg.FindString(tmpStr)
		logs.GetFirstLogger().Trace("info = " + info)
		//p.AddResultItem("info", info)
		info = strings.Replace(info, "'", "\"", -1)
		info = strings.Replace(info, "&#39;", "\"", -1)

		item.AddItem("info", info)

		reg, _ = regexp.Compile("<span(.)*<a(.)*</span>")
		detailurl := reg.FindString(tmpStr)
		reg, _ = regexp.Compile("href(.)*\">")
		detailurl = reg.FindString(detailurl)
		detailurl = detailurl[strings.Index(detailurl, "\"")+1 : len(detailurl)]
		detailurl = detailurl[0:strings.Index(detailurl, "\"")]
		logs.GetFirstLogger().Trace("detailurl = " + detailurl)
		//p.AddResultItem("detailurl", detailurl)
		item.AddItem("detailurl", detailurl)
		//p.AddResultItem("key", detailurl)
		item.SetKey(detailurl)
		p.AddNewUrl(detailurl, "content")

		logs.GetFirstLogger().Trace("E================>")
		logs.GetFirstLogger().Tracef("count = %d", count)
		count = count + 1
		logs.GetFirstLogger().Warn(title)

		pos1 = strings.Index(content, "<li>")
		pos2 = strings.Index(content, "</li>")
		p.AddPageItems(item)
	}

	return p
}
開發者ID:dulumao,項目名稱:game_spider,代碼行數:77,代碼來源:main.go

示例2: parseNewsDetail


//.........這裏部分代碼省略.........

	}
	newsIndex1 := strings.Index(news, ">")
	newsIndex2 := strings.Index(news, "</div>")
	if newsIndex1 >= 0 && newsIndex2 >= 0 {
		news = news[newsIndex1+1 : newsIndex2]
	}

	//p.AddResultItem("news_content", news)
	news = strings.Replace(news, "'", "\"", -1)
	news = strings.Replace(news, "&#39;", "\"", -1)
	//	imgSrcIndex := strings.Index(news, "<img src=\"/")
	//	if imgSrcIndex >= 0 {
	//		news = strings.Replace(news, "<img src=\"/", "<img src=\""+util.GetUrlDomain(p.GetRequest().GetUrl())+"/", -1)
	//	}
	////////////////////
	imgSrcIndex := strings.Index(news, "<img ")
	if imgSrcIndex >= 0 {
		news = strings.Replace(news, "<img src=\"/", "<img src=\""+util.GetUrlDomain(p.GetRequest().GetUrl())+"/", -1)
		news = strings.Replace(news, "<img alt=\"[^\"]\" src=\"/", "<img src=\""+util.GetUrlDomain(p.GetRequest().GetUrl())+"/", -1)
		//println(news_content)

		//	println("===============")
		reg, _ = regexp.Compile(`<img[^>]*>`)
		imgList := reg.FindAllString(news, -1)
		for _, img := range imgList {
			//strings.Replace(news_content, img)
			//println("old img ==>" + img)
			newImg := img
			styleIndex := strings.Index(newImg, "style=\"")
			if styleIndex >= 0 {
				styleStr := newImg[styleIndex+len("style=\""):]
				endIndex := strings.Index(styleStr, "\"")
				if endIndex > 0 {
					styleStr = styleStr[0:endIndex]
				}
				newstyleStr := changeImgSize(styleStr)
				newImg = strings.Replace(img, styleStr, newstyleStr, -1)

			} else {
				//找width,找height
				reg2, _ := regexp.Compile(`width=\"[0-9]+\"`)
				tmpWidthStr := reg2.FindString(img)

				reg2, _ = regexp.Compile(`height=\"[0-9]+\"`)
				tmpHeightStr := reg2.FindString(img)
				//println("tmp height str = " + tmpHeightStr)
				var f float32 = 1.0
				if len(tmpWidthStr) > 0 {
					tmpStr1 := tmpWidthStr[strings.Index(tmpWidthStr, "\"")+1:]
					tmpStr1 = tmpStr1[0:strings.Index(tmpStr1, "\"")]

					tmpWidth, _ := strconv.Atoi(tmpStr1)
					if tmpWidth > 360 {
						f = float32(tmpWidth) / 360.0
						if len(tmpHeightStr) > 0 {
							tmpStr2 := tmpHeightStr[strings.Index(tmpHeightStr, "\"")+1:]
							tmpStr2 = tmpStr2[0:strings.Index(tmpStr2, "\"")]
							tmpHeight, _ := strconv.Atoi(tmpStr2)

							newImg = strings.Replace(img, tmpWidthStr, "width=\"360\"", -1)
							tmpHeight = int(float32(tmpHeight) / f)
							newImg = strings.Replace(newImg, tmpHeightStr, "height=\""+strconv.Itoa(tmpHeight)+"\"", -1)

						} else {
							newImg = strings.Replace(img, tmpWidthStr, "width=\"360\"", -1)
						}

					}
				}

			}

			//有沒有STYLE,有style的處理style
			//有沒有width
			//有沒有height
			//println("new img ==>" + newImg)
			if img != newImg {
				news = strings.Replace(news, img, newImg, -1)
			}
		}
	}
	//////
	news = strings.Replace(news, "<a[^>]*>官方網站</a>", "", -1)

	logs.GetFirstLogger().Trace("news = " + news)
	//判斷是否有視頻在新聞中,如有則過濾到哦
	reg, _ = regexp.Compile(`<[^>]*shockwave-flash[^>]*>`)
	tmpN := reg.FindString(news)

	item.AddItem("news_content", news)
	//p.AddResultItem("key", p.GetRequest().GetUrl())
	if len(tmpN) <= 0 {
		p.AddPageItems(item)
	}

	logs.GetFirstLogger().Trace("E TEST ARTICLE")

	return p
}
開發者ID:dulumao,項目名稱:game_spider,代碼行數:101,代碼來源:main.go


注:本文中的git/oschina/net/ciweilao/game_spider/git/page.Page.AddPageItems方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。