本文整理匯總了Golang中github.com/PuerkitoBio/goquery.NewDocument函數的典型用法代碼示例。如果您正苦於以下問題:Golang NewDocument函數的具體用法?Golang NewDocument怎麽用?Golang NewDocument使用的例子?那麽, 這裏精選的函數代碼示例或許可以為您提供幫助。
在下文中一共展示了NewDocument函數的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Golang代碼示例。
示例1: GetContentishe
func GetContentishe(category_url string) (url string, err error) {
doc, err := goquery.NewDocument(category_url)
if err != nil {
return "", err
}
pNav := doc.Find("div.pagination_expanded > span.current").First()
pageCount, _ := strconv.Atoi(pNav.Text())
rand.Seed(time.Now().Unix())
pageIndex := strconv.Itoa(rand.Intn(pageCount-1) + 1)
doc, err = goquery.NewDocument(category_url + "/" + pageIndex)
if err != nil {
return "", err
}
contentishe := doc.Find(".image [src$='.gif']," +
" .image [src$='.png']," +
" .image [src$='.jpg']," +
" .image [src$='.jpeg'] ")
if contentishe.Length() == 0 {
return "", errors.New("failed to find contentishe")
}
imgSrc, exist := contentishe.Eq(rand.Intn(contentishe.Length())).Attr("src")
if !exist || imgSrc == "" {
imgSrc, exist = contentishe.Eq(rand.Intn(contentishe.Length())).Attr("href")
if !exist || imgSrc == "" {
return "", errors.New("bad src and href")
}
}
return imgSrc + "?.jpg", nil
}
示例2: posts
func posts(url string, lastModified time.Time) []Post {
doc, err := goquery.NewDocument(url)
errNotNilToPanic(err)
lastPage := 0
doc.Find("ul").EachWithBreak(func(_ int, s *goquery.Selection) bool {
if class, exist := s.Attr("class"); exist && class == "pagination" {
if href, exist := s.Find("li").Find("a").Last().Attr("href"); exist {
reg := regexp.MustCompile(".*page_num=([0-9]+)$")
if m := reg.FindStringSubmatch(href); len(m) > 1 {
lastPage, _ = strconv.Atoi(m[1])
}
}
return false
}
return true
})
pList := []Post{}
for page := 1; page <= lastPage; page++ {
doc, err := goquery.NewDocument(url + "?comment_order=DESC&page_num=" + strconv.Itoa(page))
errNotNilToPanic(err)
doc.Find("div").EachWithBreak(func(_ int, s *goquery.Selection) bool {
if class, exist := s.Attr("class"); exist && class == "post-sla" {
p := post(s)
if !lastModified.Before(p.postDate) {
return false
}
pList = append(pList, p)
}
return true
})
}
return pList
}
示例3: BroadcastIds
func BroadcastIds(showId string, all bool) []string {
continueUntilPage := 1
showUrl := "https://www.bbc.co.uk/programmes/" + showId + "/episodes/guide?page="
var broadcastIds []string
showDoc, err := goquery.NewDocument(showUrl + strconv.Itoa(continueUntilPage))
if err != nil {
log.Fatal(err)
}
if all {
maxPage, _ := strconv.Atoi(showDoc.Find(".pagination__page--last a").Text())
if maxPage > 0 {
continueUntilPage = maxPage
}
}
for pageCount := 1; pageCount <= continueUntilPage; pageCount++ {
if pageCount > 1 {
showDoc, err = goquery.NewDocument(showUrl + strconv.Itoa(pageCount))
if err != nil {
log.Fatal(err)
}
}
broadcastIds = append(broadcastIds, showDoc.Find(".programme__titles a").Map(func(i int, s *goquery.Selection) string {
broadcastLink, _ := s.Attr("href")
return strings.Split(broadcastLink, "/")[2]
})...)
}
return broadcastIds
}
示例4: parsePttBoardIndex
func parsePttBoardIndex(page int) (hrefs []string) {
doc, err := goquery.NewDocument(EntryAddress)
if err != nil {
log.Fatal(err)
}
hrefs = make([]string, 0)
maxPageNumberString := ""
var PageWebSide string
if page > 0 {
// Find page result
doc.Find(".btn-group a").Each(func(i int, s *goquery.Selection) {
if strings.Contains(s.Text(), "上頁") {
href, exist := s.Attr("href")
if exist {
targetString := strings.Split(href, "index")[1]
targetString = strings.Split(targetString, ".html")[0]
fmt.Println("total page:", targetString)
maxPageNumberString = targetString
}
}
})
pageNum, _ := strconv.Atoi(maxPageNumberString)
pageNum = pageNum - page
PageWebSide = fmt.Sprintf("https://www.ptt.cc/bbs/Beauty/index%d.html", pageNum)
} else {
PageWebSide = EntryAddress
}
doc, err = goquery.NewDocument(PageWebSide)
if err != nil {
log.Fatal(err)
}
doc.Find(".r-ent").Each(func(i int, s *goquery.Selection) {
title := strings.TrimSpace(s.Find(".title").Text())
likeCount, _ := strconv.Atoi(s.Find(".nrec span").Text())
href, _ := s.Find(".title a").Attr("href")
link := BasePttAddress + href
hrefs = append(hrefs, link)
fmt.Printf("%d:[%d★]%s\n", i, likeCount, title)
})
// Print pages
fmt.Printf("Pages: ")
for i := page - 3; i <= page+2; i++ {
if i >= 0 {
if i == page {
fmt.Printf("[%v] ", i)
} else {
fmt.Printf("%v ", i)
}
}
}
fmt.Printf("(o: open file in fider, s: top page, n:next, p:prev, quit: quit program)\n")
return hrefs
}
示例5: ParseCK101PageByIndex
//Set CK101 board page index, fetch all post and return article count back
func (p *CK101) ParseCK101PageByIndex(page int) int {
doc, err := goquery.NewDocument(p.entryAddress)
if err != nil {
log.Fatal(err)
}
urlList := make([]string, 0)
postList := make([]string, 0)
starList := make([]int, 0)
var PageWebSide string
page = page + 1 //one base
if page > 1 {
// Find page result
PageWebSide = fmt.Sprintf("http://ck101.com/forum-1345-%d.html", page)
} else {
PageWebSide = p.entryAddress
}
//fmt.Println("Page", PageWebSide)
doc, err = goquery.NewDocument(PageWebSide)
if err != nil {
log.Fatal(err)
}
doc.Find(".cl_box").Each(func(i int, s *goquery.Selection) {
star := ""
title := ""
url := ""
starInt := 0
s.Find("a").Each(func(i int, tQ *goquery.Selection) {
title, _ = tQ.Attr("title")
url, _ = tQ.Attr("href")
})
s.Find("em").Each(func(i int, starC *goquery.Selection) {
star_c, _ := starC.Attr("title")
fmt.Println("star_c:", star_c)
if strings.Contains(star_c, "查看") {
star = strings.Replace(star_c, "查看", "", -1)
fmt.Println("star:", star)
star = strings.TrimSpace(star)
starInt, _ = strconv.Atoi(star)
}
//}
})
urlList = append(urlList, url)
starList = append(starList, starInt)
postList = append(postList, title)
})
p.storedPostURLList = urlList
p.storedStarList = starList
p.storedPostTitleList = postList
return len(p.storedPostTitleList)
}
示例6: getPagina
func getPagina(url string) (doc *goquery.Document) {
var err error
doc, err = goquery.NewDocument(url)
for i, maxIntentos := 0, 1000; err != nil && i < maxIntentos; i++ {
doc, err = goquery.NewDocument(url)
}
if err != nil {
mataPrograma(">>Error al obtener la url: "+url, err)
}
return
}
示例7: lsyj
//領事安全預警
func lsyj() {
file := xlsx.NewFile()
sheet := file.AddSheet("領事安全預警")
doc, err := goquery.NewDocument(URL_LSYJ + "/default.shtml")
if err != nil {
log.Fatal(err)
}
totalUrl := 0
doc.Find(".ct3_m .news_list li a").Each(func(i int, contentSelection *goquery.Selection) {
name := contentSelection.Text()
if href, exists := contentSelection.Attr("href"); exists {
href = URL_LSYJ + strings.Replace(href, ".", "", 1)
err := parseLSYJ(sheet, href)
if err != nil {
fmt.Printf("[read error]第%d個:%s。url:%s。 %v", i+1, name, href, err)
}
totalUrl += 1
}
})
for i := 1; i <= 11; i++ {
url := fmt.Sprintf("/default_%d.shtml", i)
fmt.Printf("第%d個URL:%s", i, url)
doc, err := goquery.NewDocument(URL_LSYJ + url)
if err != nil {
log.Fatal(err)
}
doc.Find(".ct3_m .news_list li a").Each(func(i int, contentSelection *goquery.Selection) {
name := contentSelection.Text()
if href, exists := contentSelection.Attr("href"); exists {
href = URL_LSYJ + strings.Replace(href, ".", "", 1)
err := parseLSYJ(sheet, href)
if err != nil {
fmt.Printf("[read error]第%d個:%s。url:%s。 %v", i+1, name, href, err)
}
totalUrl += 1
}
})
}
fileName := fmt.Sprintf("領事安全預警(%d個).xlsx", totalUrl)
err = file.Save(fileName)
if err != nil {
fmt.Printf(err.Error())
}
}
示例8: Rong360bbsCrawler
func Rong360bbsCrawler() {
beego.Info("Process rong360 bbs-yangmao.")
for i := 1; i < 6; i++ {
u := "http://bbs.rong360.com/forum-76-" + strconv.Itoa(i) + ".html"
beego.Info("Process rong360 bbs-yangmao url: " + u)
document, _ := goquery.NewDocument(u)
//所有帖子
document.Find("table#threadlisttableid").Find("tbody").Each(func(i int, selection *goquery.Selection) {
topic := &models.Topic{}
topic.Node_id = 4
topic.Uid = 1
topic.Ord = time.Now().Unix()
t := selection.Find("th").First().Find("a.s.xst")
title := t.Text()
if len(title) > 0 {
topic.Title = title
if titleUrl, f := t.Attr("href"); f {
//獲取帖子正文
c, _ := goquery.NewDocument(titleUrl)
content := c.Find("div#postlist").First().Find("td.t_f").First()
content.Find("img").Each(func(i int, se *goquery.Selection) { // 替換圖片的src地址
if src, exists := se.Attr("file"); exists {
se.SetAttr("src", "http://bbs.rong360.com/"+src)
}
})
html, _ := content.Html()
topic.Content = html
topic.Addtime = time.Now().Unix()
topic.Updatetime = time.Now().Unix()
(&models.TopicDao{}).InsertOrUpdate(topic)
}
}
})
}
}
示例9: GetMovie
// GetMovie finds shows with a title containg the keyword
// Returns error if no show is found
func GetMovie(keyword string) (*Movie, error) {
if keyword == "" {
return nil, ErrMissingArgument
}
doc, err := goquery.NewDocument("https://kat.cr/usearch/" + keyword)
if err != nil {
return nil, err
}
usearch := doc.Find(".torrentMediaInfo")
if usearch.Length() < 1 {
return nil, ErrMovieNotFound
}
titleLink := doc.Find("h1 > a.plain")
title := titleLink.Text()
if title == "" {
return nil, ErrParsingFailure
}
url, ok := titleLink.Attr("href")
if !ok {
return nil, ErrParsingFailure
}
doc, err = goquery.NewDocument("https://kat.cr" + url)
if err != nil {
return nil, err
}
cover, ok := doc.Find(".movieCover > img").Attr("src")
if !ok {
return nil, ErrParsingFailure
}
magnets := make(map[string]string, 3)
magnets["1080p"], _ = doc.Find("#tab-1080p i.ka-magnet").Parent().Attr("href")
magnets["720p"], _ = doc.Find("#tab-720p i.ka-magnet").Parent().Attr("href")
magnets["hdtv"], _ = doc.Find("#tab-HDRiP i.ka-magnet").Parent().Attr("href")
return &Movie{
Title: title,
URL: url,
Cover: cover,
Sources: magnets},
nil
}
示例10: SelfPage
func SelfPage(cururl string) {
x, _ := goquery.NewDocument(cururl)
//獲取標題
title := x.Find(".main-tags").Text()
fmt.Println("標題:", title)
//獲取當前頁可見圖像
x.Find(".size-full").Each(func(idx int, s *goquery.Selection) {
title2, b2 := s.Attr("title")
if b2 == true {
title = title2
}
v, b := s.Attr("src")
if b == true {
if !strings.HasSuffix(v, "grey.jpg") {
AddSpiderData(v, title)
}
}
})
//獲取翻頁鏈接
x.Find(".link_pages").Each(func(idx int, s *goquery.Selection) {
iurl, bl := s.Find("a").Attr("href")
if bl == true {
z, _ := goquery.NewDocument(iurl)
//讀取被打開的翻頁頁麵內的可見圖像
z.Find(".size-full").Each(func(idx int, s *goquery.Selection) {
title2, b2 := s.Attr("title")
if b2 == true {
title = title2
}
v, b := s.Attr("src")
if b == true {
if !strings.HasSuffix(v, "grey.jpg") {
AddSpiderData(v, title)
}
}
})
}
})
}
示例11: main
func main() {
urlMain := `http://www.kuaiyilicai.com`
urlUpayCurrency := `http://www.kuaiyilicai.com/upcurrency.html`
docUpayCurrency, err := goquery.NewDocument(urlUpayCurrency)
checkError(err)
// fmt.Println(` ** list all sorts of currency`)
docUpayCurrency.Find(`ul.list-inline > li.itm`).Each(
func(i int, selUpayCcurrency *goquery.Selection) {
// fmt.Println(` ** get all url of every currency`)
selUpayCcurrency.Find(`a`).Each(
func(i int, sel_sort *goquery.Selection) {
href, _ := sel_sort.Attr(`href`)
if matched, _ := regexp.MatchString(`.*uprate.*`, href); matched {
href = urlMain + href
fmt.Println(href + ` | ` + selUpayCcurrency.Text())
// fmt.Println(` ** get data from every url of currency`)
docEachCurrency, err := goquery.NewDocument(href)
checkError(err)
docEachCurrency.Find(`div.rate`).Each(
func(i int, selEachCurrency *goquery.Selection) {
eachCurrency := regexp.MustCompile(`\s`).
ReplaceAllString(selEachCurrency.Text(), ``)
if matched, _ := regexp.MatchString(`\d+\.\d+\/\d+\.\d+.*`, eachCurrency); matched {
// fmt.Println(` ** match a format`)
eachCurrency = regexp.MustCompile(`\d+\.\d+\/(\d+\.\d+)[^0-9]*(\d+)-(\d+).*`).
ReplaceAllString(eachCurrency, `$2$3;$1`)
} else {
// fmt.Println(` ** not match the format`)
eachReciprocalCurrency := regexp.MustCompile(`[^0-9]*(\d+\.\d+)[^0-9]*(\d+-\d+).*`).
ReplaceAllString(eachCurrency, `$1`)
eachCurrencyDate := regexp.MustCompile(`[^0-9]*(\d+\.\d+)[^0-9]*(\d+)-(\d+).*`).
ReplaceAllString(eachCurrency, `$2$3`)
f, err := strconv.ParseFloat(eachReciprocalCurrency, 32)
checkError(err)
eachReciprocalCurrency = strconv.FormatFloat(1/f, 'f', 4, 32)
eachCurrency = eachCurrencyDate + `;` + eachReciprocalCurrency
}
fmt.Println(
regexp.MustCompile(`(.*);(.*)` /*Date-%4d;Currency-%.4f*/).
ReplaceAllString(eachCurrency, `$2;$1`))
})
} // else { fmt.Println(` ** not match url`) }
})
})
}
示例12: getBuildStatus
func getBuildStatus(src string) (string, error) {
doc, err := goquery.NewDocument(src)
if err != nil {
return "", err
}
cssPath := "#repo-info-tab > div.repository > table > tbody tr > td"
var status string
doc.Find(cssPath).Each(func(i int, s *goquery.Selection) {
txt := s.Text()
if txt == "Finished" || txt == "Error" {
if status == "" {
switch txt {
case "Finished":
status = "passing"
case "Error":
status = "failing"
default:
status = txt
}
}
}
})
return status, nil
}
示例13: GetSlideList
func GetSlideList(presentationURL string) ([]string, error) {
//var slideContSelector string = ".slide_container"
var slideImgSelector string = ".slide_image"
var imgURLAttribute string = "data-full"
doc, err := goquery.NewDocument(presentationURL)
if err != nil {
return nil, err
}
// allocate for a single slide, than extend it
// for each slide that we find in the HTML page
slideList := make([]string, 0, 0)
// find the slide container in the web page
// for each section in it, retrieve the img tag that contains the images' URL
doc.Find(slideImgSelector).Each(func(i int, s *goquery.Selection) {
// each children of the slide container is a section
// each sections' children is an "img" tag
if url, ok := s.Attr(imgURLAttribute); ok {
slideList = append(slideList, url)
}
})
if len(slideList) == 0 {
return nil, TagNotFoundError{"No slide sections in the HTML page!"}
}
return slideList, nil
}
示例14: Search
func (tpb *Thepiratebay) Search(query string, options Options) ([]*Result, error) {
url := url.URL{
Scheme: "https",
Host: "thepiratebay.org",
Path: fmt.Sprintf("/search/%s/0/7/0", query),
}
doc, err := goquery.NewDocument(url.String())
if err != nil {
return nil, err
}
ret := []*Result{}
doc.Find("#SearchResults table#searchResult > tbody > tr").Each(func(i int, tr *goquery.Selection) {
magnet, ok := tr.Find("a[href^=magnet]").Attr("href")
if !ok {
return
}
seeders, err := strconv.Atoi(tr.Find("td:nth-child(3)").Text())
if err != nil || seeders < 0 {
return
}
ret = append(ret, &Result{
Name: tr.Find(".detName a.detLink").Text(),
MagnetURL: magnet,
Seeders: uint(seeders),
})
})
return ret, nil
}
示例15: parseStartLink
func parseStartLink() {
fmt.Println("Input url: ")
fmt.Scanf("%s", &url)
firstDoc, err := goquery.NewDocument(url)
checkerr(err)
firstDoc.Find("tbody").Each(func(i int, tbody *goquery.Selection) {
tbody.Find(".description").Each(func(j int, s *goquery.Selection) {
link, _ := s.Find("a").Attr("href")
x, _ := regexp.MatchString(`https://www.exploit-db.com/exploits/.....`, link)
if x == true {
file, err := os.OpenFile("temp.txt", os.O_CREATE|os.O_RDWR|os.O_APPEND, 0666)
checkerr(err)
_, err = file.WriteString(link + "\n")
checkerr(err)
file.Close()
}
y, _ := regexp.MatchString(`/docs/......pdf`, link)
if y == true {
wasteUrl, err := os.OpenFile("waste.txt", os.O_CREATE|os.O_RDWR|os.O_APPEND, 0666)
checkerr(err)
_, err = wasteUrl.WriteString(link)
checkerr(err)
wasteUrl.Close()
}
})
})
}