當前位置: 首頁>>代碼示例>>Golang>>正文


Golang goquery.Document類代碼示例

本文整理匯總了Golang中github.com/PuerkitoBio/goquery.Document的典型用法代碼示例。如果您正苦於以下問題:Golang Document類的具體用法?Golang Document怎麽用?Golang Document使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。


在下文中一共展示了Document類的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Golang代碼示例。

示例1: doWork

func doWork(links <-chan string, results chan<- string) {
	for link := range links {
		var doc *goquery.Document
		for i := 1; ; i++ {
			var err error
			doc, err = goquery.NewDocument(link)
			if err == nil {
				break
			}
			fmt.Fprintf(os.Stderr, "[Tentativa %d] Erro tentando processar página de servidor: %s. Erro: %q", i, link, err)
			if i == maxRetries {
				fmt.Fprintf(os.Stderr, "Página não processada: %s", link)
				return
			}
			time.Sleep(time.Duration(i) * time.Duration(rand.Intn(5)) * time.Second)
		}
		var row []string
		doc.Find("td.desc").Each(func(i int, s *goquery.Selection) {
			cell := strings.Replace(
				strings.Trim(s.Next().Text(), " \n"),
				",",
				".",
				1)
			row = append(row, cell)
		})
		if len(row) > 0 {
			results <- strings.Join(row, *sep)
		} else {
			fmt.Fprintf(os.Stderr, "Não achou td.desc: %s\n", link)
		}
	}
}
開發者ID:danielfireman,項目名稱:phd,代碼行數:32,代碼來源:main.go

示例2: parseTrendingRepos

func parseTrendingRepos(doc *goquery.Document) []GithubRepo {
	var repos []GithubRepo
	var regStars = regexp.MustCompile("[0-9]+")

	doc.Find("li.repo-list-item").Each(func(i int, s *goquery.Selection) {
		title := strings.Trim(s.Find("h3.repo-list-name a").Text(), "\n\t ")
		title = strings.Replace(title, " ", "", -1)
		title = strings.Replace(title, "\n", "", -1)
		description := strings.Trim(s.Find("p.repo-list-description").Text(), "\n\t ")
		url, _ := s.Find("h3.repo-list-name a").Attr("href")
		url = "https://github.com" + url
		starsString := s.Find("p.repo-list-meta").Text()
		starsString = strings.Replace(starsString, ",", "", -1)
		starsString = regStars.FindString(starsString)
		if starsString == "" {
			starsString = "0"
		}
		stars, _ := strconv.Atoi(starsString)

		repo := GithubRepo{
			Title:       title,
			Description: description,
			Url:         url,
			Stars:       stars,
			Forks:       0,
			Date:        time.Now().UTC().Unix(),
		}

		repos = append(repos, repo)
	})

	return repos
}
開發者ID:hypebeast,項目名稱:gostats,代碼行數:33,代碼來源:github.go

示例3: cleanCites

func (this *cleaner) cleanCites(doc *goquery.Document) *goquery.Document {
	cites := doc.Find("cite")
	cites.Each(func(i int, s *goquery.Selection) {
		this.config.parser.removeNode(s)
	})
	return doc
}
開發者ID:ngs,項目名稱:GoOse,代碼行數:7,代碼來源:cleaner.go

示例4: defaultHTML

// ogtags extracts the og:title, og:image, ... tags from a webpage
func defaultHTML(i *data.Item, sourceURL string, doc *goquery.Document) {
	fmt.Println("Running OG extract.")

	selection := doc.Find("title")
	if len(selection.Nodes) != 0 {
		i.Caption = selection.Nodes[0].FirstChild.Data
	}

	selection = doc.Find("meta[property*='og']")

	for _, e := range selection.Nodes {
		m := htmlAttributeToMap(e.Attr)

		if m["property"] == "og:title" {
			i.Caption = m["content"]
		}
		if m["property"] == "og:image" {
			if !govalidator.IsRequestURL(m["content"]) {
				log.Println("Invalid url in og:image. " + sourceURL)
				continue
			}
			i.ImageURL = m["content"]
		}
		if m["property"] == "og:url" {
			if !govalidator.IsRequestURL(m["content"]) {
				log.Println("Invalid url in og:url. " + sourceURL)
				continue
			}
			i.URL = m["content"]
		}
		if m["property"] == "og:description" {
			i.Description = m["content"]
		}
	}
}
開發者ID:koffeinsource,項目名稱:notreddit,代碼行數:36,代碼來源:defaultHTML.go

示例5: parseOrderListPage

func parseOrderListPage(s *goquery.Document) ([]Order, bool, error) {
	c := s.Find(".container").First()
	t := c.Find("div").First().Text()
	if t != ">注文情報(一覧)<" && t != ">注文情報(検索)<" {
		return nil, false, fmt.Errorf("cannot open \"注文情報(一覧)\", but %#v", t)
	}
	// タイトル行の削除
	c.Find("hr").First().Next().PrevAll().Remove()

	results := []Order{}
	c.Find("a").Each(
		func(_ int, s *goquery.Selection) {
			href, ok := s.Attr("href")
			if !ok || !strings.HasPrefix(href, "../otc/C003.html?") {
				return
			}
			u, err := url.Parse(href)
			if err != nil || u.RawQuery == "" {
				return
			}
			v, err := url.ParseQuery(u.RawQuery)
			results = append(results, Order{
				OrderId:     v.Get("order_id"),
				OrderMethod: v.Get("order_method"),
			})
		})

	return results, c.Find("a[accesskey=\"#\"]").Length() == 1, nil
}
開發者ID:imos,項目名稱:fxtools,代碼行數:29,代碼來源:order.go

示例6: GetShopName

//獲取店鋪名稱
func GetShopName(p *goquery.Document) string {
	name := p.Find(".tb-shop-name").Text()
	if name == "" {
		name = p.Find(".slogo-shopname").Text()
	}
	return strings.TrimSpace(name)
}
開發者ID:qgweb,項目名稱:new,代碼行數:8,代碼來源:tao.go

示例7: feedsFromDoc

func feedsFromDoc(doc *goquery.Document, text string) []string {
	sel := "link[type='application/rss+xml']"
	sel += ", link[type='application/atom+xml']"
	matches := doc.Find(sel)

	if matches.Length() > 0 {
		feeds := make([]string, matches.Length())
		matches.Each(func(i int, s *goquery.Selection) {
			url, _ := s.Attr("href")
			feeds[i] = url
		})
		return feeds
	}

	rx := regexp.MustCompile(`href=['"]([^'"]*(rss|atom|feed|xml)[^'"]*)['"]`)
	if rx.FindString(text) != "" {
		matches := rx.FindAllStringSubmatch(text, -1)
		feeds := make([]string, len(matches))
		for i, e := range matches {
			feeds[i] = e[1]
		}
		return feeds
	}

	return make([]string, 0)
}
開發者ID:golibri,項目名稱:website,代碼行數:26,代碼來源:website.go

示例8: perseHTML

// Parse html
func perseHTML(htmldata *goquery.Document) []string {
	var dates []string

	htmldata.Find("a.bt-open").Each(func(_ int, s *goquery.Selection) {
		if jsonData, ok := s.Attr("id"); ok {

			//decode
			htmlStringDecode(&jsonData)

			//analyze json object
			var jsonObject map[string]interface{}
			//json.JsonAnalyze(jsonData, &jsonObject)
			json.Unmarshal([]byte(jsonData), &jsonObject)

			//extract date from json object
			//e.g. 2016-02-27 03:30:00
			strDate := jsonObject["field19"].(string)
			if isTimeApplicable(strDate) {
				dates = append(dates, strDate)
			}
		}
	})

	return dates
}
開發者ID:hiromaily,項目名稱:go-book-teacher,代碼行數:26,代碼來源:analyzehtml.go

示例9: garfield

func garfield(i *data.Item, sourceURL string, doc *goquery.Document) {
	if !strings.Contains(sourceURL, "www.gocomics.com/garfield") {
		return
	}

	fmt.Println("Running Garfield plugin.")

	// update title

	selection := doc.Find(".strip")
	if len(selection.Nodes) == 0 {
		fmt.Println("Garfield plugin found no .strip. " + sourceURL)
	} else {
		if len(selection.Nodes) > 1 {
			fmt.Println("Garfield plugin found >1 .strip. " + sourceURL)
		}
		m := htmlAttributeToMap(selection.Nodes[0].Attr)

		if govalidator.IsRequestURL(m["src"]) {
			i.Description = "<img src =\""
			i.Description += m["src"]
			i.Description += "\" />"
		} else {
			fmt.Println("Amazon plugin invalid url. " + m["src"])
		}
		i.ImageURL = ""
	}

}
開發者ID:koffeinsource,項目名稱:notreddit,代碼行數:29,代碼來源:garfield.go

示例10: getItems

func getItems(doc *goquery.Document) (items []item, maxWidth int) {
	doc.Find("td.title a").EachWithBreak(func(i int, s *goquery.Selection) bool {
		if i == maxItems {
			return false
		}

		if s.Text() == "More" {
			return true
		}

		href, _ := s.Attr("href")
		title := s.Text()
		points := s.Parent().Parent().Next().Find("span").Text()
		a, b := len(fmt.Sprintf("%s (%s)", title, points)), len(href)
		maxWidth = max(a, b, maxWidth)

		items = append(items, item{
			title:  title,
			url:    href,
			points: points,
		})

		return true
	})
	return
}
開發者ID:coolhacks,項目名稱:gohn,代碼行數:26,代碼來源:main.go

示例11: getTeamsId

/*
Get the two teams in a match
*/
func getTeamsId(d *goquery.Document) ([2]int, error) {
	var ids [2]int

	url1, ok := d.Find("div.container.left h3 a").Attr("href")
	if !ok {
		return ids, errors.New("could not find team a")
	}

	idA, err := parseTeam(BASE + url1)
	if err != nil {
		return ids, err
	}

	url2, ok := d.Find("div.container.right h3 a").Attr("href")
	if !ok {
		return ids, errors.New("could not find team b")
	}

	idB, err := parseTeam(BASE + url2)
	if err != nil {
		return ids, err
	}

	ids[0] = idA
	ids[1] = idB
	return ids, nil
}
開發者ID:trtstm,項目名稱:zeejongparser,代碼行數:30,代碼來源:matchPage.go

示例12: GetFFInfo

/*
** get friends' friends info
 */
func (w *SocialWorker) GetFFInfo(query *goquery.Document) {
	var user User
	// var uid string
	var usex string
	// var usersId []string
	// var usersName []string
	// uidString, _ := query.Find("div.c").Eq(1).Find("a").Attr("href")
	// var digitsRegexp = regexp.MustCompile(`(^|&|\?)uid=([^&]*)(&|$)`)
	/*
	 ** 獲取粉絲的粉絲的uid(str)
	 */
	// str := digitsRegexp.FindStringSubmatch(uidString)
	// uid = crawlUrl.Id
	// usersId = append(usersId, uid)
	uStr := query.Find("div.c").Eq(2).Text()
	nameStr_1 := GetBetweenStr(uStr, ":", "性別")
	nameStr_2 := GetBetweenStr(nameStr_1, ":", "認證")
	nameStr_3 := strings.Split(nameStr_2, ":")
	uname := nameStr_3[1]
	sexStr_1 := GetBetweenStr(uStr, "性別", "地區")
	sexStr_2 := strings.Split(sexStr_1, ":")
	if sexStr_2[1] == "男" {
		usex = "male"
	} else {
		usex = "famale"
	}

	user.uid = crawlUrl.FatherId
	user.friendid = crawlUrl.Id
	user.uname = uname
	user.usex = usex
	glog.Infoln(user)
	w.putItems(user)
}
開發者ID:luzh0422,項目名稱:spider-docker,代碼行數:37,代碼來源:spider.go

示例13: GetFriendsUrl

/*
**get friends url
 */
func (w *SocialWorker) GetFriendsUrl(query *goquery.Document, p *page.Page) {
	var str_1 string
	// newCrawlUrl := models.CrawlUrl{}
	query.Find("div.c").Find("table").Find("tbody").Find("tr").Find("a:last-child").Each(func(j int, s *goquery.Selection) {
		if j%2 != 0 {
			friendsUrlString, _ := s.Attr("href")
			var digitsRegexp = regexp.MustCompile(`(^|&|\?)uid=([^&]*)(&|$)`)
			str := digitsRegexp.FindStringSubmatch(friendsUrlString)
			if str == nil {
				str_1 = "1"
			} else {
				str_1 = str[2]
			}
			friendsInfoUrl := "http://weibo.cn/" + str_1 + "/info"
			// newCrawlUrl.Url = "http://weibo.cn/" + str_1 + "/fans"
			// p.AddTargetRequestWithHeaderFile(friendsInfoUrl, "html", "./header.json")
			// newCrawlUrl.Id = str_1
			// newCrawlUrl.Layer = crawlUrl.Layer + 1
			// newCrawlUrl.FatherId = crawlUrl.Id
			// w.SendMessageToSQS(newCrawlUrl)

			Urls = append(Urls, friendsInfoUrl)
			UrlsLevel = append(UrlsLevel, UrlsLevel[i]+1)
		}
	})
}
開發者ID:luzh0422,項目名稱:spider-docker,代碼行數:29,代碼來源:spider.go

示例14: Parse

// Parse 獲取url對應的資源並根據規則進行解析
func (this *RedditLogic) Parse(redditUrl string) error {
	redditUrl = strings.TrimSpace(redditUrl)
	if redditUrl == "" {
		redditUrl = this.domain + this.golang
	} else if !strings.HasPrefix(redditUrl, "https") {
		redditUrl = "https://" + redditUrl
	}

	var (
		doc *goquery.Document
		err error
	)

	// if doc, err = goquery.NewDocument(redditUrl); err != nil {
	if doc, err = this.newDocumentFromResp(redditUrl); err != nil {
		logger.Errorln("goquery reddit newdocument error:", err)
		return err
	}

	// 最後麵的先入庫處理
	resourcesSelection := doc.Find("#siteTable .link")

	for i := resourcesSelection.Length() - 1; i >= 0; i-- {
		err = this.dealRedditOneResource(goquery.NewDocumentFromNode(resourcesSelection.Get(i)).Selection)

		if err != nil {
			logger.Errorln(err)
		}
	}

	return err
}
開發者ID:studygolang,項目名稱:studygolang,代碼行數:33,代碼來源:reddit.go

示例15: ARSOPotresi

// ARSOPotresi returs slice of Potres struct
func ARSOPotresi() []Potres {
	var potresi []Potres
	var doc *goquery.Document
	var e error

	if res, found := cacheArso.Get("potresi"); found {
		return res.([]Potres)
	}

	if doc, e = goquery.NewDocument("http://www.arso.gov.si/potresi/obvestila%20o%20potresih/aip/"); e != nil {
		return potresi
	}

	doc.Find("#glavna td.vsebina table tr").Each(func(i int, s *goquery.Selection) {
		magnituda, err := strconv.ParseFloat(s.Find("td:nth-child(4)").Text(), 2)
		if magnituda > 0 && err == nil {
			potres := Potres{}
			potres.Magnituda = magnituda
			potres.Lat, _ = strconv.ParseFloat(s.Find("td:nth-child(2)").Text(), 3)
			potres.Lon, _ = strconv.ParseFloat(s.Find("td:nth-child(3)").Text(), 3)
			potres.Lokacija = s.Find("td:nth-child(6)").Text()
			potres.Datum = s.Find("td:nth-child(1)").Text()
			potresi = append(potresi, potres)
		}
	})
	cacheArso.Set("potresi", potresi, cache.DefaultExpiration)
	return potresi
}
開發者ID:ubuntu-si,項目名稱:arso-api,代碼行數:29,代碼來源:scrapers.go


注:本文中的github.com/PuerkitoBio/goquery.Document類示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。