当前位置: 首页>>代码示例>>Golang>>正文


Golang scrape.Attr函数代码示例

本文整理汇总了Golang中github.com/yhat/scrape.Attr函数的典型用法代码示例。如果您正苦于以下问题:Golang Attr函数的具体用法?Golang Attr怎么用?Golang Attr使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了Attr函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Golang代码示例。

示例1: main

func main() {
	// request and parse the front page
	resp, err := http.Get("https://news.ycombinator.com/")
	if err != nil {
		panic(err)
	}
	root, err := html.Parse(resp.Body)
	if err != nil {
		panic(err)
	}

	// define a matcher
	matcher := func(n *html.Node) bool {
		// must check for nil values
		if n.DataAtom == atom.A && n.Parent != nil && n.Parent.Parent != nil {
			return scrape.Attr(n.Parent.Parent, "class") == "athing"
		}
		return false
	}
	// grab all articles and print them
	articles := scrape.FindAll(root, matcher)
	for i, article := range articles {
		fmt.Printf("%2d %s (%s)\n", i, scrape.Text(article), scrape.Attr(article, "href"))
	}
}
开发者ID:abejenaru,项目名称:vagrant-boxes,代码行数:25,代码来源:first.go

示例2: parseBroadcastsFromNode

func (day *timeURL) parseBroadcastsFromNode(root *html.Node) (ret []*r.Broadcast, err error) {
	nodes := scrape.FindAll(root, func(n *html.Node) bool { return atom.Div == n.DataAtom && "time" == scrape.Attr(n, "class") })
	ret = make([]*r.Broadcast, len(nodes))
	for index, tim := range nodes {
		// prepare response
		bc := r.Broadcast{
			BroadcastURL: r.BroadcastURL{
				TimeURL: r.TimeURL(*day),
			},
		}
		// some defaults
		bc.Language = &lang_de
		bc.Publisher = &publisher
		// set start time
		{
			div_t := strings.TrimSpace(scrape.Text(tim))
			if 5 != len(div_t) {
				continue
			}
			hour := r.MustParseInt(div_t[0:2])
			minute := r.MustParseInt(div_t[3:5])
			bc.Time = time.Date(day.Year(), day.Month(), day.Day(), hour, minute, 0, 0, day.TimeZone)
			if index > 0 {
				ret[index-1].DtEnd = &bc.Time
			}
		}
		for _, tit := range scrape.FindAll(tim.Parent, func(n *html.Node) bool {
			return atom.A == n.DataAtom && atom.Div == n.Parent.DataAtom && "descr" == scrape.Attr(n.Parent, "class")
		}) {
			// Title
			bc.Title = strings.TrimSpace(scrape.Text(tit))
			href := scrape.Attr(tit, "href")
			if "" != href {
				u, _ := url.Parse(href)
				bc.Subject = day.Source.ResolveReference(u)
			}

			desc_node := tit.Parent
			desc_node.RemoveChild(tit)
			description := r.TextWithBrFromNodeSet([]*html.Node{desc_node})
			bc.Description = &description
			// fmt.Fprintf(os.Stderr, "\n")
		}
		ret[index] = &bc
	}
	// fmt.Fprintf(os.Stderr, "len(ret) = %d '%s'\n", len(ret), day.Source.String())
	if len(nodes) > 0 {
		midnight := time.Date(day.Year(), day.Month(), day.Day(), 24, 0, 0, 0, day.TimeZone)
		ret[len(nodes)-1].DtEnd = &midnight
	}
	return
}
开发者ID:mro,项目名称:internet-radio-recorder,代码行数:52,代码来源:m945.go

示例3: NewListing

func NewListing(ctx appengine.Context, url string) (*Listing, error) {
	client := urlfetch.Client(ctx)
	resp, err := client.Get("http://167.88.16.61:2138/" + url)
	if err != nil {
		ctx.Errorf("%s", err)
	}
	ctx.Debugf("Craigslist request came back with status: %s", resp.Status)
	if err != nil {
		ctx.Errorf("%s", err)
		return nil, errors.New("Get listing failed")
	}
	root, err := html.Parse(resp.Body)
	if err != nil {
		ctx.Errorf("%s", "Parsing Error")
		return nil, errors.New("Parse body failed")
	}

	title, ok := scrape.Find(root, scrape.ByTag(atom.Title))
	if !ok {
		ctx.Errorf("%s", "Error getting title")
		return nil, errors.New("Get title failed")
	}
	price, ok := scrape.Find(root, scrape.ByClass("price"))
	if !ok {
		ctx.Errorf("%s", "Error getting price")
		return nil, errors.New("Get price failed")
	}
	intPrice, err := strconv.Atoi(scrape.Text(price)[1:])
	if err != nil {
		ctx.Errorf("Error casting price: %s", scrape.Text(price))
		return nil, err
	}
	images := scrape.FindAll(root, scrape.ByTag(atom.Img))
	imageUrl := ""
	for _, image := range images {
		if scrape.Attr(image, "title") == "image 1" {
			imageUrl = scrape.Attr(image, "src")
		}
	}

	ctx.Debugf("Craigslist returned listing.Price: %d, listing.Title: %s", intPrice, scrape.Text(title))

	return &Listing{
		Url:      url,
		Title:    scrape.Text(title),
		Price:    intPrice,
		ImageUrl: imageUrl,
	}, nil
}
开发者ID:matthewdu,项目名称:powerplug,代码行数:49,代码来源:craigslist.go

示例4: findOpenGraphTitle

func findOpenGraphTitle(doc *html.Node) string {
	el, found := scrape.Find(doc, func(n *html.Node) bool {
		if n.DataAtom == atom.Meta {
			return scrape.Attr(n, "property") == "og:title" && scrape.Attr(n, "content") != ""
		}

		return false
	})

	if !found {
		return ""
	}

	return scrape.Attr(el, "content")
}
开发者ID:mcmillan,项目名称:socialite,代码行数:15,代码来源:title.go

示例5: findTwitterTitle

func findTwitterTitle(doc *html.Node) string {
	el, found := scrape.Find(doc, func(n *html.Node) bool {
		if n.DataAtom == atom.Meta {
			return scrape.Attr(n, "name") == "twitter:title" && scrape.Attr(n, "content") != ""
		}

		return false
	})

	if !found {
		return ""
	}

	return scrape.Attr(el, "content")
}
开发者ID:mcmillan,项目名称:socialite,代码行数:15,代码来源:title.go

示例6: parseBroadcastURLsNode

func (day *timeURL) parseBroadcastURLsNode(root *html.Node) (ret []*broadcastURL, err error) {
	const closeDownHour int = 5
	for _, h4 := range scrape.FindAll(root, func(n *html.Node) bool { return atom.H4 == n.DataAtom }) {
		year, month, day_, err := timeForH4(scrape.Text(h4), &day.Time)
		if nil != err {
			panic(err)
		}
		// fmt.Printf("%d-%d-%d %s\n", year, month, day, err)
		for _, a := range scrape.FindAll(h4.Parent, func(n *html.Node) bool { return atom.A == n.DataAtom && atom.Dt == n.Parent.DataAtom }) {
			m := hourMinuteTitleRegExp.FindStringSubmatch(scrape.Text(a))
			if nil == m {
				panic(errors.New("Couldn't parse <a>"))
			}
			ur, _ := url.Parse(scrape.Attr(a, "href"))
			hour := r.MustParseInt(m[1])
			dayOffset := 0
			if hour < closeDownHour {
				dayOffset = 1
			}
			// fmt.Printf("%s %s\n", b.r.TimeURL.String(), b.Title)
			bcu := broadcastURL(r.BroadcastURL{
				TimeURL: r.TimeURL{
					Time:    time.Date(year, month, day_+dayOffset, hour, r.MustParseInt(m[2]), 0, 0, localLoc),
					Source:  *day.Source.ResolveReference(ur),
					Station: day.Station,
				},
				Title: strings.TrimSpace(m[3]),
			})
			ret = append(ret, &bcu)
		}
	}
	return
}
开发者ID:mro,项目名称:internet-radio-recorder,代码行数:33,代码来源:br.go

示例7: Scrape

// Scrape scrapes a site for a keyword
func (q *query) Scrape() []*match {

	// Request the URL
	resp, err := http.Get(q.SiteURL)
	if err != nil {
		panic(err)
		log.Fatal("Couldn't GET ", q.SiteURL)
	}

	// Parse the contents of the URL
	root, err := html.Parse(resp.Body)
	if err != nil {
		panic(err)
		log.Fatal("Unable to parse response")
	}

	// Grab all the posts and print them
	posts := scrape.FindAll(root, scrape.ByClass("description"))
	matches := make([]*match, len(posts))
	for i, post := range posts {
		matches[i] = &match{
			Title:       scrape.Text(post.FirstChild.NextSibling),
			Description: scrape.Text(post),
			Link:        "http://kijiji.ca" + scrape.Attr(post.FirstChild.NextSibling, "href"),
			Price:       scrape.Text(post.NextSibling.NextSibling),
			Matched:     false,
		}
	}

	return matches
}
开发者ID:bentranter,项目名称:kijiji-scrape,代码行数:32,代码来源:main.go

示例8: main

func main() {

	resp, err := http.Get("https://www.reddit.com")
	if err != nil {
		panic(err)
	}
	root, err := html.Parse(resp.Body)
	if err != nil {
		panic(err)
	}

	matcher := func(n *html.Node) bool {
		if n.DataAtom == atom.Div && n.Parent != nil {
			return scrape.Attr(n, "id") == "siteTable"
		}
		return false
	}
	table, ok := scrape.Find(root, matcher)
	if !ok {
		panic(ok)
	}
	matcher = func(n *html.Node) bool {
		if n.DataAtom == atom.Div && n.Parent != nil {
			return scrape.Attr(n, "data-type") == "link"
		}
		return false
	}

	articles := scrape.FindAll(table, matcher)
	var posts []Post

	for i := 0; i < len(articles); i++ {
		wg.Add(1)
		go func(n *html.Node) {
			post := parsepost(n)
			posts = append(posts, post)
			wg.Done()
		}(articles[i])
	}

	wg.Wait()

	for i := 0; i < len(posts); i++ {
		printpost(posts[i])
	}

}
开发者ID:jalavosus,项目名称:redditscraper,代码行数:47,代码来源:reddit_scraper.go

示例9: parsepost

func parsepost(n *html.Node) Post {
	post := Post{}

	// get the title. uses a scrape inbuilt matcher
	title_scrape, _ := scrape.Find(n, scrape.ByClass("title"))
	title := scrape.Text(title_scrape.FirstChild)

	// get the subreddit. This requires a custom matcher.
	matcher := func(n *html.Node) bool {
		if n.DataAtom == atom.A && n.Parent != nil {
			return scrape.Attr(n, "class") == "subreddit hover may-blank"
		}
		return false
	}
	sub, _ := scrape.Find(n, matcher)
	subreddit := scrape.Text(sub)

	// get the url to the comments. requires custom matcher.
	matcher = func(n *html.Node) bool {
		if n.DataAtom == atom.Ul && n.FirstChild != nil {
			return scrape.Attr(n, "class") == "flat-list buttons" && scrape.Attr(n.FirstChild, "class") == "first"
		}
		return false
	}
	ul, _ := scrape.Find(n, matcher)          // ul is a list of two buttons: one that links to a post's comments page, one a "share" function
	li := ul.FirstChild                       // the first list item of ul -- this will always be the comments page link.
	url := scrape.Attr(li.FirstChild, "href") // finally, the url found in the list item.

	// get the author. Uses custom matcher and magic.
	matcher = func(n *html.Node) bool {
		if n.DataAtom == atom.A && n.Parent.DataAtom == atom.P {
			return strings.Contains(scrape.Attr(n, "href"), "/user/")
		}
		return false
	}
	author_scrape, _ := scrape.Find(n, matcher)
	author := scrape.Text(author_scrape)

	post.title = title
	post.subreddit = subreddit
	post.url = url
	post.author = author

	return post
}
开发者ID:jalavosus,项目名称:redditscraper,代码行数:45,代码来源:reddit_scraper.go

示例10: getLink

func getLink(r *html.Node) (s string) {
	buttons := scrape.FindAll(r, scrape.ByClass("downloadbtn"))
	for _, button := range buttons {
		windowLocation := scrape.Attr(button, "onclick")
		link := strings.Split(windowLocation, "=")[1]
		s := strings.Trim(link, "'")
		return s
	}
	return
}
开发者ID:jmonmane,项目名称:scrape,代码行数:10,代码来源:main.go

示例11: parseBroadcastSeedNode

// Get Time, Source and Image from json html snippet
func (item *calendarItem) parseBroadcastSeedNode(root *html.Node) (bc *broadcastURL, err error) {
	bc = &broadcastURL{}
	bc.Station = *item.Station
	bc.Time = time.Time(item.DateTime)
	for _, a := range scrape.FindAll(root, func(n *html.Node) bool {
		if atom.A != n.DataAtom {
			return false
		}
		href := scrape.Attr(n, "href")
		return strings.HasPrefix(href, "/programm/radio/ausstrahlung-") && strings.HasSuffix(href, ".html")
	}) {
		ru, _ := url.Parse(scrape.Attr(a, "href"))
		bc.Source = *item.Station.ProgramURL.ResolveReference(ru)
	}
	for _, img := range scrape.FindAll(root, func(n *html.Node) bool { return atom.Img == n.DataAtom }) {
		ru, _ := url.Parse(scrape.Attr(img, "src"))
		bc.Image = item.Station.ProgramURL.ResolveReference(ru)
	}
	return
}
开发者ID:mro,项目名称:internet-radio-recorder,代码行数:21,代码来源:b4.go

示例12: parseBroadcastFromHtmlNode

func (bc *broadcast) parseBroadcastFromHtmlNode(root *html.Node) (ret []*r.Broadcast, err error) {
	{
		// Author
		meta, _ := scrape.Find(root, func(n *html.Node) bool {
			return atom.Meta == n.DataAtom && "Author" == scrape.Attr(n, "name")
		})
		if nil != meta {
			content := scrape.Attr(meta, "content")
			bc.Author = &content
		}
	}
	for idx, epg := range scrape.FindAll(root, func(n *html.Node) bool {
		return atom.Div == n.DataAtom && "epg-content-right" == scrape.Attr(n, "class")
	}) {
		if idx != 0 {
			err = errors.New("There was more than 1 <div class='epg-content-right'/>")
			return
		}
		{
			// TitleEpisode
			txt, _ := scrape.Find(epg, func(n *html.Node) bool {
				return html.TextNode == n.Type && atom.H3 == n.Parent.DataAtom && atom.Br == n.NextSibling.DataAtom
			})
			if nil != txt {
				t := strings.TrimSpace(r.NormaliseWhiteSpace(txt.Data))
				bc.TitleEpisode = &t
				txt.Parent.RemoveChild(txt.NextSibling)
				txt.Parent.RemoveChild(txt)
			}
		}
		{
			// Subject
			a, _ := scrape.Find(epg, func(n *html.Node) bool {
				return atom.Div == n.Parent.DataAtom && "sendungsLink" == scrape.Attr(n.Parent, "class") && atom.A == n.DataAtom
			})
			if nil != a {
				u, _ := url.Parse(scrape.Attr(a, "href"))
				bc.Subject = bc.Source.ResolveReference(u)
			}
		}
		// purge some cruft
		for _, nn := range scrape.FindAll(epg, func(n *html.Node) bool {
			clz := scrape.Attr(n, "class")
			return atom.H2 == n.DataAtom ||
				"mod modSharing" == clz ||
				"modGalery" == clz ||
				"sendungsLink" == clz ||
				"tabs-container" == clz
		}) {
			nn.Parent.RemoveChild(nn)
		}
		{
			description := r.TextWithBrFromNodeSet(scrape.FindAll(epg, func(n *html.Node) bool { return epg == n.Parent }))
			bc.Description = &description
		}
	}
	bc_ := r.Broadcast(*bc)
	ret = append(ret, &bc_)
	return
}
开发者ID:mro,项目名称:internet-radio-recorder,代码行数:60,代码来源:wdr.go

示例13: TweetsToUser

func TweetsToUser(u user.User) []tweet.Tweet {
	reqURL := SearchURL
	_url.SetQueryParams(&reqURL, map[string]string{
		"q": "to:" + u.ScreenName,
		"f": "tweets",
	})

	res, err := http.Get(reqURL.String())
	PanicIf(err)
	root, err := html.Parse(res.Body)
	PanicIf(err)

	tweetsMatcher := func(n *html.Node) bool {
		return n.DataAtom == atom.Div && strings.HasPrefix(scrape.Attr(n, "class"), "tweet original-tweet")
	}
	tweetScreenNameMatcher := func(n *html.Node) bool {
		return n.DataAtom == atom.Span && strings.HasPrefix(scrape.Attr(n, "class"), "username")
	}
	tweetTextMatcher := func(n *html.Node) bool {
		return n.DataAtom == atom.P && strings.HasSuffix(scrape.Attr(n, "class"), "tweet-text")
	}

	tweetNodes := scrape.FindAll(root, tweetsMatcher)
	tweets := make([]tweet.Tweet, len(tweetNodes))
	for i, n := range tweetNodes {
		t := tweet.Tweet{
			ID: scrape.Attr(n, "data-user-id"),
		}
		if child, ok := scrape.Find(n, tweetScreenNameMatcher); ok {
			t.Author = *user.NewUser(scrape.Text(child))
		}
		if child, ok := scrape.Find(n, tweetTextMatcher); ok {
			t.Text = scrape.Text(child)
		}
		tweets[i] = t
	}

	return tweets
}
开发者ID:mrap,项目名称:twitterget,代码行数:39,代码来源:search.go

示例14: parseVideoInfo

func parseVideoInfo(element *html.Node) *YoutubeVideoInfo {
	var info YoutubeVideoInfo

	info.ID = scrape.Attr(element, "data-context-item-id")

	thumbnailContainer, ok := scrape.Find(element, scrape.ByClass("yt-thumb-simple"))
	if ok {
		thumbnailImage, ok := scrape.Find(thumbnailContainer, scrape.ByTag(atom.Img))
		if ok {
			info.ThumbnailURL, _ = url.Parse(scrape.Attr(thumbnailImage, "src"))
		}
	}

	videoTimeElement, ok := scrape.Find(element, scrape.ByClass("video-time"))
	if ok {
		durationStr := strings.TrimSpace(scrape.Text(videoTimeElement))
		info.Length, _ = parseVideoDuration(durationStr)
	}

	linkFieldClasses := []string{"yt-lockup-title", "yt-lockup-byline"}
	linkFieldPtrs := []*string{&info.Title, &info.Author}
	for i, class := range linkFieldClasses {
		linkContainer, ok := scrape.Find(element, scrape.ByClass(class))
		if ok {
			link, ok := scrape.Find(linkContainer, scrape.ByTag(atom.A))
			if ok {
				*linkFieldPtrs[i] = strings.TrimSpace(scrape.Text(link))
			}
		}
	}

	descBox, ok := scrape.Find(element, scrape.ByClass("yt-lockup-description"))
	if ok {
		info.Description = strings.TrimSpace(scrape.Text(descBox))
	}

	return &info
}
开发者ID:unixpickle,项目名称:gscrape,代码行数:38,代码来源:youtube.go

示例15: eventDetailsToStrArr

func eventDetailsToStrArr(eventDetails []*html.Node, eventID int) []string {
	return []string{
		strconv.Itoa(eventID),
		scrape.Text(eventDetails[0]),
		scrape.Text(eventDetails[1]),
		scrape.Text(eventDetails[2]),
		scrape.Text(eventDetails[3]),
		scrape.Text(eventDetails[4]),
		scrape.Text(eventDetails[5]),
		strings.TrimPrefix(
			scrape.Attr(eventDetails[5].FirstChild, "href"),
			"mailto:"),
	}
}
开发者ID:jamesma,项目名称:html-scraper,代码行数:14,代码来源:chamberorganizer.go


注:本文中的github.com/yhat/scrape.Attr函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。