本文整理汇总了Golang中github.com/yhat/scrape.Attr函数的典型用法代码示例。如果您正苦于以下问题:Golang Attr函数的具体用法?Golang Attr怎么用?Golang Attr使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了Attr函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Golang代码示例。
示例1: main
func main() {
// request and parse the front page
resp, err := http.Get("https://news.ycombinator.com/")
if err != nil {
panic(err)
}
root, err := html.Parse(resp.Body)
if err != nil {
panic(err)
}
// define a matcher
matcher := func(n *html.Node) bool {
// must check for nil values
if n.DataAtom == atom.A && n.Parent != nil && n.Parent.Parent != nil {
return scrape.Attr(n.Parent.Parent, "class") == "athing"
}
return false
}
// grab all articles and print them
articles := scrape.FindAll(root, matcher)
for i, article := range articles {
fmt.Printf("%2d %s (%s)\n", i, scrape.Text(article), scrape.Attr(article, "href"))
}
}
示例2: parseBroadcastsFromNode
func (day *timeURL) parseBroadcastsFromNode(root *html.Node) (ret []*r.Broadcast, err error) {
nodes := scrape.FindAll(root, func(n *html.Node) bool { return atom.Div == n.DataAtom && "time" == scrape.Attr(n, "class") })
ret = make([]*r.Broadcast, len(nodes))
for index, tim := range nodes {
// prepare response
bc := r.Broadcast{
BroadcastURL: r.BroadcastURL{
TimeURL: r.TimeURL(*day),
},
}
// some defaults
bc.Language = &lang_de
bc.Publisher = &publisher
// set start time
{
div_t := strings.TrimSpace(scrape.Text(tim))
if 5 != len(div_t) {
continue
}
hour := r.MustParseInt(div_t[0:2])
minute := r.MustParseInt(div_t[3:5])
bc.Time = time.Date(day.Year(), day.Month(), day.Day(), hour, minute, 0, 0, day.TimeZone)
if index > 0 {
ret[index-1].DtEnd = &bc.Time
}
}
for _, tit := range scrape.FindAll(tim.Parent, func(n *html.Node) bool {
return atom.A == n.DataAtom && atom.Div == n.Parent.DataAtom && "descr" == scrape.Attr(n.Parent, "class")
}) {
// Title
bc.Title = strings.TrimSpace(scrape.Text(tit))
href := scrape.Attr(tit, "href")
if "" != href {
u, _ := url.Parse(href)
bc.Subject = day.Source.ResolveReference(u)
}
desc_node := tit.Parent
desc_node.RemoveChild(tit)
description := r.TextWithBrFromNodeSet([]*html.Node{desc_node})
bc.Description = &description
// fmt.Fprintf(os.Stderr, "\n")
}
ret[index] = &bc
}
// fmt.Fprintf(os.Stderr, "len(ret) = %d '%s'\n", len(ret), day.Source.String())
if len(nodes) > 0 {
midnight := time.Date(day.Year(), day.Month(), day.Day(), 24, 0, 0, 0, day.TimeZone)
ret[len(nodes)-1].DtEnd = &midnight
}
return
}
示例3: NewListing
func NewListing(ctx appengine.Context, url string) (*Listing, error) {
client := urlfetch.Client(ctx)
resp, err := client.Get("http://167.88.16.61:2138/" + url)
if err != nil {
ctx.Errorf("%s", err)
}
ctx.Debugf("Craigslist request came back with status: %s", resp.Status)
if err != nil {
ctx.Errorf("%s", err)
return nil, errors.New("Get listing failed")
}
root, err := html.Parse(resp.Body)
if err != nil {
ctx.Errorf("%s", "Parsing Error")
return nil, errors.New("Parse body failed")
}
title, ok := scrape.Find(root, scrape.ByTag(atom.Title))
if !ok {
ctx.Errorf("%s", "Error getting title")
return nil, errors.New("Get title failed")
}
price, ok := scrape.Find(root, scrape.ByClass("price"))
if !ok {
ctx.Errorf("%s", "Error getting price")
return nil, errors.New("Get price failed")
}
intPrice, err := strconv.Atoi(scrape.Text(price)[1:])
if err != nil {
ctx.Errorf("Error casting price: %s", scrape.Text(price))
return nil, err
}
images := scrape.FindAll(root, scrape.ByTag(atom.Img))
imageUrl := ""
for _, image := range images {
if scrape.Attr(image, "title") == "image 1" {
imageUrl = scrape.Attr(image, "src")
}
}
ctx.Debugf("Craigslist returned listing.Price: %d, listing.Title: %s", intPrice, scrape.Text(title))
return &Listing{
Url: url,
Title: scrape.Text(title),
Price: intPrice,
ImageUrl: imageUrl,
}, nil
}
示例4: findOpenGraphTitle
func findOpenGraphTitle(doc *html.Node) string {
el, found := scrape.Find(doc, func(n *html.Node) bool {
if n.DataAtom == atom.Meta {
return scrape.Attr(n, "property") == "og:title" && scrape.Attr(n, "content") != ""
}
return false
})
if !found {
return ""
}
return scrape.Attr(el, "content")
}
示例5: findTwitterTitle
func findTwitterTitle(doc *html.Node) string {
el, found := scrape.Find(doc, func(n *html.Node) bool {
if n.DataAtom == atom.Meta {
return scrape.Attr(n, "name") == "twitter:title" && scrape.Attr(n, "content") != ""
}
return false
})
if !found {
return ""
}
return scrape.Attr(el, "content")
}
示例6: parseBroadcastURLsNode
func (day *timeURL) parseBroadcastURLsNode(root *html.Node) (ret []*broadcastURL, err error) {
const closeDownHour int = 5
for _, h4 := range scrape.FindAll(root, func(n *html.Node) bool { return atom.H4 == n.DataAtom }) {
year, month, day_, err := timeForH4(scrape.Text(h4), &day.Time)
if nil != err {
panic(err)
}
// fmt.Printf("%d-%d-%d %s\n", year, month, day, err)
for _, a := range scrape.FindAll(h4.Parent, func(n *html.Node) bool { return atom.A == n.DataAtom && atom.Dt == n.Parent.DataAtom }) {
m := hourMinuteTitleRegExp.FindStringSubmatch(scrape.Text(a))
if nil == m {
panic(errors.New("Couldn't parse <a>"))
}
ur, _ := url.Parse(scrape.Attr(a, "href"))
hour := r.MustParseInt(m[1])
dayOffset := 0
if hour < closeDownHour {
dayOffset = 1
}
// fmt.Printf("%s %s\n", b.r.TimeURL.String(), b.Title)
bcu := broadcastURL(r.BroadcastURL{
TimeURL: r.TimeURL{
Time: time.Date(year, month, day_+dayOffset, hour, r.MustParseInt(m[2]), 0, 0, localLoc),
Source: *day.Source.ResolveReference(ur),
Station: day.Station,
},
Title: strings.TrimSpace(m[3]),
})
ret = append(ret, &bcu)
}
}
return
}
示例7: Scrape
// Scrape scrapes a site for a keyword
func (q *query) Scrape() []*match {
// Request the URL
resp, err := http.Get(q.SiteURL)
if err != nil {
panic(err)
log.Fatal("Couldn't GET ", q.SiteURL)
}
// Parse the contents of the URL
root, err := html.Parse(resp.Body)
if err != nil {
panic(err)
log.Fatal("Unable to parse response")
}
// Grab all the posts and print them
posts := scrape.FindAll(root, scrape.ByClass("description"))
matches := make([]*match, len(posts))
for i, post := range posts {
matches[i] = &match{
Title: scrape.Text(post.FirstChild.NextSibling),
Description: scrape.Text(post),
Link: "http://kijiji.ca" + scrape.Attr(post.FirstChild.NextSibling, "href"),
Price: scrape.Text(post.NextSibling.NextSibling),
Matched: false,
}
}
return matches
}
示例8: main
func main() {
resp, err := http.Get("https://www.reddit.com")
if err != nil {
panic(err)
}
root, err := html.Parse(resp.Body)
if err != nil {
panic(err)
}
matcher := func(n *html.Node) bool {
if n.DataAtom == atom.Div && n.Parent != nil {
return scrape.Attr(n, "id") == "siteTable"
}
return false
}
table, ok := scrape.Find(root, matcher)
if !ok {
panic(ok)
}
matcher = func(n *html.Node) bool {
if n.DataAtom == atom.Div && n.Parent != nil {
return scrape.Attr(n, "data-type") == "link"
}
return false
}
articles := scrape.FindAll(table, matcher)
var posts []Post
for i := 0; i < len(articles); i++ {
wg.Add(1)
go func(n *html.Node) {
post := parsepost(n)
posts = append(posts, post)
wg.Done()
}(articles[i])
}
wg.Wait()
for i := 0; i < len(posts); i++ {
printpost(posts[i])
}
}
示例9: parsepost
func parsepost(n *html.Node) Post {
post := Post{}
// get the title. uses a scrape inbuilt matcher
title_scrape, _ := scrape.Find(n, scrape.ByClass("title"))
title := scrape.Text(title_scrape.FirstChild)
// get the subreddit. This requires a custom matcher.
matcher := func(n *html.Node) bool {
if n.DataAtom == atom.A && n.Parent != nil {
return scrape.Attr(n, "class") == "subreddit hover may-blank"
}
return false
}
sub, _ := scrape.Find(n, matcher)
subreddit := scrape.Text(sub)
// get the url to the comments. requires custom matcher.
matcher = func(n *html.Node) bool {
if n.DataAtom == atom.Ul && n.FirstChild != nil {
return scrape.Attr(n, "class") == "flat-list buttons" && scrape.Attr(n.FirstChild, "class") == "first"
}
return false
}
ul, _ := scrape.Find(n, matcher) // ul is a list of two buttons: one that links to a post's comments page, one a "share" function
li := ul.FirstChild // the first list item of ul -- this will always be the comments page link.
url := scrape.Attr(li.FirstChild, "href") // finally, the url found in the list item.
// get the author. Uses custom matcher and magic.
matcher = func(n *html.Node) bool {
if n.DataAtom == atom.A && n.Parent.DataAtom == atom.P {
return strings.Contains(scrape.Attr(n, "href"), "/user/")
}
return false
}
author_scrape, _ := scrape.Find(n, matcher)
author := scrape.Text(author_scrape)
post.title = title
post.subreddit = subreddit
post.url = url
post.author = author
return post
}
示例10: getLink
func getLink(r *html.Node) (s string) {
buttons := scrape.FindAll(r, scrape.ByClass("downloadbtn"))
for _, button := range buttons {
windowLocation := scrape.Attr(button, "onclick")
link := strings.Split(windowLocation, "=")[1]
s := strings.Trim(link, "'")
return s
}
return
}
示例11: parseBroadcastSeedNode
// Get Time, Source and Image from json html snippet
func (item *calendarItem) parseBroadcastSeedNode(root *html.Node) (bc *broadcastURL, err error) {
bc = &broadcastURL{}
bc.Station = *item.Station
bc.Time = time.Time(item.DateTime)
for _, a := range scrape.FindAll(root, func(n *html.Node) bool {
if atom.A != n.DataAtom {
return false
}
href := scrape.Attr(n, "href")
return strings.HasPrefix(href, "/programm/radio/ausstrahlung-") && strings.HasSuffix(href, ".html")
}) {
ru, _ := url.Parse(scrape.Attr(a, "href"))
bc.Source = *item.Station.ProgramURL.ResolveReference(ru)
}
for _, img := range scrape.FindAll(root, func(n *html.Node) bool { return atom.Img == n.DataAtom }) {
ru, _ := url.Parse(scrape.Attr(img, "src"))
bc.Image = item.Station.ProgramURL.ResolveReference(ru)
}
return
}
示例12: parseBroadcastFromHtmlNode
func (bc *broadcast) parseBroadcastFromHtmlNode(root *html.Node) (ret []*r.Broadcast, err error) {
{
// Author
meta, _ := scrape.Find(root, func(n *html.Node) bool {
return atom.Meta == n.DataAtom && "Author" == scrape.Attr(n, "name")
})
if nil != meta {
content := scrape.Attr(meta, "content")
bc.Author = &content
}
}
for idx, epg := range scrape.FindAll(root, func(n *html.Node) bool {
return atom.Div == n.DataAtom && "epg-content-right" == scrape.Attr(n, "class")
}) {
if idx != 0 {
err = errors.New("There was more than 1 <div class='epg-content-right'/>")
return
}
{
// TitleEpisode
txt, _ := scrape.Find(epg, func(n *html.Node) bool {
return html.TextNode == n.Type && atom.H3 == n.Parent.DataAtom && atom.Br == n.NextSibling.DataAtom
})
if nil != txt {
t := strings.TrimSpace(r.NormaliseWhiteSpace(txt.Data))
bc.TitleEpisode = &t
txt.Parent.RemoveChild(txt.NextSibling)
txt.Parent.RemoveChild(txt)
}
}
{
// Subject
a, _ := scrape.Find(epg, func(n *html.Node) bool {
return atom.Div == n.Parent.DataAtom && "sendungsLink" == scrape.Attr(n.Parent, "class") && atom.A == n.DataAtom
})
if nil != a {
u, _ := url.Parse(scrape.Attr(a, "href"))
bc.Subject = bc.Source.ResolveReference(u)
}
}
// purge some cruft
for _, nn := range scrape.FindAll(epg, func(n *html.Node) bool {
clz := scrape.Attr(n, "class")
return atom.H2 == n.DataAtom ||
"mod modSharing" == clz ||
"modGalery" == clz ||
"sendungsLink" == clz ||
"tabs-container" == clz
}) {
nn.Parent.RemoveChild(nn)
}
{
description := r.TextWithBrFromNodeSet(scrape.FindAll(epg, func(n *html.Node) bool { return epg == n.Parent }))
bc.Description = &description
}
}
bc_ := r.Broadcast(*bc)
ret = append(ret, &bc_)
return
}
示例13: TweetsToUser
func TweetsToUser(u user.User) []tweet.Tweet {
reqURL := SearchURL
_url.SetQueryParams(&reqURL, map[string]string{
"q": "to:" + u.ScreenName,
"f": "tweets",
})
res, err := http.Get(reqURL.String())
PanicIf(err)
root, err := html.Parse(res.Body)
PanicIf(err)
tweetsMatcher := func(n *html.Node) bool {
return n.DataAtom == atom.Div && strings.HasPrefix(scrape.Attr(n, "class"), "tweet original-tweet")
}
tweetScreenNameMatcher := func(n *html.Node) bool {
return n.DataAtom == atom.Span && strings.HasPrefix(scrape.Attr(n, "class"), "username")
}
tweetTextMatcher := func(n *html.Node) bool {
return n.DataAtom == atom.P && strings.HasSuffix(scrape.Attr(n, "class"), "tweet-text")
}
tweetNodes := scrape.FindAll(root, tweetsMatcher)
tweets := make([]tweet.Tweet, len(tweetNodes))
for i, n := range tweetNodes {
t := tweet.Tweet{
ID: scrape.Attr(n, "data-user-id"),
}
if child, ok := scrape.Find(n, tweetScreenNameMatcher); ok {
t.Author = *user.NewUser(scrape.Text(child))
}
if child, ok := scrape.Find(n, tweetTextMatcher); ok {
t.Text = scrape.Text(child)
}
tweets[i] = t
}
return tweets
}
示例14: parseVideoInfo
func parseVideoInfo(element *html.Node) *YoutubeVideoInfo {
var info YoutubeVideoInfo
info.ID = scrape.Attr(element, "data-context-item-id")
thumbnailContainer, ok := scrape.Find(element, scrape.ByClass("yt-thumb-simple"))
if ok {
thumbnailImage, ok := scrape.Find(thumbnailContainer, scrape.ByTag(atom.Img))
if ok {
info.ThumbnailURL, _ = url.Parse(scrape.Attr(thumbnailImage, "src"))
}
}
videoTimeElement, ok := scrape.Find(element, scrape.ByClass("video-time"))
if ok {
durationStr := strings.TrimSpace(scrape.Text(videoTimeElement))
info.Length, _ = parseVideoDuration(durationStr)
}
linkFieldClasses := []string{"yt-lockup-title", "yt-lockup-byline"}
linkFieldPtrs := []*string{&info.Title, &info.Author}
for i, class := range linkFieldClasses {
linkContainer, ok := scrape.Find(element, scrape.ByClass(class))
if ok {
link, ok := scrape.Find(linkContainer, scrape.ByTag(atom.A))
if ok {
*linkFieldPtrs[i] = strings.TrimSpace(scrape.Text(link))
}
}
}
descBox, ok := scrape.Find(element, scrape.ByClass("yt-lockup-description"))
if ok {
info.Description = strings.TrimSpace(scrape.Text(descBox))
}
return &info
}
示例15: eventDetailsToStrArr
func eventDetailsToStrArr(eventDetails []*html.Node, eventID int) []string {
return []string{
strconv.Itoa(eventID),
scrape.Text(eventDetails[0]),
scrape.Text(eventDetails[1]),
scrape.Text(eventDetails[2]),
scrape.Text(eventDetails[3]),
scrape.Text(eventDetails[4]),
scrape.Text(eventDetails[5]),
strings.TrimPrefix(
scrape.Attr(eventDetails[5].FirstChild, "href"),
"mailto:"),
}
}