當前位置: 首頁>>代碼示例>>Golang>>正文


Golang scrape.Attr函數代碼示例

本文整理匯總了Golang中github.com/yhat/scrape.Attr函數的典型用法代碼示例。如果您正苦於以下問題:Golang Attr函數的具體用法?Golang Attr怎麽用?Golang Attr使用的例子?那麽, 這裏精選的函數代碼示例或許可以為您提供幫助。


在下文中一共展示了Attr函數的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Golang代碼示例。

示例1: main

func main() {
    // request and parse the front page
    resp, err := http.Get("https://news.ycombinator.com/")
    if err != nil {
        panic(err)
    }
    root, err := html.Parse(resp.Body)
    if err != nil {
        panic(err)
    }

    // define a matcher
    matcher := func(n *html.Node) bool {
        // must check for nil values
        if n.DataAtom == atom.A && n.Parent != nil && n.Parent.Parent != nil {
            return scrape.Attr(n.Parent.Parent, "class") == "athing"
        }
        return false
    }
    // grab all articles and print them
    articles := scrape.FindAll(root, matcher)
    for i, article := range articles {
        fmt.Printf("%2d %s (%s)\n", i, scrape.Text(article), scrape.Attr(article, "href"))
    }
}
開發者ID:abejenaru,項目名稱:vagrant-boxes,代碼行數:25,代碼來源:first.go

示例2: parseBroadcastsFromNode

func (day *timeURL) parseBroadcastsFromNode(root *html.Node) (ret []*r.Broadcast, err error) {
    nodes := scrape.FindAll(root, func(n *html.Node) bool { return atom.Div == n.DataAtom && "time" == scrape.Attr(n, "class") })
    ret = make([]*r.Broadcast, len(nodes))
    for index, tim := range nodes {
        // prepare response
        bc := r.Broadcast{
            BroadcastURL: r.BroadcastURL{
                TimeURL: r.TimeURL(*day),
            },
        }
        // some defaults
        bc.Language = &lang_de
        bc.Publisher = &publisher
        // set start time
        {
            div_t := strings.TrimSpace(scrape.Text(tim))
            if 5 != len(div_t) {
                continue
            }
            hour := r.MustParseInt(div_t[0:2])
            minute := r.MustParseInt(div_t[3:5])
            bc.Time = time.Date(day.Year(), day.Month(), day.Day(), hour, minute, 0, 0, day.TimeZone)
            if index > 0 {
                ret[index-1].DtEnd = &bc.Time
            }
        }
        for _, tit := range scrape.FindAll(tim.Parent, func(n *html.Node) bool {
            return atom.A == n.DataAtom && atom.Div == n.Parent.DataAtom && "descr" == scrape.Attr(n.Parent, "class")
        }) {
            // Title
            bc.Title = strings.TrimSpace(scrape.Text(tit))
            href := scrape.Attr(tit, "href")
            if "" != href {
                u, _ := url.Parse(href)
                bc.Subject = day.Source.ResolveReference(u)
            }

            desc_node := tit.Parent
            desc_node.RemoveChild(tit)
            description := r.TextWithBrFromNodeSet([]*html.Node{desc_node})
            bc.Description = &description
            // fmt.Fprintf(os.Stderr, "\n")
        }
        ret[index] = &bc
    }
    // fmt.Fprintf(os.Stderr, "len(ret) = %d '%s'\n", len(ret), day.Source.String())
    if len(nodes) > 0 {
        midnight := time.Date(day.Year(), day.Month(), day.Day(), 24, 0, 0, 0, day.TimeZone)
        ret[len(nodes)-1].DtEnd = &midnight
    }
    return
}
開發者ID:mro,項目名稱:internet-radio-recorder,代碼行數:52,代碼來源:m945.go

示例3: NewListing

func NewListing(ctx appengine.Context, url string) (*Listing, error) {
    client := urlfetch.Client(ctx)
    resp, err := client.Get("http://167.88.16.61:2138/" + url)
    if err != nil {
        ctx.Errorf("%s", err)
    }
    ctx.Debugf("Craigslist request came back with status: %s", resp.Status)
    if err != nil {
        ctx.Errorf("%s", err)
        return nil, errors.New("Get listing failed")
    }
    root, err := html.Parse(resp.Body)
    if err != nil {
        ctx.Errorf("%s", "Parsing Error")
        return nil, errors.New("Parse body failed")
    }

    title, ok := scrape.Find(root, scrape.ByTag(atom.Title))
    if !ok {
        ctx.Errorf("%s", "Error getting title")
        return nil, errors.New("Get title failed")
    }
    price, ok := scrape.Find(root, scrape.ByClass("price"))
    if !ok {
        ctx.Errorf("%s", "Error getting price")
        return nil, errors.New("Get price failed")
    }
    intPrice, err := strconv.Atoi(scrape.Text(price)[1:])
    if err != nil {
        ctx.Errorf("Error casting price: %s", scrape.Text(price))
        return nil, err
    }
    images := scrape.FindAll(root, scrape.ByTag(atom.Img))
    imageUrl := ""
    for _, image := range images {
        if scrape.Attr(image, "title") == "image 1" {
            imageUrl = scrape.Attr(image, "src")
        }
    }

    ctx.Debugf("Craigslist returned listing.Price: %d, listing.Title: %s", intPrice, scrape.Text(title))

    return &Listing{
        Url:      url,
        Title:    scrape.Text(title),
        Price:    intPrice,
        ImageUrl: imageUrl,
    }, nil
}
開發者ID:matthewdu,項目名稱:powerplug,代碼行數:49,代碼來源:craigslist.go

示例4: findOpenGraphTitle

func findOpenGraphTitle(doc *html.Node) string {
    el, found := scrape.Find(doc, func(n *html.Node) bool {
        if n.DataAtom == atom.Meta {
            return scrape.Attr(n, "property") == "og:title" && scrape.Attr(n, "content") != ""
        }

        return false
    })

    if !found {
        return ""
    }

    return scrape.Attr(el, "content")
}
開發者ID:mcmillan,項目名稱:socialite,代碼行數:15,代碼來源:title.go

示例5: findTwitterTitle

func findTwitterTitle(doc *html.Node) string {
    el, found := scrape.Find(doc, func(n *html.Node) bool {
        if n.DataAtom == atom.Meta {
            return scrape.Attr(n, "name") == "twitter:title" && scrape.Attr(n, "content") != ""
        }

        return false
    })

    if !found {
        return ""
    }

    return scrape.Attr(el, "content")
}
開發者ID:mcmillan,項目名稱:socialite,代碼行數:15,代碼來源:title.go

示例6: parseBroadcastURLsNode

func (day *timeURL) parseBroadcastURLsNode(root *html.Node) (ret []*broadcastURL, err error) {
    const closeDownHour int = 5
    for _, h4 := range scrape.FindAll(root, func(n *html.Node) bool { return atom.H4 == n.DataAtom }) {
        year, month, day_, err := timeForH4(scrape.Text(h4), &day.Time)
        if nil != err {
            panic(err)
        }
        // fmt.Printf("%d-%d-%d %s\n", year, month, day, err)
        for _, a := range scrape.FindAll(h4.Parent, func(n *html.Node) bool { return atom.A == n.DataAtom && atom.Dt == n.Parent.DataAtom }) {
            m := hourMinuteTitleRegExp.FindStringSubmatch(scrape.Text(a))
            if nil == m {
                panic(errors.New("Couldn't parse <a>"))
            }
            ur, _ := url.Parse(scrape.Attr(a, "href"))
            hour := r.MustParseInt(m[1])
            dayOffset := 0
            if hour < closeDownHour {
                dayOffset = 1
            }
            // fmt.Printf("%s %s\n", b.r.TimeURL.String(), b.Title)
            bcu := broadcastURL(r.BroadcastURL{
                TimeURL: r.TimeURL{
                    Time:    time.Date(year, month, day_+dayOffset, hour, r.MustParseInt(m[2]), 0, 0, localLoc),
                    Source:  *day.Source.ResolveReference(ur),
                    Station: day.Station,
                },
                Title: strings.TrimSpace(m[3]),
            })
            ret = append(ret, &bcu)
        }
    }
    return
}
開發者ID:mro,項目名稱:internet-radio-recorder,代碼行數:33,代碼來源:br.go

示例7: Scrape

// Scrape scrapes a site for a keyword
func (q *query) Scrape() []*match {

    // Request the URL
    resp, err := http.Get(q.SiteURL)
    if err != nil {
        panic(err)
        log.Fatal("Couldn't GET ", q.SiteURL)
    }

    // Parse the contents of the URL
    root, err := html.Parse(resp.Body)
    if err != nil {
        panic(err)
        log.Fatal("Unable to parse response")
    }

    // Grab all the posts and print them
    posts := scrape.FindAll(root, scrape.ByClass("description"))
    matches := make([]*match, len(posts))
    for i, post := range posts {
        matches[i] = &match{
            Title:       scrape.Text(post.FirstChild.NextSibling),
            Description: scrape.Text(post),
            Link:        "http://kijiji.ca" + scrape.Attr(post.FirstChild.NextSibling, "href"),
            Price:       scrape.Text(post.NextSibling.NextSibling),
            Matched:     false,
        }
    }

    return matches
}
開發者ID:bentranter,項目名稱:kijiji-scrape,代碼行數:32,代碼來源:main.go

示例8: main

func main() {

    resp, err := http.Get("https://www.reddit.com")
    if err != nil {
        panic(err)
    }
    root, err := html.Parse(resp.Body)
    if err != nil {
        panic(err)
    }

    matcher := func(n *html.Node) bool {
        if n.DataAtom == atom.Div && n.Parent != nil {
            return scrape.Attr(n, "id") == "siteTable"
        }
        return false
    }
    table, ok := scrape.Find(root, matcher)
    if !ok {
        panic(ok)
    }
    matcher = func(n *html.Node) bool {
        if n.DataAtom == atom.Div && n.Parent != nil {
            return scrape.Attr(n, "data-type") == "link"
        }
        return false
    }

    articles := scrape.FindAll(table, matcher)
    var posts []Post

    for i := 0; i < len(articles); i++ {
        wg.Add(1)
        go func(n *html.Node) {
            post := parsepost(n)
            posts = append(posts, post)
            wg.Done()
        }(articles[i])
    }

    wg.Wait()

    for i := 0; i < len(posts); i++ {
        printpost(posts[i])
    }

}
開發者ID:jalavosus,項目名稱:redditscraper,代碼行數:47,代碼來源:reddit_scraper.go

示例9: parsepost

func parsepost(n *html.Node) Post {
    post := Post{}

    // get the title. uses a scrape inbuilt matcher
    title_scrape, _ := scrape.Find(n, scrape.ByClass("title"))
    title := scrape.Text(title_scrape.FirstChild)

    // get the subreddit. This requires a custom matcher.
    matcher := func(n *html.Node) bool {
        if n.DataAtom == atom.A && n.Parent != nil {
            return scrape.Attr(n, "class") == "subreddit hover may-blank"
        }
        return false
    }
    sub, _ := scrape.Find(n, matcher)
    subreddit := scrape.Text(sub)

    // get the url to the comments. requires custom matcher.
    matcher = func(n *html.Node) bool {
        if n.DataAtom == atom.Ul && n.FirstChild != nil {
            return scrape.Attr(n, "class") == "flat-list buttons" && scrape.Attr(n.FirstChild, "class") == "first"
        }
        return false
    }
    ul, _ := scrape.Find(n, matcher)          // ul is a list of two buttons: one that links to a post's comments page, one a "share" function
    li := ul.FirstChild                       // the first list item of ul -- this will always be the comments page link.
    url := scrape.Attr(li.FirstChild, "href") // finally, the url found in the list item.

    // get the author. Uses custom matcher and magic.
    matcher = func(n *html.Node) bool {
        if n.DataAtom == atom.A && n.Parent.DataAtom == atom.P {
            return strings.Contains(scrape.Attr(n, "href"), "/user/")
        }
        return false
    }
    author_scrape, _ := scrape.Find(n, matcher)
    author := scrape.Text(author_scrape)

    post.title = title
    post.subreddit = subreddit
    post.url = url
    post.author = author

    return post
}
開發者ID:jalavosus,項目名稱:redditscraper,代碼行數:45,代碼來源:reddit_scraper.go

示例10: getLink

func getLink(r *html.Node) (s string) {
    buttons := scrape.FindAll(r, scrape.ByClass("downloadbtn"))
    for _, button := range buttons {
        windowLocation := scrape.Attr(button, "onclick")
        link := strings.Split(windowLocation, "=")[1]
        s := strings.Trim(link, "'")
        return s
    }
    return
}
開發者ID:jmonmane,項目名稱:scrape,代碼行數:10,代碼來源:main.go

示例11: parseBroadcastSeedNode

// Get Time, Source and Image from json html snippet
func (item *calendarItem) parseBroadcastSeedNode(root *html.Node) (bc *broadcastURL, err error) {
    bc = &broadcastURL{}
    bc.Station = *item.Station
    bc.Time = time.Time(item.DateTime)
    for _, a := range scrape.FindAll(root, func(n *html.Node) bool {
        if atom.A != n.DataAtom {
            return false
        }
        href := scrape.Attr(n, "href")
        return strings.HasPrefix(href, "/programm/radio/ausstrahlung-") && strings.HasSuffix(href, ".html")
    }) {
        ru, _ := url.Parse(scrape.Attr(a, "href"))
        bc.Source = *item.Station.ProgramURL.ResolveReference(ru)
    }
    for _, img := range scrape.FindAll(root, func(n *html.Node) bool { return atom.Img == n.DataAtom }) {
        ru, _ := url.Parse(scrape.Attr(img, "src"))
        bc.Image = item.Station.ProgramURL.ResolveReference(ru)
    }
    return
}
開發者ID:mro,項目名稱:internet-radio-recorder,代碼行數:21,代碼來源:b4.go

示例12: parseBroadcastFromHtmlNode

func (bc *broadcast) parseBroadcastFromHtmlNode(root *html.Node) (ret []*r.Broadcast, err error) {
    {
        // Author
        meta, _ := scrape.Find(root, func(n *html.Node) bool {
            return atom.Meta == n.DataAtom && "Author" == scrape.Attr(n, "name")
        })
        if nil != meta {
            content := scrape.Attr(meta, "content")
            bc.Author = &content
        }
    }
    for idx, epg := range scrape.FindAll(root, func(n *html.Node) bool {
        return atom.Div == n.DataAtom && "epg-content-right" == scrape.Attr(n, "class")
    }) {
        if idx != 0 {
            err = errors.New("There was more than 1 <div class='epg-content-right'/>")
            return
        }
        {
            // TitleEpisode
            txt, _ := scrape.Find(epg, func(n *html.Node) bool {
                return html.TextNode == n.Type && atom.H3 == n.Parent.DataAtom && atom.Br == n.NextSibling.DataAtom
            })
            if nil != txt {
                t := strings.TrimSpace(r.NormaliseWhiteSpace(txt.Data))
                bc.TitleEpisode = &t
                txt.Parent.RemoveChild(txt.NextSibling)
                txt.Parent.RemoveChild(txt)
            }
        }
        {
            // Subject
            a, _ := scrape.Find(epg, func(n *html.Node) bool {
                return atom.Div == n.Parent.DataAtom && "sendungsLink" == scrape.Attr(n.Parent, "class") && atom.A == n.DataAtom
            })
            if nil != a {
                u, _ := url.Parse(scrape.Attr(a, "href"))
                bc.Subject = bc.Source.ResolveReference(u)
            }
        }
        // purge some cruft
        for _, nn := range scrape.FindAll(epg, func(n *html.Node) bool {
            clz := scrape.Attr(n, "class")
            return atom.H2 == n.DataAtom ||
                "mod modSharing" == clz ||
                "modGalery" == clz ||
                "sendungsLink" == clz ||
                "tabs-container" == clz
        }) {
            nn.Parent.RemoveChild(nn)
        }
        {
            description := r.TextWithBrFromNodeSet(scrape.FindAll(epg, func(n *html.Node) bool { return epg == n.Parent }))
            bc.Description = &description
        }
    }
    bc_ := r.Broadcast(*bc)
    ret = append(ret, &bc_)
    return
}
開發者ID:mro,項目名稱:internet-radio-recorder,代碼行數:60,代碼來源:wdr.go

示例13: TweetsToUser

func TweetsToUser(u user.User) []tweet.Tweet {
    reqURL := SearchURL
    _url.SetQueryParams(&reqURL, map[string]string{
        "q": "to:" + u.ScreenName,
        "f": "tweets",
    })

    res, err := http.Get(reqURL.String())
    PanicIf(err)
    root, err := html.Parse(res.Body)
    PanicIf(err)

    tweetsMatcher := func(n *html.Node) bool {
        return n.DataAtom == atom.Div && strings.HasPrefix(scrape.Attr(n, "class"), "tweet original-tweet")
    }
    tweetScreenNameMatcher := func(n *html.Node) bool {
        return n.DataAtom == atom.Span && strings.HasPrefix(scrape.Attr(n, "class"), "username")
    }
    tweetTextMatcher := func(n *html.Node) bool {
        return n.DataAtom == atom.P && strings.HasSuffix(scrape.Attr(n, "class"), "tweet-text")
    }

    tweetNodes := scrape.FindAll(root, tweetsMatcher)
    tweets := make([]tweet.Tweet, len(tweetNodes))
    for i, n := range tweetNodes {
        t := tweet.Tweet{
            ID: scrape.Attr(n, "data-user-id"),
        }
        if child, ok := scrape.Find(n, tweetScreenNameMatcher); ok {
            t.Author = *user.NewUser(scrape.Text(child))
        }
        if child, ok := scrape.Find(n, tweetTextMatcher); ok {
            t.Text = scrape.Text(child)
        }
        tweets[i] = t
    }

    return tweets
}
開發者ID:mrap,項目名稱:twitterget,代碼行數:39,代碼來源:search.go

示例14: parseVideoInfo

func parseVideoInfo(element *html.Node) *YoutubeVideoInfo {
    var info YoutubeVideoInfo

    info.ID = scrape.Attr(element, "data-context-item-id")

    thumbnailContainer, ok := scrape.Find(element, scrape.ByClass("yt-thumb-simple"))
    if ok {
        thumbnailImage, ok := scrape.Find(thumbnailContainer, scrape.ByTag(atom.Img))
        if ok {
            info.ThumbnailURL, _ = url.Parse(scrape.Attr(thumbnailImage, "src"))
        }
    }

    videoTimeElement, ok := scrape.Find(element, scrape.ByClass("video-time"))
    if ok {
        durationStr := strings.TrimSpace(scrape.Text(videoTimeElement))
        info.Length, _ = parseVideoDuration(durationStr)
    }

    linkFieldClasses := []string{"yt-lockup-title", "yt-lockup-byline"}
    linkFieldPtrs := []*string{&info.Title, &info.Author}
    for i, class := range linkFieldClasses {
        linkContainer, ok := scrape.Find(element, scrape.ByClass(class))
        if ok {
            link, ok := scrape.Find(linkContainer, scrape.ByTag(atom.A))
            if ok {
                *linkFieldPtrs[i] = strings.TrimSpace(scrape.Text(link))
            }
        }
    }

    descBox, ok := scrape.Find(element, scrape.ByClass("yt-lockup-description"))
    if ok {
        info.Description = strings.TrimSpace(scrape.Text(descBox))
    }

    return &info
}
開發者ID:unixpickle,項目名稱:gscrape,代碼行數:38,代碼來源:youtube.go

示例15: eventDetailsToStrArr

func eventDetailsToStrArr(eventDetails []*html.Node, eventID int) []string {
    return []string{
        strconv.Itoa(eventID),
        scrape.Text(eventDetails[0]),
        scrape.Text(eventDetails[1]),
        scrape.Text(eventDetails[2]),
        scrape.Text(eventDetails[3]),
        scrape.Text(eventDetails[4]),
        scrape.Text(eventDetails[5]),
        strings.TrimPrefix(
            scrape.Attr(eventDetails[5].FirstChild, "href"),
            "mailto:"),
    }
}
開發者ID:jamesma,項目名稱:html-scraper,代碼行數:14,代碼來源:chamberorganizer.go


注:本文中的github.com/yhat/scrape.Attr函數示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。