本文整理汇总了Golang中github.com/yhat/scrape.FindAll函数的典型用法代码示例。如果您正苦于以下问题:Golang FindAll函数的具体用法?Golang FindAll怎么用?Golang FindAll使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了FindAll函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Golang代码示例。
示例1: parseBroadcastFromHtmlNode
func (bc *broadcast) parseBroadcastFromHtmlNode(root *html.Node) (ret []*r.Broadcast, err error) {
{
// Author
meta, _ := scrape.Find(root, func(n *html.Node) bool {
return atom.Meta == n.DataAtom && "Author" == scrape.Attr(n, "name")
})
if nil != meta {
content := scrape.Attr(meta, "content")
bc.Author = &content
}
}
for idx, epg := range scrape.FindAll(root, func(n *html.Node) bool {
return atom.Div == n.DataAtom && "epg-content-right" == scrape.Attr(n, "class")
}) {
if idx != 0 {
err = errors.New("There was more than 1 <div class='epg-content-right'/>")
return
}
{
// TitleEpisode
txt, _ := scrape.Find(epg, func(n *html.Node) bool {
return html.TextNode == n.Type && atom.H3 == n.Parent.DataAtom && atom.Br == n.NextSibling.DataAtom
})
if nil != txt {
t := strings.TrimSpace(r.NormaliseWhiteSpace(txt.Data))
bc.TitleEpisode = &t
txt.Parent.RemoveChild(txt.NextSibling)
txt.Parent.RemoveChild(txt)
}
}
{
// Subject
a, _ := scrape.Find(epg, func(n *html.Node) bool {
return atom.Div == n.Parent.DataAtom && "sendungsLink" == scrape.Attr(n.Parent, "class") && atom.A == n.DataAtom
})
if nil != a {
u, _ := url.Parse(scrape.Attr(a, "href"))
bc.Subject = bc.Source.ResolveReference(u)
}
}
// purge some cruft
for _, nn := range scrape.FindAll(epg, func(n *html.Node) bool {
clz := scrape.Attr(n, "class")
return atom.H2 == n.DataAtom ||
"mod modSharing" == clz ||
"modGalery" == clz ||
"sendungsLink" == clz ||
"tabs-container" == clz
}) {
nn.Parent.RemoveChild(nn)
}
{
description := r.TextWithBrFromNodeSet(scrape.FindAll(epg, func(n *html.Node) bool { return epg == n.Parent }))
bc.Description = &description
}
}
bc_ := r.Broadcast(*bc)
ret = append(ret, &bc_)
return
}
示例2: TextWithBrFromNodeSet
func TextWithBrFromNodeSet(nodes []*html.Node) string {
parts := make([]string, len(nodes))
for i, node := range nodes {
for _, tag := range []atom.Atom{atom.Br, atom.Tr} {
for _, n := range scrape.FindAll(node, func(n *html.Node) bool { return tag == n.DataAtom }) {
lfn := html.Node{Type: html.TextNode, Data: lineFeedMarker}
n.Parent.InsertBefore(&lfn, n.NextSibling)
}
}
for _, tag := range []atom.Atom{atom.P, atom.Div} {
for _, n := range scrape.FindAll(node, func(n *html.Node) bool { return tag == n.DataAtom }) {
lfn := html.Node{Type: html.TextNode, Data: lineFeedMarker + lineFeedMarker}
n.Parent.InsertBefore(&lfn, n.NextSibling)
}
}
tmp := []string{}
for _, n := range scrape.FindAll(node, func(n *html.Node) bool { return html.TextNode == n.Type }) {
tmp = append(tmp, n.Data)
}
parts[i] = strings.Join(tmp, "")
}
ret := strings.Join(parts, lineFeedMarker+lineFeedMarker)
ret = NormaliseWhiteSpace(ret)
ret = strings.Replace(ret, lineFeedMarker, "\n", -1)
re := regexp.MustCompile("[ ]*(\\s)[ ]*") // collapse whitespace, keep \n
ret = re.ReplaceAllString(ret, "$1") // collapse whitespace (not the \n\n however)
{
re := regexp.MustCompile("\\s*\\n\\s*\\n\\s*") // collapse linefeeds
ret = re.ReplaceAllString(ret, "\n\n")
}
return strings.TrimSpace(ret)
}
示例3: parseBroadcastURLsNode
func (day *timeURL) parseBroadcastURLsNode(root *html.Node) (ret []*broadcastURL, err error) {
const closeDownHour int = 5
for _, h4 := range scrape.FindAll(root, func(n *html.Node) bool { return atom.H4 == n.DataAtom }) {
year, month, day_, err := timeForH4(scrape.Text(h4), &day.Time)
if nil != err {
panic(err)
}
// fmt.Printf("%d-%d-%d %s\n", year, month, day, err)
for _, a := range scrape.FindAll(h4.Parent, func(n *html.Node) bool { return atom.A == n.DataAtom && atom.Dt == n.Parent.DataAtom }) {
m := hourMinuteTitleRegExp.FindStringSubmatch(scrape.Text(a))
if nil == m {
panic(errors.New("Couldn't parse <a>"))
}
ur, _ := url.Parse(scrape.Attr(a, "href"))
hour := r.MustParseInt(m[1])
dayOffset := 0
if hour < closeDownHour {
dayOffset = 1
}
// fmt.Printf("%s %s\n", b.r.TimeURL.String(), b.Title)
bcu := broadcastURL(r.BroadcastURL{
TimeURL: r.TimeURL{
Time: time.Date(year, month, day_+dayOffset, hour, r.MustParseInt(m[2]), 0, 0, localLoc),
Source: *day.Source.ResolveReference(ur),
Station: day.Station,
},
Title: strings.TrimSpace(m[3]),
})
ret = append(ret, &bcu)
}
}
return
}
示例4: parseBroadcastsFromNode
func (day *timeURL) parseBroadcastsFromNode(root *html.Node) (ret []*r.Broadcast, err error) {
nodes := scrape.FindAll(root, func(n *html.Node) bool { return atom.Div == n.DataAtom && "time" == scrape.Attr(n, "class") })
ret = make([]*r.Broadcast, len(nodes))
for index, tim := range nodes {
// prepare response
bc := r.Broadcast{
BroadcastURL: r.BroadcastURL{
TimeURL: r.TimeURL(*day),
},
}
// some defaults
bc.Language = &lang_de
bc.Publisher = &publisher
// set start time
{
div_t := strings.TrimSpace(scrape.Text(tim))
if 5 != len(div_t) {
continue
}
hour := r.MustParseInt(div_t[0:2])
minute := r.MustParseInt(div_t[3:5])
bc.Time = time.Date(day.Year(), day.Month(), day.Day(), hour, minute, 0, 0, day.TimeZone)
if index > 0 {
ret[index-1].DtEnd = &bc.Time
}
}
for _, tit := range scrape.FindAll(tim.Parent, func(n *html.Node) bool {
return atom.A == n.DataAtom && atom.Div == n.Parent.DataAtom && "descr" == scrape.Attr(n.Parent, "class")
}) {
// Title
bc.Title = strings.TrimSpace(scrape.Text(tit))
href := scrape.Attr(tit, "href")
if "" != href {
u, _ := url.Parse(href)
bc.Subject = day.Source.ResolveReference(u)
}
desc_node := tit.Parent
desc_node.RemoveChild(tit)
description := r.TextWithBrFromNodeSet([]*html.Node{desc_node})
bc.Description = &description
// fmt.Fprintf(os.Stderr, "\n")
}
ret[index] = &bc
}
// fmt.Fprintf(os.Stderr, "len(ret) = %d '%s'\n", len(ret), day.Source.String())
if len(nodes) > 0 {
midnight := time.Date(day.Year(), day.Month(), day.Day(), 24, 0, 0, 0, day.TimeZone)
ret[len(nodes)-1].DtEnd = &midnight
}
return
}
示例5: Scrape
// Scrape scrapes a site for a keyword
func (q *query) Scrape() []*match {
// Request the URL
resp, err := http.Get(q.SiteURL)
if err != nil {
panic(err)
log.Fatal("Couldn't GET ", q.SiteURL)
}
// Parse the contents of the URL
root, err := html.Parse(resp.Body)
if err != nil {
panic(err)
log.Fatal("Unable to parse response")
}
// Grab all the posts and print them
posts := scrape.FindAll(root, scrape.ByClass("description"))
matches := make([]*match, len(posts))
for i, post := range posts {
matches[i] = &match{
Title: scrape.Text(post.FirstChild.NextSibling),
Description: scrape.Text(post),
Link: "http://kijiji.ca" + scrape.Attr(post.FirstChild.NextSibling, "href"),
Price: scrape.Text(post.NextSibling.NextSibling),
Matched: false,
}
}
return matches
}
示例6: main
func main() {
// request and parse the front page
resp, err := http.Get("https://torguard.net/downloads.php")
if err != nil {
panic(err)
}
root, err := html.Parse(resp.Body)
if err != nil {
panic(err)
}
// define a matcher
matcher := func(n *html.Node) bool {
// must check for nil values
// if n.DataAtom == atom.A && n.Parent != nil && n.Parent.Parent != nil {
if n.DataAtom == atom.Tr {
return true
}
return false
}
// grab all articles and print them
articles := scrape.FindAll(root, matcher)
for _, article := range articles {
if strings.Contains(scrape.Text(article), "DEBIAN x64Bit") {
fmt.Printf("%s\n", scrape.Text(article))
}
//fmt.Printf("%2d %s (%s)\n", i, scrape.Text(article), scrape.Attr(article, "href"))
}
}
示例7: Search
func Search(s JobSearch) []*Job {
jobSlice := []*Job{}
fmt.Println("before loop in search")
for i := 0; i < 1000; i++ {
go getPage(urlCh, respCh)
}
for s.root = fetchByKeyword(s.Keyword); checkNextPage(s) == true; s.root = fetchNextPage(s.Keyword) {
fmt.Println("in loop in search")
jobs := scrape.FindAll(s.root, allJobMatcher)
fmt.Println(len(jobs))
for i, job := range jobs {
fmt.Println(i)
fmt.Println(job)
j := fillJobStruct(job)
jobSlice = append(jobSlice, j)
fmt.Println(pager)
}
fmt.Println("befor if")
if len(jobs) < 50 {
break
}
}
return jobSlice
}
示例8: TorrentList
func TorrentList(url string) ([]Torrent, error) {
// request and parse the front page
resp, err := http.Get(url)
if err != nil {
return make([]Torrent, 0), err
}
root, err := html.Parse(resp.Body)
if err != nil {
return make([]Torrent, 0), err
}
var torrents []Torrent
if content, ok := scrape.Find(root, scrape.ById("searchResult")); ok {
// define a matcher
matcher := func(n *html.Node) bool {
// must check for nil values
if n.DataAtom == atom.Tr && n.Parent.DataAtom == atom.Tbody {
return true
}
return false
}
// grab all articles and print them
trs := scrape.FindAll(content, matcher)
for _, tr := range trs {
torrents = append(torrents, ParseRecord(tr))
}
}
resp.Body.Close()
return torrents, nil
}
示例9: indexPage
func indexPage(page string) (ind map[string]int, branches []string, err error) {
resp, err := http.Get(page)
if err != nil {
return
}
root, err := html.Parse(resp.Body)
resp.Body.Close()
if err != nil {
return
}
content, ok := scrape.Find(root, scrape.ById("bodyContent"))
if !ok {
return nil, nil, errors.New("no bodyContent element")
}
paragraphs := scrape.FindAll(content, scrape.ByTag(atom.P))
pageText := ""
for _, p := range paragraphs {
pageText += elementInnerText(p) + " "
}
words := strings.Fields(strings.ToLower(pageText))
ind = map[string]int{}
for _, word := range words {
ind[word] = ind[word] + 1
}
links := findWikiLinks(content)
branches = make([]string, len(links))
for i, link := range links {
branches[i] = "https://en.wikipedia.org" + link
}
return
}
示例10: main
func main() {
// request and parse the front page
resp, err := http.Get("https://news.ycombinator.com/")
if err != nil {
panic(err)
}
root, err := html.Parse(resp.Body)
if err != nil {
panic(err)
}
// define a matcher
matcher := func(n *html.Node) bool {
// must check for nil values
if n.DataAtom == atom.A && n.Parent != nil && n.Parent.Parent != nil {
return scrape.Attr(n.Parent.Parent, "class") == "athing"
}
return false
}
// grab all articles and print them
articles := scrape.FindAll(root, matcher)
for i, article := range articles {
fmt.Printf("%2d %s (%s)\n", i, scrape.Text(article), scrape.Attr(article, "href"))
}
}
示例11: Auth
// Auth attempts to access a given URL, then enters the given
// credentials when the URL redirects to a login page.
func (s *Session) Auth(serviceURL, email, password string) error {
resp, err := s.Get(serviceURL)
if err != nil {
return err
}
defer resp.Body.Close()
parsed, err := html.ParseFragment(resp.Body, nil)
if err != nil || len(parsed) == 0 {
return err
}
root := parsed[0]
form, ok := scrape.Find(root, scrape.ById("gaia_loginform"))
if !ok {
return errors.New("failed to process login page")
}
submission := url.Values{}
for _, input := range scrape.FindAll(form, scrape.ByTag(atom.Input)) {
submission.Add(getAttribute(input, "name"), getAttribute(input, "value"))
}
submission["Email"] = []string{email}
submission["Passwd"] = []string{password}
postResp, err := s.PostForm(resp.Request.URL.String(), submission)
if err != nil {
return err
}
postResp.Body.Close()
if postResp.Request.Method == "POST" {
return errors.New("login incorrect")
}
return nil
}
示例12: parseGenericLoginForm
// parseGenericLoginForm takes a login page and parses the first form it finds, treating it as the
// login form.
func parseGenericLoginForm(res *http.Response) (result *loginFormInfo, err error) {
parsed, err := html.ParseFragment(res.Body, nil)
if err != nil {
return
} else if len(parsed) != 1 {
return nil, errors.New("wrong number of root elements")
}
root := parsed[0]
var form loginFormInfo
htmlForm, ok := scrape.Find(root, scrape.ByTag(atom.Form))
if !ok {
return nil, errors.New("no form element found")
}
if actionStr := getNodeAttribute(htmlForm, "action"); actionStr == "" {
form.action = res.Request.URL.String()
} else {
actionURL, err := url.Parse(actionStr)
if err != nil {
return nil, err
}
if actionURL.Host == "" {
actionURL.Host = res.Request.URL.Host
}
if actionURL.Scheme == "" {
actionURL.Scheme = res.Request.URL.Scheme
}
if !path.IsAbs(actionURL.Path) {
actionURL.Path = path.Join(res.Request.URL.Path, actionURL.Path)
}
form.action = actionURL.String()
}
inputs := scrape.FindAll(root, scrape.ByTag(atom.Input))
form.otherFields = url.Values{}
for _, input := range inputs {
inputName := getNodeAttribute(input, "name")
switch getNodeAttribute(input, "type") {
case "text":
form.usernameField = inputName
case "password":
form.passwordField = inputName
default:
form.otherFields.Add(inputName, getNodeAttribute(input, "value"))
}
}
if form.usernameField == "" {
return nil, errors.New("no username field found")
} else if form.passwordField == "" {
return nil, errors.New("no password field found")
}
return &form, nil
}
示例13: parseSchedule
// parseCurrentSchedule parses the courses from the schedule list view page.
//
// If fetchMoreInfo is true, this will perform a request for each component to find out information
// about it.
func parseSchedule(rootNode *html.Node) ([]Course, error) {
courseTables := scrape.FindAll(rootNode, scrape.ByClass("PSGROUPBOXWBO"))
result := make([]Course, 0, len(courseTables))
for _, classTable := range courseTables {
println("found course")
titleElement, ok := scrape.Find(classTable, scrape.ByClass("PAGROUPDIVIDER"))
if !ok {
// This will occur at least once, since the filter options are a PSGROUPBOXWBO.
continue
}
infoTables := scrape.FindAll(classTable, scrape.ByClass("PSLEVEL3GRIDNBO"))
if len(infoTables) != 2 {
return nil, errors.New("expected exactly 2 info tables but found " +
strconv.Itoa(len(infoTables)))
}
courseInfoTable := infoTables[0]
course, err := parseCourseInfoTable(courseInfoTable)
if err != nil {
return nil, err
}
// NOTE: there isn't really a standard way to parse the department/number.
course.Name = nodeInnerText(titleElement)
componentsInfoTable := infoTables[1]
componentMaps, err := tableEntriesAsMaps(componentsInfoTable)
if err != nil {
return nil, err
}
course.Components = make([]Component, len(componentMaps))
for i, componentMap := range componentMaps {
course.Components[i], err = parseComponentInfoMap(componentMap)
if err != nil {
return nil, err
}
}
result = append(result, course)
}
return result, nil
}
示例14: getLink
func getLink(r *html.Node) (s string) {
buttons := scrape.FindAll(r, scrape.ByClass("downloadbtn"))
for _, button := range buttons {
windowLocation := scrape.Attr(button, "onclick")
link := strings.Split(windowLocation, "=")[1]
s := strings.Trim(link, "'")
return s
}
return
}
示例15: parseHistoryItems
func parseHistoryItems(rootNode *html.Node) []*YoutubeVideoInfo {
videoElements := scrape.FindAll(rootNode, scrape.ByClass("yt-lockup-video"))
res := make([]*YoutubeVideoInfo, len(videoElements))
for i, element := range videoElements {
res[i] = parseVideoInfo(element)
}
return res
}