Golang scrape.FindAll函数代码示例

本文整理汇总了Golang中github.com/yhat/scrape.FindAll函数的典型用法代码示例。如果您正苦于以下问题：Golang FindAll函数的具体用法？Golang FindAll怎么用？Golang FindAll使用的例子？那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了FindAll函数的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Golang代码示例。

示例1: parseBroadcastFromHtmlNode

func (bc *broadcast) parseBroadcastFromHtmlNode(root *html.Node) (ret []*r.Broadcast, err error) {
	{
		// Author
		meta, _ := scrape.Find(root, func(n *html.Node) bool {
			return atom.Meta == n.DataAtom && "Author" == scrape.Attr(n, "name")
		})
		if nil != meta {
			content := scrape.Attr(meta, "content")
			bc.Author = &content
		}
	}
	for idx, epg := range scrape.FindAll(root, func(n *html.Node) bool {
		return atom.Div == n.DataAtom && "epg-content-right" == scrape.Attr(n, "class")
	}) {
		if idx != 0 {
			err = errors.New("There was more than 1 <div class='epg-content-right'/>")
			return
		}
		{
			// TitleEpisode
			txt, _ := scrape.Find(epg, func(n *html.Node) bool {
				return html.TextNode == n.Type && atom.H3 == n.Parent.DataAtom && atom.Br == n.NextSibling.DataAtom
			})
			if nil != txt {
				t := strings.TrimSpace(r.NormaliseWhiteSpace(txt.Data))
				bc.TitleEpisode = &t
				txt.Parent.RemoveChild(txt.NextSibling)
				txt.Parent.RemoveChild(txt)
			}
		}
		{
			// Subject
			a, _ := scrape.Find(epg, func(n *html.Node) bool {
				return atom.Div == n.Parent.DataAtom && "sendungsLink" == scrape.Attr(n.Parent, "class") && atom.A == n.DataAtom
			})
			if nil != a {
				u, _ := url.Parse(scrape.Attr(a, "href"))
				bc.Subject = bc.Source.ResolveReference(u)
			}
		}
		// purge some cruft
		for _, nn := range scrape.FindAll(epg, func(n *html.Node) bool {
			clz := scrape.Attr(n, "class")
			return atom.H2 == n.DataAtom ||
				"mod modSharing" == clz ||
				"modGalery" == clz ||
				"sendungsLink" == clz ||
				"tabs-container" == clz
		}) {
			nn.Parent.RemoveChild(nn)
		}
		{
			description := r.TextWithBrFromNodeSet(scrape.FindAll(epg, func(n *html.Node) bool { return epg == n.Parent }))
			bc.Description = &description
		}
	}
	bc_ := r.Broadcast(*bc)
	ret = append(ret, &bc_)
	return
}

开发者ID:mro，项目名称:internet-radio-recorder，代码行数:60，代码来源:wdr.go

示例2: TextWithBrFromNodeSet

func TextWithBrFromNodeSet(nodes []*html.Node) string {
	parts := make([]string, len(nodes))
	for i, node := range nodes {
		for _, tag := range []atom.Atom{atom.Br, atom.Tr} {
			for _, n := range scrape.FindAll(node, func(n *html.Node) bool { return tag == n.DataAtom }) {
				lfn := html.Node{Type: html.TextNode, Data: lineFeedMarker}
				n.Parent.InsertBefore(&lfn, n.NextSibling)
			}
		}
		for _, tag := range []atom.Atom{atom.P, atom.Div} {
			for _, n := range scrape.FindAll(node, func(n *html.Node) bool { return tag == n.DataAtom }) {
				lfn := html.Node{Type: html.TextNode, Data: lineFeedMarker + lineFeedMarker}
				n.Parent.InsertBefore(&lfn, n.NextSibling)
			}
		}
		tmp := []string{}
		for _, n := range scrape.FindAll(node, func(n *html.Node) bool { return html.TextNode == n.Type }) {
			tmp = append(tmp, n.Data)
		}
		parts[i] = strings.Join(tmp, "")
	}
	ret := strings.Join(parts, lineFeedMarker+lineFeedMarker)
	ret = NormaliseWhiteSpace(ret)
	ret = strings.Replace(ret, lineFeedMarker, "\n", -1)
	re := regexp.MustCompile("[ ]*(\\s)[ ]*") // collapse whitespace, keep \n
	ret = re.ReplaceAllString(ret, "$1")      // collapse whitespace (not the \n\n however)
	{
		re := regexp.MustCompile("\\s*\\n\\s*\\n\\s*") // collapse linefeeds
		ret = re.ReplaceAllString(ret, "\n\n")
	}
	return strings.TrimSpace(ret)
}

开发者ID:mro，项目名称:internet-radio-recorder，代码行数:32，代码来源:html.go

示例3: parseBroadcastURLsNode

func (day *timeURL) parseBroadcastURLsNode(root *html.Node) (ret []*broadcastURL, err error) {
	const closeDownHour int = 5
	for _, h4 := range scrape.FindAll(root, func(n *html.Node) bool { return atom.H4 == n.DataAtom }) {
		year, month, day_, err := timeForH4(scrape.Text(h4), &day.Time)
		if nil != err {
			panic(err)
		}
		// fmt.Printf("%d-%d-%d %s\n", year, month, day, err)
		for _, a := range scrape.FindAll(h4.Parent, func(n *html.Node) bool { return atom.A == n.DataAtom && atom.Dt == n.Parent.DataAtom }) {
			m := hourMinuteTitleRegExp.FindStringSubmatch(scrape.Text(a))
			if nil == m {
				panic(errors.New("Couldn't parse <a>"))
			}
			ur, _ := url.Parse(scrape.Attr(a, "href"))
			hour := r.MustParseInt(m[1])
			dayOffset := 0
			if hour < closeDownHour {
				dayOffset = 1
			}
			// fmt.Printf("%s %s\n", b.r.TimeURL.String(), b.Title)
			bcu := broadcastURL(r.BroadcastURL{
				TimeURL: r.TimeURL{
					Time:    time.Date(year, month, day_+dayOffset, hour, r.MustParseInt(m[2]), 0, 0, localLoc),
					Source:  *day.Source.ResolveReference(ur),
					Station: day.Station,
				},
				Title: strings.TrimSpace(m[3]),
			})
			ret = append(ret, &bcu)
		}
	}
	return
}

开发者ID:mro，项目名称:internet-radio-recorder，代码行数:33，代码来源:br.go

示例4: parseBroadcastsFromNode

func (day *timeURL) parseBroadcastsFromNode(root *html.Node) (ret []*r.Broadcast, err error) {
	nodes := scrape.FindAll(root, func(n *html.Node) bool { return atom.Div == n.DataAtom && "time" == scrape.Attr(n, "class") })
	ret = make([]*r.Broadcast, len(nodes))
	for index, tim := range nodes {
		// prepare response
		bc := r.Broadcast{
			BroadcastURL: r.BroadcastURL{
				TimeURL: r.TimeURL(*day),
			},
		}
		// some defaults
		bc.Language = &lang_de
		bc.Publisher = &publisher
		// set start time
		{
			div_t := strings.TrimSpace(scrape.Text(tim))
			if 5 != len(div_t) {
				continue
			}
			hour := r.MustParseInt(div_t[0:2])
			minute := r.MustParseInt(div_t[3:5])
			bc.Time = time.Date(day.Year(), day.Month(), day.Day(), hour, minute, 0, 0, day.TimeZone)
			if index > 0 {
				ret[index-1].DtEnd = &bc.Time
			}
		}
		for _, tit := range scrape.FindAll(tim.Parent, func(n *html.Node) bool {
			return atom.A == n.DataAtom && atom.Div == n.Parent.DataAtom && "descr" == scrape.Attr(n.Parent, "class")
		}) {
			// Title
			bc.Title = strings.TrimSpace(scrape.Text(tit))
			href := scrape.Attr(tit, "href")
			if "" != href {
				u, _ := url.Parse(href)
				bc.Subject = day.Source.ResolveReference(u)
			}

			desc_node := tit.Parent
			desc_node.RemoveChild(tit)
			description := r.TextWithBrFromNodeSet([]*html.Node{desc_node})
			bc.Description = &description
			// fmt.Fprintf(os.Stderr, "\n")
		}
		ret[index] = &bc
	}
	// fmt.Fprintf(os.Stderr, "len(ret) = %d '%s'\n", len(ret), day.Source.String())
	if len(nodes) > 0 {
		midnight := time.Date(day.Year(), day.Month(), day.Day(), 24, 0, 0, 0, day.TimeZone)
		ret[len(nodes)-1].DtEnd = &midnight
	}
	return
}

开发者ID:mro，项目名称:internet-radio-recorder，代码行数:52，代码来源:m945.go

示例5: Scrape

// Scrape scrapes a site for a keyword
func (q *query) Scrape() []*match {

	// Request the URL
	resp, err := http.Get(q.SiteURL)
	if err != nil {
		panic(err)
		log.Fatal("Couldn't GET ", q.SiteURL)
	}

	// Parse the contents of the URL
	root, err := html.Parse(resp.Body)
	if err != nil {
		panic(err)
		log.Fatal("Unable to parse response")
	}

	// Grab all the posts and print them
	posts := scrape.FindAll(root, scrape.ByClass("description"))
	matches := make([]*match, len(posts))
	for i, post := range posts {
		matches[i] = &match{
			Title:       scrape.Text(post.FirstChild.NextSibling),
			Description: scrape.Text(post),
			Link:        "http://kijiji.ca" + scrape.Attr(post.FirstChild.NextSibling, "href"),
			Price:       scrape.Text(post.NextSibling.NextSibling),
			Matched:     false,
		}
	}

	return matches
}

开发者ID:bentranter，项目名称:kijiji-scrape，代码行数:32，代码来源:main.go

示例6: main

func main() {
	// request and parse the front page
	resp, err := http.Get("https://torguard.net/downloads.php")
	if err != nil {
		panic(err)
	}
	root, err := html.Parse(resp.Body)
	if err != nil {
		panic(err)
	}

	// define a matcher
	matcher := func(n *html.Node) bool {
		// must check for nil values
		// if n.DataAtom == atom.A && n.Parent != nil && n.Parent.Parent != nil {
		if n.DataAtom == atom.Tr {
			return true
		}
		return false
	}
	// grab all articles and print them
	articles := scrape.FindAll(root, matcher)
	for _, article := range articles {
		if strings.Contains(scrape.Text(article), "DEBIAN x64Bit") {
			fmt.Printf("%s\n", scrape.Text(article))
		}
		//fmt.Printf("%2d %s (%s)\n", i, scrape.Text(article), scrape.Attr(article, "href"))
	}
}

开发者ID:jmonmane，项目名称:scrape，代码行数:29，代码来源:main.go

示例7: Search

func Search(s JobSearch) []*Job {
	jobSlice := []*Job{}
	fmt.Println("before loop in search")

	for i := 0; i < 1000; i++ {
		go getPage(urlCh, respCh)
	}

	for s.root = fetchByKeyword(s.Keyword); checkNextPage(s) == true; s.root = fetchNextPage(s.Keyword) {
		fmt.Println("in loop in search")
		jobs := scrape.FindAll(s.root, allJobMatcher)
		fmt.Println(len(jobs))

		for i, job := range jobs {
			fmt.Println(i)
			fmt.Println(job)
			j := fillJobStruct(job)
			jobSlice = append(jobSlice, j)
			fmt.Println(pager)
		}
		fmt.Println("befor if")
		if len(jobs) < 50 {
			break
		}

	}

	return jobSlice

}

开发者ID:gozes，项目名称:co，代码行数:30，代码来源:co.go

示例8: TorrentList

func TorrentList(url string) ([]Torrent, error) {
	// request and parse the front page
	resp, err := http.Get(url)
	if err != nil {
		return make([]Torrent, 0), err
	}
	root, err := html.Parse(resp.Body)
	if err != nil {
		return make([]Torrent, 0), err
	}
	var torrents []Torrent
	if content, ok := scrape.Find(root, scrape.ById("searchResult")); ok {
		// define a matcher
		matcher := func(n *html.Node) bool {
			// must check for nil values
			if n.DataAtom == atom.Tr && n.Parent.DataAtom == atom.Tbody {
				return true
			}
			return false
		}
		// grab all articles and print them
		trs := scrape.FindAll(content, matcher)
		for _, tr := range trs {
			torrents = append(torrents, ParseRecord(tr))
		}
	}
	resp.Body.Close()
	return torrents, nil
}

开发者ID:anykao，项目名称:p，代码行数:29，代码来源:main.go

示例9: indexPage

func indexPage(page string) (ind map[string]int, branches []string, err error) {
	resp, err := http.Get(page)
	if err != nil {
		return
	}
	root, err := html.Parse(resp.Body)
	resp.Body.Close()
	if err != nil {
		return
	}

	content, ok := scrape.Find(root, scrape.ById("bodyContent"))
	if !ok {
		return nil, nil, errors.New("no bodyContent element")
	}

	paragraphs := scrape.FindAll(content, scrape.ByTag(atom.P))
	pageText := ""
	for _, p := range paragraphs {
		pageText += elementInnerText(p) + " "
	}
	words := strings.Fields(strings.ToLower(pageText))

	ind = map[string]int{}
	for _, word := range words {
		ind[word] = ind[word] + 1
	}

	links := findWikiLinks(content)
	branches = make([]string, len(links))
	for i, link := range links {
		branches[i] = "https://en.wikipedia.org" + link
	}
	return
}

开发者ID:unixpickle，项目名称:weakai，代码行数:35，代码来源:index.go

示例10: main

func main() {
	// request and parse the front page
	resp, err := http.Get("https://news.ycombinator.com/")
	if err != nil {
		panic(err)
	}
	root, err := html.Parse(resp.Body)
	if err != nil {
		panic(err)
	}

	// define a matcher
	matcher := func(n *html.Node) bool {
		// must check for nil values
		if n.DataAtom == atom.A && n.Parent != nil && n.Parent.Parent != nil {
			return scrape.Attr(n.Parent.Parent, "class") == "athing"
		}
		return false
	}
	// grab all articles and print them
	articles := scrape.FindAll(root, matcher)
	for i, article := range articles {
		fmt.Printf("%2d %s (%s)\n", i, scrape.Text(article), scrape.Attr(article, "href"))
	}
}

开发者ID:abejenaru，项目名称:vagrant-boxes，代码行数:25，代码来源:first.go

示例11: Auth

// Auth attempts to access a given URL, then enters the given
// credentials when the URL redirects to a login page.
func (s *Session) Auth(serviceURL, email, password string) error {
	resp, err := s.Get(serviceURL)
	if err != nil {
		return err
	}
	defer resp.Body.Close()
	parsed, err := html.ParseFragment(resp.Body, nil)
	if err != nil || len(parsed) == 0 {
		return err
	}
	root := parsed[0]
	form, ok := scrape.Find(root, scrape.ById("gaia_loginform"))
	if !ok {
		return errors.New("failed to process login page")
	}
	submission := url.Values{}
	for _, input := range scrape.FindAll(form, scrape.ByTag(atom.Input)) {
		submission.Add(getAttribute(input, "name"), getAttribute(input, "value"))
	}
	submission["Email"] = []string{email}
	submission["Passwd"] = []string{password}

	postResp, err := s.PostForm(resp.Request.URL.String(), submission)
	if err != nil {
		return err
	}
	postResp.Body.Close()

	if postResp.Request.Method == "POST" {
		return errors.New("login incorrect")
	}

	return nil
}

开发者ID:unixpickle，项目名称:gscrape，代码行数:36，代码来源:auth.go

示例12: parseGenericLoginForm

// parseGenericLoginForm takes a login page and parses the first form it finds, treating it as the
// login form.
func parseGenericLoginForm(res *http.Response) (result *loginFormInfo, err error) {
	parsed, err := html.ParseFragment(res.Body, nil)
	if err != nil {
		return
	} else if len(parsed) != 1 {
		return nil, errors.New("wrong number of root elements")
	}

	root := parsed[0]

	var form loginFormInfo

	htmlForm, ok := scrape.Find(root, scrape.ByTag(atom.Form))
	if !ok {
		return nil, errors.New("no form element found")
	}

	if actionStr := getNodeAttribute(htmlForm, "action"); actionStr == "" {
		form.action = res.Request.URL.String()
	} else {
		actionURL, err := url.Parse(actionStr)
		if err != nil {
			return nil, err
		}
		if actionURL.Host == "" {
			actionURL.Host = res.Request.URL.Host
		}
		if actionURL.Scheme == "" {
			actionURL.Scheme = res.Request.URL.Scheme
		}
		if !path.IsAbs(actionURL.Path) {
			actionURL.Path = path.Join(res.Request.URL.Path, actionURL.Path)
		}
		form.action = actionURL.String()
	}

	inputs := scrape.FindAll(root, scrape.ByTag(atom.Input))
	form.otherFields = url.Values{}
	for _, input := range inputs {
		inputName := getNodeAttribute(input, "name")
		switch getNodeAttribute(input, "type") {
		case "text":
			form.usernameField = inputName
		case "password":
			form.passwordField = inputName
		default:
			form.otherFields.Add(inputName, getNodeAttribute(input, "value"))
		}
	}

	if form.usernameField == "" {
		return nil, errors.New("no username field found")
	} else if form.passwordField == "" {
		return nil, errors.New("no password field found")
	}

	return &form, nil
}

开发者ID:unixpickle，项目名称:better-student-center，代码行数:60，代码来源:html.go

示例13: parseSchedule

// parseCurrentSchedule parses the courses from the schedule list view page.
//
// If fetchMoreInfo is true, this will perform a request for each component to find out information
// about it.
func parseSchedule(rootNode *html.Node) ([]Course, error) {
	courseTables := scrape.FindAll(rootNode, scrape.ByClass("PSGROUPBOXWBO"))
	result := make([]Course, 0, len(courseTables))
	for _, classTable := range courseTables {
		println("found course")

		titleElement, ok := scrape.Find(classTable, scrape.ByClass("PAGROUPDIVIDER"))
		if !ok {
			// This will occur at least once, since the filter options are a PSGROUPBOXWBO.
			continue
		}

		infoTables := scrape.FindAll(classTable, scrape.ByClass("PSLEVEL3GRIDNBO"))
		if len(infoTables) != 2 {
			return nil, errors.New("expected exactly 2 info tables but found " +
				strconv.Itoa(len(infoTables)))
		}

		courseInfoTable := infoTables[0]
		course, err := parseCourseInfoTable(courseInfoTable)
		if err != nil {
			return nil, err
		}

		// NOTE: there isn't really a standard way to parse the department/number.
		course.Name = nodeInnerText(titleElement)

		componentsInfoTable := infoTables[1]
		componentMaps, err := tableEntriesAsMaps(componentsInfoTable)
		if err != nil {
			return nil, err
		}
		course.Components = make([]Component, len(componentMaps))
		for i, componentMap := range componentMaps {
			course.Components[i], err = parseComponentInfoMap(componentMap)
			if err != nil {
				return nil, err
			}
		}

		result = append(result, course)
	}
	return result, nil
}

开发者ID:unixpickle，项目名称:better-student-center，代码行数:48，代码来源:schedule.go

示例14: getLink

func getLink(r *html.Node) (s string) {
	buttons := scrape.FindAll(r, scrape.ByClass("downloadbtn"))
	for _, button := range buttons {
		windowLocation := scrape.Attr(button, "onclick")
		link := strings.Split(windowLocation, "=")[1]
		s := strings.Trim(link, "'")
		return s
	}
	return
}

开发者ID:jmonmane，项目名称:scrape，代码行数:10，代码来源:main.go

示例15: parseHistoryItems

func parseHistoryItems(rootNode *html.Node) []*YoutubeVideoInfo {
	videoElements := scrape.FindAll(rootNode, scrape.ByClass("yt-lockup-video"))

	res := make([]*YoutubeVideoInfo, len(videoElements))
	for i, element := range videoElements {
		res[i] = parseVideoInfo(element)
	}

	return res
}

开发者ID:unixpickle，项目名称:gscrape，代码行数:10，代码来源:youtube.go

注：本文中的github.com/yhat/scrape.FindAll函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。