当前位置: 首页>>代码示例>>Golang>>正文


Golang goquery.NewDocumentFromReader函数代码示例

本文整理汇总了Golang中github.com/PuerkitoBio/goquery.NewDocumentFromReader函数的典型用法代码示例。如果您正苦于以下问题:Golang NewDocumentFromReader函数的具体用法?Golang NewDocumentFromReader怎么用?Golang NewDocumentFromReader使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了NewDocumentFromReader函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Golang代码示例。

示例1: crawl

func crawl(exe_dir string, db *sql.DB) {
	res, _ := http.PostForm("http://shirodanuki.cs.shinshu-u.ac.jp/cgi-bin/olts/sys/exercise.cgi",
		url.Values{
			"name":    {"hoge"},
			"id":      {"hogehoge"},
			"email":   {""},
			"exe_dir": {exe_dir},
			"chapter": {""},
			"url":     {"http://webmizar.cs.shinshu-u.ac.jp/learn/infomath/"},
		},
	)
	defer res.Body.Close()
	utf8 := euc2utf8(res.Body)
	doc, _ := goquery.NewDocumentFromReader(utf8)
	html, _ := doc.Find("blockquote").Html()
	question := strings.TrimSpace(html)
	tmp, _ := doc.Find("input[name=tmp]").Attr("value")
	res, _ = http.PostForm("http://shirodanuki.cs.shinshu-u.ac.jp/cgi-bin/olts/sys/answer.cgi",
		url.Values{
			"answer":  {""},
			"subject": {""},
			"chapter": {""},
			"url":     {"http://webmizar.cs.shinshu-u.ac.jp/learn/infomath/"},
			"tmp":     {tmp},
		},
	)
	defer res.Body.Close()
	utf8 = euc2utf8(res.Body)
	doc, _ = goquery.NewDocumentFromReader(utf8)
	answer := strings.TrimSpace(doc.Find("blockquote tt b").Text())
	stmt, _ := db.Prepare("INSERT INTO `cai` (`exe_dir`, `question`, `answer`) VALUES (?, ?, ?)")
	stmt.Exec(exe_dir, question, answer)
}
开发者ID:bgpat,项目名称:autocai,代码行数:33,代码来源:crawl.go

示例2: ExtractNews

// ExtractNews will return the proper structures from items
func ExtractNews(newitems []*rss.Item) []NewStruct {
	var newst []NewStruct
	for _, new := range newitems {
		// init
		// linkstr := ""
		var linkslist []string
		// linkslist := make([]string, 0)
		var images []string
		descrip := ""

		// get all links
		if new.Links != nil {
			links := new.Links
			for _, l := range links {
				l2 := *l
				linkslist = append(linkslist, l2.Href)
				// linkstr += fmt.Sprintf(" - (%s)", l2.Href)
			}
		}

		// Read HTML
		content := new.Description
		if new.Content != nil {
			content = new.Content.Text
		}
		// finaltext := fmt.Sprintf("%s<br>%s", new.Description, content)
		read := strings.NewReader(content)
		doc, err := goquery.NewDocumentFromReader(read)

		if err == nil {
			doc.Find("img").Each(func(i int, s *goquery.Selection) {
				val, ok := s.Attr("src")
				if ok {
					images = append(images, val)
				}
			})

			descrip = doc.Text()

			doc2, err2 := goquery.NewDocumentFromReader(strings.NewReader(descrip))
			if err2 == nil {
				doc2.Find("img").Each(func(i int, s *goquery.Selection) {
					val, ok := s.Attr("src")
					if ok {
						images = append(images, val)
					}
				})
				descrip = doc2.Text()
			}
		}

		new.Title, descrip = analyzeTitleDescrip(new.Title, descrip)

		// itemstr := fmt.Sprintf("%s%s\n%s", new.Title, linkstr, descrip)
		newst = append(newst, NewStruct{"", images, new.Title, descrip, new.PubDate, new.Author.Name, "", linkslist})

		// newst = append(newst, NewStruct{itemstr, images})
	}
	return newst
}
开发者ID:3ehzad,项目名称:go-bots,代码行数:61,代码来源:news.go

示例3: load

func (c *webCache) load(url string) (*goquery.Document, error) {
	localPath := c.urlToLocal(url)

	if file, err := os.Open(localPath); err == nil {
		defer file.Close()
		return goquery.NewDocumentFromReader(file)
	}

	<-c.ticker.C

	res, err := http.Get(url)
	if err != nil {
		return nil, err
	}
	defer res.Body.Close()

	var buff bytes.Buffer
	if _, err := buff.ReadFrom(res.Body); err != nil {
		return nil, err
	}

	if err := ioutil.WriteFile(localPath, buff.Bytes(), 0644); err != nil {
		return nil, err
	}

	return goquery.NewDocumentFromReader(&buff)
}
开发者ID:postfix,项目名称:search,代码行数:27,代码来源:webcache.go

示例4: Preprocess

// Preprocess fetches the HTML page if needed, converts it to UTF-8 and applies
// some text normalisation to guarantee better results when extracting the content
func (c *Crawler) Preprocess() (*goquery.Document, error) {
	if c.RawHTML == "" {
		c.RawHTML = c.fetchHTML(c.url, c.config.timeout)
	}
	if c.RawHTML == "" {
		return nil, nil
	}

	c.RawHTML = c.addSpacesBetweenTags(c.RawHTML)

	reader := strings.NewReader(c.RawHTML)
	document, err := goquery.NewDocumentFromReader(reader)

	if err != nil {
		return nil, err
	}

	cs := c.GetCharset(document)
	//log.Println("-------------------------------------------CHARSET:", cs)
	if "" != cs && "UTF-8" != cs {
		// the net/html parser and goquery require UTF-8 data
		c.RawHTML = UTF8encode(c.RawHTML, cs)
		reader = strings.NewReader(c.RawHTML)
		document, err = goquery.NewDocumentFromReader(reader)

		if nil != err {
			return nil, err
		}
	}

	return document, nil
}
开发者ID:datasift,项目名称:GoOse,代码行数:34,代码来源:crawler.go

示例5: TestPostAfterUpdating

func TestPostAfterUpdating(t *testing.T) {

	Convey("the post should not be displayed on frontpage", t, func() {
		var recorder = httptest.NewRecorder()
		request, _ := http.NewRequest("GET", "/", nil)
		server.ServeHTTP(recorder, request)
		So(recorder.Code, ShouldEqual, 200)
		doc, _ := goquery.NewDocumentFromReader(recorder.Body)
		sel := doc.Find("article h1").Text()
		So(sel, ShouldBeEmpty)
	})

	Convey("update should return HTTP 200", t, func() {
		var recorder = httptest.NewRecorder()
		request, _ := http.NewRequest("GET", fmt.Sprintf("/api/post/%s/publish", post.Slug), nil)
		cookie := &http.Cookie{Name: "id", Value: sessioncookie}
		request.AddCookie(cookie)
		server.ServeHTTP(recorder, request)
		So(recorder.Body.String(), ShouldEqual, `{"success":"Post published"}`)
		So(recorder.Code, ShouldEqual, 200)
	})

	Convey("after updating, post should be displayed on frontpage", t, func() {
		var recorder = httptest.NewRecorder()
		request, _ := http.NewRequest("GET", "/", nil)
		server.ServeHTTP(recorder, request)
		So(recorder.Code, ShouldEqual, 200)
		doc, _ := goquery.NewDocumentFromReader(recorder.Body)
		sel := doc.Find("article .title").Text()
		So(sel, ShouldEqual, post.Title)
	})

	Convey("the post should not be displayed trough API", t, func() {
		var recorder = httptest.NewRecorder()
		request, _ := http.NewRequest("GET", "/api/posts", nil)
		server.ServeHTTP(recorder, request)
		So(recorder.Code, ShouldEqual, 200)
		var posts []Post
		json.Unmarshal(recorder.Body.Bytes(), &posts)
		for i, p := range posts {
			So(i, ShouldEqual, 0)
			So(post.ID, ShouldEqual, p.ID)
			So(post.Title, ShouldEqual, p.Title)
			So(post.Content, ShouldEqual, p.Content)
			So(post.Markdown, ShouldEqual, p.Markdown)
			So(post.Slug, ShouldEqual, p.Slug)
			So(post.Author, ShouldEqual, p.Author)
			So(post.Created, ShouldBeGreaterThan, int64(1400000000))
			if post.Updated != post.Created {
				So(post.Updated, ShouldAlmostEqual, post.Created, 5)
			}
			So(post.Excerpt, ShouldEqual, p.Excerpt)
		}
	})
}
开发者ID:9uuso,项目名称:vertigo,代码行数:55,代码来源:main_test.go

示例6: Login

// Login() authenticates with ShopKeep.
// Returns a non-nil error value if login fails.
func (d *Downloader) Login() error {
	// Get the login page
	lp, err := d.client.Get(d.site)
	if err != nil {
		return errors.New("Could not get: " + d.site)
	}
	defer lp.Body.Close()

	// Pull the login page into a goquery.Document
	loginPage, err := goquery.NewDocumentFromReader(lp.Body)
	if err != nil {
		return errors.New("Failed to login: Could not read response body.")
	}

	// Determine what the authenticity token is.
	at := authToken(loginPage)
	if at == "" {
		return errors.New("Failed to find authenticity_token.")
	}
	d.authenticity_token = at
	log.Println("Found authenticity_token: " + d.authenticity_token)

	// Get the homepage by posting login credentials
	hp, err := d.client.PostForm(d.site+"/session",
		url.Values{
			"authenticity_token": {d.authenticity_token},
			"utf8":               {"✓"},
			"login":              {d.username},
			"password":           {d.password},
			"commit":             {"Sign in"},
		})
	if err != nil {
		return errors.New("Failed POSTing login form: " + err.Error())
	}
	defer hp.Body.Close()

	// Pull the homepage response into a goquery.Document
	homePage, err := goquery.NewDocumentFromReader(hp.Body)
	if err != nil {
		return errors.New("Failed to access homepage: " + err.Error())
	}

	// Check the login status.
	// Can't simply check response status (ShopKeep returns 200 whether login was successful or not).
	// Can't check location header as it is not included in the response.
	if loginStatus(homePage) == false {
		return errors.New("Invalid username or password")
	}

	log.Println("Login successful!")

	return nil
}
开发者ID:JFMarket,项目名称:report-cacher,代码行数:55,代码来源:download.go

示例7: Etl

func (user *User) Etl(links []string) {
	mscnt_regexp := regexp.MustCompile(`(\d+)人参加`)
	date_regexp := regexp.MustCompile(`0?(\d+)月0?(\d+)日`)
	for _, link := range links {
		go func(u User, link string) {
			fmt.Println("Etl <-", link)
			response, err := u.RequestWithCookie(link, "GET", nil)
			if err != nil {
				fmt.Println(err)
			} else {
				defer response.Body.Close()
				if rawbody, err := goquery.NewDocumentFromReader(response.Body); err != nil {
					fmt.Printf("error: %s\n", err)
				} else {
					var mscnt int
					var acdate time.Time
					body := rawbody.Find("div[class='tn-box-content tn-widget-content tn-corner-all']")
					subject := rawbody.Find("h1[class='tn-helper-reset tn-text-heading']").Text()
					body.Find("span[class='tn-action']").Find("a").Each(func(i int, s *goquery.Selection) {
						if mscnt_content := mscnt_regexp.FindStringSubmatch(s.Text()); len(mscnt_content) > 1 {
							if cnt, err := strconv.Atoi(mscnt_content[1]); err != nil {
								panic(err)
							} else {
								mscnt = cnt
							}
						}
					})
					if datext := body.Find("span[class='tn-date']").Text(); datext != "" {
						ad, _ := time.Parse("2006年01月02日", "2014年"+date_regexp.FindStringSubmatch(datext)[0])
						acdate = ad
					}
					robbery_body := body.Find("span[class='tn-icon-join tn-icon']").Next()
					robbery_text := robbery_body.Text()
					robbery_addr, _ := robbery_body.Attr("href")
					if strings.Contains(robbery_text, "我要报名") {
						form_response, _ := u.RequestWithCookie(domain+robbery_addr, "GET", nil)
						form_body, _ := goquery.NewDocumentFromReader(form_response.Body)
						if form_addr, form_exists := form_body.Find("form").Attr("action"); form_exists {
							activitie := Activity{subject, acdate, acdate.Weekday(), mscnt, domain + form_addr}
							fmt.Println("Activitys <-", activitie)
							activities <- activitie
						}
					}
				}
			}
		}(*user, link)
	}
}
开发者ID:season-py,项目名称:golang.test,代码行数:48,代码来源:healthclub.go

示例8: parse

func parse(s string) []string {
	doc, err := goquery.NewDocumentFromReader(strings.NewReader(s))
	if err != nil {
		log.Fatalln("pare error", err)
	}

	result := []string{}
	f := func(i int, q *goquery.Selection) {
		q = q.Children()

		if q.Length() != 7 {
			return
		}

		dt := strings.TrimSpace(q.Eq(1).Text())

		name := strings.TrimSpace(q.Eq(2).Text())
		name = strings.Replace(name, "_", "", -1)
		id, _ := q.Eq(2).Find("a").Attr("href")
		id = strings.TrimSpace(id)
		id = strings.Split(id, "=")[1]

		b := strings.TrimSpace(q.Eq(3).Text())
		b = strings.Replace(b, "_", "", -1)
		w := strings.TrimSpace(q.Eq(4).Text())
		w = strings.Replace(w, "_", "", -1)

		result = append(result, fmt.Sprintf("%v_%v_%v_%v_%v", name, dt, b, w, id))
	}

	doc.Find("#table1 tr").Each(f)
	return result
}
开发者ID:sugeladi,项目名称:goWeb,代码行数:33,代码来源:page.go

示例9: downloadHtml

func (this *HttpDownloader) downloadHtml(p *page.Page, req *request.Request) *page.Page {
	var err error
	p, destbody := this.downloadFile(p, req)
	//fmt.Printf("Destbody %v \r\n", destbody)
	if !p.IsSucc() {
		//fmt.Print("Page error \r\n")
		return p
	}
	bodyReader := bytes.NewReader([]byte(destbody))

	var doc *goquery.Document
	if doc, err = goquery.NewDocumentFromReader(bodyReader); err != nil {
		mlog.LogInst().LogError(err.Error())
		p.SetStatus(true, err.Error())
		return p
	}

	var body string
	if body, err = doc.Html(); err != nil {
		mlog.LogInst().LogError(err.Error())
		p.SetStatus(true, err.Error())
		return p
	}

	p.SetBodyStr(body).SetHtmlParser(doc).SetStatus(false, "")

	return p
}
开发者ID:CrocdileChan,项目名称:go_spider,代码行数:28,代码来源:downloader_http.go

示例10: getGbkDoc

func getGbkDoc(client *http.Client, url string) (*goquery.Document, error) {
	retry := 3
get:
	resp, err := client.Get(url)
	if err != nil {
		if retry > 0 {
			retry--
			goto get
		} else {
			return nil, me(err, "get")
		}
	}
	defer resp.Body.Close()
	r := transform.NewReader(resp.Body, simplifiedchinese.GBK.NewDecoder())
	doc, err := goquery.NewDocumentFromReader(r)
	if err != nil {
		if retry > 0 {
			retry--
			goto get
		} else {
			return nil, me(err, "new document from response")
		}
	}
	return doc, nil
}
开发者ID:reusee,项目名称:jd-analysis,代码行数:25,代码来源:utils.go

示例11: MakeDoubanSpider

func MakeDoubanSpider() *spiders.Spider {
	spider := &spiders.Spider{}
	spider.Name = "douban_img_spider"
	spider.StartUrls = []string{"http://movie.douban.com/"}
	spider.ParseMap = make(map[string]func(response *http.Response) ([]*http.Request, error))
	spider.ParseMap[spiders.BASE_PARSE_NAME] = func(response *http.Response) ([]*http.Request, error) {
		if response.Request.Depth > 10 {
			return nil, nil
		}
		doc, err := goquery.NewDocumentFromReader(strings.NewReader(response.Body))
		if err != nil {
			return nil, err
		}
		nodes := doc.Find("#page .n").Nodes
		if len(nodes) == 0 {
			return nil, err
		}
		nextNode := nodes[len(nodes)-1]
		attrList := nextNode.Attr
		var nextPageLink string
		for _, attr := range attrList {
			if attr.Key == "href" {
				nextPageLink = attr.Val
				break
			}
		}
		nextPage := "http://www.baidu.com" + nextPageLink
		request, err := http.NewRequest("GET", nextPage, spider.Name, spiders.BASE_PARSE_NAME, nil, 0)
		requestList := make([]*http.Request, 0)
		requestList = append(requestList, request)
		return requestList, nil
	}
	return spider
}
开发者ID:ROOT005,项目名称:ants-go,代码行数:34,代码来源:douban_img_spider.go

示例12: firstURLFromHTML

func firstURLFromHTML(con *data.Context, body string) ([]string, error) {
	if body == "" {
		return nil, nil
	}
	strRdr := strings.NewReader(body)
	doc, err := goquery.NewDocumentFromReader(strRdr)
	if err != nil {
		return nil, err
	}

	var links []string
	found := false

	doc.Find("a").First().Each(func(i int, s *goquery.Selection) {
		if found {
			return
		}
		link, exists := s.Attr("href")
		if !exists {
			return
		}
		if strings.Contains(link, "mailto:") {
			return
		}
		links = append(links, link)
		found = true

		con.Log.Infof("HTML found %v", link)
	})

	return links, nil
}
开发者ID:koffeinsource,项目名称:kaffeeshare,代码行数:32,代码来源:URLsFromText.go

示例13: getPageTitle

// Returns the page title or an error. If there is an error, the url is returned as well.
func getPageTitle(url string) (string, error) {
	client := &http.Client{}
	req, err := http.NewRequest("GET", url, nil)
	if err != nil {
		return url, err
	}

	req.Header.Set("User-Agent", SUFRUserAgent)

	res, err := client.Do(req)
	if err != nil {
		return url, err
	}

	defer res.Body.Close()

	doc, err := goquery.NewDocumentFromReader(res.Body)

	if err != nil {
		return url, err
	}

	title := doc.Find("title").Text()
	return title, nil
}
开发者ID:kyleterry,项目名称:sufr,代码行数:26,代码来源:app.go

示例14: Robtex

// Robtex looks up a host at robtex.com.
func Robtex(ip string) (string, Results, error) {
	task := "robtex.com"
	results := Results{}
	resp, err := http.Get("http://www.robtex.com/ip/" + ip + ".html")
	if err != nil {
		return task, results, err
	}
	defer resp.Body.Close()
	doc, err := goquery.NewDocumentFromReader(resp.Body)
	if err != nil {
		return task, results, err
	}
	doc.Selection.Find("#x_summary td:nth-child(1)").Each(func(_ int, s *goquery.Selection) {
		hostname := s.Text()
		if strings.Contains(hostname, "*") {
			return
		}
		if hostname == "." {
			return
		}
		if _, err := strconv.Atoi(hostname); err == nil {
			return
		}
		results = append(results, Result{Source: task, IP: ip, Hostname: s.Text()})
	})
	return task, results, nil
}
开发者ID:intfrr,项目名称:blacksheepwall,代码行数:28,代码来源:robtex.go

示例15: httpRequest

// send uses the given *http.Request to make an HTTP request.
func (bow *Browser) httpRequest(req *http.Request) error {
	bow.preSend()
	resp, err := bow.client.Do(req)
	if err != nil {
		return err
	}
	defer resp.Body.Close()

	bow.body, err = ioutil.ReadAll(resp.Body)
	if err != nil {
		return err
	}

	buff := bytes.NewBuffer(bow.body)
	dom, err := goquery.NewDocumentFromReader(buff)
	if err != nil {
		return err
	}

	bow.history.Push(bow.state)
	bow.state = jar.NewHistoryState(req, resp, dom)
	bow.postSend()

	return nil
}
开发者ID:emgfc,项目名称:surf,代码行数:26,代码来源:browser.go


注:本文中的github.com/PuerkitoBio/goquery.NewDocumentFromReader函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。