本文整理汇总了Golang中github.com/PuerkitoBio/goquery.NewDocumentFromReader函数的典型用法代码示例。如果您正苦于以下问题:Golang NewDocumentFromReader函数的具体用法?Golang NewDocumentFromReader怎么用?Golang NewDocumentFromReader使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了NewDocumentFromReader函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Golang代码示例。
示例1: crawl
func crawl(exe_dir string, db *sql.DB) {
res, _ := http.PostForm("http://shirodanuki.cs.shinshu-u.ac.jp/cgi-bin/olts/sys/exercise.cgi",
url.Values{
"name": {"hoge"},
"id": {"hogehoge"},
"email": {""},
"exe_dir": {exe_dir},
"chapter": {""},
"url": {"http://webmizar.cs.shinshu-u.ac.jp/learn/infomath/"},
},
)
defer res.Body.Close()
utf8 := euc2utf8(res.Body)
doc, _ := goquery.NewDocumentFromReader(utf8)
html, _ := doc.Find("blockquote").Html()
question := strings.TrimSpace(html)
tmp, _ := doc.Find("input[name=tmp]").Attr("value")
res, _ = http.PostForm("http://shirodanuki.cs.shinshu-u.ac.jp/cgi-bin/olts/sys/answer.cgi",
url.Values{
"answer": {""},
"subject": {""},
"chapter": {""},
"url": {"http://webmizar.cs.shinshu-u.ac.jp/learn/infomath/"},
"tmp": {tmp},
},
)
defer res.Body.Close()
utf8 = euc2utf8(res.Body)
doc, _ = goquery.NewDocumentFromReader(utf8)
answer := strings.TrimSpace(doc.Find("blockquote tt b").Text())
stmt, _ := db.Prepare("INSERT INTO `cai` (`exe_dir`, `question`, `answer`) VALUES (?, ?, ?)")
stmt.Exec(exe_dir, question, answer)
}
示例2: ExtractNews
// ExtractNews will return the proper structures from items
func ExtractNews(newitems []*rss.Item) []NewStruct {
var newst []NewStruct
for _, new := range newitems {
// init
// linkstr := ""
var linkslist []string
// linkslist := make([]string, 0)
var images []string
descrip := ""
// get all links
if new.Links != nil {
links := new.Links
for _, l := range links {
l2 := *l
linkslist = append(linkslist, l2.Href)
// linkstr += fmt.Sprintf(" - (%s)", l2.Href)
}
}
// Read HTML
content := new.Description
if new.Content != nil {
content = new.Content.Text
}
// finaltext := fmt.Sprintf("%s<br>%s", new.Description, content)
read := strings.NewReader(content)
doc, err := goquery.NewDocumentFromReader(read)
if err == nil {
doc.Find("img").Each(func(i int, s *goquery.Selection) {
val, ok := s.Attr("src")
if ok {
images = append(images, val)
}
})
descrip = doc.Text()
doc2, err2 := goquery.NewDocumentFromReader(strings.NewReader(descrip))
if err2 == nil {
doc2.Find("img").Each(func(i int, s *goquery.Selection) {
val, ok := s.Attr("src")
if ok {
images = append(images, val)
}
})
descrip = doc2.Text()
}
}
new.Title, descrip = analyzeTitleDescrip(new.Title, descrip)
// itemstr := fmt.Sprintf("%s%s\n%s", new.Title, linkstr, descrip)
newst = append(newst, NewStruct{"", images, new.Title, descrip, new.PubDate, new.Author.Name, "", linkslist})
// newst = append(newst, NewStruct{itemstr, images})
}
return newst
}
示例3: load
func (c *webCache) load(url string) (*goquery.Document, error) {
localPath := c.urlToLocal(url)
if file, err := os.Open(localPath); err == nil {
defer file.Close()
return goquery.NewDocumentFromReader(file)
}
<-c.ticker.C
res, err := http.Get(url)
if err != nil {
return nil, err
}
defer res.Body.Close()
var buff bytes.Buffer
if _, err := buff.ReadFrom(res.Body); err != nil {
return nil, err
}
if err := ioutil.WriteFile(localPath, buff.Bytes(), 0644); err != nil {
return nil, err
}
return goquery.NewDocumentFromReader(&buff)
}
示例4: Preprocess
// Preprocess fetches the HTML page if needed, converts it to UTF-8 and applies
// some text normalisation to guarantee better results when extracting the content
func (c *Crawler) Preprocess() (*goquery.Document, error) {
if c.RawHTML == "" {
c.RawHTML = c.fetchHTML(c.url, c.config.timeout)
}
if c.RawHTML == "" {
return nil, nil
}
c.RawHTML = c.addSpacesBetweenTags(c.RawHTML)
reader := strings.NewReader(c.RawHTML)
document, err := goquery.NewDocumentFromReader(reader)
if err != nil {
return nil, err
}
cs := c.GetCharset(document)
//log.Println("-------------------------------------------CHARSET:", cs)
if "" != cs && "UTF-8" != cs {
// the net/html parser and goquery require UTF-8 data
c.RawHTML = UTF8encode(c.RawHTML, cs)
reader = strings.NewReader(c.RawHTML)
document, err = goquery.NewDocumentFromReader(reader)
if nil != err {
return nil, err
}
}
return document, nil
}
示例5: TestPostAfterUpdating
func TestPostAfterUpdating(t *testing.T) {
Convey("the post should not be displayed on frontpage", t, func() {
var recorder = httptest.NewRecorder()
request, _ := http.NewRequest("GET", "/", nil)
server.ServeHTTP(recorder, request)
So(recorder.Code, ShouldEqual, 200)
doc, _ := goquery.NewDocumentFromReader(recorder.Body)
sel := doc.Find("article h1").Text()
So(sel, ShouldBeEmpty)
})
Convey("update should return HTTP 200", t, func() {
var recorder = httptest.NewRecorder()
request, _ := http.NewRequest("GET", fmt.Sprintf("/api/post/%s/publish", post.Slug), nil)
cookie := &http.Cookie{Name: "id", Value: sessioncookie}
request.AddCookie(cookie)
server.ServeHTTP(recorder, request)
So(recorder.Body.String(), ShouldEqual, `{"success":"Post published"}`)
So(recorder.Code, ShouldEqual, 200)
})
Convey("after updating, post should be displayed on frontpage", t, func() {
var recorder = httptest.NewRecorder()
request, _ := http.NewRequest("GET", "/", nil)
server.ServeHTTP(recorder, request)
So(recorder.Code, ShouldEqual, 200)
doc, _ := goquery.NewDocumentFromReader(recorder.Body)
sel := doc.Find("article .title").Text()
So(sel, ShouldEqual, post.Title)
})
Convey("the post should not be displayed trough API", t, func() {
var recorder = httptest.NewRecorder()
request, _ := http.NewRequest("GET", "/api/posts", nil)
server.ServeHTTP(recorder, request)
So(recorder.Code, ShouldEqual, 200)
var posts []Post
json.Unmarshal(recorder.Body.Bytes(), &posts)
for i, p := range posts {
So(i, ShouldEqual, 0)
So(post.ID, ShouldEqual, p.ID)
So(post.Title, ShouldEqual, p.Title)
So(post.Content, ShouldEqual, p.Content)
So(post.Markdown, ShouldEqual, p.Markdown)
So(post.Slug, ShouldEqual, p.Slug)
So(post.Author, ShouldEqual, p.Author)
So(post.Created, ShouldBeGreaterThan, int64(1400000000))
if post.Updated != post.Created {
So(post.Updated, ShouldAlmostEqual, post.Created, 5)
}
So(post.Excerpt, ShouldEqual, p.Excerpt)
}
})
}
示例6: Login
// Login() authenticates with ShopKeep.
// Returns a non-nil error value if login fails.
func (d *Downloader) Login() error {
// Get the login page
lp, err := d.client.Get(d.site)
if err != nil {
return errors.New("Could not get: " + d.site)
}
defer lp.Body.Close()
// Pull the login page into a goquery.Document
loginPage, err := goquery.NewDocumentFromReader(lp.Body)
if err != nil {
return errors.New("Failed to login: Could not read response body.")
}
// Determine what the authenticity token is.
at := authToken(loginPage)
if at == "" {
return errors.New("Failed to find authenticity_token.")
}
d.authenticity_token = at
log.Println("Found authenticity_token: " + d.authenticity_token)
// Get the homepage by posting login credentials
hp, err := d.client.PostForm(d.site+"/session",
url.Values{
"authenticity_token": {d.authenticity_token},
"utf8": {"✓"},
"login": {d.username},
"password": {d.password},
"commit": {"Sign in"},
})
if err != nil {
return errors.New("Failed POSTing login form: " + err.Error())
}
defer hp.Body.Close()
// Pull the homepage response into a goquery.Document
homePage, err := goquery.NewDocumentFromReader(hp.Body)
if err != nil {
return errors.New("Failed to access homepage: " + err.Error())
}
// Check the login status.
// Can't simply check response status (ShopKeep returns 200 whether login was successful or not).
// Can't check location header as it is not included in the response.
if loginStatus(homePage) == false {
return errors.New("Invalid username or password")
}
log.Println("Login successful!")
return nil
}
示例7: Etl
func (user *User) Etl(links []string) {
mscnt_regexp := regexp.MustCompile(`(\d+)人参加`)
date_regexp := regexp.MustCompile(`0?(\d+)月0?(\d+)日`)
for _, link := range links {
go func(u User, link string) {
fmt.Println("Etl <-", link)
response, err := u.RequestWithCookie(link, "GET", nil)
if err != nil {
fmt.Println(err)
} else {
defer response.Body.Close()
if rawbody, err := goquery.NewDocumentFromReader(response.Body); err != nil {
fmt.Printf("error: %s\n", err)
} else {
var mscnt int
var acdate time.Time
body := rawbody.Find("div[class='tn-box-content tn-widget-content tn-corner-all']")
subject := rawbody.Find("h1[class='tn-helper-reset tn-text-heading']").Text()
body.Find("span[class='tn-action']").Find("a").Each(func(i int, s *goquery.Selection) {
if mscnt_content := mscnt_regexp.FindStringSubmatch(s.Text()); len(mscnt_content) > 1 {
if cnt, err := strconv.Atoi(mscnt_content[1]); err != nil {
panic(err)
} else {
mscnt = cnt
}
}
})
if datext := body.Find("span[class='tn-date']").Text(); datext != "" {
ad, _ := time.Parse("2006年01月02日", "2014年"+date_regexp.FindStringSubmatch(datext)[0])
acdate = ad
}
robbery_body := body.Find("span[class='tn-icon-join tn-icon']").Next()
robbery_text := robbery_body.Text()
robbery_addr, _ := robbery_body.Attr("href")
if strings.Contains(robbery_text, "我要报名") {
form_response, _ := u.RequestWithCookie(domain+robbery_addr, "GET", nil)
form_body, _ := goquery.NewDocumentFromReader(form_response.Body)
if form_addr, form_exists := form_body.Find("form").Attr("action"); form_exists {
activitie := Activity{subject, acdate, acdate.Weekday(), mscnt, domain + form_addr}
fmt.Println("Activitys <-", activitie)
activities <- activitie
}
}
}
}
}(*user, link)
}
}
示例8: parse
func parse(s string) []string {
doc, err := goquery.NewDocumentFromReader(strings.NewReader(s))
if err != nil {
log.Fatalln("pare error", err)
}
result := []string{}
f := func(i int, q *goquery.Selection) {
q = q.Children()
if q.Length() != 7 {
return
}
dt := strings.TrimSpace(q.Eq(1).Text())
name := strings.TrimSpace(q.Eq(2).Text())
name = strings.Replace(name, "_", "", -1)
id, _ := q.Eq(2).Find("a").Attr("href")
id = strings.TrimSpace(id)
id = strings.Split(id, "=")[1]
b := strings.TrimSpace(q.Eq(3).Text())
b = strings.Replace(b, "_", "", -1)
w := strings.TrimSpace(q.Eq(4).Text())
w = strings.Replace(w, "_", "", -1)
result = append(result, fmt.Sprintf("%v_%v_%v_%v_%v", name, dt, b, w, id))
}
doc.Find("#table1 tr").Each(f)
return result
}
示例9: downloadHtml
func (this *HttpDownloader) downloadHtml(p *page.Page, req *request.Request) *page.Page {
var err error
p, destbody := this.downloadFile(p, req)
//fmt.Printf("Destbody %v \r\n", destbody)
if !p.IsSucc() {
//fmt.Print("Page error \r\n")
return p
}
bodyReader := bytes.NewReader([]byte(destbody))
var doc *goquery.Document
if doc, err = goquery.NewDocumentFromReader(bodyReader); err != nil {
mlog.LogInst().LogError(err.Error())
p.SetStatus(true, err.Error())
return p
}
var body string
if body, err = doc.Html(); err != nil {
mlog.LogInst().LogError(err.Error())
p.SetStatus(true, err.Error())
return p
}
p.SetBodyStr(body).SetHtmlParser(doc).SetStatus(false, "")
return p
}
示例10: getGbkDoc
func getGbkDoc(client *http.Client, url string) (*goquery.Document, error) {
retry := 3
get:
resp, err := client.Get(url)
if err != nil {
if retry > 0 {
retry--
goto get
} else {
return nil, me(err, "get")
}
}
defer resp.Body.Close()
r := transform.NewReader(resp.Body, simplifiedchinese.GBK.NewDecoder())
doc, err := goquery.NewDocumentFromReader(r)
if err != nil {
if retry > 0 {
retry--
goto get
} else {
return nil, me(err, "new document from response")
}
}
return doc, nil
}
示例11: MakeDoubanSpider
func MakeDoubanSpider() *spiders.Spider {
spider := &spiders.Spider{}
spider.Name = "douban_img_spider"
spider.StartUrls = []string{"http://movie.douban.com/"}
spider.ParseMap = make(map[string]func(response *http.Response) ([]*http.Request, error))
spider.ParseMap[spiders.BASE_PARSE_NAME] = func(response *http.Response) ([]*http.Request, error) {
if response.Request.Depth > 10 {
return nil, nil
}
doc, err := goquery.NewDocumentFromReader(strings.NewReader(response.Body))
if err != nil {
return nil, err
}
nodes := doc.Find("#page .n").Nodes
if len(nodes) == 0 {
return nil, err
}
nextNode := nodes[len(nodes)-1]
attrList := nextNode.Attr
var nextPageLink string
for _, attr := range attrList {
if attr.Key == "href" {
nextPageLink = attr.Val
break
}
}
nextPage := "http://www.baidu.com" + nextPageLink
request, err := http.NewRequest("GET", nextPage, spider.Name, spiders.BASE_PARSE_NAME, nil, 0)
requestList := make([]*http.Request, 0)
requestList = append(requestList, request)
return requestList, nil
}
return spider
}
示例12: firstURLFromHTML
func firstURLFromHTML(con *data.Context, body string) ([]string, error) {
if body == "" {
return nil, nil
}
strRdr := strings.NewReader(body)
doc, err := goquery.NewDocumentFromReader(strRdr)
if err != nil {
return nil, err
}
var links []string
found := false
doc.Find("a").First().Each(func(i int, s *goquery.Selection) {
if found {
return
}
link, exists := s.Attr("href")
if !exists {
return
}
if strings.Contains(link, "mailto:") {
return
}
links = append(links, link)
found = true
con.Log.Infof("HTML found %v", link)
})
return links, nil
}
示例13: getPageTitle
// Returns the page title or an error. If there is an error, the url is returned as well.
func getPageTitle(url string) (string, error) {
client := &http.Client{}
req, err := http.NewRequest("GET", url, nil)
if err != nil {
return url, err
}
req.Header.Set("User-Agent", SUFRUserAgent)
res, err := client.Do(req)
if err != nil {
return url, err
}
defer res.Body.Close()
doc, err := goquery.NewDocumentFromReader(res.Body)
if err != nil {
return url, err
}
title := doc.Find("title").Text()
return title, nil
}
示例14: Robtex
// Robtex looks up a host at robtex.com.
func Robtex(ip string) (string, Results, error) {
task := "robtex.com"
results := Results{}
resp, err := http.Get("http://www.robtex.com/ip/" + ip + ".html")
if err != nil {
return task, results, err
}
defer resp.Body.Close()
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
return task, results, err
}
doc.Selection.Find("#x_summary td:nth-child(1)").Each(func(_ int, s *goquery.Selection) {
hostname := s.Text()
if strings.Contains(hostname, "*") {
return
}
if hostname == "." {
return
}
if _, err := strconv.Atoi(hostname); err == nil {
return
}
results = append(results, Result{Source: task, IP: ip, Hostname: s.Text()})
})
return task, results, nil
}
示例15: httpRequest
// send uses the given *http.Request to make an HTTP request.
func (bow *Browser) httpRequest(req *http.Request) error {
bow.preSend()
resp, err := bow.client.Do(req)
if err != nil {
return err
}
defer resp.Body.Close()
bow.body, err = ioutil.ReadAll(resp.Body)
if err != nil {
return err
}
buff := bytes.NewBuffer(bow.body)
dom, err := goquery.NewDocumentFromReader(buff)
if err != nil {
return err
}
bow.history.Push(bow.state)
bow.state = jar.NewHistoryState(req, resp, dom)
bow.postSend()
return nil
}