当前位置: 首页>>代码示例>>Golang>>正文


Golang goquery.ParseUrl函数代码示例

本文整理汇总了Golang中github.com/opesun/goquery.ParseUrl函数的典型用法代码示例。如果您正苦于以下问题:Golang ParseUrl函数的具体用法?Golang ParseUrl怎么用?Golang ParseUrl使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了ParseUrl函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Golang代码示例。

示例1: main

func main() {
	var url = "http://toutiao.io"

	p, err := goquery.ParseUrl(url)

	if err != nil {
		panic(err)
	}
	// toutiao.io
	title := p.Find("title").Text()

	fmt.Println(title)

	t := p.Find(".title a")
	for i := 0; i < t.Length(); i++ {
		d := t.Eq(i).Text()
		l := t.Eq(i).Attr("href")
		c := p.Find(".summary a").Eq(i).Text()
		fmt.Println(l, d, "||", c)
	}

	// geek.csdn.net
	p, err = goquery.ParseUrl("http://geek.csdn.net/hotest")
	if err != nil {
		panic(err)
	}

	title = p.Find("title").Text()
	t = p.Find("a.title")

	fmt.Println(title)
	for i := 0; i < t.Length(); i++ {
		l := t.Eq(i).Attr("href")
		c := t.Eq(i).Text()
		fmt.Println(l, c)
	}
	// ituring.com
	p, err = goquery.ParseUrl("http://www.ituring.com.cn/")
	if err != nil {
		panic(err)
	}

	title = p.Find("title").Text()
	t = p.Find(".arc-list").Eq(0)
	x, _ := goquery.ParseString(t.Html())

	t = x.Find("dt a")

	fmt.Println(title)

	for i := 0; i < t.Length(); i++ {
		c := t.Eq(i).Text()
		l := t.Eq(i).Attr("href")
		fmt.Printf("http://www.ituring.com.cn/%s %s\n", l, c)
	}
}
开发者ID:click1,项目名称:daily-code,代码行数:56,代码来源:fetch-toutiao.go

示例2: GetDownloadLinks

func GetDownloadLinks(url string) {
	/*
	 *通过传入的书籍URL地址;提取到下载地址
	 *提取书籍的名称和url地址
	 *
	 */
	fmt.Println("\n")
	fmt.Println("书籍地址", url)
	r, err := goquery.ParseUrl(url)
	if err != nil {
		panic(err)
	} else {
		text := r.Find(".download-link")
		bookName := r.Find("h1").Text()
		fmt.Println("书籍名称:", bookName)
		//fmt.Println(text)
		for i := 0; i < text.Length(); i++ {
			downloadlink := text.Eq(i).Attr("href")
			fmt.Println("下载地址", downloadlink)
			DownloadBook(downloadlink, bookName)
		}

	}

}
开发者ID:zhuyouzha,项目名称:EbookDownloadTools,代码行数:25,代码来源:EbookDownloadTools.go

示例3: ParsePanMap

func ParsePanMap(date string, history bool) {
	bet_url := strings.Replace(myinit.DateUrl, "TTT", date, -1)
	fmt.Println("bet_url:", bet_url)

	pan_url := myinit.PanUrl

	html_obj, _ := goquery.ParseUrl(bet_url)

	schedule_trs := html_obj.Find(".bet_table tbody tr")
	for i, _ := range schedule_trs {
		schedule_int_info := make(map[string]int)
		schedule_string_info := make(map[string]string)

		// insert schedule
		fid, _ := strconv.Atoi(schedule_trs.Eq(i).Attr("fid"))
		schedule_int_info["schedule_fenxi_id"] = int(fid)
		schedule_string_info["schedule_home"] = common.ConvToGB(schedule_trs.Eq(i).Attr("homesxname"))
		schedule_string_info["schedule_guest"] = common.ConvToGB(schedule_trs.Eq(i).Attr("awaysxname"))
		schedule_string_info["schedule_date"] = schedule_trs.Eq(i).Attr("pdate")

		schedule_pan_url := strings.Replace(pan_url, "TTT", strconv.Itoa(fid), -1)

		GetPanValue(schedule_pan_url, fid, schedule_string_info, date)

	}
}
开发者ID:ryancsq,项目名称:test,代码行数:26,代码来源:schedule.go

示例4: checkPanUseable

func checkPanUseable(schedule_pan_url string, schedule_fenxi_id int, schedule_string_info map[string]string, date string) (res bool) {
	pan_html_obj, _ := goquery.ParseUrl(schedule_pan_url)

	odds_tr := pan_html_obj.Find(".table_cont table tbody tr")

	if odds_tr.Length() == 0 {
		return false
	}
	for i := 0; i < odds_tr.Length(); i++ {
		tr_item := odds_tr.Eq(i)
		td_of_company := tr_item.Find("td").Eq(1)
		if td_of_company.Find("p a").Attr("title") == "" {
			continue
		}

		is_useable := checkPanTr(tr_item, schedule_fenxi_id)
		if is_useable == false {
			return false
		}
	}
	if odds_tr.Length() >= 30 {
		ajax_res := checkPanUseableFromAjax(30, schedule_fenxi_id)
		if ajax_res == false {
			return false
		}
	}
	return true
}
开发者ID:ryancsq,项目名称:test,代码行数:28,代码来源:pan.go

示例5: ParseBetUrl

func ParseBetUrl(date string, history bool) {
	bet_url := getBetUrl(date)
	html_obj, _ := goquery.ParseUrl(bet_url)
	schedule_trs := html_obj.Find(".bet_table tbody tr")
	for i, _ := range schedule_trs {
		is_end := schedule_trs.Eq(i).Attr("isend")
		if is_end == "1" && history == false {
			continue
		}

		today := common.GetToday()
		schedule_is_today := today == schedule_trs.Eq(i).Attr("pdate")
		//		fmt.Println("schedule_is_today:===",schedule_is_today)
		if schedule_is_today == false && history == false {
			//			continue
		}

		schedule_int_info, schedule_string_info := pareseScheduleTR(schedule_trs.Eq(i))
		schedule.Add(schedule_int_info, schedule_string_info)
		// end insert schedule

		//parse pan data
		res := ParsePanByScheduleFenxiId(schedule_int_info["schedule_fenxi_id"], date, schedule_string_info)
		if res == false {
			continue
		}
		//计算预测比率
		calcScheduleResult(schedule_int_info, schedule_string_info)
		//				return

	}
}
开发者ID:ryancsq,项目名称:test,代码行数:32,代码来源:schedule.go

示例6: GetZhihuQuestionList

func GetZhihuQuestionList(url string) {
	/*
	 *  通过传入的url、分析提取url里面的问题列表
	 *  用for循环提交分页URL地址
	 *  并用goquery查找页面内容是否存在、用以判断是否还有分页
	 *  用提取到的短URL加上统一URL地址头得到某一个完整URL地址
	 */
	urlHeader := "http://www.zhihu.com"
	for i := 1; i < 100; i++ {
		url := url + strconv.Itoa(i)
		fmt.Println(url)
		r, err := goquery.ParseUrl(url)
		if err != nil {
			panic(err)
		} else {
			text := r.Find(".zm-item-title a") //查找所有问题列表
			if text.Length() > 0 {
				//取到text里面的所有"href"属性的数据
				for i := 0; i < text.Length(); i++ {
					singleUrl := urlHeader + text.Eq(i).Attr("href")
					urlList = append(urlList, singleUrl)
					GetSubjectBody(singleUrl)
				}
			} else { //如果text的长度小于0表示没有找到
				fmt.Print(".........没有文章了.......\n")
				break
			}
		}
	}

}
开发者ID:huaisha1224,项目名称:ZhihuToNote,代码行数:31,代码来源:ZhihuToNote.go

示例7: ParseBetUrl

func ParseBetUrl(date string, history bool) {
	bet_url := getBetUrl(date)
	html_obj, _ := goquery.ParseUrl(bet_url)
	schedule_trs := html_obj.Find(".bet_table tbody tr")
	for i, _ := range schedule_trs {
		is_end := schedule_trs.Eq(i).Attr("isend")
		if is_end == "1" && history == false {
			continue
		}

		schedule_int_info, schedule_string_info := pareseScheduleTR(schedule_trs.Eq(i), date)
		//if(schedule_int_info["schedule_fenxi_id"]!=556793){
		//	continue
		//}
		//parse pan data
		res := ParsePanByScheduleFenxiId(schedule_int_info["schedule_fenxi_id"], date, schedule_string_info)
		if res == false {
			continue
		}
		schedule.Add(schedule_int_info, schedule_string_info)
		//计算预测比率
		calcScheduleResult(schedule_int_info, schedule_string_info)
		//				return

	}
}
开发者ID:ryancsq,项目名称:test,代码行数:26,代码来源:schedule.go

示例8: GetZi5PageUrl

func GetZi5PageUrl(category string) {
	/*
	 *通过传入的分类标签;得到分类地址
	 *用For循环传递分页页码;然后用goquery得到单个书籍的URL地址;
	 *如果分页里面得不到值就认为分页结束
	 *
	 *
	 */
	url := "http://book.zi5.me/archives/book-gentre/"
	//var category string
	for i := 1; i < 100; i++ {
		url := url + category + "/page/" + strconv.Itoa(i) //分页地址
		fmt.Println("\n")
		fmt.Println("分页地址", i, url)
		r, err := goquery.ParseUrl(url)
		if err != nil {
			panic(err)
		} else {
			//查找<class="thumb-holder" > 和<a
			text := r.Find(".thumb-holder a")
			//fmt.Println(text)
			if text.Length() > 0 {
				//取到text里面的所有"href"属性的数据
				for x := 0; x < text.Length(); x++ {
					bookUrl := text.Eq(x).Attr("href")
					GetDownloadLinks(bookUrl)
				}
			} else { //如果text的长度小于0表示没有找到
				fmt.Print("本分类下已经没有书籍\n")
				break
			}
		}
	}
}
开发者ID:zhuyouzha,项目名称:EbookDownloadTools,代码行数:34,代码来源:EbookDownloadTools.go

示例9: main

func main() {
	x, err := goquery.ParseUrl("http://www.youtube.com/watch?v=ob_nh1WMMzU")
	if err != nil {
		panic(err)
	}
	x.Find("#eow-title").Print()
	fmt.Println("---")
	x, err = goquery.ParseUrl("http://thepiratebay.se/search/one%20day%202011/0/99/0")
	if err != nil {
		panic(err)
	}
	x.Find("a.detLink").Print()
	fmt.Println("---")
	for _, v := range x.Find("a.detLink").HtmlAll() {
		fmt.Println(v)
	}
}
开发者ID:huaisha1224,项目名称:goquery,代码行数:17,代码来源:remote.go

示例10: main

func main() {

	pool := mongo.NewDialPool("localhost:27018", 1000)

	var i int = 1
	chs := make([]chan bool, 50)
	hasMore := true
	for ; i <= 50; i++ {
		ch := make(chan bool)

		chs[i-1] = ch

		go func(i int, ch chan bool) {
			conn, _ := pool.Get()
			db := &mongo.Database{conn, "meishi", mongo.DefaultLastErrorCmd}
			coll := db.C("foods")
			data, err := goquery.ParseUrl(targetUrl + strconv.Itoa(i))
			if nil != err {
				log.Fatalln("response fail ,", err)
				ch <- false
				return
			}

			nodes := data.Find("div.lp_result_list")
			size := nodes.Find("li").Length()
			if size <= 0 {
				hasMore = false
			}
			for idx := 0; idx < size; idx++ {
				item := nodes.Find("li").Eq(idx)

				link := item.Find("a")
				href := link.Attr("href")
				name := link.Attr("title")
				img := item.Find("img").Attr("src")
				// log.Println(name, "|", href, "|", img)
				if len(name) > 0 {

					err := coll.Upsert(mongo.M{"name": name}, mongo.M{"name": name, "img_url": img, "link": href})

					// log.Println("insert mongo|", err, "|", href)
					log.Println("err", err, "name:", name, "link:", link, "href:", href, "img", img)
				}
			}
			ch <- true
			log.Println(i)

		}(i, ch)
	}

	for i, val := range chs {
		<-val
		log.Println("end:", i)
	}
}
开发者ID:houzhenggang,项目名称:weixin-1,代码行数:55,代码来源:crawler2.go

示例11: ParseResultUrl

func ParseResultUrl(date string, history bool) (res bool) {
	if date == "" {
		return false
	}
	result_url := strings.Replace(myinit.ResultUrl, "DDD", date, -1)
	fmt.Println(result_url)
	schedule_string_info := make(map[string]string)
	schedule_float_info := make(map[string]float32)

	pan_string_info := make(map[string]string)
	pan_float_info := make(map[string]float32)
	html_obj, _ := goquery.ParseUrl(result_url)
	schedule_trs := html_obj.Find(".ld_table tbody tr")
	for i, _ := range schedule_trs {
		if i == 0 {
			continue
		}
		tr := schedule_trs.Eq(i)
		tds := tr.Find("td")
		schedule_string_info["schedule_result_no"] = common.ConvToGB(tds.Eq(0).Html())

		schedule_string_info["schedule_score"] = common.ConvToGB(tds.Eq(6).Html())
		schedule_string_info["schedule_spf_result"] = common.ConvToGB(tds.Eq(11).Html())
		schedule_float_info["schedule_spf_odd"] = common.ConvToFloat32(tds.Eq(12).Text())
		schedule_string_info["schedule_rqspf_result"] = common.ConvToGB(tds.Eq(8).Html())
		schedule_float_info["schedule_rqspf_odd"] = common.ConvToFloat32(tds.Eq(9).Text())
		schedule_string_info["schedule_zjq_result"] = common.ConvToGB(tds.Eq(14).Html())
		schedule_float_info["schedule_zjq_odd"] = common.ConvToFloat32(tds.Eq(15).Text())
		schedule_string_info["schedule_bqc_result"] = common.ConvToGB(tds.Eq(17).Html())
		schedule_float_info["schedule_bqc_odd"] = common.ConvToFloat32(tds.Eq(18).Text())

		pan_string_info["schedule_result_no"] = schedule_string_info["schedule_result_no"]
		pan_string_info["schedule_score"] = schedule_string_info["schedule_score"]
		pan_string_info["schedule_spf_result"] = schedule_string_info["schedule_spf_result"]
		pan_string_info["schedule_rqspf_result"] = schedule_string_info["schedule_rqspf_result"]
		pan_string_info["schedule_zjq_result"] = schedule_string_info["schedule_zjq_result"]
		pan_string_info["schedule_bqc_result"] = schedule_string_info["schedule_bqc_result"]

		has := schedule.CheckExistsByResultNoAndDate(schedule_string_info["schedule_result_no"], date)
		fmt.Println("has:", has)
		if has == false {
			continue
		}
		schedule.UpdateScheduleResult(date, schedule_float_info, schedule_string_info)
		asiapan.UpdateAsiaPanResult(date, schedule_float_info, pan_string_info)
		asiapanlog.UpdateAsiaPanResult(date, pan_float_info, pan_string_info)

	}
	return true
}
开发者ID:ryancsq,项目名称:test,代码行数:50,代码来源:result.go

示例12: GetSubjectBody

func GetSubjectBody(url string) {
	/*
	 *  通过传入的单个问题url、分析提取url里面的问题标题和内容
	 *  用goquery查找页面提取里面title作为邮件的subject、用html()作为body
	 *
	 */
	//var url = "http://www.zhihu.com/question/24859069"
	p, error := goquery.ParseUrl(url)
	if error != nil {
		panic(error)
	}
	subject := p.Find("title").Text()
	body := p.Html()
	fmt.Print(subject)
	GetConf(subject, body)
}
开发者ID:huaisha1224,项目名称:ZhihuToNote,代码行数:16,代码来源:ZhihuToNote.go

示例13: main

func main() {

	pool := mongo.NewDialPool("localhost:27018", 1000)

	var i int = 1
	ch := make([]chan bool, 20)
	for ; i <= 20; i++ {
		ch[i-1] = make(chan bool)
		go func(i int, ch chan bool) {
			conn, _ := pool.Get()
			db := &mongo.Database{conn, "meishi", mongo.DefaultLastErrorCmd}
			coll := db.C("foods")
			data, err := goquery.ParseUrl(targetUrl + strconv.Itoa(i))
			if nil != err {
				log.Fatalln("response fail ,", err)
				ch <- false
				return
			}

			nodes := data.Find("#page-" + strconv.Itoa(i))
			size := nodes.Find("div.media").Length()
			for idx := 0; idx < size; idx++ {
				item := nodes.Find("div.media").Eq(idx)
				h2 := item.Find("h2")
				link := h2.Find("a")
				href := link.Attr("href")
				name := link.Attr("title")

				img := item.Find("img").Attr("src")
				// log.Println(name, "|", href, "|", img)
				if len(name) > 0 {
					err := coll.Insert(mongo.M{"name": name, "img_url": img, "link": href})
					log.Println("insert mongo|", err, "|", href)
				}
			}
			ch <- true
			log.Println(i)

		}(i, ch[i-1])
	}

	for i, val := range ch {
		<-val
		log.Println("end:", i)
	}
}
开发者ID:houzhenggang,项目名称:weixin-1,代码行数:46,代码来源:crawler.go

示例14: doParsePanUrl

func doParsePanUrl(schedule_pan_url string, schedule_fenxi_id int, schedule_string_info map[string]string, date string) (res bool) {
	pan_int_info := make(map[string]int)
	pan_float_info := make(map[string]float32)
	pan_string_info := make(map[string]string)

	pan_html_obj, _ := goquery.ParseUrl(schedule_pan_url)

	schedule_item := pan_html_obj.Find(".odds_hd_cont table tbody tr td")
	home_td := schedule_item.Eq(0)
	guest_td := schedule_item.Eq(4)
	center_td := schedule_item.Eq(2)

	pan_int_info["schedule_fenxi_id"] = schedule_fenxi_id
	pan_string_info["schedule_date"] = schedule_string_info["schedule_date"]
	pan_string_info["schedule_no"] = schedule_string_info["schedule_no"]
	pan_string_info["schedule_result_no"] = schedule_string_info["schedule_result_no"]
	pan_string_info["schedule_league"] = schedule_string_info["schedule_league"]

	pan_string_info["schedule_home"] = common.ConvToGB(home_td.Find("ul li a").Text())
	pan_string_info["schedule_guest"] = common.ConvToGB(guest_td.Find("ul li a").Text())
	pan_string_info["schedule_game_desc"] = common.ConvToGB(center_td.Find(".odds_hd_center .odds_hd_ls a").Text())
	pan_string_info["schedule_date_desc"] = common.ConvToGB(center_td.Find(".odds_hd_center .game_time ").Text())

	odds_tr := pan_html_obj.Find(".table_cont table tbody tr")
	for i := 0; i < odds_tr.Length(); i++ {
		tr_item := odds_tr.Eq(i)
		td_of_company := tr_item.Find("td").Eq(1)
		if td_of_company.Find("p a").Attr("title") == "" {
			continue
		}

		parse_res := doParsePanTr(tr_item, schedule_fenxi_id, pan_int_info, pan_float_info, pan_string_info, false)
		if parse_res == false {
			return false
		}
	}
	if odds_tr.Length() >= 30 {
		ajax_res := ParsePanUrlFromAjax(30, schedule_fenxi_id, pan_string_info)
		if ajax_res == false {
			return false
		}
	}

	return true
}
开发者ID:ryancsq,项目名称:test,代码行数:45,代码来源:pan.go

示例15: fetchProductName

// use Goquery to fetch product-name from html
func fetchProductName(url string) {
	p, err := goquery.ParseUrl(url)
	if err != nil {
		panic(err)
	} else {
		pTitle := p.Find("title").Text() // fetch the content of title
		fmt.Println(pTitle)

		productList := p.Find(".product-name")
		priceList := p.Find(".product_price")
		for i := 0; i < productList.Length(); i++ {
			product := productList.Eq(i).Text()
			price := priceList.Eq(i).Text()
			fmt.Println(product + ": " + price)

		}

	}
}
开发者ID:sunyuantao,项目名称:go-lang,代码行数:20,代码来源:webScrapter.go


注:本文中的github.com/opesun/goquery.ParseUrl函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。