當前位置: 首頁>>代碼示例>>Java>>正文


Java Strings.filterWebString方法代碼示例

本文整理匯總了Java中happy.coding.io.Strings.filterWebString方法的典型用法代碼示例。如果您正苦於以下問題:Java Strings.filterWebString方法的具體用法?Java Strings.filterWebString怎麽用?Java Strings.filterWebString使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在happy.coding.io.Strings的用法示例。


在下文中一共展示了Strings.filterWebString方法的7個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。

示例1: crawl_web_pages

import happy.coding.io.Strings; //導入方法依賴的package包/類
public void crawl_web_pages() throws Exception
{
	String filePath = "./src/main/resources/mtime.txt";
	List<String> urls = FileIO.readAsList(filePath);

	for (String url : urls)
	{
		String html = URLReader.read(url);
		Document doc = Jsoup.parse(html);
		String name = doc.select("span[property=v:itemreviewed]").text();
		name = Strings.filterWebString(name, '_');

		String dirPath = dir + name + "/";
		FileIO.makeDirectory(dirPath);
		FileIO.writeString(dirPath + name + ".html", html);
	}
}
 
開發者ID:466152112,項目名稱:HappyResearch,代碼行數:18,代碼來源:MTimeCrawler.java

示例2: crawl_web_pages

import happy.coding.io.Strings; //導入方法依賴的package包/類
public void crawl_web_pages(String url) throws Exception
{
	String html = read_url(url);
	Document doc = Jsoup.parse(html);
	String name = doc.select("div.detail_head_name h1").first().text();
	name = Strings.filterWebString(name, '_');

	String dirPath = dir + name + "/";
	FileIO.makeDirectory(dirPath);
	FileIO.writeString(dirPath + name + ".html", html);
}
 
開發者ID:466152112,項目名稱:HappyResearch,代碼行數:12,代碼來源:GewaraCrawler.java

示例3: run_web_pages

import happy.coding.io.Strings; //導入方法依賴的package包/類
public void run_web_pages(String url) throws Exception
{
	String html = read_url(url);
	Document doc = Jsoup.parse(html);
	String name = doc.select("span[property=v:itemreviewed]").text();
	name = Strings.filterWebString(name, '_');

	String dirPath = dir + name + "/";
	FileIO.makeDirectory(dirPath);
	FileIO.writeString(dirPath + name + ".html", html);
}
 
開發者ID:466152112,項目名稱:HappyResearch,代碼行數:12,代碼來源:DoubanCrawler.java

示例4: run_ratings

import happy.coding.io.Strings; //導入方法依賴的package包/類
public void run_ratings(String url) throws Exception
{
	String html = read_url(url);
	Document doc = Jsoup.parse(html);
	String name = doc.select("span[property=v:itemreviewed]").text();
	name = Strings.filterWebString(name, '_');

	String dirPath = dir + name + "/ratings/";
	FileIO.makeDirectory(dirPath);

	// save rating pages
	int k = 0;
	while (true)
	{
		String link = url + "collections?start=" + (k * 20);
		String page = read_url(link);

		k++;
		FileIO.writeString(dirPath + "page_" + k + ".html", page);
		Logs.debug("Current processing page: " + k);

		// if finished;
		Document doc2 = Jsoup.parse(page);
		Elements es = doc2.select("div#collections_tab span.next");
		if (es == null || es.size() == 0)
		{
			break;
		}
	}

}
 
開發者ID:466152112,項目名稱:HappyResearch,代碼行數:32,代碼來源:DoubanCrawler.java

示例5: crawl_comments

import happy.coding.io.Strings; //導入方法依賴的package包/類
public void crawl_comments(String url) throws Exception
{
	String html = read_url(url);
	Document doc = Jsoup.parse(html);
	String name = doc.select("div.detail_head_name h1").first().text();
	name = Strings.filterWebString(name, '_');

	String val = doc.select("#detail_nav li a").first().attr("href");
	String id = val.substring(val.lastIndexOf("/") + 1);

	String dirPath = dir + name + "/comments/";
	FileIO.makeDirectory(dirPath);

	// save rating pages
	int max = 1;
	boolean maxSet = false;
	url = url + "/commentlist";

	for (int k = 0; k <= max; k++)
	{
		String page_file = dirPath + "page_" + (k + 1) + ".html";
		Logs.debug(name + " comments with page: " + (k + 1) + "/" + (max + 1));

		String contents = null;
		if (!FileIO.exist(page_file))
		{

			String link = "http://www.gewara.com/ajax/common/qryComment.xhtml?pageNumber="
					+ k
					+ "&relatedid="
					+ id
					+ "&title=&issue=false&hasMarks=true&tag=movie&isPic=true&isVideo=false&pages=true&maxCount=20&userLogo=";

			contents = read_url(link);
			FileIO.writeString(page_file, contents);// new String(contents.getBytes("utf-8"), "utf-8"));
		} else
		{
			contents = FileIO.readAsString(page_file);
		}

		// find the maximum page num;
		if (!maxSet)
		{
			Document doc2 = Jsoup.parse(contents);
			Elements es = doc2.select("div#page a");
			Element e = es.get(es.size() - 2);
			max = Integer.parseInt(e.attr("lang"));
			maxSet = true;
		}

	}
}
 
開發者ID:466152112,項目名稱:HappyResearch,代碼行數:53,代碼來源:GewaraCrawler.java

示例6: run_reviews

import happy.coding.io.Strings; //導入方法依賴的package包/類
public void run_reviews(String url) throws Exception
{
	url = url.trim();
	String html = read_url(url);
	Document doc = Jsoup.parse(html);
	String name = doc.select("span[property=v:itemreviewed]").text();
	name = Strings.filterWebString(name, '_');

	String dirPath = dir + name + "/reviews/";
	FileIO.makeDirectory(dirPath);

	// save rating pages
	int k = 0;
	url = url + "reviews";
	String link = url;
	while (true)
	{
		k++;
		String page = null;
		String path = dirPath + "page_" + k + ".html";
		if (!FileIO.exist(path))
		{
			page = read_url(link);
			FileIO.writeString(path, page);
			Logs.debug(name + " reviews with page: " + k);
		} else
		{
			page = FileIO.readAsString(path);
		}

		// find the next page link;
		Document doc2 = Jsoup.parse(page);
		Elements es = doc2.select("div#paginator a.next");
		if (es == null || es.size() == 0)
		{
			break;
		} else
		{
			link = url + es.first().attr("href");
		}
	}
}
 
開發者ID:466152112,項目名稱:HappyResearch,代碼行數:43,代碼來源:DoubanCrawler.java

示例7: run_comments

import happy.coding.io.Strings; //導入方法依賴的package包/類
public void run_comments(String url) throws Exception
{
	url = url.trim();
	String html = read_url(url);
	Document doc = Jsoup.parse(html);
	String name = doc.select("span[property=v:itemreviewed]").text();
	name = Strings.filterWebString(name, '_');

	String dirPath = dir + name + "/comments/";
	FileIO.makeDirectory(dirPath);

	// save rating pages
	int k = 0;
	url = url + "comments";
	String link = url;
	while (true)
	{
		k++;
		String page_file = dirPath + "page_" + k + ".html";

		String contents = null;
		if (!FileIO.exist(page_file))
		{
			contents = read_url(link);
			FileIO.writeString(page_file, contents);
			Logs.debug(name + " comments with page: " + k);
		} else
		{
			contents = FileIO.readAsString(page_file);
		}

		// find the next page link;
		Document doc2 = Jsoup.parse(contents);
		Elements es = doc2.select("div#paginator a.next");
		if (es == null || es.size() == 0)
		{
			break;
		} else
		{
			link = url + es.first().attr("href");
		}
	}
}
 
開發者ID:466152112,項目名稱:HappyResearch,代碼行數:44,代碼來源:DoubanCrawler.java


注:本文中的happy.coding.io.Strings.filterWebString方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。