当前位置: 首页>>代码示例>>Java>>正文


Java Strings.filterWebString方法代码示例

本文整理汇总了Java中happy.coding.io.Strings.filterWebString方法的典型用法代码示例。如果您正苦于以下问题:Java Strings.filterWebString方法的具体用法?Java Strings.filterWebString怎么用?Java Strings.filterWebString使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在happy.coding.io.Strings的用法示例。


在下文中一共展示了Strings.filterWebString方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: crawl_web_pages

import happy.coding.io.Strings; //导入方法依赖的package包/类
public void crawl_web_pages() throws Exception
{
	String filePath = "./src/main/resources/mtime.txt";
	List<String> urls = FileIO.readAsList(filePath);

	for (String url : urls)
	{
		String html = URLReader.read(url);
		Document doc = Jsoup.parse(html);
		String name = doc.select("span[property=v:itemreviewed]").text();
		name = Strings.filterWebString(name, '_');

		String dirPath = dir + name + "/";
		FileIO.makeDirectory(dirPath);
		FileIO.writeString(dirPath + name + ".html", html);
	}
}
 
开发者ID:466152112,项目名称:HappyResearch,代码行数:18,代码来源:MTimeCrawler.java

示例2: crawl_web_pages

import happy.coding.io.Strings; //导入方法依赖的package包/类
public void crawl_web_pages(String url) throws Exception
{
	String html = read_url(url);
	Document doc = Jsoup.parse(html);
	String name = doc.select("div.detail_head_name h1").first().text();
	name = Strings.filterWebString(name, '_');

	String dirPath = dir + name + "/";
	FileIO.makeDirectory(dirPath);
	FileIO.writeString(dirPath + name + ".html", html);
}
 
开发者ID:466152112,项目名称:HappyResearch,代码行数:12,代码来源:GewaraCrawler.java

示例3: run_web_pages

import happy.coding.io.Strings; //导入方法依赖的package包/类
public void run_web_pages(String url) throws Exception
{
	String html = read_url(url);
	Document doc = Jsoup.parse(html);
	String name = doc.select("span[property=v:itemreviewed]").text();
	name = Strings.filterWebString(name, '_');

	String dirPath = dir + name + "/";
	FileIO.makeDirectory(dirPath);
	FileIO.writeString(dirPath + name + ".html", html);
}
 
开发者ID:466152112,项目名称:HappyResearch,代码行数:12,代码来源:DoubanCrawler.java

示例4: run_ratings

import happy.coding.io.Strings; //导入方法依赖的package包/类
public void run_ratings(String url) throws Exception
{
	String html = read_url(url);
	Document doc = Jsoup.parse(html);
	String name = doc.select("span[property=v:itemreviewed]").text();
	name = Strings.filterWebString(name, '_');

	String dirPath = dir + name + "/ratings/";
	FileIO.makeDirectory(dirPath);

	// save rating pages
	int k = 0;
	while (true)
	{
		String link = url + "collections?start=" + (k * 20);
		String page = read_url(link);

		k++;
		FileIO.writeString(dirPath + "page_" + k + ".html", page);
		Logs.debug("Current processing page: " + k);

		// if finished;
		Document doc2 = Jsoup.parse(page);
		Elements es = doc2.select("div#collections_tab span.next");
		if (es == null || es.size() == 0)
		{
			break;
		}
	}

}
 
开发者ID:466152112,项目名称:HappyResearch,代码行数:32,代码来源:DoubanCrawler.java

示例5: crawl_comments

import happy.coding.io.Strings; //导入方法依赖的package包/类
public void crawl_comments(String url) throws Exception
{
	String html = read_url(url);
	Document doc = Jsoup.parse(html);
	String name = doc.select("div.detail_head_name h1").first().text();
	name = Strings.filterWebString(name, '_');

	String val = doc.select("#detail_nav li a").first().attr("href");
	String id = val.substring(val.lastIndexOf("/") + 1);

	String dirPath = dir + name + "/comments/";
	FileIO.makeDirectory(dirPath);

	// save rating pages
	int max = 1;
	boolean maxSet = false;
	url = url + "/commentlist";

	for (int k = 0; k <= max; k++)
	{
		String page_file = dirPath + "page_" + (k + 1) + ".html";
		Logs.debug(name + " comments with page: " + (k + 1) + "/" + (max + 1));

		String contents = null;
		if (!FileIO.exist(page_file))
		{

			String link = "http://www.gewara.com/ajax/common/qryComment.xhtml?pageNumber="
					+ k
					+ "&relatedid="
					+ id
					+ "&title=&issue=false&hasMarks=true&tag=movie&isPic=true&isVideo=false&pages=true&maxCount=20&userLogo=";

			contents = read_url(link);
			FileIO.writeString(page_file, contents);// new String(contents.getBytes("utf-8"), "utf-8"));
		} else
		{
			contents = FileIO.readAsString(page_file);
		}

		// find the maximum page num;
		if (!maxSet)
		{
			Document doc2 = Jsoup.parse(contents);
			Elements es = doc2.select("div#page a");
			Element e = es.get(es.size() - 2);
			max = Integer.parseInt(e.attr("lang"));
			maxSet = true;
		}

	}
}
 
开发者ID:466152112,项目名称:HappyResearch,代码行数:53,代码来源:GewaraCrawler.java

示例6: run_reviews

import happy.coding.io.Strings; //导入方法依赖的package包/类
public void run_reviews(String url) throws Exception
{
	url = url.trim();
	String html = read_url(url);
	Document doc = Jsoup.parse(html);
	String name = doc.select("span[property=v:itemreviewed]").text();
	name = Strings.filterWebString(name, '_');

	String dirPath = dir + name + "/reviews/";
	FileIO.makeDirectory(dirPath);

	// save rating pages
	int k = 0;
	url = url + "reviews";
	String link = url;
	while (true)
	{
		k++;
		String page = null;
		String path = dirPath + "page_" + k + ".html";
		if (!FileIO.exist(path))
		{
			page = read_url(link);
			FileIO.writeString(path, page);
			Logs.debug(name + " reviews with page: " + k);
		} else
		{
			page = FileIO.readAsString(path);
		}

		// find the next page link;
		Document doc2 = Jsoup.parse(page);
		Elements es = doc2.select("div#paginator a.next");
		if (es == null || es.size() == 0)
		{
			break;
		} else
		{
			link = url + es.first().attr("href");
		}
	}
}
 
开发者ID:466152112,项目名称:HappyResearch,代码行数:43,代码来源:DoubanCrawler.java

示例7: run_comments

import happy.coding.io.Strings; //导入方法依赖的package包/类
public void run_comments(String url) throws Exception
{
	url = url.trim();
	String html = read_url(url);
	Document doc = Jsoup.parse(html);
	String name = doc.select("span[property=v:itemreviewed]").text();
	name = Strings.filterWebString(name, '_');

	String dirPath = dir + name + "/comments/";
	FileIO.makeDirectory(dirPath);

	// save rating pages
	int k = 0;
	url = url + "comments";
	String link = url;
	while (true)
	{
		k++;
		String page_file = dirPath + "page_" + k + ".html";

		String contents = null;
		if (!FileIO.exist(page_file))
		{
			contents = read_url(link);
			FileIO.writeString(page_file, contents);
			Logs.debug(name + " comments with page: " + k);
		} else
		{
			contents = FileIO.readAsString(page_file);
		}

		// find the next page link;
		Document doc2 = Jsoup.parse(contents);
		Elements es = doc2.select("div#paginator a.next");
		if (es == null || es.size() == 0)
		{
			break;
		} else
		{
			link = url + es.first().attr("href");
		}
	}
}
 
开发者ID:466152112,项目名称:HappyResearch,代码行数:44,代码来源:DoubanCrawler.java


注:本文中的happy.coding.io.Strings.filterWebString方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。