本文整理汇总了Java中happy.coding.io.Strings.filterWebString方法的典型用法代码示例。如果您正苦于以下问题:Java Strings.filterWebString方法的具体用法?Java Strings.filterWebString怎么用?Java Strings.filterWebString使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类happy.coding.io.Strings
的用法示例。
在下文中一共展示了Strings.filterWebString方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: crawl_web_pages
import happy.coding.io.Strings; //导入方法依赖的package包/类
public void crawl_web_pages() throws Exception
{
String filePath = "./src/main/resources/mtime.txt";
List<String> urls = FileIO.readAsList(filePath);
for (String url : urls)
{
String html = URLReader.read(url);
Document doc = Jsoup.parse(html);
String name = doc.select("span[property=v:itemreviewed]").text();
name = Strings.filterWebString(name, '_');
String dirPath = dir + name + "/";
FileIO.makeDirectory(dirPath);
FileIO.writeString(dirPath + name + ".html", html);
}
}
示例2: crawl_web_pages
import happy.coding.io.Strings; //导入方法依赖的package包/类
public void crawl_web_pages(String url) throws Exception
{
String html = read_url(url);
Document doc = Jsoup.parse(html);
String name = doc.select("div.detail_head_name h1").first().text();
name = Strings.filterWebString(name, '_');
String dirPath = dir + name + "/";
FileIO.makeDirectory(dirPath);
FileIO.writeString(dirPath + name + ".html", html);
}
示例3: run_web_pages
import happy.coding.io.Strings; //导入方法依赖的package包/类
public void run_web_pages(String url) throws Exception
{
String html = read_url(url);
Document doc = Jsoup.parse(html);
String name = doc.select("span[property=v:itemreviewed]").text();
name = Strings.filterWebString(name, '_');
String dirPath = dir + name + "/";
FileIO.makeDirectory(dirPath);
FileIO.writeString(dirPath + name + ".html", html);
}
示例4: run_ratings
import happy.coding.io.Strings; //导入方法依赖的package包/类
public void run_ratings(String url) throws Exception
{
String html = read_url(url);
Document doc = Jsoup.parse(html);
String name = doc.select("span[property=v:itemreviewed]").text();
name = Strings.filterWebString(name, '_');
String dirPath = dir + name + "/ratings/";
FileIO.makeDirectory(dirPath);
// save rating pages
int k = 0;
while (true)
{
String link = url + "collections?start=" + (k * 20);
String page = read_url(link);
k++;
FileIO.writeString(dirPath + "page_" + k + ".html", page);
Logs.debug("Current processing page: " + k);
// if finished;
Document doc2 = Jsoup.parse(page);
Elements es = doc2.select("div#collections_tab span.next");
if (es == null || es.size() == 0)
{
break;
}
}
}
示例5: crawl_comments
import happy.coding.io.Strings; //导入方法依赖的package包/类
public void crawl_comments(String url) throws Exception
{
String html = read_url(url);
Document doc = Jsoup.parse(html);
String name = doc.select("div.detail_head_name h1").first().text();
name = Strings.filterWebString(name, '_');
String val = doc.select("#detail_nav li a").first().attr("href");
String id = val.substring(val.lastIndexOf("/") + 1);
String dirPath = dir + name + "/comments/";
FileIO.makeDirectory(dirPath);
// save rating pages
int max = 1;
boolean maxSet = false;
url = url + "/commentlist";
for (int k = 0; k <= max; k++)
{
String page_file = dirPath + "page_" + (k + 1) + ".html";
Logs.debug(name + " comments with page: " + (k + 1) + "/" + (max + 1));
String contents = null;
if (!FileIO.exist(page_file))
{
String link = "http://www.gewara.com/ajax/common/qryComment.xhtml?pageNumber="
+ k
+ "&relatedid="
+ id
+ "&title=&issue=false&hasMarks=true&tag=movie&isPic=true&isVideo=false&pages=true&maxCount=20&userLogo=";
contents = read_url(link);
FileIO.writeString(page_file, contents);// new String(contents.getBytes("utf-8"), "utf-8"));
} else
{
contents = FileIO.readAsString(page_file);
}
// find the maximum page num;
if (!maxSet)
{
Document doc2 = Jsoup.parse(contents);
Elements es = doc2.select("div#page a");
Element e = es.get(es.size() - 2);
max = Integer.parseInt(e.attr("lang"));
maxSet = true;
}
}
}
示例6: run_reviews
import happy.coding.io.Strings; //导入方法依赖的package包/类
public void run_reviews(String url) throws Exception
{
url = url.trim();
String html = read_url(url);
Document doc = Jsoup.parse(html);
String name = doc.select("span[property=v:itemreviewed]").text();
name = Strings.filterWebString(name, '_');
String dirPath = dir + name + "/reviews/";
FileIO.makeDirectory(dirPath);
// save rating pages
int k = 0;
url = url + "reviews";
String link = url;
while (true)
{
k++;
String page = null;
String path = dirPath + "page_" + k + ".html";
if (!FileIO.exist(path))
{
page = read_url(link);
FileIO.writeString(path, page);
Logs.debug(name + " reviews with page: " + k);
} else
{
page = FileIO.readAsString(path);
}
// find the next page link;
Document doc2 = Jsoup.parse(page);
Elements es = doc2.select("div#paginator a.next");
if (es == null || es.size() == 0)
{
break;
} else
{
link = url + es.first().attr("href");
}
}
}
示例7: run_comments
import happy.coding.io.Strings; //导入方法依赖的package包/类
public void run_comments(String url) throws Exception
{
url = url.trim();
String html = read_url(url);
Document doc = Jsoup.parse(html);
String name = doc.select("span[property=v:itemreviewed]").text();
name = Strings.filterWebString(name, '_');
String dirPath = dir + name + "/comments/";
FileIO.makeDirectory(dirPath);
// save rating pages
int k = 0;
url = url + "comments";
String link = url;
while (true)
{
k++;
String page_file = dirPath + "page_" + k + ".html";
String contents = null;
if (!FileIO.exist(page_file))
{
contents = read_url(link);
FileIO.writeString(page_file, contents);
Logs.debug(name + " comments with page: " + k);
} else
{
contents = FileIO.readAsString(page_file);
}
// find the next page link;
Document doc2 = Jsoup.parse(contents);
Elements es = doc2.select("div#paginator a.next");
if (es == null || es.size() == 0)
{
break;
} else
{
link = url + es.first().attr("href");
}
}
}