本文整理汇总了Java中us.codecraft.webmagic.Page.setRawText方法的典型用法代码示例。如果您正苦于以下问题:Java Page.setRawText方法的具体用法?Java Page.setRawText怎么用?Java Page.setRawText使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类us.codecraft.webmagic.Page
的用法示例。
在下文中一共展示了Page.setRawText方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: download
import us.codecraft.webmagic.Page; //导入方法依赖的package包/类
@Override
public Page download(Request request, Task task) {
String html = null;
Site site = null;
if (task != null) {
site = task.getSite();
}
try {
html = casperjs.gatherHtml(new com.gs.spider.model.commons.Request(request.getUrl(), true));
} catch (Exception e) {
if (site.getCycleRetryTimes() > 0) {
return addToCycleRetry(request, site);
}
request.putExtra("EXCEPTION", e);
onError(request);
return null;
}
Page page = new Page();
page.setRawText(html);
page.setUrl(new PlainText(request.getUrl()));
page.setRequest(request);
onSuccess(request);
return page;
}
示例2: handleResponse
import us.codecraft.webmagic.Page; //导入方法依赖的package包/类
protected Page handleResponse(Request request, String charset, HttpResponse httpResponse, Task task) throws IOException {
byte[] bytes = IOUtils.toByteArray(httpResponse.getEntity().getContent());
String contentType = httpResponse.getEntity().getContentType() == null ? "" : httpResponse.getEntity().getContentType().getValue();
Page page = new Page();
page.setBytes(bytes);
if (!request.isBinaryContent()){
if (charset == null) {
charset = getHtmlCharset(contentType, bytes);
}
page.setCharset(charset);
page.setRawText(new String(bytes, charset));
}
page.setUrl(new PlainText(request.getUrl()));
page.setRequest(request);
page.setStatusCode(httpResponse.getStatusLine().getStatusCode());
page.setDownloadSuccess(true);
if (responseHeader) {
page.setHeaders(HttpClientUtils.convertHeaders(httpResponse.getAllHeaders()));
}
return page;
}
示例3: download
import us.codecraft.webmagic.Page; //导入方法依赖的package包/类
@Override
public Page download(Request request, Task task) {
Page page = new Page();
InputStream resourceAsStream = this.getClass().getResourceAsStream("/html/mock-github.html");
try {
page.setRawText(IOUtils.toString(resourceAsStream));
} catch (IOException e) {
e.printStackTrace();
}
page.setRequest(new Request("https://github.com/code4craft/webmagic"));
page.setUrl(new PlainText("https://github.com/code4craft/webmagic"));
return page;
}
示例4: getMockJsonPage
import us.codecraft.webmagic.Page; //导入方法依赖的package包/类
public Page getMockJsonPage() throws IOException {
Page page = new Page();
page.setRawText(IOUtils.toString(PageMocker.class.getClassLoader().getResourceAsStream("json/mock-githubrepo.json")));
page.setRequest(new Request("https://api.github.com/repos/code4craft/webmagic"));
page.setUrl(new PlainText("https://api.github.com/repos/code4craft/webmagic"));
return page;
}
示例5: getMockPage
import us.codecraft.webmagic.Page; //导入方法依赖的package包/类
public Page getMockPage() throws IOException {
Page page = new Page();
page.setRawText(IOUtils.toString(PageMocker.class.getClassLoader().getResourceAsStream("html/mock-webmagic.html")));
page.setRequest(new Request("http://webmagic.io/list/0"));
page.setUrl(new PlainText("http://webmagic.io/list/0"));
return page;
}
示例6: testMultiModel_should_not_skip_when_match
import us.codecraft.webmagic.Page; //导入方法依赖的package包/类
@Test
public void testMultiModel_should_not_skip_when_match() throws Exception {
Page page = new Page();
page.setRawText("<div foo='foo'></div>");
page.setRequest(new Request("http://codecraft.us/foo"));
page.setUrl(PlainText.create("http://codecraft.us/foo"));
ModelPageProcessor modelPageProcessor = ModelPageProcessor.create(null, ModelFoo.class, ModelBar.class);
modelPageProcessor.process(page);
assertThat(page.getResultItems().isSkip()).isFalse();
}
示例7: download
import us.codecraft.webmagic.Page; //导入方法依赖的package包/类
@Override
public Page download(Request request, Task task) {
Page page = new Page();
page.setRawText(html);
page.setStatusCode(200);
page.setRequest(new Request("https://github.com/code4craft/webmagic"));
page.setUrl(new PlainText("https://github.com/code4craft/webmagic"));
return page;
}