本文整理汇总了Java中us.codecraft.webmagic.ResultItems.get方法的典型用法代码示例。如果您正苦于以下问题:Java ResultItems.get方法的具体用法?Java ResultItems.get怎么用?Java ResultItems.get使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类us.codecraft.webmagic.ResultItems
的用法示例。
在下文中一共展示了ResultItems.get方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: process
import us.codecraft.webmagic.ResultItems; //导入方法依赖的package包/类
@Override
public void process(ResultItems resultItems, Task task) {
List<IndustryInfo> industryInfos = resultItems.get("industryInfos");
if (industryInfos != null && industryInfos.size() > 0) {
for (IndustryInfo industryInfo : industryInfos) {
try {
industryInfoDao.add(industryInfo);
} catch (Exception e) {
if (e instanceof DataIntegrityViolationException) {
} else {
e.printStackTrace();
}
}
}
}
}
示例2: process
import us.codecraft.webmagic.ResultItems; //导入方法依赖的package包/类
@Override
public void process(ResultItems resultItems, Task task) {
SpiderInfo spiderInfo = resultItems.get("spiderInfo");
Webpage webpage = convertResultItems2Webpage(resultItems);
SearchRequestBuilder searchRequestBuilder = client.prepareSearch(INDEX_NAME)
.setTypes(TYPE_NAME)
.setQuery(QueryBuilders.matchQuery("url", webpage.getUrl()));
SearchResponse response = searchRequestBuilder.execute().actionGet();
if (response.getHits().totalHits() == 0) {
try {
client.prepareIndex(INDEX_NAME, TYPE_NAME)
.setId(Hashing.md5().hashString(webpage.getUrl(), Charset.forName("utf-8")).toString())
.setSource(gson.toJson(webpage))
.get();
} catch (Exception e) {
LOG.error("索引 Webpage 出错," + e.getLocalizedMessage());
}
}
}
示例3: process
import us.codecraft.webmagic.ResultItems; //导入方法依赖的package包/类
@Override
public void process(ResultItems resultItems, Task task) {
SpiderInfo info = resultItems.get("spiderInfo");
Webpage page = convertToWebpage(resultItems);
/*
* guava22.0不再对MD5()提供支持,
* 如果想更安全,使用sha256(),
* 如果想更快,使用goodFastHash()
* */
try {
client.prepareIndex(INDEX_NAME, TYPE_NAME)
.setId(Hashing.sha256().hashString(page.getUrl(), Charset.forName("utf-8")).toString())
.setSource(GSON.toJson(page), XContentType.JSON)
.get();
} catch(Exception e) {
LOG.error("索引Webpage出错, 由于 " + e.getLocalizedMessage());
}
}
示例4: process
import us.codecraft.webmagic.ResultItems; //导入方法依赖的package包/类
@Override
public void process(ResultItems resultItems, Task task) {
for (Map.Entry<Class, PageModelPipeline> classPageModelPipelineEntry : pageModelPipelines.entrySet()) {
Object o = resultItems.get(classPageModelPipelineEntry.getKey().getCanonicalName());
if (o != null) {
Annotation annotation = classPageModelPipelineEntry.getKey().getAnnotation(ExtractBy.class);
if (annotation == null || !((ExtractBy) annotation).multi()) {
classPageModelPipelineEntry.getValue().process(o, task);
} else {
List<Object> list = (List<Object>) o;
for (Object o1 : list) {
classPageModelPipelineEntry.getValue().process(o1, task);
}
}
}
}
}
示例5: process
import us.codecraft.webmagic.ResultItems; //导入方法依赖的package包/类
@Override
public void process(ResultItems resultItems, Task task) {
if(resultItems.get("url")!=null){
try {
PrintWriter printWriter = new PrintWriter(new FileWriter(this.getFile(path + DigestUtils.md5Hex(resultItems.getRequest().getUrl()) + ".json")));
printWriter.write(JSON.toJSONString(resultItems.getAll()));
printWriter.close();
}catch (IOException e){
e.printStackTrace();
}
}
}
示例6: process
import us.codecraft.webmagic.ResultItems; //导入方法依赖的package包/类
@Override
public void process(ResultItems resultItems, Task task) {
List<Industry> industrys = resultItems.get("industries");
if (industrys != null && industrys.size() > 0) {
for (Industry industry : industrys) {
industryDao.add(industry);
}
}
}
示例7: process
import us.codecraft.webmagic.ResultItems; //导入方法依赖的package包/类
@Override
public void process(ResultItems resultItems, Task task) {
SpiderInfo spiderInfo = resultItems.get("spiderInfo");
Webpage webpage = convertResultItems2Webpage(resultItems);
try {
client.prepareIndex(INDEX_NAME, TYPE_NAME)
.setId(Hashing.md5().hashString(webpage.getUrl(), Charset.forName("utf-8")).toString())
.setSource(gson.toJson(webpage))
.get();
} catch (Exception e) {
LOG.error("索引 Webpage 出错," + e.getLocalizedMessage());
}
}
示例8: process
import us.codecraft.webmagic.ResultItems; //导入方法依赖的package包/类
@Override
public void process(ResultItems resultItems, Task task) {
Html html = resultItems.get("html");
if (html != null && htmlService != null) {
htmlService.save(html);
logger.info("### 保存:" + html.getUrl());
}
}
示例9: process
import us.codecraft.webmagic.ResultItems; //导入方法依赖的package包/类
@Override
public synchronized void process(ResultItems resultItems, Task task) {
Object o = resultItems.get(clazz.getCanonicalName());
if (o != null) {
Annotation annotation = clazz.getAnnotation(ExtractBy.class);
if (annotation == null || !((ExtractBy) annotation).multi()) {
classPipeline.process((T) o, task);
} else {
List<Object> list = (List<Object>) o;
for (Object o1 : list) {
classPipeline.process((T) o1, task);
}
}
}
}