本文整理汇总了Java中com.rometools.rome.feed.synd.SyndContent.getValue方法的典型用法代码示例。如果您正苦于以下问题:Java SyndContent.getValue方法的具体用法?Java SyndContent.getValue怎么用?Java SyndContent.getValue使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类com.rometools.rome.feed.synd.SyndContent
的用法示例。
在下文中一共展示了SyndContent.getValue方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: sendMessageForFeedEntry
import com.rometools.rome.feed.synd.SyndContent; //导入方法依赖的package包/类
/**
* Sends a MOTECH event for a feed entry
*
* @param entry
*/
private void sendMessageForFeedEntry(String url, SyndEntry entry, String regex) {
Map<String, Object> parameters = new HashMap<>();
String rawContent = "";
String extractedContent = "";
parameters.put("url", url);
parameters.put("published_date", entry.getPublishedDate());
parameters.put("updated_date", entry.getUpdatedDate());
if (entry.getContents() != null && entry.getContents().size() > 0) {
if (entry.getContents().size() > 1) {
LOGGER.warn("More than one (actually {}) content element for this entry! Discarding all but the first.", entry.getContents().size());
}
SyndContent content = entry.getContents().get(0);
rawContent = content.getValue();
String extractedContentString = extractContent(content.getValue(), regex);
if (StringUtils.isNotBlank(extractedContentString)) {
extractedContent = extractedContentString;
}
} else {
LOGGER.warn("NULL content for entry {}", entry.getUri());
}
parameters.put("raw_content", rawContent);
parameters.put("extracted_content", extractedContent);
MotechEvent event = new MotechEvent(Constants.FEED_CHANGE_MESSAGE, parameters);
LOGGER.debug("sending message {}", event);
eventRelay.sendEventMessage(event);
}
示例2: nextTuple
import com.rometools.rome.feed.synd.SyndContent; //导入方法依赖的package包/类
public void nextTuple() {
// Fetch feeds only every 30 secs.
long curtime = System.currentTimeMillis();
if (this.lastFetchTimestamp != 0) {
if (curtime - this.lastFetchTimestamp < 30000) {
// A Spout's nextTuple() is called continuously in a loop by Storm. If there's nothing to do,
// just exit the method so Storm can do other things like acking processed messages.
return;
}
}
LOG.info("Fetching comments for " + subreddit + " at " + curtime);
SyndFeedInput input = new SyndFeedInput();
SyndFeed feed = null;
try {
feed = input.build(new XmlReader(this.subredditCommentsfeedURL));
} catch (Exception e) {
throw new RuntimeException(e);
}
this.lastFetchTimestamp = System.currentTimeMillis();
LOG.info("Fetched " + feed.getEntries().size() + " comments for " + subreddit + " at " + this.lastFetchTimestamp);
history.startBatch();
for (SyndEntry s : feed.getEntries()) {
String commentId = s.getUri();
if (history.contains(commentId)) {
LOG.info("Skip dupe " + subreddit + ":" + commentId);
continue;
}
// An entry.link has the syntax:
// /r/[SUBREDDIT]/comments/[STORY-ID]/[STORY-PATH]/[COMMENT-ID]
// We extract the story ID and story URL (that is everything except the [COMMENT-ID] at the end.
//
// Story title can be extracted from entry.title which has the syntax:
// [AUTHOR] on [STORY TITLE]
List<SyndContent> contents = s.getContents();
if (contents != null && contents.size() > 0) {
String link = s.getLink();
String storyURL = link.substring(0, link.lastIndexOf("/"));
String[] parts = storyURL.split("/");
String storyId = parts[4];
String title = s.getTitle();
String titlePrefix = s.getAuthor() + " on ";
String storyTitle = title.substring(titlePrefix.length(), title.length());
SyndContent cnt = contents.get(0);
String comment = cnt.getValue();
comment = Jsoup.clean(comment, Whitelist.none());
comment = comment.replaceAll("\\p{Punct}", "");
LOG.info("Emit {}:{}:{}:{}:{}:[{}]", subreddit, storyId, storyURL, storyTitle, commentId, comment);
collector.emit(
new Values(subreddit, storyId, storyURL, storyTitle, commentId, comment, this.lastFetchTimestamp),
commentId);
}
history.add(commentId);
}
}
示例3: parseRss
import com.rometools.rome.feed.synd.SyndContent; //导入方法依赖的package包/类
/**
* RSSの解析処理。
* @param rss 1つのRSS情報
* @return 解析後のLink情報
* @throws FeedException Feedの解析失敗時の例外
* @throws IOException IOException IO例外
* @throws URISyntaxException URIの形式が不正な場合の例外
* @throws CrawlerException Crawler共通例外
*/
@SuppressWarnings("deprecation")
public ArrayList<ArticleEntityBean> parseRss(RssEntityBean rss) throws IOException, FeedException,
URISyntaxException, CrawlerException {
log.info("start feed parse : " + rss.url);
SyndFeed feed = null;
feed = buildSyndFeed(new URI(rss.url));
ArrayList<ArticleEntityBean> result = new ArrayList<ArticleEntityBean>();
TimeZone tz = TimeZone.getTimeZone("GMT");
if (feed != null) {
List<SyndEntry> entries = feed.getEntries();
log.info("feed entries count : " + entries.size());
for (Object obj : entries) {
SyndEntry entry = (SyndEntry) obj;
String link = entry.getLink();
DateTime createdAt = new DateTime(entry.getPublishedDate(), tz);
String createdAtStr = Util.formatIsoDate(createdAt);
String title = org.apache.commons.lang.StringEscapeUtils.unescapeXml(entry.getTitle());
log.info("article : " + title + " / " + createdAtStr + " / " + link);
if (isExistArticle(link, createdAt)) {
log.info("-> article already exists");
continue;
}
log.info("-> article not exists");
ArticleEntityBean article = new ArticleEntityBean();
article.link = link;
article.title = title;
article.auther = entry.getAuthor();
article.url = entry.getUri();
article.createdAt = createdAt;
String fixHour = Conf.getValue("fix_time");
Date now = new Date();
Date fixDate = new Date(now.getYear(), now.getMonth(), now.getDate(), Integer.parseInt(fixHour), 0);
if (now.getTime() > fixDate.getTime()) {
article.publishedAt = tomorrow;
} else {
article.publishedAt = today;
}
article.site = rss.site;
article.type = rss.type;
article.description = null;
SyndContent sc = entry.getDescription();
if (sc != null) {
article.description = sc.getValue();
if (rss.replaceCR != null) {
article.rawHTML = article.description.replaceAll("\n", rss.replaceCR);
} else {
article.rawHTML = article.description;
}
}
article.tags = rss.defaultTag;
result.add(article);
}
} else {
log.warn("feed parse error url : " + rss.site);
throw new CrawlerException();
}
return result;
}
示例4: doParseXML
import com.rometools.rome.feed.synd.SyndContent; //导入方法依赖的package包/类
@Override
protected FeedParseBo doParseXML(String feedId, Date lastedSyncDate, String xml) throws Exception {
FeedParseBo feedParseBo = null;
StringReader sr = null;
InputSource is = null;
try {
xml = new String(xml.getBytes("utf-8"));
sr = new StringReader(xml);
is = new InputSource(sr);
SyndFeedInput input = new SyndFeedInput();
// 得到SyndFeed对象,即得到Rss源里的所有信息
SyndFeed syndFeed = input.build(is);
// 得到Rss新闻中子项列表
List<SyndEntry> entries = syndFeed.getEntries();
// 循环得到每个子项信息
for (int i = 0; i < entries.size(); i++) {
SyndEntry entry = entries.get(i);
// 标题、连接地址、标题简介、时间是一个Rss源项最基本的组成部分
// logger.debug("标题:" + entry.getTitle());
// logger.debug("连接地址:" + entry.getLink());
SyndContent description = entry.getDescription();
String descriptionValue = description.getValue();
if (!StringUtils.isEmpty(descriptionValue)) {
descriptionValue = descriptionValue.substring(0, 10) + "...";
}
// logger.debug("简介model:" + description.getMode());
// logger.debug("简介type:" + description.getType());
// logger.debug("简介value:" + descriptionValue);
// logger.debug("发布时间:" + entry.getPublishedDate());
// 以下是Rss源可先的几个部分
// logger.debug("标题的作者:" + entry.getAuthor());
logger.debug("entry:" + StringUtils.toJSONString(entry));
// 此标题所属的范畴
List<SyndCategory> categoryList = entry.getCategories();
if (categoryList != null) {
for (int m = 0; m < categoryList.size(); m++) {
// SyndCategory category = (SyndCategory)
// categoryList.get(m);
// logger.debug("此标题所属的范畴:" + category.getName());
}
}
// 得到流媒体播放文件的信息列表
List<SyndEnclosure> enclosureList = entry.getEnclosures();
if (enclosureList != null) {
for (int n = 0; n < enclosureList.size(); n++) {
// SyndEnclosure enclosure = (SyndEnclosure)
// enclosureList.get(n);
// logger.debug("流媒体播放文件:" + entry.getEnclosures());
// logger.debug("流媒体播放文件2:" + enclosure.getUrl());
}
}
}
} catch (Exception e) {
logger.error("RSSParser.doParseByRome is error.", e);
throw e;
} finally {
if (is != null) {
is = null;
}
if (sr != null) {
sr.close();
}
}
return feedParseBo;
}