本文整理汇总了Java中com.rometools.rome.feed.synd.SyndEntry.getUri方法的典型用法代码示例。如果您正苦于以下问题:Java SyndEntry.getUri方法的具体用法?Java SyndEntry.getUri怎么用?Java SyndEntry.getUri使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类com.rometools.rome.feed.synd.SyndEntry
的用法示例。
在下文中一共展示了SyndEntry.getUri方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: createRSSItem
import com.rometools.rome.feed.synd.SyndEntry; //导入方法依赖的package包/类
@Override
protected Item createRSSItem(final SyndEntry sEntry) {
final Item item = super.createRSSItem(sEntry);
final SyndContent desc = sEntry.getDescription();
if (desc != null) {
item.setDescription(createItemDescription(desc));
}
final List<SyndContent> contents = sEntry.getContents();
if (Lists.isNotEmpty(contents)) {
item.setContent(createItemContent(contents.get(0)));
}
final String uri = sEntry.getUri();
if (uri != null) {
item.setUri(uri);
}
return item;
}
示例2: ExtLibOPDSEntry
import com.rometools.rome.feed.synd.SyndEntry; //导入方法依赖的package包/类
public ExtLibOPDSEntry(SyndEntry syndEntry) {
id = syndEntry.getUri();
title = new Res("opds.first.value",syndEntry.getTitle());
links = syndEntry.getLinks().stream().
map(ExtLibOPDSEntry::mapLink).filter(Objects::nonNull).collect(Collectors.toList());
updated = syndEntry.getUpdatedDate();
content = Optional.of(syndEntry.getContents().stream().
map(sc -> new OPDSContent(sc.getType(), sc.getValue(), null)).collect(Collectors.toList()));
List<OPDSAuthor> authors = syndEntry.getAuthors().stream().
map(person -> new ExtLibAuthor(person.getName(), "")).collect(Collectors.toList());
this.authors = Optional.of(authors);
}
示例3: computeEntryTag
import com.rometools.rome.feed.synd.SyndEntry; //导入方法依赖的package包/类
private String computeEntryTag(final SyndEntry entry) {
// Following hash algorithm suggested by Robert Cooper needs to be
// evaluated in future.
// int hash = ( entry.getUri() != null ? entry.getUri().hashCode() :
// entry.getLink().hashCode() ) ^
// (entry.getUpdatedDate() != null ? entry.getUpdatedDate().hashCode() :
// entry.getPublishedDate().hashCode()) ^
// entry.getTitle().hashCode() ^
// entry.getDescription().hashCode();
final String id = entry.getUri();
Date updateDate = entry.getUpdatedDate();
final Date publishedDate = entry.getPublishedDate();
if (updateDate == null) {
if (publishedDate != null) {
updateDate = publishedDate;
} else {
// For misbehaving feeds that do not set updateDate or
// publishedDate we use current tiem which pretty mucg assures
// that it will be viewed as
// changed even when it is not
updateDate = new Date();
}
}
final String key = id + ":" + entry.getUpdatedDate();
return computeDigest(key);
}
示例4: parseFeed
import com.rometools.rome.feed.synd.SyndEntry; //导入方法依赖的package包/类
private List<Outlink> parseFeed(String url, byte[] content,
Metadata parentMetadata) throws Exception {
List<Outlink> links = new ArrayList<>();
SyndFeed feed = null;
try (ByteArrayInputStream is = new ByteArrayInputStream(content)) {
SyndFeedInput input = new SyndFeedInput();
feed = input.build(new InputSource(is));
}
URL sURL = new URL(url);
List<SyndEntry> entries = feed.getEntries();
for (SyndEntry entry : entries) {
String targetURL = entry.getLink();
// targetURL can be null?!?
// e.g. feed does not use links but guid
if (StringUtils.isBlank(targetURL)) {
targetURL = entry.getUri();
if (StringUtils.isBlank(targetURL)) {
continue;
}
}
Outlink newLink = filterOutlink(sURL, targetURL, parentMetadata);
if (newLink == null)
continue;
String title = entry.getTitle();
if (StringUtils.isNotBlank(title)) {
newLink.getMetadata().setValue("feed.title", title.trim());
}
Date publishedDate = entry.getPublishedDate();
if (publishedDate != null) {
// filter based on the published date
if (filterHoursSincePub != -1) {
Calendar rightNow = Calendar.getInstance();
rightNow.add(Calendar.HOUR, -filterHoursSincePub);
if (publishedDate.before(rightNow.getTime())) {
LOG.info(
"{} has a published date {} which is more than {} hours old",
targetURL, publishedDate.toString(),
filterHoursSincePub);
continue;
}
}
newLink.getMetadata().setValue("feed.publishedDate",
publishedDate.toString());
}
SyndContent description = entry.getDescription();
if (description != null
&& StringUtils.isNotBlank(description.getValue())) {
newLink.getMetadata().setValue("feed.description",
description.getValue());
}
links.add(newLink);
}
return links;
}
示例5: nextTuple
import com.rometools.rome.feed.synd.SyndEntry; //导入方法依赖的package包/类
public void nextTuple() {
// Fetch feeds only every 30 secs.
long curtime = System.currentTimeMillis();
if (this.lastFetchTimestamp != 0) {
if (curtime - this.lastFetchTimestamp < 30000) {
// A Spout's nextTuple() is called continuously in a loop by Storm. If there's nothing to do,
// just exit the method so Storm can do other things like acking processed messages.
return;
}
}
LOG.info("Fetching comments for " + subreddit + " at " + curtime);
SyndFeedInput input = new SyndFeedInput();
SyndFeed feed = null;
try {
feed = input.build(new XmlReader(this.subredditCommentsfeedURL));
} catch (Exception e) {
throw new RuntimeException(e);
}
this.lastFetchTimestamp = System.currentTimeMillis();
LOG.info("Fetched " + feed.getEntries().size() + " comments for " + subreddit + " at " + this.lastFetchTimestamp);
history.startBatch();
for (SyndEntry s : feed.getEntries()) {
String commentId = s.getUri();
if (history.contains(commentId)) {
LOG.info("Skip dupe " + subreddit + ":" + commentId);
continue;
}
// An entry.link has the syntax:
// /r/[SUBREDDIT]/comments/[STORY-ID]/[STORY-PATH]/[COMMENT-ID]
// We extract the story ID and story URL (that is everything except the [COMMENT-ID] at the end.
//
// Story title can be extracted from entry.title which has the syntax:
// [AUTHOR] on [STORY TITLE]
List<SyndContent> contents = s.getContents();
if (contents != null && contents.size() > 0) {
String link = s.getLink();
String storyURL = link.substring(0, link.lastIndexOf("/"));
String[] parts = storyURL.split("/");
String storyId = parts[4];
String title = s.getTitle();
String titlePrefix = s.getAuthor() + " on ";
String storyTitle = title.substring(titlePrefix.length(), title.length());
SyndContent cnt = contents.get(0);
String comment = cnt.getValue();
comment = Jsoup.clean(comment, Whitelist.none());
comment = comment.replaceAll("\\p{Punct}", "");
LOG.info("Emit {}:{}:{}:{}:{}:[{}]", subreddit, storyId, storyURL, storyTitle, commentId, comment);
collector.emit(
new Values(subreddit, storyId, storyURL, storyTitle, commentId, comment, this.lastFetchTimestamp),
commentId);
}
history.add(commentId);
}
}
示例6: parseRss
import com.rometools.rome.feed.synd.SyndEntry; //导入方法依赖的package包/类
/**
* RSSの解析処理。
* @param rss 1つのRSS情報
* @return 解析後のLink情報
* @throws FeedException Feedの解析失敗時の例外
* @throws IOException IOException IO例外
* @throws URISyntaxException URIの形式が不正な場合の例外
* @throws CrawlerException Crawler共通例外
*/
@SuppressWarnings("deprecation")
public ArrayList<ArticleEntityBean> parseRss(RssEntityBean rss) throws IOException, FeedException,
URISyntaxException, CrawlerException {
log.info("start feed parse : " + rss.url);
SyndFeed feed = null;
feed = buildSyndFeed(new URI(rss.url));
ArrayList<ArticleEntityBean> result = new ArrayList<ArticleEntityBean>();
TimeZone tz = TimeZone.getTimeZone("GMT");
if (feed != null) {
List<SyndEntry> entries = feed.getEntries();
log.info("feed entries count : " + entries.size());
for (Object obj : entries) {
SyndEntry entry = (SyndEntry) obj;
String link = entry.getLink();
DateTime createdAt = new DateTime(entry.getPublishedDate(), tz);
String createdAtStr = Util.formatIsoDate(createdAt);
String title = org.apache.commons.lang.StringEscapeUtils.unescapeXml(entry.getTitle());
log.info("article : " + title + " / " + createdAtStr + " / " + link);
if (isExistArticle(link, createdAt)) {
log.info("-> article already exists");
continue;
}
log.info("-> article not exists");
ArticleEntityBean article = new ArticleEntityBean();
article.link = link;
article.title = title;
article.auther = entry.getAuthor();
article.url = entry.getUri();
article.createdAt = createdAt;
String fixHour = Conf.getValue("fix_time");
Date now = new Date();
Date fixDate = new Date(now.getYear(), now.getMonth(), now.getDate(), Integer.parseInt(fixHour), 0);
if (now.getTime() > fixDate.getTime()) {
article.publishedAt = tomorrow;
} else {
article.publishedAt = today;
}
article.site = rss.site;
article.type = rss.type;
article.description = null;
SyndContent sc = entry.getDescription();
if (sc != null) {
article.description = sc.getValue();
if (rss.replaceCR != null) {
article.rawHTML = article.description.replaceAll("\n", rss.replaceCR);
} else {
article.rawHTML = article.description;
}
}
article.tags = rss.defaultTag;
result.add(article);
}
} else {
log.warn("feed parse error url : " + rss.site);
throw new CrawlerException();
}
return result;
}
示例7: createRSSItem
import com.rometools.rome.feed.synd.SyndEntry; //导入方法依赖的package包/类
protected Item createRSSItem(final SyndEntry sEntry) {
final Item item = new Item();
item.setModules(ModuleUtils.cloneModules(sEntry.getModules()));
item.setTitle(sEntry.getTitle());
item.setLink(sEntry.getLink());
final List<Element> foreignMarkup = sEntry.getForeignMarkup();
if (!foreignMarkup.isEmpty()) {
item.setForeignMarkup(foreignMarkup);
}
item.setSource(createSource(sEntry.getSource()));
final String uri = sEntry.getUri();
if (uri != null) {
item.setUri(uri);
}
return item;
}
示例8: parse
import com.rometools.rome.feed.synd.SyndEntry; //导入方法依赖的package包/类
public FetchedFeed parse(String feedUrl, byte[] xml) throws FeedException {
FetchedFeed fetchedFeed = new FetchedFeed();
Feed feed = fetchedFeed.getFeed();
List<FeedEntry> entries = fetchedFeed.getEntries();
try {
Charset encoding = FeedUtils.guessEncoding(xml);
String xmlString = FeedUtils.trimInvalidXmlCharacters(new String(xml, encoding));
if (xmlString == null) {
throw new FeedException("Input string is null for url " + feedUrl);
}
xmlString = FeedUtils.replaceHtmlEntitiesWithNumericEntities(xmlString);
InputSource source = new InputSource(new StringReader(xmlString));
SyndFeed rss = new SyndFeedInput().build(source);
handleForeignMarkup(rss);
fetchedFeed.setTitle(rss.getTitle());
feed.setPushHub(findHub(rss));
feed.setPushTopic(findSelf(rss));
feed.setUrl(feedUrl);
feed.setLink(rss.getLink());
List<SyndEntry> items = rss.getEntries();
for (SyndEntry item : items) {
FeedEntry entry = new FeedEntry();
String guid = item.getUri();
if (StringUtils.isBlank(guid)) {
guid = item.getLink();
}
if (StringUtils.isBlank(guid)) {
// no guid and no link, skip entry
continue;
}
entry.setGuid(FeedUtils.truncate(guid, 2048));
entry.setUpdated(validateDate(getEntryUpdateDate(item), true));
entry.setUrl(FeedUtils.truncate(FeedUtils.toAbsoluteUrl(item.getLink(), feed.getLink(), feedUrl), 2048));
// if link is empty but guid is used as url
if (StringUtils.isBlank(entry.getUrl()) && StringUtils.startsWith(entry.getGuid(), "http")) {
entry.setUrl(entry.getGuid());
}
FeedEntryContent content = new FeedEntryContent();
content.setContent(getContent(item));
content.setCategories(FeedUtils.truncate(
item.getCategories().stream().map(c -> c.getName()).collect(Collectors.joining(", ")), 4096));
content.setTitle(getTitle(item));
content.setAuthor(StringUtils.trimToNull(item.getAuthor()));
SyndEnclosure enclosure = Iterables.getFirst(item.getEnclosures(), null);
if (enclosure != null) {
content.setEnclosureUrl(FeedUtils.truncate(enclosure.getUrl(), 2048));
content.setEnclosureType(enclosure.getType());
}
entry.setContent(content);
entries.add(entry);
}
Date lastEntryDate = null;
Date publishedDate = validateDate(rss.getPublishedDate(), false);
if (!entries.isEmpty()) {
List<Long> sortedTimestamps = FeedUtils.getSortedTimestamps(entries);
Long timestamp = sortedTimestamps.get(0);
lastEntryDate = new Date(timestamp);
publishedDate = (publishedDate == null || publishedDate.before(lastEntryDate)) ? lastEntryDate : publishedDate;
}
feed.setLastPublishedDate(publishedDate);
feed.setAverageEntryInterval(FeedUtils.averageTimeBetweenEntries(entries));
feed.setLastEntryDate(lastEntryDate);
} catch (Exception e) {
throw new FeedException(String.format("Could not parse feed from %s : %s", feedUrl, e.getMessage()), e);
}
return fetchedFeed;
}