本文整理汇总了Python中feedparser.parse方法的典型用法代码示例。如果您正苦于以下问题:Python feedparser.parse方法的具体用法?Python feedparser.parse怎么用?Python feedparser.parse使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类feedparser
的用法示例。
在下文中一共展示了feedparser.parse方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: read_rss_and_tweet
# 需要导入模块: import feedparser [as 别名]
# 或者: from feedparser import parse [as 别名]
def read_rss_and_tweet(url: str):
"""Read RSS and post feed items as a tweet.
Parameters
----------
url: str
URL to RSS feed.
"""
feed = feedparser.parse(url)
if feed:
for item in feed["items"]:
link = item["link"]
if is_in_logfile(link, Settings.posted_urls_output_file):
print("Already posted:", link)
else:
post_tweet(message=compose_message(item))
write_to_logfile(link, Settings.posted_urls_output_file)
print("Posted:", link)
else:
print("Nothing found in feed", url)
示例2: findImage
# 需要导入模块: import feedparser [as 别名]
# 或者: from feedparser import parse [as 别名]
def findImage(entry):
if "description" not in entry:
return
soup = bs4.BeautifulSoup(entry.description, "html.parser")
img = soup.find("img")
if img:
img = img["src"]
if len(img) == 0:
return
# If address is relative, append root URL
if img[0] == "/":
p = urllib.parse.urlparse(entry.id)
img = f"{p.scheme}://{p.netloc}" + img
return img
# Convert string from HTML to plain text
示例3: run
# 需要导入模块: import feedparser [as 别名]
# 或者: from feedparser import parse [as 别名]
def run(self):
for feed in self.feeds:
parsed_feed = feedparser.parse(feed)
for entry in parsed_feed.entries:
if entryMatches(entry):
self.n_seen += 1
# If no ID provided, use the link as ID
if "id" not in entry:
entry.id = entry.link
if entry.id not in self.posted:
self.sendTweet(entry)
# Bail out if we have reached max number of tweets
if self.throttle > 0 and self.n_tweeted >= self.throttle:
print(f"Max number of papers met ({self.throttle}), stopping now")
return
# Print statistics of a given run
示例4: get_rss_title_and_url
# 需要导入模块: import feedparser [as 别名]
# 或者: from feedparser import parse [as 别名]
def get_rss_title_and_url(self):
"""
根据规则获取rss的标题和url
:return:
"""
try:
rss_data = json.load(open(BASE_PATH + "/core/data.json", "r", encoding="utf-8"))
for item in rss_data:
rss = feedparser.parse(requests.get(item['rss']).content)['entries']
push_new_list = {"source": item, "target": []}
for it in rss:
datetime_struct = parser.parse(it['published'])
published = datetime_struct.strftime("%Y-%m-%d")
today = time.strftime("%Y-%m-%d")
if today == published:
if item['has_content'] in it['title']:
push_new_list["target"].append(it)
self.news_list.append(push_new_list)
except Exception as e:
logger.warn("获取RSS标题和URL异常:" + str(e))
示例5: get_wet
# 需要导入模块: import feedparser [as 别名]
# 或者: from feedparser import parse [as 别名]
def get_wet():
# Get the weather data
print("Updating weather for", postcode)
d = feedparser.parse(url)
entries = int(len(d['entries']))
val = " " + d['entries'][0]['title']
val += " " + d['entries'][1]['title']
val += " " + d['entries'][2]['title']
# Tidy & shorten the message for the scroll display
val = val.replace("Maximum", "Max")
val = val.replace("Minimum", "Min")
val = val.replace("Temperature: ", "")
val = val.replace(u"\u00B0", "")
val = val.replace(",", "")
val = val.replace("(", "")
val = val.replace(")", "")
return val
示例6: get_headlines
# 需要导入模块: import feedparser [as 别名]
# 或者: from feedparser import parse [as 别名]
def get_headlines(self):
try:
# remove all children
for widget in self.headlinesContainer.winfo_children():
widget.destroy()
if news_country_code == None:
headlines_url = "https://news.google.com/news?ned=us&output=rss"
else:
headlines_url = "https://news.google.com/news?ned=%s&output=rss" % news_country_code
feed = feedparser.parse(headlines_url)
for post in feed.entries[0:5]:
headline = NewsHeadline(self.headlinesContainer, post.title)
headline.pack(side=TOP, anchor=W)
except Exception as e:
traceback.print_exc()
print "Error: %s. Cannot get news." % e
self.after(600000, self.get_headlines)
示例7: _parse
# 需要导入模块: import feedparser [as 别名]
# 或者: from feedparser import parse [as 别名]
def _parse(data, limit):
output = []
feed = feedparser.parse(data) # Atom + RSS
for entry in feed["entries"]:
title = entry.get("title")
link = entry.get("link")
if title:
if link:
output.append('<li><a href="{}">{}</a></li>'.format(
link, escape(title)))
else:
output.append('<li>{}</li>'.format(escape(title)))
if limit and len(output) == limit:
break
if output:
return ["<ul>"] + output + ["</ul>"]
示例8: get_ios_reviews
# 需要导入模块: import feedparser [as 别名]
# 或者: from feedparser import parse [as 别名]
def get_ios_reviews(app_id, language, limit=100):
url = 'https://itunes.apple.com/%(language)srss/customerreviews/id=%(app_id)s/sortBy=mostRecent/xml' % {
'language': '%s/' % language if language else '', 'app_id': app_id}
response = requests.get(url, headers={'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1)'},
timeout=timeout)
response.encoding = 'utf-8' # avoid chardet not guessing correctly
feed = feedparser.parse(response.text)
reviews = [Review(
id=entry.id,
platform='ios',
title=entry.title,
rating=int(entry.im_rating),
summary=entry.summary,
url=entry.href,
author=entry.author, # author url: entry.href
date=datetime.datetime.fromtimestamp(mktime(entry.updated_parsed)),
language=language,
version=entry.im_version
) for entry in feed['entries'][1:1 + limit]]
return reviews
示例9: process_all_rss
# 需要导入模块: import feedparser [as 别名]
# 或者: from feedparser import parse [as 别名]
def process_all_rss(reprocess=False):
"""Gather all RSS feeds and articles, then process."""
sources = list()
logger.debug("Collecting sources")
monitors = mongo.db[app.config['MONITORS_COLLECTION']]
for item in monitors.find({'active': True}):
sources.append(item['metadata'].get('rss_link'))
contents = [feedparser.parse(x) for x in sources]
logger.debug("Processing sources")
for source in contents:
for idx, item in enumerate(source.get('entries')):
response = get_article(item, source['href'], reprocess)
if response['from_store'] or reprocess:
continue
clean_link = response['article']['feed_source']
monitors.update({'metadata.rss_link': clean_link},
{'$set': {'checked': now_time()}})
correct_counts()
示例10: remove_url
# 需要导入模块: import feedparser [as 别名]
# 或者: from feedparser import parse [as 别名]
def remove_url(bot, update, args):
if len(args) >= 1:
tg_chat_id = str(update.effective_chat.id)
tg_feed_link = args[0]
link_processed = parse(tg_feed_link)
if link_processed.bozo == 0:
user_data = sql.check_url_availability(tg_chat_id, tg_feed_link)
if user_data:
sql.remove_url(tg_chat_id, tg_feed_link)
update.effective_message.reply_text("Removed URL from subscription")
else:
update.effective_message.reply_text("You haven't subscribed to this URL yet")
else:
update.effective_message.reply_text("This link is not an RSS Feed link")
else:
update.effective_message.reply_text("URL missing")
示例11: searchGithub
# 需要导入模块: import feedparser [as 别名]
# 或者: from feedparser import parse [as 别名]
def searchGithub(word, day, level):
searchlevel = {
1: ['in:name,description', 'created'],
2: ['in:name,description,readme', 'created'],
3: ['in:name,description', 'pushed'],
4: ['in:name,description,readme', 'pushed']}
github_url = 'https://api.github.com/search/repositories?q='
try:
if word.find(' ') > 0:
word.replace(' ', '\" \"')
word = urllib.parse.quote('\"' + word + '\"')
url = github_url + word + '+' + searchlevel[level][0] + '+' + searchlevel[level][1] + ':>' + day + '&s=updated&o=asc'
headers = {"Accept": "application/vnd.github.mercy-preview+json"}
result = requests.get(url, timeout=10, headers=headers)
statuscode = result.status_code
resultdata = result.json()
codes = []
for a in resultdata['items']:
name = a['full_name']
if a['size'] > 0:
codes.append(name)
return codes, statuscode
except:
return [], -1
示例12: searchGist
# 需要导入模块: import feedparser [as 别名]
# 或者: from feedparser import parse [as 别名]
def searchGist(word, day):
if word.find(' ') > 0:
word.replace(' ', '\" \"')
word = urllib.parse.quote('\"' + word + '\"')
url = 'https://gist.github.com/search?utf8=%E2%9C%93&q=' + word + '+created%3A>' + day + '&ref=searchresults'
try:
result = requests.get(url, timeout=10)
statuscode = result.status_code
root = lxml.html.fromstring(result.text)
codes = []
for a in root.xpath('//div/a[@class="link-overlay"]'):
# name = a.text_content()
link = a.get('href')
codes.append(link)
return codes, statuscode
except:
return [], -1
示例13: searchGitlab
# 需要导入模块: import feedparser [as 别名]
# 或者: from feedparser import parse [as 别名]
def searchGitlab(word):
try:
if word.find(' ') > 0:
word.replace(' ', '\" \"')
word = urllib.parse.quote('\"' + word + '\"')
url = 'https://gitlab.com/explore/projects?utf8=%E2%9C%93&name=' + word + '&sort=latest_activity_desc'
result = requests.get(url, timeout=10)
statuscode = result.status_code
root = lxml.html.fromstring(result.text)
codes = []
for a in root.xpath('//div/a[@class="project"]'):
# name = a.text_content()
link = a.get('href')
codes.append(link)
return codes, statuscode
except:
return [], -1
示例14: googleCustomSearch
# 需要导入模块: import feedparser [as 别名]
# 或者: from feedparser import parse [as 别名]
def googleCustomSearch(word, engine_id, api_key):
try:
if word.find(' ') > 0:
word.replace(' ', '\" \"')
word = urllib.parse.quote('\"' + word + '\"')
headers = {"content-type": "application/json"}
url = 'https://www.googleapis.com/customsearch/v1?key=' + api_key + '&rsz=filtered_cse&num=10&hl=en&prettyPrint=false&cx=' + engine_id + '&q=' + word + '&sort=date'
result = requests.get(url, timeout=10, headers=headers)
statuscode = result.status_code
codes = {}
if statuscode == 200:
jsondata = result.json()
if 'items' in jsondata.keys():
for item in jsondata['items']:
name = item['title']
sub = item['snippet']
link = item['link']
codes[link] = [name, sub]
return codes, statuscode
except:
return {}, -1
示例15: getRSSFeeds
# 需要导入模块: import feedparser [as 别名]
# 或者: from feedparser import parse [as 别名]
def getRSSFeeds(url, lastpost):
try:
headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:50.0) Gecko/20100101 Firefox/50.0'}
response = requests.get(url, timeout=10, headers=headers)
updateditems = []
statuscode = response.status_code
if statuscode == 200:
rss = feedparser.parse(response.text)
result = parseRSS(rss['entries'])
for entry in result:
if entry['link'] == lastpost['link']:
break
else:
if entry['timestamp'] != None and lastpost['timestamp'] != None:
if datetime.datetime.strptime(entry['timestamp'], '%Y-%m-%d %H:%M:%S') < datetime.datetime.strptime(lastpost['timestamp'], '%Y-%m-%d %H:%M:%S'):
break
updateditems.append(entry)
return updateditems, statuscode
except:
return [], -1