当前位置: 首页>>代码示例>>Python>>正文


Python feedparser.parse方法代码示例

本文整理汇总了Python中feedparser.parse方法的典型用法代码示例。如果您正苦于以下问题:Python feedparser.parse方法的具体用法?Python feedparser.parse怎么用?Python feedparser.parse使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在feedparser的用法示例。


在下文中一共展示了feedparser.parse方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: read_rss_and_tweet

# 需要导入模块: import feedparser [as 别名]
# 或者: from feedparser import parse [as 别名]
def read_rss_and_tweet(url: str):
    """Read RSS and post feed items as a tweet.

    Parameters
    ----------
    url: str
        URL to RSS feed.
    """
    feed = feedparser.parse(url)
    if feed:
        for item in feed["items"]:
            link = item["link"]
            if is_in_logfile(link, Settings.posted_urls_output_file):
                print("Already posted:", link)
            else:
                post_tweet(message=compose_message(item))
                write_to_logfile(link, Settings.posted_urls_output_file)
                print("Posted:", link)
    else:
        print("Nothing found in feed", url) 
开发者ID:peterdalle,项目名称:twitterbot,代码行数:22,代码来源:twitterbot.py

示例2: findImage

# 需要导入模块: import feedparser [as 别名]
# 或者: from feedparser import parse [as 别名]
def findImage(entry):
    if "description" not in entry:
        return

    soup = bs4.BeautifulSoup(entry.description, "html.parser")
    img = soup.find("img")
    if img:
        img = img["src"]
        if len(img) == 0:
            return
        # If address is relative, append root URL
        if img[0] == "/":
            p = urllib.parse.urlparse(entry.id)
            img = f"{p.scheme}://{p.netloc}" + img

    return img


# Convert string from HTML to plain text 
开发者ID:fxcoudert,项目名称:PapersBot,代码行数:21,代码来源:papersbot.py

示例3: run

# 需要导入模块: import feedparser [as 别名]
# 或者: from feedparser import parse [as 别名]
def run(self):
        for feed in self.feeds:
            parsed_feed = feedparser.parse(feed)
            for entry in parsed_feed.entries:
                if entryMatches(entry):
                    self.n_seen += 1
                    # If no ID provided, use the link as ID
                    if "id" not in entry:
                        entry.id = entry.link
                    if entry.id not in self.posted:
                        self.sendTweet(entry)
                        # Bail out if we have reached max number of tweets
                        if self.throttle > 0 and self.n_tweeted >= self.throttle:
                            print(f"Max number of papers met ({self.throttle}), stopping now")
                            return

    # Print statistics of a given run 
开发者ID:fxcoudert,项目名称:PapersBot,代码行数:19,代码来源:papersbot.py

示例4: get_rss_title_and_url

# 需要导入模块: import feedparser [as 别名]
# 或者: from feedparser import parse [as 别名]
def get_rss_title_and_url(self):
        """
        根据规则获取rss的标题和url
        :return:
        """
        try:
            rss_data = json.load(open(BASE_PATH + "/core/data.json", "r", encoding="utf-8"))
            for item in rss_data:
                rss = feedparser.parse(requests.get(item['rss']).content)['entries']
                push_new_list = {"source": item, "target": []}

                for it in rss:
                    datetime_struct = parser.parse(it['published'])
                    published = datetime_struct.strftime("%Y-%m-%d")

                    today = time.strftime("%Y-%m-%d")

                    if today == published:
                        if item['has_content'] in it['title']:
                            push_new_list["target"].append(it)
                self.news_list.append(push_new_list)
        except Exception as e:
            logger.warn("获取RSS标题和URL异常:" + str(e)) 
开发者ID:anbai-inc,项目名称:SecRss,代码行数:25,代码来源:core.py

示例5: get_wet

# 需要导入模块: import feedparser [as 别名]
# 或者: from feedparser import parse [as 别名]
def get_wet():
    # Get the weather data
    print("Updating weather for", postcode)
    d = feedparser.parse(url)
    entries = int(len(d['entries']))
    val = "        " + d['entries'][0]['title']
    val += "        " + d['entries'][1]['title']
    val += "        " + d['entries'][2]['title']
    # Tidy & shorten the message for the scroll display
    val = val.replace("Maximum", "Max")
    val = val.replace("Minimum", "Min")
    val = val.replace("Temperature: ", "")
    val = val.replace(u"\u00B0", "")
    val = val.replace(",", "")
    val = val.replace("(", "")
    val = val.replace(")", "")
    return val 
开发者ID:pimoroni,项目名称:scroll-phat,代码行数:19,代码来源:ukweather.py

示例6: get_headlines

# 需要导入模块: import feedparser [as 别名]
# 或者: from feedparser import parse [as 别名]
def get_headlines(self):
        try:
            # remove all children
            for widget in self.headlinesContainer.winfo_children():
                widget.destroy()
            if news_country_code == None:
                headlines_url = "https://news.google.com/news?ned=us&output=rss"
            else:
                headlines_url = "https://news.google.com/news?ned=%s&output=rss" % news_country_code

            feed = feedparser.parse(headlines_url)

            for post in feed.entries[0:5]:
                headline = NewsHeadline(self.headlinesContainer, post.title)
                headline.pack(side=TOP, anchor=W)
        except Exception as e:
            traceback.print_exc()
            print "Error: %s. Cannot get news." % e

        self.after(600000, self.get_headlines) 
开发者ID:HackerShackOfficial,项目名称:Smart-Mirror,代码行数:22,代码来源:smartmirror.py

示例7: _parse

# 需要导入模块: import feedparser [as 别名]
# 或者: from feedparser import parse [as 别名]
def _parse(data, limit):
        output = []
        feed = feedparser.parse(data) # Atom + RSS
        for entry in feed["entries"]:
            title = entry.get("title")
            link = entry.get("link")
            if title:
                if link:
                    output.append('<li><a href="{}">{}</a></li>'.format(
                            link, escape(title)))
                else:
                    output.append('<li>{}</li>'.format(escape(title)))
            if limit and len(output) == limit:
                break
        if output:
            return ["<ul>"] + output + ["</ul>"] 
开发者ID:lovexiaov,项目名称:python-in-practice,代码行数:18,代码来源:Feed.py

示例8: get_ios_reviews

# 需要导入模块: import feedparser [as 别名]
# 或者: from feedparser import parse [as 别名]
def get_ios_reviews(app_id, language, limit=100):
    url = 'https://itunes.apple.com/%(language)srss/customerreviews/id=%(app_id)s/sortBy=mostRecent/xml' % {
        'language': '%s/' % language if language else '', 'app_id': app_id}
    response = requests.get(url, headers={'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1)'},
                            timeout=timeout)
    response.encoding = 'utf-8'  # avoid chardet not guessing correctly
    feed = feedparser.parse(response.text)
    reviews = [Review(
        id=entry.id,
        platform='ios',
        title=entry.title,
        rating=int(entry.im_rating),
        summary=entry.summary,
        url=entry.href,
        author=entry.author,  # author url: entry.href
        date=datetime.datetime.fromtimestamp(mktime(entry.updated_parsed)),
        language=language,
        version=entry.im_version
    ) for entry in feed['entries'][1:1 + limit]]
    return reviews 
开发者ID:coagulant,项目名称:critics,代码行数:22,代码来源:parsers.py

示例9: process_all_rss

# 需要导入模块: import feedparser [as 别名]
# 或者: from feedparser import parse [as 别名]
def process_all_rss(reprocess=False):
    """Gather all RSS feeds and articles, then process."""
    sources = list()
    logger.debug("Collecting sources")
    monitors = mongo.db[app.config['MONITORS_COLLECTION']]
    for item in monitors.find({'active': True}):
        sources.append(item['metadata'].get('rss_link'))

    contents = [feedparser.parse(x) for x in sources]
    logger.debug("Processing sources")
    for source in contents:
        for idx, item in enumerate(source.get('entries')):
            response = get_article(item, source['href'], reprocess)
            if response['from_store'] or reprocess:
                continue
            clean_link = response['article']['feed_source']
            monitors.update({'metadata.rss_link': clean_link},
                            {'$set': {'checked': now_time()}})
    correct_counts() 
开发者ID:9b,项目名称:chirp,代码行数:21,代码来源:__init__.py

示例10: remove_url

# 需要导入模块: import feedparser [as 别名]
# 或者: from feedparser import parse [as 别名]
def remove_url(bot, update, args):
    if len(args) >= 1:
        tg_chat_id = str(update.effective_chat.id)

        tg_feed_link = args[0]

        link_processed = parse(tg_feed_link)

        if link_processed.bozo == 0:
            user_data = sql.check_url_availability(tg_chat_id, tg_feed_link)

            if user_data:
                sql.remove_url(tg_chat_id, tg_feed_link)

                update.effective_message.reply_text("Removed URL from subscription")
            else:
                update.effective_message.reply_text("You haven't subscribed to this URL yet")
        else:
            update.effective_message.reply_text("This link is not an RSS Feed link")
    else:
        update.effective_message.reply_text("URL missing") 
开发者ID:skittles9823,项目名称:SkittBot,代码行数:23,代码来源:rss.py

示例11: searchGithub

# 需要导入模块: import feedparser [as 别名]
# 或者: from feedparser import parse [as 别名]
def searchGithub(word, day, level):
  searchlevel = {
    1: ['in:name,description', 'created'],
    2: ['in:name,description,readme', 'created'],
    3: ['in:name,description', 'pushed'],
    4: ['in:name,description,readme', 'pushed']}
  github_url = 'https://api.github.com/search/repositories?q='
  try:
    if word.find(' ') > 0:
      word.replace(' ', '\" \"')
    word = urllib.parse.quote('\"' + word + '\"')
    url = github_url + word + '+' + searchlevel[level][0] + '+' + searchlevel[level][1] + ':>' + day + '&s=updated&o=asc'
    headers = {"Accept": "application/vnd.github.mercy-preview+json"}
    result = requests.get(url, timeout=10, headers=headers)
    statuscode = result.status_code
    resultdata = result.json()
    codes = []
    for a in resultdata['items']:
      name = a['full_name']
      if a['size'] > 0:
        codes.append(name)
    return codes, statuscode
  except:
    return [], -1 
开发者ID:blue1616,项目名称:CodeScraper,代码行数:26,代码来源:search_api.py

示例12: searchGist

# 需要导入模块: import feedparser [as 别名]
# 或者: from feedparser import parse [as 别名]
def searchGist(word, day):
  if word.find(' ') > 0:
    word.replace(' ', '\" \"')
  word = urllib.parse.quote('\"' + word + '\"')
  url = 'https://gist.github.com/search?utf8=%E2%9C%93&q=' + word + '+created%3A>' + day + '&ref=searchresults'
  try:
    result = requests.get(url, timeout=10)
    statuscode = result.status_code
    root = lxml.html.fromstring(result.text)
    codes = []
    for a in root.xpath('//div/a[@class="link-overlay"]'):
#      name = a.text_content()
      link = a.get('href')
      codes.append(link)
    return codes, statuscode
  except:
    return [], -1 
开发者ID:blue1616,项目名称:CodeScraper,代码行数:19,代码来源:search_api.py

示例13: searchGitlab

# 需要导入模块: import feedparser [as 别名]
# 或者: from feedparser import parse [as 别名]
def searchGitlab(word):
  try:
    if word.find(' ') > 0:
      word.replace(' ', '\" \"')
    word = urllib.parse.quote('\"' + word + '\"')
    url = 'https://gitlab.com/explore/projects?utf8=%E2%9C%93&name=' + word + '&sort=latest_activity_desc'
    result = requests.get(url, timeout=10)
    statuscode = result.status_code
    root = lxml.html.fromstring(result.text)
    codes = []
    for a in root.xpath('//div/a[@class="project"]'):
  #    name = a.text_content()
      link = a.get('href')
      codes.append(link)
    return codes, statuscode
  except:
    return [], -1 
开发者ID:blue1616,项目名称:CodeScraper,代码行数:19,代码来源:search_api.py

示例14: googleCustomSearch

# 需要导入模块: import feedparser [as 别名]
# 或者: from feedparser import parse [as 别名]
def googleCustomSearch(word, engine_id, api_key):
  try:
    if word.find(' ') > 0:
      word.replace(' ', '\" \"')
    word = urllib.parse.quote('\"' + word + '\"')
    headers = {"content-type": "application/json"}
    url = 'https://www.googleapis.com/customsearch/v1?key=' + api_key + '&rsz=filtered_cse&num=10&hl=en&prettyPrint=false&cx=' + engine_id + '&q=' + word + '&sort=date'
    result = requests.get(url, timeout=10, headers=headers)
    statuscode = result.status_code
    codes = {}
    if statuscode == 200:
      jsondata = result.json()
      if 'items' in jsondata.keys():
        for item in jsondata['items']:
          name = item['title']
          sub = item['snippet']
          link = item['link']
          codes[link] = [name, sub]
    return codes, statuscode
  except:
    return {}, -1 
开发者ID:blue1616,项目名称:CodeScraper,代码行数:23,代码来源:search_api.py

示例15: getRSSFeeds

# 需要导入模块: import feedparser [as 别名]
# 或者: from feedparser import parse [as 别名]
def getRSSFeeds(url, lastpost):
  try:
    headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:50.0) Gecko/20100101 Firefox/50.0'}
    response = requests.get(url, timeout=10, headers=headers)
    updateditems = []
    statuscode = response.status_code
    if statuscode == 200:
      rss = feedparser.parse(response.text)
      result = parseRSS(rss['entries'])
      for entry in result:
        if entry['link'] == lastpost['link']:
          break
        else:
          if entry['timestamp'] != None and lastpost['timestamp'] != None:
            if datetime.datetime.strptime(entry['timestamp'], '%Y-%m-%d %H:%M:%S') < datetime.datetime.strptime(lastpost['timestamp'], '%Y-%m-%d %H:%M:%S'):
              break
          updateditems.append(entry)
    return updateditems, statuscode
  except:
    return [], -1 
开发者ID:blue1616,项目名称:CodeScraper,代码行数:22,代码来源:search_api.py


注:本文中的feedparser.parse方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。