本文整理汇总了Python中salts_lib.scraper_utils.pathify_url函数的典型用法代码示例。如果您正苦于以下问题:Python pathify_url函数的具体用法?Python pathify_url怎么用?Python pathify_url使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了pathify_url函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _get_episode_url
def _get_episode_url(self, show_url, video):
force_title = scraper_utils.force_title(video)
title_fallback = kodi.get_setting('title-fallback') == 'true'
norm_title = scraper_utils.normalize_title(video.ep_title)
page_url = [show_url]
too_old = False
while page_url and not too_old:
html = self._http_get(page_url[0], require_debrid=True, cache_limit=1)
for _attr, post in dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')}):
if self.__too_old(post):
too_old = True
break
if CATEGORIES[VIDEO_TYPES.TVSHOW] in post and show_url in post:
match = dom_parser2.parse_dom(post, 'a', req='href')
if match:
url, title = match[0].attrs['href'], match[0].content
if not force_title:
if scraper_utils.release_check(video, title, require_title=False):
return scraper_utils.pathify_url(url)
else:
if title_fallback and norm_title:
match = re.search('</strong>(.*?)</p>', post)
if match and norm_title == scraper_utils.normalize_title(match.group(1)):
return scraper_utils.pathify_url(url)
page_url = dom_parser2.parse_dom(html, 'a', {'class': 'nextpostslink'}, req='href')
if page_url: page_url = [page_url[0].attrs['href']]
示例2: _get_episode_url
def _get_episode_url(self, show_url, video):
force_title = scraper_utils.force_title(video)
title_fallback = kodi.get_setting('title-fallback') == 'true'
norm_title = scraper_utils.normalize_title(video.ep_title)
page_url = [show_url]
too_old = False
while page_url and not too_old:
url = urlparse.urljoin(self.base_url, page_url[0])
html = self._http_get(url, require_debrid=True, cache_limit=1)
posts = dom_parser.parse_dom(html, 'div', {'id': 'post-\d+'})
for post in posts:
if self.__too_old(post):
too_old = True
break
if CATEGORIES[VIDEO_TYPES.TVSHOW] in post and show_url in post:
match = re.search('<a\s+href="([^"]+)[^>]+>(.*?)</a>', post)
if match:
url, title = match.groups()
if not force_title:
if scraper_utils.release_check(video, title, require_title=False):
return scraper_utils.pathify_url(url)
else:
if title_fallback and norm_title:
match = re.search('</strong>(.*?)</p>', post)
if match and norm_title == scraper_utils.normalize_title(match.group(1)):
return scraper_utils.pathify_url(url)
page_url = dom_parser.parse_dom(html, 'a', {'class': 'nextpostslink'}, ret='href')
示例3: search
def search(self, video_type, title, year, season=''): # @UnusedVariable
results = []
search_url = '/search/' + urllib.quote_plus(title)
html = self._http_get(search_url, require_debrid=True, cache_limit=1)
if video_type == VIDEO_TYPES.TVSHOW:
seen_urls = {}
for _attr, post in dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')}):
if CATEGORIES[video_type] not in post: continue
match = re.search('<span>\s*TAGS:\s*</span>\s*<a\s+href="([^"]+)[^>]+>([^<]+)', post, re.I)
if match:
show_url, match_title = match.groups()
if show_url in seen_urls: continue
result = {'url': scraper_utils.pathify_url(show_url), 'title': scraper_utils.cleanse_title(match_title), 'year': ''}
seen_urls[show_url] = result
results.append(result)
elif video_type == VIDEO_TYPES.MOVIE:
norm_title = scraper_utils.normalize_title(title)
headings = re.findall('<h2>\s*<a\s+href="([^"]+)[^>]+>(.*?)</a>', html)
posts = [result.content for result in dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')})]
for heading, post in zip(headings, posts):
if CATEGORIES[video_type] not in post or self.__too_old(post): continue
post_url, post_title = heading
meta = scraper_utils.parse_movie_link(post_title)
full_title = '%s [%s] (%sp)' % (meta['title'], meta['extra'], meta['height'])
match_year = meta['year']
match_norm_title = scraper_utils.normalize_title(meta['title'])
if (match_norm_title in norm_title or norm_title in match_norm_title) and (not year or not match_year or year == match_year):
result = {'url': scraper_utils.pathify_url(post_url), 'title': scraper_utils.cleanse_title(full_title), 'year': match_year}
results.append(result)
return results
示例4: _get_episode_url
def _get_episode_url(self, show_url, video):
episode_pattern = 'href="([^"]+-s0*%se0*%s(?!\d)[^"]*)' % (video.season, video.episode)
result = self._default_get_episode_url(show_url, video, episode_pattern)
if result:
return result
url = urlparse.urljoin(self.base_url, show_url)
html = self._http_get(url, cache_limit=2)
fragment = dom_parser.parse_dom(html, "ul", {"class": "episode_list"})
if fragment:
ep_urls = dom_parser.parse_dom(fragment[0], "a", ret="href")
ep_dates = dom_parser.parse_dom(fragment[0], "span", {"class": "episode_air_d"})
ep_titles = dom_parser.parse_dom(fragment[0], "span", {"class": "episode_name"})
force_title = scraper_utils.force_title(video)
if not force_title and kodi.get_setting("airdate-fallback") == "true" and video.ep_airdate:
for ep_url, ep_date in zip(ep_urls, ep_dates):
log_utils.log(
"Quikr Ep Airdate Matching: %s - %s - %s" % (ep_url, ep_date, video.ep_airdate),
log_utils.LOGDEBUG,
)
if video.ep_airdate == scraper_utils.to_datetime(ep_date, "%Y-%m-%d").date():
return scraper_utils.pathify_url(ep_url)
if force_title or kodi.get_setting("title-fallback") == "true":
norm_title = scraper_utils.normalize_title(video.ep_title)
for ep_url, ep_title in zip(ep_urls, ep_titles):
ep_title = re.sub("<span>.*?</span>\s*", "", ep_title)
log_utils.log(
"Quikr Ep Title Matching: %s - %s - %s" % (ep_url, norm_title, video.ep_title),
log_utils.LOGDEBUG,
)
if norm_title == scraper_utils.normalize_title(ep_title):
return scraper_utils.pathify_url(ep_url)
示例5: search
def search(self, video_type, title, year, season=""):
results = []
norm_title = scraper_utils.normalize_title(title)
if video_type == VIDEO_TYPES.MOVIE:
if year:
base_url = urlparse.urljoin(self.base_url, "/Film/")
html = self._http_get(base_url, cache_limit=48)
for link in self.__parse_directory(html):
if year == link["title"]:
url = urlparse.urljoin(base_url, link["link"])
for movie in self.__get_files(url, cache_limit=24):
match_title, match_year, _height, _extra = scraper_utils.parse_movie_link(movie["link"])
if (
not movie["directory"]
and norm_title in scraper_utils.normalize_title(match_title)
and (not year or not match_year or year == match_year)
):
result = {"url": scraper_utils.pathify_url(url), "title": match_title, "year": year}
results.append(result)
else:
base_url = urlparse.urljoin(self.base_url, "/Serial/")
html = self._http_get(base_url, cache_limit=48)
for link in self.__parse_directory(html):
if link["directory"] and norm_title in scraper_utils.normalize_title(link["title"]):
url = urlparse.urljoin(base_url, link["link"])
result = {"url": scraper_utils.pathify_url(url), "title": link["title"], "year": ""}
results.append(result)
return results
示例6: _get_episode_url
def _get_episode_url(self, show_url, video):
url = scraper_utils.urljoin(self.base_url, show_url)
html = self._http_get(url, cache_limit=2)
episode_pattern = 'href="([^"]+-s0*%se0*%s(?!\d)[^"]*)' % (video.season, video.episode)
parts = dom_parser2.parse_dom(html, 'ul', {'class': 'episode_list'})
fragment = '\n'.join(part.content for part in parts)
result = self._default_get_episode_url(fragment, video, episode_pattern)
if result: return result
ep_urls = [r.attrs['href'] for r in dom_parser2.parse_dom(fragment, 'a', req='href')]
ep_dates = [r.content for r in dom_parser2.parse_dom(fragment, 'span', {'class': 'episode_air_d'})]
ep_titles = [r.content for r in dom_parser2.parse_dom(fragment, 'span', {'class': 'episode_name'})]
force_title = scraper_utils.force_title(video)
if not force_title and kodi.get_setting('airdate-fallback') == 'true' and video.ep_airdate:
for ep_url, ep_date in zip(ep_urls, ep_dates):
logger.log('Quikr Ep Airdate Matching: %s - %s - %s' % (ep_url, ep_date, video.ep_airdate), log_utils.LOGDEBUG)
if video.ep_airdate == scraper_utils.to_datetime(ep_date, '%Y-%m-%d').date():
return scraper_utils.pathify_url(ep_url)
if force_title or kodi.get_setting('title-fallback') == 'true':
norm_title = scraper_utils.normalize_title(video.ep_title)
for ep_url, ep_title in zip(ep_urls, ep_titles):
ep_title = re.sub('<span>.*?</span>\s*', '', ep_title)
logger.log('Quikr Ep Title Matching: %s - %s - %s' % (ep_url.encode('utf-8'), ep_title.encode('utf-8'), video.ep_title), log_utils.LOGDEBUG)
if norm_title == scraper_utils.normalize_title(ep_title):
return scraper_utils.pathify_url(ep_url)
示例7: _get_episode_url
def _get_episode_url(self, show_url, video):
force_title = scraper_utils.force_title(video)
title_fallback = kodi.get_setting('title-fallback') == 'true'
norm_title = scraper_utils.normalize_title(video.ep_title)
page_url = [show_url]
too_old = False
while page_url and not too_old:
url = scraper_utils.urljoin(self.base_url, page_url[0])
html = self._http_get(url, require_debrid=True, cache_limit=1)
headings = re.findall('<h2>\s*<a\s+href="([^"]+)[^>]+>(.*?)</a>', html)
posts = [r.content for r in dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')})]
for heading, post in zip(headings, posts):
if self.__too_old(post):
too_old = True
break
if CATEGORIES[VIDEO_TYPES.TVSHOW] in post and show_url in post:
url, title = heading
if not force_title:
if scraper_utils.release_check(video, title, require_title=False):
return scraper_utils.pathify_url(url)
else:
if title_fallback and norm_title:
match = re.search('<strong>(.*?)</strong>', post)
if match and norm_title == scraper_utils.normalize_title(match.group(1)):
return scraper_utils.pathify_url(url)
page_url = dom_parser2.parse_dom(html, 'a', {'class': 'nextpostslink'}, req='href')
if page_url: page_url = [page_url[0].attrs['href']]
示例8: search
def search(self, video_type, title, year, season=''): # @UnusedVariable
results = []
if video_type == VIDEO_TYPES.TVSHOW and title:
test_url = '/tv-show/%s/' % (scraper_utils.to_slug(title))
test_url = scraper_utils.urljoin(self.base_url, test_url)
html = self._http_get(test_url, require_debrid=True, cache_limit=24)
posts = dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')})
if posts:
result = {'url': scraper_utils.pathify_url(test_url), 'title': scraper_utils.cleanse_title(title), 'year': ''}
results.append(result)
elif video_type == VIDEO_TYPES.MOVIE:
search_title = re.sub('[^A-Za-z0-9 ]', '', title.lower())
html = self._http_get(self.base_url, params={'s': search_title}, require_debrid=True, cache_limit=1)
norm_title = scraper_utils.normalize_title(title)
for _attrs, post in dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')}):
match = re.search('<h\d+[^>]*>\s*<a\s+href="([^"]+)[^>]*>(.*?)</a>', post)
if match:
post_url, post_title = match.groups()
if '/tv-show/' in post or self.__too_old(post): continue
post_title = re.sub('<[^>]*>', '', post_title)
meta = scraper_utils.parse_movie_link(post_title)
full_title = '%s [%s] (%sp)' % (meta['title'], meta['extra'], meta['height'])
match_year = meta['year']
match_norm_title = scraper_utils.normalize_title(meta['title'])
if (match_norm_title in norm_title or norm_title in match_norm_title) and (not year or not match_year or year == match_year):
result = {'url': scraper_utils.pathify_url(post_url), 'title': scraper_utils.cleanse_title(full_title), 'year': match_year}
results.append(result)
return results
示例9: search
def search(self, video_type, title, year, season=''): # @UnusedVariable
results = []
if title:
html = self._http_get(self.base_url, cache_limit=48)
norm_title = scraper_utils.normalize_title(title)
fragment = dom_parser2.parse_dom(html, 'div', {'class': 'container seo'})
if fragment:
match_year = ''
for attrs, match_title in dom_parser2.parse_dom(fragment[0].content, 'a', {'class': 'link'}, req='href'):
if norm_title in scraper_utils.normalize_title(match_title) and (not year or not match_year or year == match_year):
result = {'url': scraper_utils.pathify_url(attrs['href']), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
results.append(result)
for _attrs, table in dom_parser2.parse_dom(html, 'table'):
for _attrs, td in dom_parser2.parse_dom(table, 'td'):
match_url = dom_parser2.parse_dom(td, 'a', req='href')
match_title = dom_parser2.parse_dom(td, 'div', {'class': 'searchTVname'})
match_year = dom_parser2.parse_dom(td, 'span', {'class': 'right'})
if match_url and match_title:
match_url = match_url[0].attrs['href']
match_title = match_title[0].content
match_year = match_year[0].content if match_year else ''
if norm_title in scraper_utils.normalize_title(match_title) and (not year or not match_year or year == match_year):
result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
results.append(result)
return results
示例10: _get_episode_url
def _get_episode_url(self, show_url, video):
sxe = '(\.|_| )S%02dE%02d(\.|_| )' % (int(video.season), int(video.episode))
force_title = scraper_utils.force_title(video)
title_fallback = kodi.get_setting('title-fallback') == 'true'
norm_title = scraper_utils.normalize_title(video.ep_title)
try: airdate_pattern = video.ep_airdate.strftime('(\.|_| )%Y(\.|_| )%m(\.|_| )%d(\.|_| )')
except: airdate_pattern = ''
page_url = [show_url]
too_old = False
while page_url and not too_old:
url = urlparse.urljoin(self.base_url, page_url[0])
html = self._http_get(url, require_debrid=True, cache_limit=1)
posts = dom_parser.parse_dom(html, 'div', {'id': 'post-\d+'})
for post in posts:
if self.__too_old(post):
too_old = True
break
if CATEGORIES[VIDEO_TYPES.TVSHOW] in post and show_url in post:
match = re.search('<a\s+href="([^"]+)[^>]+>(.*?)</a>', post)
if match:
url, title = match.groups()
if not force_title:
if re.search(sxe, title) or (airdate_pattern and re.search(airdate_pattern, title)):
return scraper_utils.pathify_url(url)
else:
if title_fallback and norm_title:
match = re.search('</strong>(.*?)</p>', post)
if match and norm_title == scraper_utils.normalize_title(match.group(1)):
return scraper_utils.pathify_url(url)
page_url = dom_parser.parse_dom(html, 'a', {'class': 'nextpostslink'}, ret='href')
示例11: _get_episode_url
def _get_episode_url(self, show_url, video):
query = scraper_utils.parse_query(show_url)
if 'id' in query:
url = scraper_utils.urljoin(self.base_url, '/api/v2/shows/%s' % (query['id']))
js_data = self._http_get(url, cache_limit=.5)
if 'episodes' in js_data:
force_title = scraper_utils.force_title(video)
if not force_title:
for episode in js_data['episodes']:
if int(video.season) == int(episode['season']) and int(video.episode) == int(episode['number']):
return scraper_utils.pathify_url('?id=%s' % (episode['id']))
if kodi.get_setting('airdate-fallback') == 'true' and video.ep_airdate:
for episode in js_data['episodes']:
if 'airdate' in episode:
ep_airdate = scraper_utils.to_datetime(episode['airdate'], "%Y-%m-%d").date()
if video.ep_airdate == (ep_airdate - datetime.timedelta(days=1)):
return scraper_utils.pathify_url('?id=%s' % (episode['id']))
else:
logger.log('Skipping S&E matching as title search is forced on: %s' % (video.trakt_id), log_utils.LOGDEBUG)
if (force_title or kodi.get_setting('title-fallback') == 'true') and video.ep_title:
norm_title = scraper_utils.normalize_title(video.ep_title)
for episode in js_data['episodes']:
if 'name' in episode and norm_title in scraper_utils.normalize_title(episode['name']):
return scraper_utils.pathify_url('?id=%s' % (episode['id']))
示例12: _default_get_episode_url
def _default_get_episode_url(self, html, video, episode_pattern, title_pattern='', airdate_pattern=''):
logger.log('Default Episode Url: |%s|%s|' % (self.get_name(), video), log_utils.LOGDEBUG)
if not html: return
try: html = html[0].content
except AttributeError: pass
force_title = scraper_utils.force_title(video)
if not force_title:
if episode_pattern:
match = re.search(episode_pattern, html, re.DOTALL | re.I)
if match:
return scraper_utils.pathify_url(match.group(1))
if kodi.get_setting('airdate-fallback') == 'true' and airdate_pattern and video.ep_airdate:
airdate_pattern = airdate_pattern.replace('{year}', str(video.ep_airdate.year))
airdate_pattern = airdate_pattern.replace('{month}', str(video.ep_airdate.month))
airdate_pattern = airdate_pattern.replace('{p_month}', '%02d' % (video.ep_airdate.month))
airdate_pattern = airdate_pattern.replace('{month_name}', MONTHS[video.ep_airdate.month - 1])
airdate_pattern = airdate_pattern.replace('{short_month}', SHORT_MONS[video.ep_airdate.month - 1])
airdate_pattern = airdate_pattern.replace('{day}', str(video.ep_airdate.day))
airdate_pattern = airdate_pattern.replace('{p_day}', '%02d' % (video.ep_airdate.day))
logger.log('Air Date Pattern: %s' % (airdate_pattern), log_utils.LOGDEBUG)
match = re.search(airdate_pattern, html, re.DOTALL | re.I)
if match:
return scraper_utils.pathify_url(match.group(1))
else:
logger.log('Skipping S&E matching as title search is forced on: %s' % (video.trakt_id), log_utils.LOGDEBUG)
if (force_title or kodi.get_setting('title-fallback') == 'true') and video.ep_title and title_pattern:
norm_title = scraper_utils.normalize_title(video.ep_title)
for match in re.finditer(title_pattern, html, re.DOTALL | re.I):
episode = match.groupdict()
if norm_title == scraper_utils.normalize_title(episode['title']):
return scraper_utils.pathify_url(episode['url'])
示例13: search
def search(self, video_type, title, year):
results = []
norm_title = scraper_utils.normalize_title(title)
if video_type == VIDEO_TYPES.MOVIE:
if year:
base_url = urlparse.urljoin(self.base_url, '/Film/')
html = self._http_get(base_url, cache_limit=48)
for link in self.__parse_directory(html):
if year == link['title']:
url = urlparse.urljoin(base_url, link['link'])
for movie in self.__get_files(url, cache_limit=24):
match_title, match_year, _height, _extra = scraper_utils.parse_movie_link(movie['link'])
if not movie['directory'] and norm_title in scraper_utils.normalize_title(match_title) and (not year or not match_year or year == match_year):
result = {'url': scraper_utils.pathify_url(url), 'title': match_title, 'year': year}
results.append(result)
else:
base_url = urlparse.urljoin(self.base_url, '/Serial/')
html = self._http_get(base_url, cache_limit=48)
for link in self.__parse_directory(html):
if link['directory'] and norm_title in scraper_utils.normalize_title(link['title']):
url = urlparse.urljoin(base_url, link['link'])
result = {'url': scraper_utils.pathify_url(url), 'title': link['title'], 'year': ''}
results.append(result)
return results
示例14: _get_episode_url
def _get_episode_url(self, show_url, video):
sxe = '.S%02dE%02d.' % (int(video.season), int(video.episode))
force_title = scraper_utils.force_title(video)
title_fallback = kodi.get_setting('title-fallback') == 'true'
norm_title = scraper_utils.normalize_title(video.ep_title)
try: ep_airdate = video.ep_airdate.strftime('.%Y.%m.%d.')
except: ep_airdate = ''
page_url = [show_url]
too_old = False
while page_url and not too_old:
url = urlparse.urljoin(self.base_url, page_url[0])
html = self._http_get(url, require_debrid=True, cache_limit=1)
headings = re.findall('<h2>\s*<a\s+href="([^"]+)[^>]+>(.*?)</a>', html)
posts = dom_parser.parse_dom(html, 'div', {'id': 'post-\d+'})
for heading, post in zip(headings, posts):
if self.__too_old(post):
too_old = True
break
if CATEGORIES[VIDEO_TYPES.TVSHOW] in post and show_url in post:
url, title = heading
if not force_title:
if (sxe in title) or (ep_airdate and ep_airdate in title):
return scraper_utils.pathify_url(url)
else:
if title_fallback and norm_title:
match = re.search('<strong>(.*?)</strong>', post)
if match and norm_title == scraper_utils.normalize_title(match.group(1)):
return scraper_utils.pathify_url(url)
page_url = dom_parser.parse_dom(html, 'a', {'class': 'nextpostslink'}, ret='href')
示例15: _get_episode_url
def _get_episode_url(self, show_url, video):
url = urlparse.urljoin(self.base_url, show_url)
html = self._http_get(url, cache_limit=2)
if html:
force_title = scraper_utils.force_title(video)
episodes = dom_parser.parse_dom(html, 'div', {'class': '\s*el-item\s*'})
if not force_title:
episode_pattern = 'href="([^"]*-[sS]%02d[eE]%02d(?!\d)[^"]*)' % (int(video.season), int(video.episode))
match = re.search(episode_pattern, html)
if match:
return scraper_utils.pathify_url(match.group(1))
if kodi.get_setting('airdate-fallback') == 'true' and video.ep_airdate:
airdate_pattern = '%02d-%02d-%d' % (video.ep_airdate.day, video.ep_airdate.month, video.ep_airdate.year)
for episode in episodes:
ep_url = dom_parser.parse_dom(episode, 'a', ret='href')
ep_airdate = dom_parser.parse_dom(episode, 'div', {'class': 'date'})
if ep_url and ep_airdate:
ep_airdate = ep_airdate[0].strip()
if airdate_pattern == ep_airdate:
return scraper_utils.pathify_url(ep_url[0])
if (force_title or kodi.get_setting('title-fallback') == 'true') and video.ep_title:
norm_title = scraper_utils.normalize_title(video.ep_title)
for episode in episodes:
ep_url = dom_parser.parse_dom(episode, 'a', ret='href')
ep_title = dom_parser.parse_dom(episode, 'div', {'class': 'e-name'})
if ep_url and ep_title and norm_title == scraper_utils.normalize_title(ep_title[0]):
return scraper_utils.pathify_url(ep_url[0])