本文整理汇总了Python中youtube_dl.utils.sanitize_filename函数的典型用法代码示例。如果您正苦于以下问题:Python sanitize_filename函数的具体用法?Python sanitize_filename怎么用?Python sanitize_filename使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了sanitize_filename函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_sanitize_ids
def test_sanitize_ids(self):
self.assertEqual(
sanitize_filename('_n_cd26wFpw', is_id=True), '_n_cd26wFpw')
self.assertEqual(
sanitize_filename('_BD_eEpuzXw', is_id=True), '_BD_eEpuzXw')
self.assertEqual(
sanitize_filename('N0Y__7-UOdI', is_id=True), 'N0Y__7-UOdI')
示例2: download
def download(self):
for (course_name, i, j, chapter_name, par_name, url) in self.paragraphs:
nametmpl = sanitize_filename(course_name) + '/' \
+ sanitize_filename(chapter_name) + '/' \
+ '%02i.%02i.*' % (i,j)
fn = glob.glob(DIRECTORY + nametmpl)
if fn:
continue
par = self._br.open(SITE_URL + url)
par_soup = BeautifulSoup(par.read())
contents = par_soup.findAll('div','seq_contents')
k = 0
for content in contents:
content_soup = BeautifulSoup(content.text)
try:
video_type = content_soup.h2.text.strip()
video_stream = content_soup.find('div','video')['data-streams']
video_id = video_stream.split(':')[1]
video_url = youtube_url + video_id
k += 1
print '[%02i.%02i.%i] %s (%s)' % (i, j, k, par_name, video_type)
#f.writelines(video_url+'\n')
outtmpl = DIRECTORY + sanitize_filename(course_name) + '\/' \
+ sanitize_filename(chapter_name) + '\/' \
+ '%02i.%02i.%i ' % (i,j,k) \
+ sanitize_filename('%s (%s)' % (par_name, video_type)) + '.%(ext)s'
self._fd.params['outtmpl'] = outtmpl
self._fd.download([video_url])
except:
pass
示例3: download
def download(self):
print "\n-----------------------\nStart downloading\n-----------------------\n"
for (course_name, i, j, chapter_name, par_name, url) in self.paragraphs:
# nametmpl = sanitize_filename(course_name) + '/' \
# + sanitize_filename(chapter_name) + '/' \
# + '%02i.%02i.*' % (i,j)
# fn = glob.glob(DIRECTORY + nametmpl)
nametmpl = os.path.join(
DIRECTORY,
sanitize_filename(course_name, replace_space_with_underscore),
sanitize_filename(chapter_name, replace_space_with_underscore),
"%02i.%02i.*" % (i, j),
)
fn = glob.glob(nametmpl)
if fn:
print "Processing of %s skipped" % nametmpl
continue
print "Processing %s..." % nametmpl
par = self._br.open(base_url + url)
par_soup = BeautifulSoup(par.read())
contents = par_soup.findAll("div", "seq_contents")
k = 0
for content in contents:
# print "Content: %s" % content
content_soup = BeautifulSoup(content.text)
try:
video_type = content_soup.h2.text.strip()
video_stream = content_soup.find("div", "video")["data-streams"]
video_id = video_stream.split(":")[1]
video_url = youtube_url + video_id
k += 1
print "[%02i.%02i.%02i] %s (%s)" % (i, j, k, par_name, video_type)
# f.writelines(video_url+'\n')
# outtmpl = DIRECTORY + sanitize_filename(course_name) + '/' \
# + sanitize_filename(chapter_name) + '/' \
# + '%02i.%02i.%02i ' % (i,j,k) \
# + sanitize_filename('%s (%s)' % (par_name, video_type)) + '.%(ext)s'
outtmpl = os.path.join(
DIRECTORY,
sanitize_filename(course_name, replace_space_with_underscore),
sanitize_filename(chapter_name, replace_space_with_underscore),
"%02i.%02i.%02i " % (i, j, k)
+ sanitize_filename("%s (%s)" % (par_name, video_type), replace_space_with_underscore)
+ ".%(ext)s",
)
self._fd.params["outtmpl"] = outtmpl
self._fd.download([video_url])
except Exception as e:
# print "Error: %s" % e
pass
示例4: get_youtube_url
def get_youtube_url(self, youtube_url):
# determine the media file name
filetmpl = u'%(id)s_%(uploader_id)s_%(title)s.%(ext)s'
ydl = youtube_dl.YoutubeDL({
'outtmpl': join(self.media_folder, filetmpl),
'quiet': True,
'restrictfilenames': True,
'noplaylist': True,
'continuedl': True,
'nooverwrites': True,
'retries': 3000,
'fragment_retries': 3000,
'ignoreerrors': True
})
ydl.add_default_info_extractors()
try:
result = ydl.extract_info(youtube_url, download=False)
media_filename = sanitize_filename(filetmpl % result['entries'][0], restricted=True)
except:
return ''
# check if a file with this name already exists
if not os.path.isfile(media_filename):
try:
ydl.extract_info(youtube_url, download=True)
except:
return ''
return u'%s/%s' % (self.media_url, split(media_filename)[1])
示例5: download
def download(self):
print "\n-----------------------\nStart downloading\n-----------------------\n"
for (course_name, i, j, chapter_name, par_name, url) in self.paragraphs:
#nametmpl = sanitize_filename(course_name) + '/' \
# + sanitize_filename(chapter_name) + '/' \
# + '%02i.%02i.*' % (i,j)
#fn = glob.glob(DIRECTORY + nametmpl)
nametmpl = os.path.join(DIRECTORY,
sanitize_filename(course_name),
sanitize_filename(chapter_name),
'%02i.%02i.*' % (i,j))
fn = glob.glob(nametmpl)
if fn:
print "Processing of %s skipped" % nametmpl
continue
print "Processing %s..." % nametmpl
par = self._br.open(base_url + url)
par_soup = BeautifulSoup(par.read())
contents = par_soup.findAll('div','seq_contents')
k = 0
for content in contents:
#print "Content: %s" % content
content_soup = BeautifulSoup(content.text)
try:
video_type = content_soup.h2.text.strip()
video_stream = content_soup.find('div','video')['data-streams']
video_id = video_stream.split(':')[1]
video_url = youtube_url + video_id
k += 1
print '[%02i.%02i.%i] %s (%s)' % (i, j, k, par_name, video_type)
#f.writelines(video_url+'\n')
#outtmpl = DIRECTORY + sanitize_filename(course_name) + '/' \
# + sanitize_filename(chapter_name) + '/' \
# + '%02i.%02i.%i ' % (i,j,k) \
# + sanitize_filename('%s (%s)' % (par_name, video_type)) + '.%(ext)s'
outtmpl = os.path.join(DIRECTORY,
sanitize_filename(course_name),
sanitize_filename(chapter_name),
'%02i.%02i.%i ' % (i,j,k) + \
sanitize_filename('%s (%s)' % (par_name, video_type)) + '.%(ext)s')
self._fd.params['outtmpl'] = outtmpl
self._fd.download([video_url])
except Exception as e:
#print "Error: %s" % e
pass
示例6: test_sanitize_ids
def test_sanitize_ids(self):
self.assertEqual(sanitize_filename("_n_cd26wFpw", is_id=True), "_n_cd26wFpw")
self.assertEqual(sanitize_filename("_BD_eEpuzXw", is_id=True), "_BD_eEpuzXw")
self.assertEqual(sanitize_filename("N0Y__7-UOdI", is_id=True), "N0Y__7-UOdI")
示例7: test_sanitize_filename_restricted
def test_sanitize_filename_restricted(self):
self.assertEqual(sanitize_filename("abc", restricted=True), "abc")
self.assertEqual(sanitize_filename("abc_d-e", restricted=True), "abc_d-e")
self.assertEqual(sanitize_filename("123", restricted=True), "123")
self.assertEqual("abc_de", sanitize_filename("abc/de", restricted=True))
self.assertFalse("/" in sanitize_filename("abc/de///", restricted=True))
self.assertEqual("abc_de", sanitize_filename("abc/<>\\*|de", restricted=True))
self.assertEqual("xxx", sanitize_filename("xxx/<>\\*|", restricted=True))
self.assertEqual("yes_no", sanitize_filename("yes? no", restricted=True))
self.assertEqual("this_-_that", sanitize_filename("this: that", restricted=True))
tests = _compat_str("a\xe4b\u4e2d\u56fd\u7684c")
self.assertEqual(sanitize_filename(tests, restricted=True), "a_b_c")
self.assertTrue(sanitize_filename(_compat_str("\xf6"), restricted=True) != "") # No empty filename
forbidden = "\"\0\\/&!: '\t\n()[]{}$;`^,#"
for fc in forbidden:
for fbc in forbidden:
self.assertTrue(fbc not in sanitize_filename(fc, restricted=True))
# Handle a common case more neatly
self.assertEqual(sanitize_filename(_compat_str("\u5927\u58f0\u5e26 - Song"), restricted=True), "Song")
self.assertEqual(sanitize_filename(_compat_str("\u603b\u7edf: Speech"), restricted=True), "Speech")
# .. but make sure the file name is never empty
self.assertTrue(sanitize_filename("-", restricted=True) != "")
self.assertTrue(sanitize_filename(":", restricted=True) != "")
示例8: test_sanitize_filename
def test_sanitize_filename(self):
self.assertEqual(sanitize_filename("abc"), "abc")
self.assertEqual(sanitize_filename("abc_d-e"), "abc_d-e")
self.assertEqual(sanitize_filename("123"), "123")
self.assertEqual("abc_de", sanitize_filename("abc/de"))
self.assertFalse("/" in sanitize_filename("abc/de///"))
self.assertEqual("abc_de", sanitize_filename("abc/<>\\*|de"))
self.assertEqual("xxx", sanitize_filename("xxx/<>\\*|"))
self.assertEqual("yes no", sanitize_filename("yes? no"))
self.assertEqual("this - that", sanitize_filename("this: that"))
self.assertEqual(sanitize_filename("AT&T"), "AT&T")
aumlaut = _compat_str("\xe4")
self.assertEqual(sanitize_filename(aumlaut), aumlaut)
tests = _compat_str("\u043a\u0438\u0440\u0438\u043b\u043b\u0438\u0446\u0430")
self.assertEqual(sanitize_filename(tests), tests)
forbidden = '"\0\\/'
for fc in forbidden:
for fbc in forbidden:
self.assertTrue(fbc not in sanitize_filename(fc))
示例9: test_sanitize_filename
def test_sanitize_filename(self):
self.assertEqual(sanitize_filename("abc"), "abc")
self.assertEqual(sanitize_filename("abc_d-e"), "abc_d-e")
self.assertEqual(sanitize_filename("123"), "123")
self.assertEqual("abc_de", sanitize_filename("abc/de"))
self.assertFalse("/" in sanitize_filename("abc/de///"))
self.assertEqual("abc_de", sanitize_filename("abc/<>\\*|de"))
self.assertEqual("xxx", sanitize_filename("xxx/<>\\*|"))
self.assertEqual("yes no", sanitize_filename("yes? no"))
self.assertEqual("this - that", sanitize_filename("this: that"))
self.assertEqual(sanitize_filename("AT&T"), "AT&T")
aumlaut = "ä"
self.assertEqual(sanitize_filename(aumlaut), aumlaut)
tests = "\u043a\u0438\u0440\u0438\u043b\u043b\u0438\u0446\u0430"
self.assertEqual(sanitize_filename(tests), tests)
self.assertEqual(sanitize_filename("New World record at 0:12:34"), "New World record at 0_12_34")
self.assertEqual(sanitize_filename("--gasdgf"), "_-gasdgf")
self.assertEqual(sanitize_filename("--gasdgf", is_id=True), "--gasdgf")
self.assertEqual(sanitize_filename(".gasdgf"), "gasdgf")
self.assertEqual(sanitize_filename(".gasdgf", is_id=True), ".gasdgf")
forbidden = '"\0\\/'
for fc in forbidden:
for fbc in forbidden:
self.assertTrue(fbc not in sanitize_filename(fc))
示例10: download
def download(self):
print "\n-----------------------\nStart downloading\n-----------------------\n"
for (course_name, i, j, chapter_name, par_name, url) in self.paragraphs:
#nametmpl = sanitize_filename(course_name) + '/' \
# + sanitize_filename(chapter_name) + '/' \
# + '%02i.%02i.*' % (i,j)
#fn = glob.glob(DIRECTORY + nametmpl)
nametmpl = os.path.join(DIRECTORY,
sanitize_filename(course_name, replace_space_with_underscore),
sanitize_filename(chapter_name, replace_space_with_underscore),
'%02i.%02i.*' % (i,j))
fn = glob.glob(nametmpl)
if fn:
print "Processing of %s skipped" % nametmpl
continue
print "Processing %s..." % nametmpl
par = self._br.open(base_url + url)
par_soup = BeautifulSoup(par.read())
contents = par_soup.findAll('div','seq_contents')
k = 0
for content in contents:
#print "Content: %s" % content
content_soup = BeautifulSoup(content.text)
try:
video_type = content_soup.h2.text.strip()
video_stream = content_soup.find('div','video')['data-streams']
video_id = video_stream.split(':')[1]
video_url = youtube_url + video_id
k += 1
print '[%02i.%02i.%02i] %s (%s)' % (i, j, k, par_name, video_type)
#f.writelines(video_url+'\n')
#outtmpl = DIRECTORY + sanitize_filename(course_name) + '/' \
# + sanitize_filename(chapter_name) + '/' \
# + '%02i.%02i.%02i ' % (i,j,k) \
# + sanitize_filename('%s (%s)' % (par_name, video_type)) + '.%(ext)s'
outtmpl = os.path.join(DIRECTORY,
sanitize_filename(course_name, replace_space_with_underscore),
sanitize_filename(chapter_name, replace_space_with_underscore),
'%02i.%02i.%02i ' % (i,j,k) + \
sanitize_filename('%s (%s)' % (par_name, video_type), replace_space_with_underscore) + '.%(ext)s')
#
#print "Debug me pause- %s" % self._config.pause_mode
#print "Debug me resume- %s" % self._config.resume_mode
if self._config.pause_mode:
launch_download_msg = 'Download this video [%s - %s]? (y/n) ' % (chapter_name, outtmpl)
launch_download = raw_input(launch_download_msg)
if (launch_download.lower() == "n"):
continue
if self._config.resume_mode:
launch_download_msg = 'Download video from this [%s - %s]? (y/n) ' % (chapter_name, outtmpl)
launch_download = raw_input(launch_download_msg)
if (launch_download.lower() == "n"):
continue
else:
self._config.resume_mode = False
#
#
self._fd.params['outtmpl'] = outtmpl
self._fd.download([video_url])
except Exception as e:
#print "Error: %s" % e
pass
示例11: test_sanitize_filename
def test_sanitize_filename(self):
self.assertEqual(sanitize_filename('abc'), 'abc')
self.assertEqual(sanitize_filename('abc_d-e'), 'abc_d-e')
self.assertEqual(sanitize_filename('123'), '123')
self.assertEqual('abc_de', sanitize_filename('abc/de'))
self.assertFalse('/' in sanitize_filename('abc/de///'))
self.assertEqual('abc_de', sanitize_filename('abc/<>\\*|de'))
self.assertEqual('xxx', sanitize_filename('xxx/<>\\*|'))
self.assertEqual('yes no', sanitize_filename('yes? no'))
self.assertEqual('this - that', sanitize_filename('this: that'))
self.assertEqual(sanitize_filename('AT&T'), 'AT&T')
aumlaut = 'ä'
self.assertEqual(sanitize_filename(aumlaut), aumlaut)
tests = '\u043a\u0438\u0440\u0438\u043b\u043b\u0438\u0446\u0430'
self.assertEqual(sanitize_filename(tests), tests)
self.assertEqual(
sanitize_filename('New World record at 0:12:34'),
'New World record at 0_12_34')
self.assertEqual(sanitize_filename('--gasdgf'), '_-gasdgf')
self.assertEqual(sanitize_filename('--gasdgf', is_id=True), '--gasdgf')
self.assertEqual(sanitize_filename('.gasdgf'), 'gasdgf')
self.assertEqual(sanitize_filename('.gasdgf', is_id=True), '.gasdgf')
forbidden = '"\0\\/'
for fc in forbidden:
for fbc in forbidden:
self.assertTrue(fbc not in sanitize_filename(fc))
示例12: test_sanitize_filename_restricted
def test_sanitize_filename_restricted(self):
self.assertEqual(sanitize_filename(u'abc', restricted=True), u'abc')
self.assertEqual(sanitize_filename(u'abc_d-e', restricted=True), u'abc_d-e')
self.assertEqual(sanitize_filename(u'123', restricted=True), u'123')
self.assertEqual(u'abc-de', sanitize_filename(u'abc/de', restricted=True))
self.assertFalse(u'/' in sanitize_filename(u'abc/de///', restricted=True))
self.assertEqual(u'abc-de', sanitize_filename(u'abc/<>\\*|de', restricted=True))
self.assertEqual(u'xxx', sanitize_filename(u'xxx/<>\\*|', restricted=True))
self.assertEqual(u'yes_no', sanitize_filename(u'yes? no', restricted=True))
self.assertEqual(u'this_-_that', sanitize_filename(u'this: that', restricted=True))
forbidden = u'"\0\\/&: \'\t\n'
for fc in forbidden:
print('input: ' + fc + ', result: ' + repr(sanitize_filename(fc, restricted=True)))
for fbc in forbidden:
self.assertTrue(fbc not in sanitize_filename(fc, restricted=True))
示例13: test_sanitize_filename
def test_sanitize_filename(self):
self.assertEqual(sanitize_filename(u'abc'), u'abc')
self.assertEqual(sanitize_filename(u'abc_d-e'), u'abc_d-e')
self.assertEqual(sanitize_filename(u'123'), u'123')
self.assertEqual(u'abc-de', sanitize_filename(u'abc/de'))
self.assertFalse(u'/' in sanitize_filename(u'abc/de///'))
self.assertEqual(u'abc-de', sanitize_filename(u'abc/<>\\*|de'))
self.assertEqual(u'xxx', sanitize_filename(u'xxx/<>\\*|'))
self.assertEqual(u'yes no', sanitize_filename(u'yes? no'))
self.assertEqual(u'this - that', sanitize_filename(u'this: that'))
self.assertEqual(sanitize_filename(u'AT&T'), u'AT&T')
self.assertEqual(sanitize_filename(u'ä'), u'ä')
self.assertEqual(sanitize_filename(u'кириллица'), u'кириллица')
forbidden = u'"\0\\/'
for fc in forbidden:
for fbc in forbidden:
self.assertTrue(fbc not in sanitize_filename(fc))
示例14: main
def main(args):
# Parse arguments
playlist_url, output_dirpath = args
if not os.path.exists(output_dirpath):
sys.exit('directory not found: %s' % output_dirpath)
if not os.path.isdir(output_dirpath):
sys.exit('not a directory: %s' % output_dirpath)
# Default settings
restrictfilenames = False
# (TODO: Enable again when it plays nicely with 'extract_audio_for_itunes')
writeinfojson = False
extract_audio_for_itunes = True
# Locate all videos already in the filesystem
ordering_filepath = os.path.join(output_dirpath, '.ordering')
if os.path.exists(ordering_filepath):
filesystem_filenames = []
with codecs.open(ordering_filepath, 'rt', 'utf-8') as ordering_file:
for line in ordering_file:
filesystem_filenames.append(line.rstrip(u'\r\n'))
# Ensure all referenced files actually exist
real_filesystem_filenames = []
for filename in filesystem_filenames:
if os.path.exists(os.path.join(output_dirpath, filename)):
real_filesystem_filenames.append(filename)
else:
print ('WARNING: Could not locate file "%s" referenced by ' +
'".ordering" file. Assuming deleted.') % filename
filesystem_filenames = real_filesystem_filenames
else:
filesystem_filenames = []
# Prepare downloader
video_filename_template = u'%(title)s.%(ext)s'
downloader = youtube_dl.FileDownloader({
'outtmpl': os.path.join(
# (Be robust against output_dirpath containing %)
output_dirpath.replace('%', '%%'),
video_filename_template),
'restrictfilenames': restrictfilenames,
'writeinfojson': writeinfojson,
})
if not extract_audio_for_itunes:
final_filename_template = video_filename_template
else:
final_filename_template = video_filename_template.replace(u'%(ext)s', u'm4a')
downloader.add_post_processor(FFmpegExtractAudioPP(
preferredcodec='m4a', # iTunes compatible.
preferredquality=None, # default audio quality
keepvideo=False))
# Locate all videos in the playlist
video_infos = extract_youtube_playlist_info(playlist_url)
playlist_filenames = []
for cur_info in video_infos:
cur_filename = sanitize_filename(final_filename_template % cur_info, restrictfilenames)
playlist_filenames.append(cur_filename)
# Download videos to filesystem that are missing
for cur_info in video_infos:
cur_filename = sanitize_filename(final_filename_template % cur_info, restrictfilenames)
if not os.path.exists(os.path.join(output_dirpath, cur_filename)):
if not cur_info.get('deleted', False):
# Download (and optionally extract the audio)
downloader.process_info(cur_info)
# Verify downloaded
if not os.path.exists(os.path.join(output_dirpath, cur_filename)):
raise ValueError('Could not locate downloaded video: %s' % cur_filename)
# Remove filesystem files not in playlist
playlist_filename_set = set(playlist_filenames)
for cur_filename in filesystem_filenames:
if cur_filename not in playlist_filename_set:
# Remove video (if present)
video_filepath = os.path.join(output_dirpath, cur_filename)
if os.path.exists(video_filepath):
os.remove(video_filepath)
# Remove info json (if present)
# TODO: This is not the correct path for the info json file
# if 'extract_audio_for_itunes' is True.
# (The info json will be proceded by the *video* extension,
# instead of the output audio file extension.)
infojson_filepath = os.path.join(output_dirpath, cur_filename + u'.info.json')
if os.path.exists(infojson_filepath):
os.remove(infojson_filepath)
# Rewrite the ordering file
with codecs.open(ordering_filepath, 'wt', 'utf-8') as ordering_file:
for cur_filename in playlist_filenames:
ordering_file.write(cur_filename)
ordering_file.write(u'\n')
示例15: test_sanitize_filename_restricted
def test_sanitize_filename_restricted(self):
self.assertEqual(sanitize_filename('abc', restricted=True), 'abc')
self.assertEqual(sanitize_filename('abc_d-e', restricted=True), 'abc_d-e')
self.assertEqual(sanitize_filename('123', restricted=True), '123')
self.assertEqual('abc_de', sanitize_filename('abc/de', restricted=True))
self.assertFalse('/' in sanitize_filename('abc/de///', restricted=True))
self.assertEqual('abc_de', sanitize_filename('abc/<>\\*|de', restricted=True))
self.assertEqual('xxx', sanitize_filename('xxx/<>\\*|', restricted=True))
self.assertEqual('yes_no', sanitize_filename('yes? no', restricted=True))
self.assertEqual('this_-_that', sanitize_filename('this: that', restricted=True))
tests = 'a\xe4b\u4e2d\u56fd\u7684c'
self.assertEqual(sanitize_filename(tests, restricted=True), 'a_b_c')
self.assertTrue(sanitize_filename('\xf6', restricted=True) != '') # No empty filename
forbidden = '"\0\\/&!: \'\t\n()[]{}$;`^,#'
for fc in forbidden:
for fbc in forbidden:
self.assertTrue(fbc not in sanitize_filename(fc, restricted=True))
# Handle a common case more neatly
self.assertEqual(sanitize_filename('\u5927\u58f0\u5e26 - Song', restricted=True), 'Song')
self.assertEqual(sanitize_filename('\u603b\u7edf: Speech', restricted=True), 'Speech')
# .. but make sure the file name is never empty
self.assertTrue(sanitize_filename('-', restricted=True) != '')
self.assertTrue(sanitize_filename(':', restricted=True) != '')