New optoin --restrict-filenames

This commit is contained in:
Philipp Hagemeister 2012-11-26 23:58:46 +01:00
parent 71f36332dd
commit 1c469a9480
7 changed files with 77 additions and 41 deletions

View file

@ -47,6 +47,8 @@ which means you can modify it, redistribute it or use it however you like.
%(extractor)s for the provider (youtube, metacafe, %(extractor)s for the provider (youtube, metacafe,
etc), %(id)s for the video id and %% for a literal etc), %(id)s for the video id and %% for a literal
percent. Use - to output to stdout. percent. Use - to output to stdout.
--restrict-filenames Avoid some characters such as "&" and spaces in
filenames
-a, --batch-file FILE file containing URLs to download ('-' for stdin) -a, --batch-file FILE file containing URLs to download ('-' for stdin)
-w, --no-overwrites do not overwrite files -w, --no-overwrites do not overwrite files
-c, --continue resume partially downloaded files -c, --continue resume partially downloaded files

View file

@ -30,11 +30,34 @@ class TestUtil(unittest.TestCase):
self.assertEqual(u'yes no', sanitize_filename(u'yes? no')) self.assertEqual(u'yes no', sanitize_filename(u'yes? no'))
self.assertEqual(u'this - that', sanitize_filename(u'this: that')) self.assertEqual(u'this - that', sanitize_filename(u'this: that'))
self.assertEqual(sanitize_filename(u'AT&T'), u'AT&T')
self.assertEqual(sanitize_filename(u'ä'), u'ä') self.assertEqual(sanitize_filename(u'ä'), u'ä')
self.assertEqual(sanitize_filename(u'кириллица'), u'кириллица') self.assertEqual(sanitize_filename(u'кириллица'), u'кириллица')
for forbidden in u'"\0\\/': forbidden = u'"\0\\/'
self.assertTrue(forbidden not in sanitize_filename(forbidden)) for fc in forbidden:
for fbc in forbidden:
self.assertTrue(fbc not in sanitize_filename(fc))
def test_sanitize_filename_restricted(self):
self.assertEqual(sanitize_filename(u'abc', restricted=True), u'abc')
self.assertEqual(sanitize_filename(u'abc_d-e', restricted=True), u'abc_d-e')
self.assertEqual(sanitize_filename(u'123', restricted=True), u'123')
self.assertEqual(u'abc-de', sanitize_filename(u'abc/de', restricted=True))
self.assertFalse(u'/' in sanitize_filename(u'abc/de///', restricted=True))
self.assertEqual(u'abc-de', sanitize_filename(u'abc/<>\\*|de', restricted=True))
self.assertEqual(u'xxx', sanitize_filename(u'xxx/<>\\*|', restricted=True))
self.assertEqual(u'yes_no', sanitize_filename(u'yes? no', restricted=True))
self.assertEqual(u'this_-_that', sanitize_filename(u'this: that', restricted=True))
forbidden = u'"\0\\/&: \'\t\n'
for fc in forbidden:
print('input: ' + fc + ', result: ' + repr(sanitize_filename(fc, restricted=True)))
for fbc in forbidden:
self.assertTrue(fbc not in sanitize_filename(fc, restricted=True))
def test_ordered_set(self): def test_ordered_set(self):
self.assertEqual(orderedSet([1,1,2,3,4,4,5,6,7,3,5]), [1,2,3,4,5,6,7]) self.assertEqual(orderedSet([1,1,2,3,4,4,5,6,7,3,5]), [1,2,3,4,5,6,7])

View file

@ -59,6 +59,8 @@ redistribute it or use it however you like.
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ %(extractor)s\ for\ the\ provider\ (youtube,\ metacafe, \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ %(extractor)s\ for\ the\ provider\ (youtube,\ metacafe,
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ etc),\ %(id)s\ for\ the\ video\ id\ and\ %%\ for\ a\ literal \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ etc),\ %(id)s\ for\ the\ video\ id\ and\ %%\ for\ a\ literal
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ percent.\ Use\ -\ to\ output\ to\ stdout. \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ percent.\ Use\ -\ to\ output\ to\ stdout.
--restrict-filenames\ \ \ \ \ Avoid\ some\ characters\ such\ as\ "&"\ and\ spaces\ in
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ filenames
-a,\ --batch-file\ FILE\ \ \ \ file\ containing\ URLs\ to\ download\ (\[aq]-\[aq]\ for\ stdin) -a,\ --batch-file\ FILE\ \ \ \ file\ containing\ URLs\ to\ download\ (\[aq]-\[aq]\ for\ stdin)
-w,\ --no-overwrites\ \ \ \ \ \ do\ not\ overwrite\ files -w,\ --no-overwrites\ \ \ \ \ \ do\ not\ overwrite\ files
-c,\ --continue\ \ \ \ \ \ \ \ \ \ \ resume\ partially\ downloaded\ files -c,\ --continue\ \ \ \ \ \ \ \ \ \ \ resume\ partially\ downloaded\ files
@ -210,7 +212,7 @@ Please note that Python 2.5 is not supported anymore.
.PP .PP
Since June 2012 (#342) youtube-dl is packed as an executable zipfile, Since June 2012 (#342) youtube-dl is packed as an executable zipfile,
simply unzip it (might need renaming to \f[C]youtube-dl.zip\f[] first on simply unzip it (might need renaming to \f[C]youtube-dl.zip\f[] first on
some systems) or clone the git repo to see the code. some systems) or clone the git repository, as laid out above.
If you modify the code, you can run it by executing the If you modify the code, you can run it by executing the
\f[C]__main__.py\f[] file. \f[C]__main__.py\f[] file.
To recompile the executable, run \f[C]make\ youtube-dl\f[]. To recompile the executable, run \f[C]make\ youtube-dl\f[].

View file

@ -3,7 +3,7 @@ __youtube-dl()
local cur prev opts local cur prev opts
COMPREPLY=() COMPREPLY=()
cur="${COMP_WORDS[COMP_CWORD]}" cur="${COMP_WORDS[COMP_CWORD]}"
opts="--all-formats --audio-format --audio-quality --auto-number --batch-file --console-title --continue --cookies --dump-user-agent --extract-audio --format --get-description --get-filename --get-format --get-thumbnail --get-title --get-url --help --id --ignore-errors --keep-video --list-extractors --list-formats --literal --match-title --max-downloads --max-quality --netrc --no-continue --no-mtime --no-overwrites --no-part --no-progress --output --password --playlist-end --playlist-start --prefer-free-formats --quiet --rate-limit --reject-title --retries --simulate --skip-download --srt-lang --title --update --user-agent --username --verbose --version --write-description --write-info-json --write-srt" opts="--all-formats --audio-format --audio-quality --auto-number --batch-file --console-title --continue --cookies --dump-user-agent --extract-audio --format --get-description --get-filename --get-format --get-thumbnail --get-title --get-url --help --id --ignore-errors --keep-video --list-extractors --list-formats --literal --match-title --max-downloads --max-quality --netrc --no-continue --no-mtime --no-overwrites --no-part --no-progress --output --password --playlist-end --playlist-start --prefer-free-formats --quiet --rate-limit --reject-title --restrict-filenames --retries --simulate --skip-download --srt-lang --title --update --user-agent --username --verbose --version --write-description --write-info-json --write-srt"
if [[ ${cur} == * ]] ; then if [[ ${cur} == * ]] ; then
COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) ) COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) )

View file

@ -57,6 +57,7 @@ class FileDownloader(object):
format: Video format code. format: Video format code.
format_limit: Highest quality format to try. format_limit: Highest quality format to try.
outtmpl: Template for output names. outtmpl: Template for output names.
restrictfilenames: Do not allow "&" and spaces in file names
ignoreerrors: Do not stop on download errors. ignoreerrors: Do not stop on download errors.
ratelimit: Download speed limit, in bytes/sec. ratelimit: Download speed limit, in bytes/sec.
nooverwrites: Prevent overwriting files. nooverwrites: Prevent overwriting files.
@ -349,7 +350,7 @@ class FileDownloader(object):
def process_info(self, info_dict): def process_info(self, info_dict):
"""Process a single dictionary returned by an InfoExtractor.""" """Process a single dictionary returned by an InfoExtractor."""
info_dict['stitle'] = sanitize_filename(info_dict['title']) info_dict['stitle'] = sanitize_filename(info_dict['title'], self.params.get('restrictfilenames'))
reason = self._match_entry(info_dict) reason = self._match_entry(info_dict)
if reason is not None: if reason is not None:

View file

@ -272,6 +272,9 @@ def parseOpts():
help='number downloaded files starting from 00000', default=False) help='number downloaded files starting from 00000', default=False)
filesystem.add_option('-o', '--output', filesystem.add_option('-o', '--output',
dest='outtmpl', metavar='TEMPLATE', help='output filename template. Use %(title)s to get the title, %(uploader)s for the uploader name, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id and %% for a literal percent. Use - to output to stdout.') dest='outtmpl', metavar='TEMPLATE', help='output filename template. Use %(title)s to get the title, %(uploader)s for the uploader name, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id and %% for a literal percent. Use - to output to stdout.')
filesystem.add_option('--restrict-filenames',
action='store_true', dest='restrictfilenames',
help='Avoid some characters such as "&" and spaces in filenames', default=False)
filesystem.add_option('-a', '--batch-file', filesystem.add_option('-a', '--batch-file',
dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)') dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
filesystem.add_option('-w', '--no-overwrites', filesystem.add_option('-w', '--no-overwrites',
@ -485,6 +488,7 @@ def _real_main():
or (opts.useid and u'%(id)s.%(ext)s') or (opts.useid and u'%(id)s.%(ext)s')
or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s') or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
or u'%(id)s.%(ext)s'), or u'%(id)s.%(ext)s'),
'restrictfilenames': opts.restrictfilenames,
'ignoreerrors': opts.ignoreerrors, 'ignoreerrors': opts.ignoreerrors,
'ratelimit': opts.ratelimit, 'ratelimit': opts.ratelimit,
'nooverwrites': opts.nooverwrites, 'nooverwrites': opts.nooverwrites,

View file

@ -195,17 +195,21 @@ def timeconvert(timestr):
timestamp = email.utils.mktime_tz(timetuple) timestamp = email.utils.mktime_tz(timetuple)
return timestamp return timestamp
def sanitize_filename(s): def sanitize_filename(s, restricted=False):
"""Sanitizes a string so it could be used as part of a filename.""" """Sanitizes a string so it could be used as part of a filename.
If restricted is set, use a stricter subset of allowed characters.
"""
def replace_insane(char): def replace_insane(char):
if char == '?' or ord(char) < 32 or ord(char) == 127: if char == '?' or ord(char) < 32 or ord(char) == 127:
return '' return ''
elif char == '"': elif char == '"':
return '\'' return '' if restricted else 'FOO\''
elif char == ':': elif char == ':':
return ' -' return '_-' if restricted else ' -'
elif char in '\\/|*<>': elif char in '\\/|*<>':
return '-' return '-'
if restricted and (char in '&\'' or char.isspace()):
return '_'
return char return char
result = u''.join(map(replace_insane, s)) result = u''.join(map(replace_insane, s))