1
0
Fork 0
mirror of https://gitlab.com/dstftw/youtube-dl.git synced 2020-11-16 09:42:26 +00:00

[youtube] Use '_download_xml' for getting the available subtitles

This commit is contained in:
Jaime Marquínez Ferrándiz 2014-12-31 15:44:15 +01:00
parent b8bc7a696b
commit 60e47a2699

View file

@ -612,24 +612,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
def _get_available_subtitles(self, video_id, webpage): def _get_available_subtitles(self, video_id, webpage):
try: try:
sub_list = self._download_webpage( subs_doc = self._download_xml(
'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id, 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
video_id, note=False) video_id, note=False)
except ExtractorError as err: except ExtractorError as err:
self._downloader.report_warning('unable to download video subtitles: %s' % compat_str(err)) self._downloader.report_warning('unable to download video subtitles: %s' % compat_str(err))
return {} return {}
lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
sub_lang_list = {} sub_lang_list = {}
for l in lang_list: for track in subs_doc.findall('track'):
lang = l[1] lang = track.attrib['lang_code']
if lang in sub_lang_list: if lang in sub_lang_list:
continue continue
params = compat_urllib_parse.urlencode({ params = compat_urllib_parse.urlencode({
'lang': lang, 'lang': lang,
'v': video_id, 'v': video_id,
'fmt': self._downloader.params.get('subtitlesformat', 'srt'), 'fmt': self._downloader.params.get('subtitlesformat', 'srt'),
'name': unescapeHTML(l[0]).encode('utf-8'), 'name': track.attrib['name'].encode('utf-8'),
}) })
url = 'https://www.youtube.com/api/timedtext?' + params url = 'https://www.youtube.com/api/timedtext?' + params
sub_lang_list[lang] = url sub_lang_list[lang] = url