[itv] extract subtitles

This commit is contained in:
Remita Amine 2017-01-28 17:25:15 +01:00
parent 24ee6b9721
commit f592ff9868

View file

@ -20,7 +20,7 @@ from ..utils import (
class ITVIE(InfoExtractor): class ITVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?itv\.com/hub/[^/]+/(?P<id>[0-9a-z]+)' _VALID_URL = r'https?://(?:www\.)?itv\.com/hub/[^/]+/(?P<id>[0-9a-zA-Z]+)'
_TEST = { _TEST = {
'url': 'http://www.itv.com/hub/mr-bean-animated-series/2a2936a0053', 'url': 'http://www.itv.com/hub/mr-bean-animated-series/2a2936a0053',
'info_dict': { 'info_dict': {
@ -98,7 +98,8 @@ class ITVIE(InfoExtractor):
fault_string = xpath_text(resp_env, './/faultstring') fault_string = xpath_text(resp_env, './/faultstring')
raise ExtractorError('%s said: %s' % (self.IE_NAME, fault_string)) raise ExtractorError('%s said: %s' % (self.IE_NAME, fault_string))
title = xpath_text(playlist, 'EpisodeTitle', fatal=True) title = xpath_text(playlist, 'EpisodeTitle', fatal=True)
media_files = xpath_element(playlist, 'VideoEntries/Video/MediaFiles', fatal=True) video_element = xpath_element(playlist, 'VideoEntries/Video', fatal=True)
media_files = xpath_element(video_element, 'MediaFiles', fatal=True)
rtmp_url = media_files.attrib['base'] rtmp_url = media_files.attrib['base']
formats = [] formats = []
@ -170,10 +171,21 @@ class ITVIE(InfoExtractor):
}) })
self._sort_formats(formats) self._sort_formats(formats)
subtitles = {}
for caption_url in video_element.findall('ClosedCaptioningURIs/URL'):
if not caption_url.text:
continue
ext = determine_ext(caption_url.text, 'ttml')
subtitles.setdefault('en', []).append({
'url': caption_url,
'ext': 'ttml' if ext == 'xml' else ext,
})
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'formats': formats, 'formats': formats,
'subtitles': subtitles,
'episode_title': title, 'episode_title': title,
'episode_number': int_or_none(xpath_text(playlist, 'EpisodeNumber')), 'episode_number': int_or_none(xpath_text(playlist, 'EpisodeNumber')),
'series': xpath_text(playlist, 'ProgrammeTitle'), 'series': xpath_text(playlist, 'ProgrammeTitle'),