[condenast] fix extraction and extract subtitles

This commit is contained in:
Remita Amine 2020-11-16 18:57:33 +01:00
parent 3f1748b944
commit 9448a20312

View file

@ -16,6 +16,8 @@ from ..utils import (
mimetype2ext, mimetype2ext,
orderedSet, orderedSet,
parse_iso8601, parse_iso8601,
strip_or_none,
try_get,
) )
@ -82,6 +84,7 @@ class CondeNastIE(InfoExtractor):
'uploader': 'gq', 'uploader': 'gq',
'upload_date': '20170321', 'upload_date': '20170321',
'timestamp': 1490126427, 'timestamp': 1490126427,
'description': 'How much grimmer would things be if these people were competent?',
}, },
}, { }, {
# JS embed # JS embed
@ -93,7 +96,7 @@ class CondeNastIE(InfoExtractor):
'title': '3D printed TSA Travel Sentry keys really do open TSA locks', 'title': '3D printed TSA Travel Sentry keys really do open TSA locks',
'uploader': 'arstechnica', 'uploader': 'arstechnica',
'upload_date': '20150916', 'upload_date': '20150916',
'timestamp': 1442434955, 'timestamp': 1442434920,
} }
}, { }, {
'url': 'https://player.cnevids.com/inline/video/59138decb57ac36b83000005.js?target=js-cne-player', 'url': 'https://player.cnevids.com/inline/video/59138decb57ac36b83000005.js?target=js-cne-player',
@ -196,6 +199,13 @@ class CondeNastIE(InfoExtractor):
}) })
self._sort_formats(formats) self._sort_formats(formats)
subtitles = {}
for t, caption in video_info.get('captions', {}).items():
caption_url = caption.get('src')
if not (t in ('vtt', 'srt', 'tml') and caption_url):
continue
subtitles.setdefault('en', []).append({'url': caption_url})
return { return {
'id': video_id, 'id': video_id,
'formats': formats, 'formats': formats,
@ -208,6 +218,7 @@ class CondeNastIE(InfoExtractor):
'season': video_info.get('season_title'), 'season': video_info.get('season_title'),
'timestamp': parse_iso8601(video_info.get('premiere_date')), 'timestamp': parse_iso8601(video_info.get('premiere_date')),
'categories': video_info.get('categories'), 'categories': video_info.get('categories'),
'subtitles': subtitles,
} }
def _real_extract(self, url): def _real_extract(self, url):
@ -225,8 +236,16 @@ class CondeNastIE(InfoExtractor):
if url_type == 'series': if url_type == 'series':
return self._extract_series(url, webpage) return self._extract_series(url, webpage)
else: else:
params = self._extract_video_params(webpage, display_id) video = try_get(self._parse_json(self._search_regex(
info = self._search_json_ld( r'__PRELOADED_STATE__\s*=\s*({.+?});', webpage,
webpage, display_id, fatal=False) 'preload state', '{}'), display_id),
lambda x: x['transformed']['video'])
if video:
params = {'videoId': video['id']}
info = {'description': strip_or_none(video.get('description'))}
else:
params = self._extract_video_params(webpage, display_id)
info = self._search_json_ld(
webpage, display_id, fatal=False)
info.update(self._extract_video(params)) info.update(self._extract_video(params))
return info return info