[S4C] Add thumbnail extraction, extract series as playlist

Based on https://github.com/yt-dlp/yt-dlp/pull/7776: thx ifan-t, bashonly
This commit is contained in:
dirkf 2023-08-27 19:08:28 +01:00
parent 7d58f0769a
commit 31f50c8194
2 changed files with 59 additions and 8 deletions

View file

@ -1087,7 +1087,10 @@ from .rutube import (
from .rutv import RUTVIE from .rutv import RUTVIE
from .ruutu import RuutuIE from .ruutu import RuutuIE
from .ruv import RuvIE from .ruv import RuvIE
from .s4c import S4CIE from .s4c import (
S4CIE,
S4CSeriesIE,
)
from .safari import ( from .safari import (
SafariIE, SafariIE,
SafariApiIE, SafariApiIE,

View file

@ -2,6 +2,8 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from functools import partial as partial_f
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
float_or_none, float_or_none,
@ -9,6 +11,7 @@ from ..utils import (
T, T,
traverse_obj, traverse_obj,
txt_or_none, txt_or_none,
url_or_none,
) )
@ -21,7 +24,8 @@ class S4CIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Y Swn', 'title': 'Y Swn',
'description': 'md5:f7681a30e4955b250b3224aa9fe70cf0', 'description': 'md5:f7681a30e4955b250b3224aa9fe70cf0',
'duration': 5340 'duration': 5340,
'thumbnail': 'https://www.s4c.cymru/amg/1920x1080/Y_Swn_2023S4C_099_ii.jpg',
}, },
}, { }, {
'url': 'https://www.s4c.cymru/clic/programme/856636948', 'url': 'https://www.s4c.cymru/clic/programme/856636948',
@ -31,6 +35,7 @@ class S4CIE(InfoExtractor):
'title': 'Am Dro', 'title': 'Am Dro',
'duration': 2880, 'duration': 2880,
'description': 'md5:100d8686fc9a632a0cb2db52a3433ffe', 'description': 'md5:100d8686fc9a632a0cb2db52a3433ffe',
'thumbnail': 'https://www.s4c.cymru/amg/1920x1080/Am_Dro_2022-23S4C_P6_4005.jpg',
}, },
}] }]
@ -43,7 +48,7 @@ class S4CIE(InfoExtractor):
'programme_id': video_id, 'programme_id': video_id,
}, fatal=False) }, fatal=False)
filename = self._download_json( player_config = self._download_json(
'https://player-api.s4c-cdn.co.uk/player-configuration/prod', video_id, query={ 'https://player-api.s4c-cdn.co.uk/player-configuration/prod', video_id, query={
'programme_id': video_id, 'programme_id': video_id,
'signed': '0', 'signed': '0',
@ -51,7 +56,8 @@ class S4CIE(InfoExtractor):
'mode': 'od', 'mode': 'od',
'appId': 'clic', 'appId': 'clic',
'streamName': '', 'streamName': '',
}, note='Downloading player config JSON')['filename'] }, note='Downloading player config JSON')
m3u8_url = self._download_json( m3u8_url = self._download_json(
'https://player-api.s4c-cdn.co.uk/streaming-urls/prod', video_id, query={ 'https://player-api.s4c-cdn.co.uk/streaming-urls/prod', video_id, query={
'mode': 'od', 'mode': 'od',
@ -59,18 +65,60 @@ class S4CIE(InfoExtractor):
'region': 'WW', 'region': 'WW',
'extra': 'false', 'extra': 'false',
'thirdParty': 'false', 'thirdParty': 'false',
'filename': filename, 'filename': player_config['filename'],
}, note='Downloading streaming urls JSON')['hls'] }, note='Downloading streaming urls JSON')['hls']
# ... self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4', m3u8_id='hls') formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls', entry_protocol='m3u8_native')
formats, subtitles = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls', entry_protocol='m3u8_native'), {} self._sort_formats(formats)
subtitles = {}
for sub in traverse_obj(player_config, ('subtitles', lambda _, v: url_or_none(v['0']))):
subtitles.setdefault(sub.get('3', 'en'), []).append({
'url': sub['0'],
'name': sub.get('1'),
})
return merge_dicts({ return merge_dicts({
'id': video_id, 'id': video_id,
'formats': formats, 'formats': formats,
'subtitles': subtitles, 'subtitles': subtitles,
'thumbnail': url_or_none(player_config.get('poster')),
}, traverse_obj(details, ('full_prog_details', 0, { }, traverse_obj(details, ('full_prog_details', 0, {
'title': (('programme_title', 'series_title'), T(txt_or_none)), 'title': (('programme_title', 'series_title'), T(txt_or_none)),
'description': ('full_billing', T(txt_or_none)), 'description': ('full_billing', T(txt_or_none)),
'duration': ('duration', T(lambda x: float_or_none(x, invscale=60))), 'duration': ('duration', T(partial_f(float_or_none, invscale=60))),
}), get_all=False), }), get_all=False),
rev=True) rev=True)
class S4CSeriesIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?s4c\.cymru/clic/series/(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.s4c.cymru/clic/series/864982911',
'playlist_mincount': 6,
'info_dict': {
'id': '864982911',
'title': 'Iaith ar Daith',
},
}, {
'url': 'https://www.s4c.cymru/clic/series/866852587',
'playlist_mincount': 8,
'info_dict': {
'id': '866852587',
'title': 'FFIT Cymru',
},
}]
def _real_extract(self, url):
series_id = self._match_id(url)
series_details = self._download_json(
'https://www.s4c.cymru/df/series_details', series_id, query={
'lang': 'e',
'series_id': series_id,
'show_prog_in_series': 'Y'
}, note='Downloading series details JSON')
return self.playlist_result(
(self.url_result('https://www.s4c.cymru/clic/programme/' + episode_id, S4CIE, episode_id)
for episode_id in traverse_obj(series_details, ('other_progs_in_series', Ellipsis, 'id'))),
playlist_id=series_id, playlist_title=traverse_obj(
series_details, ('full_prog_details', 0, 'series_title', T(txt_or_none))))