1
0
Fork 0
mirror of https://gitlab.com/dstftw/youtube-dl.git synced 2020-11-16 09:42:26 +00:00

[philharmoniedeparis] Fix extraction and add support for pad.philharmoniedeparis.fr (closes #17705)

This commit is contained in:
Sergey M․ 2018-10-01 23:29:24 +07:00
parent 3c7da54c92
commit 66d106f270
No known key found for this signature in database
GPG key ID: 2C393E0F18A9236D

View file

@ -2,31 +2,38 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str
from ..utils import ( from ..utils import (
float_or_none, try_get,
int_or_none, urljoin,
parse_iso8601,
xpath_text,
) )
class PhilharmonieDeParisIE(InfoExtractor): class PhilharmonieDeParisIE(InfoExtractor):
IE_DESC = 'Philharmonie de Paris' IE_DESC = 'Philharmonie de Paris'
_VALID_URL = r'https?://live\.philharmoniedeparis\.fr/(?:[Cc]oncert/|misc/Playlist\.ashx\?id=)(?P<id>\d+)' _VALID_URL = r'''(?x)
https?://
(?:
live\.philharmoniedeparis\.fr/(?:[Cc]oncert/|misc/Playlist\.ashx\?id=)|
pad\.philharmoniedeparis\.fr/doc/CIMU/
)
(?P<id>\d+)
'''
_TESTS = [{ _TESTS = [{
'url': 'http://pad.philharmoniedeparis.fr/doc/CIMU/1086697/jazz-a-la-villette-knower',
'md5': 'a0a4b195f544645073631cbec166a2c2',
'info_dict': {
'id': '1086697',
'ext': 'mp4',
'title': 'Jazz à la Villette : Knower',
},
}, {
'url': 'http://live.philharmoniedeparis.fr/concert/1032066.html', 'url': 'http://live.philharmoniedeparis.fr/concert/1032066.html',
'info_dict': { 'info_dict': {
'id': '1032066', 'id': '1032066',
'ext': 'flv', 'title': 'md5:0a031b81807b3593cffa3c9a87a167a0',
'title': 'md5:d1f5585d87d041d07ce9434804bc8425',
'timestamp': 1428179400,
'upload_date': '20150404',
'duration': 6592.278,
}, },
'params': { 'playlist_mincount': 2,
# rtmp download
'skip_download': True,
}
}, { }, {
'url': 'http://live.philharmoniedeparis.fr/Concert/1030324.html', 'url': 'http://live.philharmoniedeparis.fr/Concert/1030324.html',
'only_matching': True, 'only_matching': True,
@ -34,45 +41,60 @@ class PhilharmonieDeParisIE(InfoExtractor):
'url': 'http://live.philharmoniedeparis.fr/misc/Playlist.ashx?id=1030324&track=&lang=fr', 'url': 'http://live.philharmoniedeparis.fr/misc/Playlist.ashx?id=1030324&track=&lang=fr',
'only_matching': True, 'only_matching': True,
}] }]
_LIVE_URL = 'https://live.philharmoniedeparis.fr'
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
concert = self._download_xml( config = self._download_json(
'http://live.philharmoniedeparis.fr/misc/Playlist.ashx?id=%s' % video_id, '%s/otoPlayer/config.ashx' % self._LIVE_URL, video_id, query={
video_id).find('./concert') 'id': video_id,
'lang': 'fr-FR',
})
formats = [] def extract_entry(source):
info_dict = { if not isinstance(source, dict):
'id': video_id, return
'title': xpath_text(concert, './titre', 'title', fatal=True), title = source.get('title')
'formats': formats, if not title:
} return
files = source.get('files')
fichiers = concert.find('./fichiers') if not isinstance(files, dict):
stream = fichiers.attrib['serveurstream'] return
for fichier in fichiers.findall('./fichier'): format_urls = set()
info_dict['duration'] = float_or_none(fichier.get('timecodefin')) formats = []
for quality, (format_id, suffix) in enumerate([('lq', ''), ('hq', '_hd')]): for format_id in ('mobile', 'desktop'):
format_url = fichier.get('url%s' % suffix) format_url = try_get(
if not format_url: files, lambda x: x[format_id]['file'], compat_str)
if not format_url or format_url in format_urls:
continue continue
formats.append({ format_urls.add(format_url)
'url': stream, m3u8_url = urljoin(self._LIVE_URL, format_url)
'play_path': format_url, formats.extend(self._extract_m3u8_formats(
'ext': 'flv', m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
'format_id': format_id, m3u8_id='hls', fatal=False))
'width': int_or_none(concert.get('largeur%s' % suffix)), if not formats:
'height': int_or_none(concert.get('hauteur%s' % suffix)), return
'quality': quality, self._sort_formats(formats)
}) return {
self._sort_formats(formats) 'title': title,
'formats': formats,
}
date, hour = concert.get('date'), concert.get('heure') thumbnail = urljoin(self._LIVE_URL, config.get('image'))
if date and hour:
info_dict['timestamp'] = parse_iso8601(
'%s-%s-%sT%s:00' % (date[0:4], date[4:6], date[6:8], hour))
elif date:
info_dict['upload_date'] = date
return info_dict info = extract_entry(config)
if info:
info.update({
'id': video_id,
'thumbnail': thumbnail,
})
return info
entries = []
for num, chapter in enumerate(config['chapters'], start=1):
entry = extract_entry(chapter)
entry['id'] = '%s-%d' % (video_id, num)
entries.append(entry)
return self.playlist_result(entries, video_id, config.get('title'))