1
0
Fork 0
mirror of https://gitlab.com/dstftw/youtube-dl.git synced 2020-11-16 09:42:26 +00:00

[sportschau] Reimplement in terms of ard extractor

This commit is contained in:
Sergey M․ 2015-07-20 00:01:22 +06:00
parent e37c92ec6d
commit 3bf8c316a6
3 changed files with 44 additions and 49 deletions

View file

@ -22,7 +22,11 @@ from .aparat import AparatIE
from .appleconnect import AppleConnectIE from .appleconnect import AppleConnectIE
from .appletrailers import AppleTrailersIE from .appletrailers import AppleTrailersIE
from .archiveorg import ArchiveOrgIE from .archiveorg import ArchiveOrgIE
from .ard import ARDIE, ARDMediathekIE from .ard import (
ARDIE,
ARDMediathekIE,
SportschauIE,
)
from .arte import ( from .arte import (
ArteTvIE, ArteTvIE,
ArteTVPlus7IE, ArteTVPlus7IE,
@ -553,7 +557,6 @@ from .sportbox import (
SportBoxEmbedIE, SportBoxEmbedIE,
) )
from .sportdeutschland import SportDeutschlandIE from .sportdeutschland import SportDeutschlandIE
from .sportschau import SportschauIE
from .srf import SrfIE from .srf import SrfIE
from .srmediathek import SRMediathekIE from .srmediathek import SRMediathekIE
from .ssa import SSAIE from .ssa import SSAIE

View file

@ -8,6 +8,7 @@ from .generic import GenericIE
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
ExtractorError, ExtractorError,
get_element_by_attribute,
qualities, qualities,
int_or_none, int_or_none,
parse_duration, parse_duration,
@ -246,3 +247,41 @@ class ARDIE(InfoExtractor):
'upload_date': upload_date, 'upload_date': upload_date,
'thumbnail': thumbnail, 'thumbnail': thumbnail,
} }
class SportschauIE(ARDMediathekIE):
IE_NAME = 'Sportschau'
_VALID_URL = r'(?P<baseurl>https?://(?:www\.)?sportschau\.de/(?:[^/]+/)+video(?P<id>[^/#?]+))\.html'
_TEST = {
'url': 'http://www.sportschau.de/tourdefrance/videoseppeltkokainhatnichtsmitklassischemdopingzutun100.html',
'info_dict': {
'id': 'seppeltkokainhatnichtsmitklassischemdopingzutun100',
'ext': 'mp4',
'title': 'Seppelt: "Kokain hat nichts mit klassischem Doping zu tun"',
'thumbnail': 're:^https?://.*\.jpg$',
'description': 'Der ARD-Doping Experte Hajo Seppelt gibt seine Einschätzung zum ersten Dopingfall der diesjährigen Tour de France um den Italiener Luca Paolini ab.',
},
'params': {
# m3u8 download
'skip_download': True,
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
base_url = mobj.group('baseurl')
webpage = self._download_webpage(url, video_id)
title = get_element_by_attribute('class', 'headline', webpage)
description = self._html_search_meta('description', webpage, 'description')
info = self._extract_media_info(
base_url + '-mc_defaultQuality-h.json', webpage, video_id)
info.update({
'title': title,
'description': description,
})
return info

View file

@ -1,47 +0,0 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import get_element_by_attribute
class SportschauIE(InfoExtractor):
IE_NAME = 'Sportschau'
_VALID_URL = r'https?://(?:www\.)?sportschau\.de/\w+(?:/\w+)?/video(?P<id>\w+)\.html'
_TEST = {
'url': 'http://www.sportschau.de/tourdefrance/videoseppeltkokainhatnichtsmitklassischemdopingzutun100.html',
'info_dict': {
'id': 'seppeltkokainhatnichtsmitklassischemdopingzutun100',
'ext': 'mp4',
'title': 'Seppelt: "Kokain hat nichts mit klassischem Doping zu tun"',
'thumbnail': 're:^https?://.*\.jpg$',
'description': 'Der ARD-Doping Experte Hajo Seppelt gibt seine Einschätzung zum ersten Dopingfall der diesjährigen Tour de France um den Italiener Luca Paolini ab.',
},
'params': {
# m3u8 download
'skip_download': True,
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
ext = '-mc_defaultQuality-h.json'
json_url = url[:-5] + ext
json = self._download_json(json_url, video_id)
thumb_url = json['_previewImage']
m3u8_url = json['_mediaArray'][1]['_mediaStreamArray'][0]['_stream'][0]
m3u8_formats = self._extract_m3u8_formats(m3u8_url, video_id, ext="mp4")
webpage = self._download_webpage(url, video_id)
title = get_element_by_attribute('class', 'headline', webpage)
desc = self._html_search_meta('description', webpage)
return {
'id': video_id,
'title': title,
'formats': m3u8_formats,
'description': desc,
'thumbnail': thumb_url,
}