[prosiebensat1] Fix some extraction and update tests

This commit is contained in:
Sergey M․ 2016-01-14 22:45:09 +06:00
parent 0baedd1851
commit 81549898c0

View file

@ -32,7 +32,7 @@ class ProSiebenSat1IE(InfoExtractor):
'url': 'http://www.prosieben.de/tv/circus-halligalli/videos/218-staffel-2-episode-18-jahresrueckblick-ganze-folge', 'url': 'http://www.prosieben.de/tv/circus-halligalli/videos/218-staffel-2-episode-18-jahresrueckblick-ganze-folge',
'info_dict': { 'info_dict': {
'id': '2104602', 'id': '2104602',
'ext': 'mp4', 'ext': 'flv',
'title': 'Episode 18 - Staffel 2', 'title': 'Episode 18 - Staffel 2',
'description': 'md5:8733c81b702ea472e069bc48bb658fc1', 'description': 'md5:8733c81b702ea472e069bc48bb658fc1',
'upload_date': '20131231', 'upload_date': '20131231',
@ -138,14 +138,13 @@ class ProSiebenSat1IE(InfoExtractor):
'url': 'http://www.the-voice-of-germany.de/video/31-andreas-kuemmert-rocket-man-clip', 'url': 'http://www.the-voice-of-germany.de/video/31-andreas-kuemmert-rocket-man-clip',
'info_dict': { 'info_dict': {
'id': '2572814', 'id': '2572814',
'ext': 'mp4', 'ext': 'flv',
'title': 'Andreas Kümmert: Rocket Man', 'title': 'Andreas Kümmert: Rocket Man',
'description': 'md5:6ddb02b0781c6adf778afea606652e38', 'description': 'md5:6ddb02b0781c6adf778afea606652e38',
'upload_date': '20131017', 'upload_date': '20131017',
'duration': 469.88, 'duration': 469.88,
}, },
'params': { 'params': {
# rtmp download
'skip_download': True, 'skip_download': True,
}, },
}, },
@ -153,13 +152,12 @@ class ProSiebenSat1IE(InfoExtractor):
'url': 'http://www.fem.com/wellness/videos/wellness-video-clip-kurztripps-zum-valentinstag.html', 'url': 'http://www.fem.com/wellness/videos/wellness-video-clip-kurztripps-zum-valentinstag.html',
'info_dict': { 'info_dict': {
'id': '2156342', 'id': '2156342',
'ext': 'mp4', 'ext': 'flv',
'title': 'Kurztrips zum Valentinstag', 'title': 'Kurztrips zum Valentinstag',
'description': 'Romantischer Kurztrip zum Valentinstag? Wir verraten, was sich hier wirklich lohnt.', 'description': 'Romantischer Kurztrip zum Valentinstag? Nina Heinemann verrät, was sich hier wirklich lohnt.',
'duration': 307.24, 'duration': 307.24,
}, },
'params': { 'params': {
# rtmp download
'skip_download': True, 'skip_download': True,
}, },
}, },
@ -182,7 +180,6 @@ class ProSiebenSat1IE(InfoExtractor):
'upload_date': '20151229', 'upload_date': '20151229',
}, },
'params': { 'params': {
# rtmp download
'skip_download': True, 'skip_download': True,
}, },
}, },
@ -192,6 +189,7 @@ class ProSiebenSat1IE(InfoExtractor):
r'"clip_id"\s*:\s+"(\d+)"', r'"clip_id"\s*:\s+"(\d+)"',
r'clipid: "(\d+)"', r'clipid: "(\d+)"',
r'clip[iI]d=(\d+)', r'clip[iI]d=(\d+)',
r'clip[iI]d\s*=\s*["\'](\d+)',
r"'itemImageUrl'\s*:\s*'/dynamic/thumbnails/full/\d+/(\d+)", r"'itemImageUrl'\s*:\s*'/dynamic/thumbnails/full/\d+/(\d+)",
] ]
_TITLE_REGEXES = [ _TITLE_REGEXES = [
@ -201,6 +199,7 @@ class ProSiebenSat1IE(InfoExtractor):
r'<h1 class="att-name">\s*(.+?)</h1>', r'<h1 class="att-name">\s*(.+?)</h1>',
r'<header class="module_header">\s*<h2>([^<]+)</h2>\s*</header>', r'<header class="module_header">\s*<h2>([^<]+)</h2>\s*</header>',
r'<h2 class="video-title" itemprop="name">\s*(.+?)</h2>', r'<h2 class="video-title" itemprop="name">\s*(.+?)</h2>',
r'<div[^>]+id="veeseoTitle"[^>]*>(.+?)</div>',
] ]
_DESCRIPTION_REGEXES = [ _DESCRIPTION_REGEXES = [
r'<p itemprop="description">\s*(.+?)</p>', r'<p itemprop="description">\s*(.+?)</p>',
@ -208,6 +207,7 @@ class ProSiebenSat1IE(InfoExtractor):
r'<div class="g-plusone" data-size="medium"></div>\s*</div>\s*</header>\s*(.+?)\s*<footer>', r'<div class="g-plusone" data-size="medium"></div>\s*</div>\s*</header>\s*(.+?)\s*<footer>',
r'<p class="att-description">\s*(.+?)\s*</p>', r'<p class="att-description">\s*(.+?)\s*</p>',
r'<p class="video-description" itemprop="description">\s*(.+?)</p>', r'<p class="video-description" itemprop="description">\s*(.+?)</p>',
r'<div[^>]+id="veeseoDescription"[^>]*>(.+?)</div>',
] ]
_UPLOAD_DATE_REGEXES = [ _UPLOAD_DATE_REGEXES = [
r'<meta property="og:published_time" content="(.+?)">', r'<meta property="og:published_time" content="(.+?)">',