mirror of
https://gitlab.com/dstftw/youtube-dl.git
synced 2020-11-16 09:42:26 +00:00
[myspass] fix video URL extraction and improve metadata extraction(closes #22448)
This commit is contained in:
parent
2349255abd
commit
3e49083604
|
@ -1,73 +1,56 @@
|
||||||
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
import os.path
|
|
||||||
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import compat_str
|
||||||
compat_urllib_parse_urlparse,
|
|
||||||
)
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
int_or_none,
|
||||||
|
parse_duration,
|
||||||
|
xpath_text,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class MySpassIE(InfoExtractor):
|
class MySpassIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?myspass\.de/.*'
|
_VALID_URL = r'https?://(?:www\.)?myspass\.de/([^/]+/)*(?P<id>\d+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/',
|
'url': 'http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/',
|
||||||
'md5': '0b49f4844a068f8b33f4b7c88405862b',
|
'md5': '0b49f4844a068f8b33f4b7c88405862b',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '11741',
|
'id': '11741',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'description': 'Wer kann in die Fu\u00dfstapfen von Wolfgang Kubicki treten und die Mehrheit der Zuschauer hinter sich versammeln? Wird vielleicht sogar die Absolute Mehrheit geknackt und der Jackpot von 200.000 Euro mit nach Hause genommen?',
|
'description': 'Wer kann in die Fußstapfen von Wolfgang Kubicki treten und die Mehrheit der Zuschauer hinter sich versammeln? Wird vielleicht sogar die Absolute Mehrheit geknackt und der Jackpot von 200.000 Euro mit nach Hause genommen?',
|
||||||
'title': 'Absolute Mehrheit vom 17.02.2013 - Die Highlights, Teil 2',
|
'title': '17.02.2013 - Die Highlights, Teil 2',
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
META_DATA_URL_TEMPLATE = 'http://www.myspass.de/myspass/includes/apps/video/getvideometadataxml.php?id=%s'
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
# video id is the last path element of the URL
|
|
||||||
# usually there is a trailing slash, so also try the second but last
|
|
||||||
url_path = compat_urllib_parse_urlparse(url).path
|
|
||||||
url_parent_path, video_id = os.path.split(url_path)
|
|
||||||
if not video_id:
|
|
||||||
_, video_id = os.path.split(url_parent_path)
|
|
||||||
|
|
||||||
# get metadata
|
|
||||||
metadata_url = META_DATA_URL_TEMPLATE % video_id
|
|
||||||
metadata = self._download_xml(
|
metadata = self._download_xml(
|
||||||
metadata_url, video_id, transform_source=lambda s: s.strip())
|
'http://www.myspass.de/myspass/includes/apps/video/getvideometadataxml.php?id=' + video_id,
|
||||||
|
video_id)
|
||||||
|
|
||||||
# extract values from metadata
|
title = xpath_text(metadata, 'title', fatal=True)
|
||||||
url_flv_el = metadata.find('url_flv')
|
video_url = xpath_text(metadata, 'url_flv', 'download url', True)
|
||||||
if url_flv_el is None:
|
video_id_int = int(video_id)
|
||||||
raise ExtractorError('Unable to extract download url')
|
for group in re.search(r'/myspass2009/\d+/(\d+)/(\d+)/(\d+)/', video_url).groups():
|
||||||
video_url = url_flv_el.text
|
group_int = int(group)
|
||||||
title_el = metadata.find('title')
|
if group_int > video_id_int:
|
||||||
if title_el is None:
|
video_url = video_url.replace(
|
||||||
raise ExtractorError('Unable to extract title')
|
group, compat_str(group_int // video_id_int))
|
||||||
title = title_el.text
|
|
||||||
format_id_el = metadata.find('format_id')
|
|
||||||
if format_id_el is None:
|
|
||||||
format = 'mp4'
|
|
||||||
else:
|
|
||||||
format = format_id_el.text
|
|
||||||
description_el = metadata.find('description')
|
|
||||||
if description_el is not None:
|
|
||||||
description = description_el.text
|
|
||||||
else:
|
|
||||||
description = None
|
|
||||||
imagePreview_el = metadata.find('imagePreview')
|
|
||||||
if imagePreview_el is not None:
|
|
||||||
thumbnail = imagePreview_el.text
|
|
||||||
else:
|
|
||||||
thumbnail = None
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'title': title,
|
'title': title,
|
||||||
'format': format,
|
'thumbnail': xpath_text(metadata, 'imagePreview'),
|
||||||
'thumbnail': thumbnail,
|
'description': xpath_text(metadata, 'description'),
|
||||||
'description': description,
|
'duration': parse_duration(xpath_text(metadata, 'duration')),
|
||||||
|
'series': xpath_text(metadata, 'format'),
|
||||||
|
'season_number': int_or_none(xpath_text(metadata, 'season')),
|
||||||
|
'season_id': xpath_text(metadata, 'season_id'),
|
||||||
|
'episode': title,
|
||||||
|
'episode_number': int_or_none(xpath_text(metadata, 'episode')),
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue