[voxmedia] add support for recode.net(fixes #14173)

This commit is contained in:
Remita Amine 2017-10-11 15:50:00 +00:00
parent 782195a9d4
commit 9e38dbb19c
2 changed files with 57 additions and 14 deletions

View file

@ -1244,7 +1244,10 @@ from .vodpl import VODPlIE
from .vodplatform import VODPlatformIE from .vodplatform import VODPlatformIE
from .voicerepublic import VoiceRepublicIE from .voicerepublic import VoiceRepublicIE
from .voot import VootIE from .voot import VootIE
from .voxmedia import VoxMediaIE from .voxmedia import (
VoxMediaVolumeIE,
VoxMediaIE,
)
from .vporn import VpornIE from .vporn import VpornIE
from .vrt import VRTIE from .vrt import VRTIE
from .vrak import VrakIE from .vrak import VrakIE

View file

@ -2,11 +2,44 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from .once import OnceIE
from ..compat import compat_urllib_parse_unquote from ..compat import compat_urllib_parse_unquote
from ..utils import ExtractorError
class VoxMediaVolumeIE(OnceIE):
_VALID_URL = r'https?://volume\.vox-cdn\.com/embed/(?P<id>[0-9a-f]{9})'
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_data = self._parse_json(self._search_regex(
r'Volume\.createVideo\(({.+})\s*,\s*{.*}\s*,\s*\[.*\]\s*,\s*{.*}\);', webpage, 'video data'), video_id)
for provider_video_type in ('ooyala', 'youtube', 'brightcove'):
provider_video_id = video_data.get('%s_id' % provider_video_type)
if not provider_video_id:
continue
info = {
'id': video_id,
'title': video_data.get('title_short'),
'description': video_data.get('description_long') or video_data.get('description_short'),
'thumbnail': video_data.get('brightcove_thumbnail')
}
if provider_video_type == 'brightcove':
info['formats'] = self._extract_once_formats(provider_video_id)
self._sort_formats(info['formats'])
else:
info.update({
'_type': 'url_transparent',
'url': provider_video_id if provider_video_type == 'youtube' else '%s:%s' % (provider_video_type, provider_video_id),
'ie_key': provider_video_type.capitalize(),
})
return info
raise ExtractorError('Unable to find provider video id')
class VoxMediaIE(InfoExtractor): class VoxMediaIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?(?:theverge|vox|sbnation|eater|polygon|curbed|racked)\.com/(?:[^/]+/)*(?P<id>[^/?]+)' _VALID_URL = r'https?://(?:www\.)?(?:(?:theverge|vox|sbnation|eater|polygon|curbed|racked)\.com|recode\.net)/(?:[^/]+/)*(?P<id>[^/?]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.theverge.com/2014/6/27/5849272/material-world-how-google-discovered-what-software-is-made-of', 'url': 'http://www.theverge.com/2014/6/27/5849272/material-world-how-google-discovered-what-software-is-made-of',
'info_dict': { 'info_dict': {
@ -31,6 +64,7 @@ class VoxMediaIE(InfoExtractor):
'description': 'md5:87a51fe95ff8cea8b5bdb9ac7ae6a6af', 'description': 'md5:87a51fe95ff8cea8b5bdb9ac7ae6a6af',
}, },
'add_ie': ['Ooyala'], 'add_ie': ['Ooyala'],
'skip': 'Video Not Found',
}, { }, {
# volume embed # volume embed
'url': 'http://www.vox.com/2016/3/31/11336640/mississippi-lgbt-religious-freedom-bill', 'url': 'http://www.vox.com/2016/3/31/11336640/mississippi-lgbt-religious-freedom-bill',
@ -84,6 +118,17 @@ class VoxMediaIE(InfoExtractor):
'description': 'md5:e02d56b026d51aa32c010676765a690d', 'description': 'md5:e02d56b026d51aa32c010676765a690d',
}, },
}], }],
}, {
# volume embed, Brightcove Once
'url': 'https://www.recode.net/2014/6/17/11628066/post-post-pc-ceo-the-full-code-conference-video-of-microsofts-satya',
'md5': '01571a896281f77dc06e084138987ea2',
'info_dict': {
'id': '1231c973d',
'ext': 'mp4',
'title': 'Post-Post-PC CEO: The Full Code Conference Video of Microsoft\'s Satya Nadella',
'description': 'The longtime veteran was chosen earlier this year as the software giant\'s third leader in its history.',
},
'add_ie': ['VoxMediaVolume'],
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -91,9 +136,14 @@ class VoxMediaIE(InfoExtractor):
webpage = compat_urllib_parse_unquote(self._download_webpage(url, display_id)) webpage = compat_urllib_parse_unquote(self._download_webpage(url, display_id))
def create_entry(provider_video_id, provider_video_type, title=None, description=None): def create_entry(provider_video_id, provider_video_type, title=None, description=None):
video_url = {
'youtube': '%s',
'ooyala': 'ooyala:%s',
'volume': 'http://volume.vox-cdn.com/embed/%s',
}[provider_video_type] % provider_video_id
return { return {
'_type': 'url_transparent', '_type': 'url_transparent',
'url': provider_video_id if provider_video_type == 'youtube' else '%s:%s' % (provider_video_type, provider_video_id), 'url': video_url,
'title': title or self._og_search_title(webpage), 'title': title or self._og_search_title(webpage),
'description': description or self._og_search_description(webpage), 'description': description or self._og_search_description(webpage),
} }
@ -124,17 +174,7 @@ class VoxMediaIE(InfoExtractor):
volume_uuid = self._search_regex( volume_uuid = self._search_regex(
r'data-volume-uuid="([^"]+)"', webpage, 'volume uuid', default=None) r'data-volume-uuid="([^"]+)"', webpage, 'volume uuid', default=None)
if volume_uuid: if volume_uuid:
volume_webpage = self._download_webpage( entries.append(create_entry(volume_uuid, 'volume'))
'http://volume.vox-cdn.com/embed/%s' % volume_uuid, volume_uuid)
video_data = self._parse_json(self._search_regex(
r'Volume\.createVideo\(({.+})\s*,\s*{.*}\s*,\s*\[.*\]\s*,\s*{.*}\);', volume_webpage, 'video data'), volume_uuid)
for provider_video_type in ('ooyala', 'youtube'):
provider_video_id = video_data.get('%s_id' % provider_video_type)
if provider_video_id:
description = video_data.get('description_long') or video_data.get('description_short')
entries.append(create_entry(
provider_video_id, provider_video_type, video_data.get('title_short'), description))
break
if len(entries) == 1: if len(entries) == 1:
return entries[0] return entries[0]