[nbcnews] Support embed widgets

Used in some Vulture videos
This commit is contained in:
Yen Chi Hsuan 2016-06-10 13:31:55 +08:00
parent 3e74b444e7
commit 5de008e8c3
No known key found for this signature in database
GPG key ID: 3FDDD575826C5C30

View file

@ -266,6 +266,11 @@ class NBCNewsIE(ThePlatformIE):
'url': 'http://www.nbcnews.com/watch/dateline/full-episode--deadly-betrayal-386250819952', 'url': 'http://www.nbcnews.com/watch/dateline/full-episode--deadly-betrayal-386250819952',
'only_matching': True, 'only_matching': True,
}, },
{
# From http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html
'url': 'http://www.nbcnews.com/widget/video-embed/701714499682',
'only_matching': True,
},
] ]
def _real_extract(self, url): def _real_extract(self, url):
@ -289,18 +294,17 @@ class NBCNewsIE(ThePlatformIE):
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
info = None info = None
bootstrap_json = self._search_regex( bootstrap_json = self._search_regex(
r'(?m)var\s+(?:bootstrapJson|playlistData)\s*=\s*({.+});?\s*$', [r'(?m)(?:var\s+(?:bootstrapJson|playlistData)|NEWS\.videoObj)\s*=\s*({.+});?\s*$',
r'videoObj\s*:\s*({.+})', r'data-video="([^"]+)"'],
webpage, 'bootstrap json', default=None) webpage, 'bootstrap json', default=None)
if bootstrap_json: bootstrap = self._parse_json(
bootstrap = self._parse_json(bootstrap_json, display_id) bootstrap_json, display_id, transform_source=unescapeHTML)
if 'results' in bootstrap:
info = bootstrap['results'][0]['video'] info = bootstrap['results'][0]['video']
elif 'video' in bootstrap:
info = bootstrap['video']
else: else:
player_instance_json = self._search_regex( info = bootstrap
r'videoObj\s*:\s*({.+})', webpage, 'player instance', default=None)
if not player_instance_json:
player_instance_json = self._html_search_regex(
r'data-video="([^"]+)"', webpage, 'video json')
info = self._parse_json(player_instance_json, display_id)
video_id = info['mpxId'] video_id = info['mpxId']
title = info['title'] title = info['title']