[limelight] Improve embeds extraction (closes #12761)

* Move extraction code to extractor
* Add extraction for LimelightEmbeddedPlayerFlash embeds
* Extract multiple video
This commit is contained in:
Sergey M․ 2017-04-17 00:23:16 +07:00
parent 751c89a27d
commit e5d39886ec
No known key found for this signature in database
GPG key ID: 2C393E0F18A9236D
2 changed files with 43 additions and 0 deletions

View file

@ -85,6 +85,7 @@ from .ustream import UstreamIE
from .openload import OpenloadIE from .openload import OpenloadIE
from .videopress import VideoPressIE from .videopress import VideoPressIE
from .rutube import RutubeIE from .rutube import RutubeIE
from .limelight import LimelightBaseIE
class GenericIE(InfoExtractor): class GenericIE(InfoExtractor):
@ -2483,6 +2484,11 @@ class GenericIE(InfoExtractor):
return self.url_result(piksel_url, PikselIE.ie_key()) return self.url_result(piksel_url, PikselIE.ie_key())
# Look for Limelight embeds # Look for Limelight embeds
limelight_urls = LimelightBaseIE._extract_urls(webpage, url)
if limelight_urls:
return self.playlist_result(
limelight_urls, video_id, video_title, video_description)
mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage) mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage)
if mobj: if mobj:
lm = { lm = {

View file

@ -9,6 +9,7 @@ from ..utils import (
determine_ext, determine_ext,
float_or_none, float_or_none,
int_or_none, int_or_none,
smuggle_url,
unsmuggle_url, unsmuggle_url,
ExtractorError, ExtractorError,
) )
@ -18,6 +19,42 @@ class LimelightBaseIE(InfoExtractor):
_PLAYLIST_SERVICE_URL = 'http://production-ps.lvp.llnw.net/r/PlaylistService/%s/%s/%s' _PLAYLIST_SERVICE_URL = 'http://production-ps.lvp.llnw.net/r/PlaylistService/%s/%s/%s'
_API_URL = 'http://api.video.limelight.com/rest/organizations/%s/%s/%s/%s.json' _API_URL = 'http://api.video.limelight.com/rest/organizations/%s/%s/%s/%s.json'
@classmethod
def _extract_urls(cls, webpage, source_url):
lm = {
'Media': 'media',
'Channel': 'channel',
'ChannelList': 'channel_list',
}
entries = []
for kind, video_id in re.findall(
r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})',
webpage):
print('video_id', video_id)
entries.append(cls.url_result(
smuggle_url(
'limelight:%s:%s' % (lm[kind], video_id),
{'source_url': source_url}),
'Limelight%s' % kind, video_id))
for mobj in re.finditer(
# As per [1] class attribute should be exactly equal to
# LimelightEmbeddedPlayerFlash but numerous examples seen
# that don't exactly match it (e.g. [2]).
# 1. http://support.3playmedia.com/hc/en-us/articles/227732408-Limelight-Embedding-the-Captions-Plugin-with-the-Limelight-Player-on-Your-Webpage
# 2. http://www.sedona.com/FacilitatorTraining2017
r'''(?sx)
<object[^>]+class=(["\'])(?:(?!\1).)*\bLimelightEmbeddedPlayerFlash\b(?:(?!\1).)*\1[^>]*>.*?
<param[^>]+
name=(["\'])flashVars\2[^>]+
value=(["\'])(?:(?!\3).)*mediaId=(?P<id>[a-z0-9]{32})
''', webpage):
entries.append(cls.url_result(
smuggle_url(
'limelight:media:%s' % mobj.group('id'),
{'source_url': source_url}),
'LimelightMedia', mobj.group('id')))
return entries
def _call_playlist_service(self, item_id, method, fatal=True, referer=None): def _call_playlist_service(self, item_id, method, fatal=True, referer=None):
headers = {} headers = {}
if referer: if referer: