[makertv] improve extraction

This commit is contained in:
remitamine 2015-12-21 04:24:58 +01:00
parent 78653a33aa
commit 7cb0952474
3 changed files with 22 additions and 7 deletions

View file

@ -53,6 +53,7 @@ from .onionstudios import OnionStudiosIE
from .snagfilms import SnagFilmsEmbedIE from .snagfilms import SnagFilmsEmbedIE
from .screenwavemedia import ScreenwaveMediaIE from .screenwavemedia import ScreenwaveMediaIE
from .mtv import MTVServicesEmbeddedIE from .mtv import MTVServicesEmbeddedIE
from .jwplatform import JWPlatformIE
class GenericIE(InfoExtractor): class GenericIE(InfoExtractor):
@ -1787,6 +1788,11 @@ class GenericIE(InfoExtractor):
if snagfilms_url: if snagfilms_url:
return self.url_result(snagfilms_url) return self.url_result(snagfilms_url)
# Look for JWPlatform embeds
jwplatform_url = JWPlatformIE._extract_url(webpage)
if jwplatform_url:
return self.url_result(jwplatform_url, 'JWPlatform')
# Look for ScreenwaveMedia embeds # Look for ScreenwaveMedia embeds
mobj = re.search(ScreenwaveMediaIE.EMBED_PATTERN, webpage) mobj = re.search(ScreenwaveMediaIE.EMBED_PATTERN, webpage)
if mobj is not None: if mobj is not None:

View file

@ -1,6 +1,8 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import int_or_none from ..utils import int_or_none
@ -23,7 +25,7 @@ class JWPlatformIE(InfoExtractor):
@staticmethod @staticmethod
def _extract_url(webpage): def _extract_url(webpage):
mobj = re.search( mobj = re.search(
r'<script[^>]+?src=["\'](?P<url>(?:https?:)?//content.jwplatform.com/players/[a-zA-Z0-9]{8}', r'<script[^>]+?src=["\'](?P<url>(?:https?:)?//content.jwplatform.com/players/[a-zA-Z0-9]{8})',
webpage) webpage)
if mobj: if mobj:
return mobj.group('url') return mobj.group('url')
@ -42,7 +44,9 @@ class JWPlatformIE(InfoExtractor):
source_url = self._proto_relative_url(source['file']) source_url = self._proto_relative_url(source['file'])
source_type = source.get('type') or '' source_type = source.get('type') or ''
if source_type == 'application/vnd.apple.mpegurl': if source_type == 'application/vnd.apple.mpegurl':
formats.extend(self._extract_m3u8_formats(source_url, video_id, 'mp4', 'm3u8_native', fatal=None)) m3u8_formats = self._extract_m3u8_formats(source_url, video_id, 'mp4', 'm3u8_native', fatal=None)
if m3u8_formats:
formats.extend(m3u8_formats)
elif source_type.startswith('audio'): elif source_type.startswith('audio'):
formats.append({ formats.append({
'url': source_url, 'url': source_url,
@ -57,7 +61,7 @@ class JWPlatformIE(InfoExtractor):
self._sort_formats(formats) self._sort_formats(formats)
return { return {
'id': video_data['mediaid'], 'id': video_id,
'title': video_data['title'], 'title': video_data['title'],
'description': video_data.get('description'), 'description': video_data.get('description'),
'thumbnail': self._proto_relative_url(video_data.get('image')), 'thumbnail': self._proto_relative_url(video_data.get('image')),

View file

@ -5,12 +5,12 @@ from .common import InfoExtractor
class MakerTVIE(InfoExtractor): class MakerTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www\.)?maker\.tv/(?:[^/]+/)?video|http://makerplayer.com/embed/maker)/(?P<id>[a-zA-Z0-9]{12})' _VALID_URL = r'https?://(?:(?:www\.)?maker\.tv/(?:[^/]+/)*video|makerplayer.com/embed/maker)/(?P<id>[a-zA-Z0-9]{12})'
_TEST = { _TEST = {
'url': 'http://www.maker.tv/video/Fh3QgymL9gsc', 'url': 'http://www.maker.tv/video/Fh3QgymL9gsc',
'md5': 'ca237a53a8eb20b6dc5bd60564d4ab3e', 'md5': 'ca237a53a8eb20b6dc5bd60564d4ab3e',
'info_dict': { 'info_dict': {
'id': 'brOEcGut', 'id': 'Fh3QgymL9gsc',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Maze Runner: The Scorch Trials Official Movie Review', 'title': 'Maze Runner: The Scorch Trials Official Movie Review',
'description': 'md5:11ff3362d7ef1d679fdb649f6413975a', 'description': 'md5:11ff3362d7ef1d679fdb649f6413975a',
@ -22,6 +22,11 @@ class MakerTVIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
jwplatform_id = self._search_regex([r'jwid="([^"]+)"', r'Maker.jw_id\s*=\s*"([^"]+)";'], webpage, 'jwplatform id') jwplatform_id = self._search_regex(r'jw_?id="([^"]+)"', webpage, 'jwplatform id')
return self.url_result('jwplatform:%s' % jwplatform_id, 'JWPlatform') return {
'_type': 'url_transparent',
'id': video_id,
'url': 'jwplatform:%s' % jwplatform_id,
'ie_key': 'JWPlatform',
}