[ooyala] extract more formats and metadata

2020-11-16 09:42:26 +00:00 · 2015-10-15 14:28:56 +01:00 · 2015-10-15 14:28:56 +01:00 · 90bddb6cdd
parent fafc7950e2
commit 90bddb6cdd
1 changed files with 53 additions and 98 deletions
--- a/youtube_dl/extractor/ooyala.py
+++ b/youtube_dl/extractor/ooyala.py
@ -1,108 +1,64 @@
 from __future__ import unicode_literals
 import re
 import json
 import base64
 from .common import InfoExtractor
 from ..utils import (
    unescapeHTML,
    ExtractorError,
    determine_ext,
    int_or_none,
    float_or_none,
 )
 class OoyalaBaseIE(InfoExtractor):
-    def _extract_result(self, info, more_info):
+    def _extract(self, player_url, video_id):
-        embedCode = info['embedCode']
+        print(player_url)
-        video_url = info.get('ipad_url') or info['url']
+        content_tree = self._download_json(player_url, video_id)['content_tree']
-
+        metadata = content_tree[list(content_tree)[0]]
-        if determine_ext(video_url) == 'm3u8':
+        embed_code = metadata['embed_code']
-            formats = self._extract_m3u8_formats(video_url, embedCode, ext='mp4')
+        pcode = metadata.get('asset_pcode') or embed_code
-        else:
+        video_info = {
-            formats = [{
+            'id': embed_code,
-                'url': video_url,
+            'title': metadata['title'],
-                'ext': 'mp4',
+            'description': metadata.get('description'),
-            }]
+            'thumbnail': metadata.get('thumbnail_image') or metadata.get('promo_image'),
-
+            'duration': int_or_none(metadata.get('duration')),
        return {
            'id': embedCode,
            'title': unescapeHTML(info['title']),
            'formats': formats,
            'description': unescapeHTML(more_info['description']),
            'thumbnail': more_info['promo'],
        }
-    def _extract(self, player_url, video_id):
+        formats = []
-        player = self._download_webpage(player_url, video_id)
+        for supported_format in ('mp4', 'm3u8', 'hds', 'rtmp'):
        mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="',
                                        player, 'mobile player url')
        # Looks like some videos are only available for particular devices
        # (e.g. http://player.ooyala.com/player.js?embedCode=x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0
        # is only available for ipad)
        # Working around with fetching URLs for all the devices found starting with 'unknown'
        # until we succeed or eventually fail for each device.
        devices = re.findall(r'device\s*=\s*"([^"]+)";', player)
        devices.remove('unknown')
        devices.insert(0, 'unknown')
        for device in devices:
            mobile_player = self._download_webpage(
                '%s&device=%s' % (mobile_url, device), video_id,
                'Downloading mobile player JS for %s device' % device)
            videos_info = self._search_regex(
                r'var streams=window.oo_testEnv\?\[\]:eval\("\((\[{.*?}\])\)"\);',
                mobile_player, 'info', fatal=False, default=None)
            if videos_info:
                break
        if not videos_info:
            formats = []
            auth_data = self._download_json(
-                'http://player.ooyala.com/sas/player_api/v1/authorization/embed_code/%s/%s?domain=www.example.org&supportedFormats=mp4,webm' % (video_id, video_id),
+                'http://player.ooyala.com/sas/player_api/v1/authorization/embed_code/%s/%s?domain=www.example.org&supportedFormats=%s' % (pcode, embed_code, supported_format),
-                video_id)
+                video_id, 'Downloading %s JSON' % supported_format)
-            cur_auth_data = auth_data['authorization_data'][video_id]
+            cur_auth_data = auth_data['authorization_data'][embed_code]
            for stream in cur_auth_data['streams']:
-                formats.append({
+                url = base64.b64decode(stream['url']['data'].encode('ascii')).decode('utf-8')
-                    'url': base64.b64decode(stream['url']['data'].encode('ascii')).decode('utf-8'),
+                delivery_type = stream['delivery_type']
-                    'ext': stream.get('delivery_type'),
+                if delivery_type == 'remote_asset':
-                    'format': stream.get('video_codec'),
+                    video_info['url'] = url
-                    'format_id': stream.get('profile'),
+                    return video_info
-                    'width': int_or_none(stream.get('width')),
+                if delivery_type == 'hls':
-                    'height': int_or_none(stream.get('height')),
+                    formats.extend(self._extract_m3u8_formats(url, embed_code, 'mp4', 'm3u8_native', 0, m3u8_id='hls', fatal=False))
-                    'abr': int_or_none(stream.get('audio_bitrate')),
+                elif delivery_type == 'hds':
-                    'vbr': int_or_none(stream.get('video_bitrate')),
+                    formats.extend(self._extract_f4m_formats(url, embed_code, f4m_id='hds', fatal=False))
-                })
+                else:
-            if formats:
+                    formats.append({
-                return {
+                        'url': url,
-                    'id': video_id,
+                        'ext': stream.get('delivery_type'),
-                    'formats': formats,
+                        'vcodec': stream.get('video_codec'),
-                    'title': 'Ooyala video',
+                        'format_id': stream.get('profile'),
-                }
+                        'width': int_or_none(stream.get('width')),
                        'height': int_or_none(stream.get('height')),
                        'abr': int_or_none(stream.get('audio_bitrate')),
                        'vbr': int_or_none(stream.get('video_bitrate')),
                        'fps': float_or_none(stream.get('framerate')),
                    })
        self._sort_formats(formats)
-            if not cur_auth_data['authorized']:
+        video_info['formats'] = formats
-                raise ExtractorError(cur_auth_data['message'], expected=True)
+        return video_info
        if not videos_info:
            raise ExtractorError('Unable to extract info')
        videos_info = videos_info.replace('\\"', '"')
        videos_more_info = self._search_regex(
            r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, 'more info').replace('\\"', '"')
        videos_info = json.loads(videos_info)
        videos_more_info = json.loads(videos_more_info)
        if videos_more_info.get('lineup'):
            videos = [self._extract_result(info, more_info) for (info, more_info) in zip(videos_info, videos_more_info['lineup'])]
            return {
                '_type': 'playlist',
                'id': video_id,
                'title': unescapeHTML(videos_more_info['title']),
                'entries': videos,
            }
        else:
            return self._extract_result(videos_info[0], videos_more_info)
 class OoyalaIE(OoyalaBaseIE):
@ -117,6 +73,7 @@ class OoyalaIE(OoyalaBaseIE):
                'ext': 'mp4',
                'title': 'Explaining Data Recovery from Hard Drives and SSDs',
                'description': 'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.',
                'duration': 853386,
            },
        }, {
            # Only available for ipad
@ -125,7 +82,7 @@ class OoyalaIE(OoyalaBaseIE):
                'id': 'x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0',
                'ext': 'mp4',
                'title': 'Simulation Overview - Levels of Simulation',
-                'description': '',
+                'duration': 194948,
            },
        },
        {
@ -136,7 +93,8 @@ class OoyalaIE(OoyalaBaseIE):
            'info_dict': {
                'id': 'FiOG81ZTrvckcchQxmalf4aQj590qTEx',
                'ext': 'mp4',
-                'title': 'Ooyala video',
+                'title': 'Divide Tool Path.mp4',
                'duration': 204405,
            }
        }
    ]
@ -152,8 +110,8 @@ class OoyalaIE(OoyalaBaseIE):
    def _real_extract(self, url):
        embed_code = self._match_id(url)
-        player_url = 'http://player.ooyala.com/player.js?embedCode=%s' % embed_code
+        content_tree_url = 'http://player.ooyala.com/player_api/v1/content_tree/embed_code/%s/%s' % (embed_code, embed_code)
-        return self._extract(player_url, embed_code)
+        return self._extract(content_tree_url, embed_code)
 class OoyalaExternalIE(OoyalaBaseIE):
@ -170,7 +128,7 @@ class OoyalaExternalIE(OoyalaBaseIE):
                        .*?&pcode=
                    )
                    (?P<pcode>.+?)
-                    (&|$)
+                    (?:&|$)
                    '''
    _TEST = {
@ -179,7 +137,7 @@ class OoyalaExternalIE(OoyalaBaseIE):
            'id': 'FkYWtmazr6Ed8xmvILvKLWjd4QvYZpzG',
            'ext': 'mp4',
            'title': 'dm_140128_30for30Shorts___JudgingJewellv2',
-            'description': '',
+            'duration': 1302000,
        },
        'params': {
            # m3u8 download
@ -188,9 +146,6 @@ class OoyalaExternalIE(OoyalaBaseIE):
    }
    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
+        partner_id, video_id, pcode = re.match(self._VALID_URL, url).groups()
-        partner_id = mobj.group('partner_id')
+        content_tree_url = 'http://player.ooyala.com/player_api/v1/content_tree/external_id/%s/%s:%s' % (pcode, partner_id, video_id)
-        video_id = mobj.group('id')
+        return self._extract(content_tree_url, video_id)
        pcode = mobj.group('pcode')
        player_url = 'http://player.ooyala.com/player.js?externalId=%s:%s&pcode=%s' % (partner_id, video_id, pcode)
        return self._extract(player_url, video_id)