Merge remote-tracking branch 'Dineshs91/f4m-2.0'

2024-01-07 17:16:08 +00:00 · 2015-01-10 17:51:52 +01:00 · 2015-01-10 17:51:52 +01:00 · 121c09c7be
parent 76bfaf6daf 0551a02b82
commit 121c09c7be
3 changed files with 65 additions and 0 deletions
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -340,6 +340,7 @@ from .ro220 import Ro220IE
 from .rottentomatoes import RottenTomatoesIE
 from .roxwel import RoxwelIE
 from .rtbf import RTBFIE
 from .rte import RteIE
 from .rtlnl import RtlXlIE
 from .rtlnow import RTLnowIE
 from .rtp import RTPIE
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@ -742,8 +742,14 @@ class InfoExtractor(object):
            'Unable to download f4m manifest')
        formats = []
        manifest_version = '1.0'
        media_nodes = manifest.findall('{http://ns.adobe.com/f4m/1.0}media')
        if not media_nodes:
            manifest_version = '2.0'
            media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media')
        for i, media_el in enumerate(media_nodes):
            if manifest_version == '2.0':
                manifest_url = '/'.join(manifest_url.split('/')[:-1]) + '/' + media_el.attrib.get('href')
            tbr = int_or_none(media_el.attrib.get('bitrate'))
            format_id = 'f4m-%d' % (i if tbr is None else tbr)
            formats.append({
--- a/youtube_dl/extractor/rte.py
+++ b/youtube_dl/extractor/rte.py
@ -0,0 +1,58 @@
 # coding: utf-8
 from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..utils import (
    float_or_none,
 )
 class RteIE(InfoExtractor):
    _VALID_URL = r'http?://(?:www\.)?rte\.ie/player/in/show/(?P<id>[0-9]+)/'
    _TEST = {
        'url': 'http://www.rte.ie/player/in/show/10336191/',
        'info_dict': {
            'id': '10336191',
            'ext': 'mp4',
            'title': 'Nine News',
            'thumbnail': 're:^https?://.*\.jpg$',
            'description': 'The One O\'Clock News followed by Weather.',
            'duration': 1622.963,
        }
    }
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        title = self._og_search_title(webpage)
        description = self._search_regex(r'<meta name="description" content="(.*?)" />', webpage, 'description')
        duration = float_or_none(self._html_search_meta('duration', webpage, 'duration'), 1000)
        thumbnail_id = self._search_regex(r'<meta name="thumbnail" content="uri:irus:(.*?)" />', webpage, 'thumbnail')
        thumbnail = 'http://img.rasset.ie/' + thumbnail_id + '.jpg' 
        feeds_url = self._html_search_meta("feeds-prefix", webpage, 'feeds url') + video_id
        json_string = self._download_json(feeds_url, video_id)
        # f4m_url = server + relative_url
        f4m_url = json_string['shows'][0]['media:group'][0]['rte:server'] + json_string['shows'][0]['media:group'][0]['url']
        f4m_formats = self._extract_f4m_formats(f4m_url, video_id)
        f4m_formats = [{
            'format_id': f['format_id'],
            'url': f['url'],
            'ext': 'mp4',
            'width': f['width'],
            'height': f['height'],
        } for f in f4m_formats ]
        return {
            'id': video_id,
            'title': title,
            'formats': f4m_formats,
            'description': description,
            'thumbnail': thumbnail,
            'duration': duration,
        }