[tunein] Add new extractor (Closes #4097)

2020-11-16 09:42:26 +00:00 · 2014-11-24 23:15:33 +02:00 · 2014-11-24 23:15:33 +02:00 · 2c25a2bd29
parent 00e9d396ab
commit 2c25a2bd29
2 changed files with 102 additions and 0 deletions
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -405,6 +405,7 @@ from .trutube import TruTubeIE
 from .tube8 import Tube8IE
 from .tudou import TudouIE
 from .tumblr import TumblrIE
 from .tunein import TuneInIE
 from .turbo import TurboIE
 from .tutv import TutvIE
 from .tvigle import TvigleIE
--- a/youtube_dl/extractor/tunein.py
+++ b/youtube_dl/extractor/tunein.py
@ -0,0 +1,101 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import json
 import re
 from .common import InfoExtractor
 from ..utils import ExtractorError
 class TuneInIE(InfoExtractor):
    _VALID_URL = r'''(?x)https?://(?:www\.)?
    (?:
        tunein\.com/
        (?:
            radio/.*?-s|
            station/.*?StationId\=
        )(?P<id>[0-9]+)
        |tun\.in/(?P<redirect_id>[A-Za-z0-9]+)
    )
    '''
    _INFO_DICT = {
        'id': '34682',
        'title': 'Jazz 24 on 88.5 Jazz24 - KPLU-HD2',
        'ext': 'AAC',
        'thumbnail': 're:^https?://.*\.png$',
        'location': 'Tacoma, WA',
    }
    _TESTS = [
        {
            'url': 'http://tunein.com/radio/Jazz24-885-s34682/',
            'info_dict': _INFO_DICT,
            'params': {
                'skip_download': True,  # live stream
            },
        },
        {  # test redirection
            'url': 'http://tun.in/ser7s',
            'info_dict': _INFO_DICT,
            'params': {
                'skip_download': True,  # live stream
            },
        },
    ]
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        redirect_id = mobj.group('redirect_id')
        if redirect_id:
            # The server doesn't support HEAD requests
            urlh = self._request_webpage(
                url, redirect_id, note='Downloading redirect page')
            url = urlh.geturl()
            self.to_screen('Following redirect: %s' % url)
            mobj = re.match(self._VALID_URL, url)
        station_id = mobj.group('id')
        webpage = self._download_webpage(
            url, station_id, note='Downloading station webpage')
        payload = self._html_search_regex(
            r'(?m)TuneIn\.payload\s*=\s*(\{[^$]+?)$', webpage, 'JSON data')
        json_data = json.loads(payload)
        station_info = json_data['Station']['broadcast']
        title = station_info['Title']
        thumbnail = station_info.get('Logo')
        location = station_info.get('Location')
        streams_url = station_info.get('StreamUrl')
        if not streams_url:
            raise ExtractorError('No downloadable streams found',
                                 expected=True)
        stream_data = self._download_webpage(
            streams_url, station_id, note='Downloading stream data')
        streams = json.loads(self._search_regex(
            r'\((.*)\);', stream_data, 'stream info'))['Streams']
        is_live = None
        formats = []
        for stream in streams:
            if stream.get('Type') == 'Live':
                is_live = True
            formats.append({
                'abr': stream.get('Bandwidth'),
                'ext': stream.get('MediaType'),
                'acodec': stream.get('MediaType'),
                'vcodec': 'none',
                'url': stream.get('Url'),
                # Sometimes streams with the highest quality do not exist
                'preference': stream.get('Reliability'),
            })
        self._sort_formats(formats)
        return {
            'id': station_id,
            'title': title,
            'formats': formats,
            'thumbnail': thumbnail,
            'location': location,
            'is_live': is_live,
        }