[thesixtyone] Add new extractor (closes #3781)

2020-11-16 09:42:26 +00:00 · 2014-10-04 22:40:36 +03:00 · 2014-10-04 22:40:36 +03:00 · 5e69192ef7
parent e9be9a6acd
commit 5e69192ef7
2 changed files with 101 additions and 0 deletions
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -371,6 +371,7 @@ from .tenplay import TenPlayIE
 from .testurl import TestURLIE
 from .tf1 import TF1IE
 from .theplatform import ThePlatformIE
 from .thesixtyone import TheSixtyOneIE
 from .thisav import ThisAVIE
 from .tinypic import TinyPicIE
 from .tlc import TlcIE, TlcDeIE
--- a/youtube_dl/extractor/thesixtyone.py
+++ b/youtube_dl/extractor/thesixtyone.py
@ -0,0 +1,100 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import json
 import re
 from .common import InfoExtractor
 from ..utils import unified_strdate
 class TheSixtyOneIE(InfoExtractor):
    _VALID_URL = r'''(?x)https?://(?:www\.)?thesixtyone\.com/
        (?:.*?/)*
        (?:
            s|
            song/comments/list|
            song
        )/(?P<id>[A-Za-z0-9]+)/?$'''
    _SONG_URL_TEMPLATE = 'http://thesixtyone.com/s/{0:}'
    _SONG_FILE_URL_TEMPLATE = 'http://{audio_server:}.thesixtyone.com/thesixtyone_production/audio/{0:}_stream'
    _THUMBNAIL_URL_TEMPLATE = '{photo_base_url:}_desktop'
    _TESTS = [
        {
            'url': 'http://www.thesixtyone.com/s/SrE3zD7s1jt/',
            'md5': '821cc43b0530d3222e3e2b70bb4622ea',
            'info_dict': {
                'id': 'SrE3zD7s1jt',
                'ext': 'mp3',
                'title': 'CASIO - Unicorn War Mixtape',
                'thumbnail': 're:^https?://.*_desktop$',
                'upload_date': '20071217',
                'duration': 3208,
            }
        },
        {
            'url': 'http://www.thesixtyone.com/song/comments/list/SrE3zD7s1jt',
            'only_matching': True,
        },
        {
            'url': 'http://www.thesixtyone.com/s/ULoiyjuJWli#/s/SrE3zD7s1jt/',
            'only_matching': True,
        },
        {
            'url': 'http://www.thesixtyone.com/#/s/SrE3zD7s1jt/',
            'only_matching': True,
        },
        {
            'url': 'http://www.thesixtyone.com/song/SrE3zD7s1jt/',
            'only_matching': True,
        },
    ]
    _DECODE_MAP = {
        "x": "a",
        "m": "b",
        "w": "c",
        "q": "d",
        "n": "e",
        "p": "f",
        "a": "0",
        "h": "1",
        "e": "2",
        "u": "3",
        "s": "4",
        "i": "5",
        "o": "6",
        "y": "7",
        "r": "8",
        "c": "9"
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        song_id = mobj.group('id')
        webpage = self._download_webpage(
            self._SONG_URL_TEMPLATE.format(song_id), song_id)
        song_data = json.loads(self._search_regex(
            r'"%s":\s(\{.*?\})' % song_id, webpage, 'song_data'))
        keys = [self._DECODE_MAP.get(s, s) for s in song_data['key']]
        url = self._SONG_FILE_URL_TEMPLATE.format(
            "".join(reversed(keys)), **song_data)
        formats = [{
            'format_id': 'sd',
            'url': url,
            'ext': 'mp3',
        }]
        return {
            'id': song_id,
            'title': '{artist:} - {name:}'.format(**song_data),
            'formats': formats,
            'comment_count': song_data.get('comments_count'),
            'duration': song_data.get('play_time'),
            'like_count': song_data.get('score'),
            'thumbnail': self._THUMBNAIL_URL_TEMPLATE.format(**song_data),
            'upload_date': unified_strdate(song_data.get('publish_date')),
        }