From 46fd0dd5a5a8d98b76830636a3dcd241ac7bcc7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 17 Oct 2014 22:58:18 +0700 Subject: [PATCH] [twitch] Rename extractor and support channel videos --- youtube_dl/extractor/__init__.py | 2 +- .../extractor/{justintv.py => twitch.py} | 102 +++++------------- 2 files changed, 28 insertions(+), 76 deletions(-) rename youtube_dl/extractor/{justintv.py => twitch.py} (69%) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 5e38d2663..c29c29c10 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -173,7 +173,6 @@ from .jadorecettepub import JadoreCettePubIE from .jeuxvideo import JeuxVideoIE from .jove import JoveIE from .jukebox import JukeboxIE -from .justintv import JustinTVIE from .jpopsukitv import JpopsukiIE from .kankan import KankanIE from .keezmovies import KeezMoviesIE @@ -395,6 +394,7 @@ from .tutv import TutvIE from .tvigle import TvigleIE from .tvp import TvpIE from .tvplay import TVPlayIE +from .twitch import TwitchIE from .ubu import UbuIE from .udemy import ( UdemyIE, diff --git a/youtube_dl/extractor/justintv.py b/youtube_dl/extractor/twitch.py similarity index 69% rename from youtube_dl/extractor/justintv.py rename to youtube_dl/extractor/twitch.py index 411167d7d..42d38b224 100644 --- a/youtube_dl/extractor/justintv.py +++ b/youtube_dl/extractor/twitch.py @@ -1,26 +1,20 @@ from __future__ import unicode_literals import itertools -import json -import os import re from .common import InfoExtractor from ..utils import ( - compat_str, ExtractorError, - formatSeconds, parse_iso8601, ) -class JustinTVIE(InfoExtractor): - """Information extractor for justin.tv and twitch.tv""" +class TwitchIE(InfoExtractor): # TODO: One broadcast may be split into multiple videos. The key # 'broadcast_id' is the same for all parts, and 'broadcast_part' # starts at 1 and increases. Can we treat all parts as one video? - - _VALID_URL = r"""(?x)^(?:http://)?(?:www\.)?(?:twitch|justin)\.tv/ + _VALID_URL = r"""(?x)^(?:http://)?(?:www\.)?twitch\.tv/ (?: (?P[^/]+)| (?:(?:[^/]+)/b/(?P[^/]+))| @@ -28,9 +22,8 @@ class JustinTVIE(InfoExtractor): ) /?(?:\#.*)?$ """ - _JUSTIN_PAGE_LIMIT = 100 - IE_NAME = 'justin.tv' - IE_DESC = 'justin.tv and twitch.tv' + _PAGE_LIMIT = 100 + _API_BASE = 'https://api.twitch.tv' _TEST = { 'url': 'http://www.twitch.tv/thegamedevhub/b/296128360', 'md5': 'ecaa8a790c22a40770901460af191c9a', @@ -44,39 +37,6 @@ class JustinTVIE(InfoExtractor): } } - _API_BASE = 'https://api.twitch.tv' - - # Return count of items, list of *valid* items - def _parse_page(self, url, video_id, counter): - info_json = self._download_webpage( - url, video_id, - 'Downloading video info JSON on page %d' % counter, - 'Unable to download video info JSON %d' % counter) - - response = json.loads(info_json) - if type(response) != list: - error_text = response.get('error', 'unknown error') - raise ExtractorError('Justin.tv API: %s' % error_text) - info = [] - for clip in response: - video_url = clip['video_file_url'] - if video_url: - video_extension = os.path.splitext(video_url)[1][1:] - video_date = re.sub('-', '', clip['start_time'][:10]) - video_uploader_id = clip.get('user_id', clip.get('channel_id')) - video_id = clip['id'] - video_title = clip.get('title', video_id) - info.append({ - 'id': compat_str(video_id), - 'url': video_url, - 'title': video_title, - 'uploader': clip.get('channel_name', video_uploader_id), - 'uploader_id': video_uploader_id, - 'upload_date': video_date, - 'ext': video_extension, - }) - return (len(response), info) - def _handle_error(self, response): if not isinstance(response, dict): return @@ -87,25 +47,21 @@ class JustinTVIE(InfoExtractor): expected=True) def _download_json(self, url, video_id, note='Downloading JSON metadata'): - response = super(JustinTVIE, self)._download_json(url, video_id, note) + response = super(TwitchIE, self)._download_json(url, video_id, note) self._handle_error(response) return response def _extract_media(self, item, item_id): - ITEMS = { 'a': 'video', 'c': 'chapter', } - info = self._extract_info(self._download_json( '%s/kraken/videos/%s%s' % (self._API_BASE, item, item_id), item_id, 'Downloading %s info JSON' % ITEMS[item])) - response = self._download_json( '%s/api/videos/%s%s' % (self._API_BASE, item, item_id), item_id, 'Downloading %s playlist JSON' % ITEMS[item]) - entries = [] chunks = response['chunks'] qualities = list(chunks.keys()) @@ -144,14 +100,7 @@ class JustinTVIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - - api_base = 'http://api.twitch.tv' - paged = False - if mobj.group('channelid'): - paged = True - video_id = mobj.group('channelid') - api = api_base + '/channel/archives/%s.json' % video_id - elif mobj.group('chapterid'): + if mobj.group('chapterid'): return self._extract_media('c', mobj.group('chapterid')) """ @@ -203,22 +152,25 @@ class JustinTVIE(InfoExtractor): } return info """ - else: + elif mobj.group('videoid'): return self._extract_media('a', mobj.group('videoid')) - - entries = [] - offset = 0 - limit = self._JUSTIN_PAGE_LIMIT - for counter in itertools.count(1): - page_url = api + ('?offset=%d&limit=%d' % (offset, limit)) - page_count, page_info = self._parse_page( - page_url, video_id, counter) - entries.extend(page_info) - if not paged or page_count != limit: - break - offset += limit - return { - '_type': 'playlist', - 'id': video_id, - 'entries': entries, - } + elif mobj.group('channelid'): + channel_id = mobj.group('channelid') + info = self._download_json( + '%s/kraken/channels/%s' % (self._API_BASE, channel_id), + channel_id, 'Downloading channel info JSON') + channel_name = info.get('display_name') or info.get('name') + entries = [] + offset = 0 + limit = self._PAGE_LIMIT + for counter in itertools.count(1): + response = self._download_json( + '%s/kraken/channels/%s/videos/?offset=%d&limit=%d' + % (self._API_BASE, channel_id, offset, limit), + channel_id, 'Downloading channel videos JSON page %d' % counter) + videos = response['videos'] + if not videos: + break + entries.extend([self.url_result(video['url'], 'Twitch') for video in videos]) + offset += limit + return self.playlist_result(entries, channel_id, channel_name)