1
0
Fork 0
mirror of https://gitlab.com/dstftw/youtube-dl.git synced 2020-11-16 09:42:26 +00:00

[americastestkitchen] Improve (closes #13996)

This commit is contained in:
Sergey M․ 2017-09-23 06:28:46 +07:00
parent 13de91c9e9
commit 4bb58fa118
No known key found for this signature in database
GPG key ID: 2C393E0F18A9236D

View file

@ -1,85 +1,85 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import (
clean_html,
int_or_none,
try_get,
unified_strdate,
)
class AmericasTestKitchenIE(InfoExtractor): class AmericasTestKitchenIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?americastestkitchen\.com/episode/(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?americastestkitchen\.com/(?:episode|videos)/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'url': 'https://www.americastestkitchen.com/episode/548-summer-dinner-party',
'https://www.americastestkitchen.com/episode/548-summer-dinner-party',
'md5': 'b861c3e365ac38ad319cfd509c30577f', 'md5': 'b861c3e365ac38ad319cfd509c30577f',
'info_dict': { 'info_dict': {
'id': '1_5g5zua6e', 'id': '1_5g5zua6e',
'title': 'atk_s17_e24.mp4', 'title': 'Summer Dinner Party',
'ext': 'mp4', 'ext': 'mp4',
'description': '<p>Host Julia Collin Davison goes into the test kitchen with test cook Dan Souza to learn how to make the ultimate Grill-Roasted Beef Tenderloin. Next, equipment expert Adam Ried reviews gas grills in the Equipment Corner. Then, gadget guru Lisa McManus uncovers the best quirky gadgets. Finally, test cook Erin McMurrer shows host Bridget Lancaster how to make an elegant Pear-Walnut Upside-Down Cake.</p>', 'description': 'md5:858d986e73a4826979b6a5d9f8f6a1ec',
'thumbnail': r're:^https?://.*\.jpg',
'timestamp': 1497285541, 'timestamp': 1497285541,
'upload_date': '20170612', 'upload_date': '20170612',
'uploader_id': 'roger.metcalf@americastestkitchen.com', 'uploader_id': 'roger.metcalf@americastestkitchen.com',
'release_date': '2017-06-17', 'release_date': '20170617',
'thumbnail': 'http://d3cizcpymoenau.cloudfront.net/images/35973/e24-tenderloin-16.jpg', 'series': "America's Test Kitchen",
'episode_number': 24, 'season_number': 17,
'episode': 'Summer Dinner Party', 'episode': 'Summer Dinner Party',
'episode_id': '548-summer-dinner-party', 'episode_number': 24,
'season_number': 17
}, },
'params': { 'params': {
# m3u8 download
'skip_download': True, 'skip_download': True,
}, },
}, { }, {
'url': 'url': 'https://www.americastestkitchen.com/videos/3420-pan-seared-salmon',
'https://www.americastestkitchen.com/episode/546-a-spanish-affair', 'only_matching': True,
'only_matching':
True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
partner_id = self._search_regex( partner_id = self._search_regex(
r'partner_id/(?P<partner_id>\d+)', r'src=["\'](?:https?:)?//(?:[^/]+\.)kaltura\.com/(?:[^/]+/)*(?:p|partner_id)/(\d+)',
webpage, webpage, 'kaltura partner id')
'partner_id',
group='partner_id')
video_data = self._parse_json( video_data = self._parse_json(
self._search_regex( self._search_regex(
r'window\.__INITIAL_STATE__\s*=\s*({.+?});\s*</script>', r'window\.__INITIAL_STATE__\s*=\s*({.+?})\s*;\s*</script>',
webpage, 'initial context'), webpage, 'initial context'),
video_id) video_id)
episode_data = video_data['episodeDetail']['content']['data'] ep_data = try_get(
episode_content_meta = episode_data['full_video'] video_data,
external_id = episode_content_meta['external_id'] (lambda x: x['episodeDetail']['content']['data'],
lambda x: x['videoDetail']['content']['data']), dict)
ep_meta = ep_data.get('full_video', {})
external_id = ep_data.get('external_id') or ep_meta['external_id']
# photo data title = ep_data.get('title') or ep_meta.get('title')
photo_data = episode_content_meta.get('photo') description = clean_html(ep_meta.get('episode_description') or ep_data.get(
thumbnail = photo_data.get('image_url') if photo_data else None 'description') or ep_meta.get('description'))
thumbnail = try_get(ep_meta, lambda x: x['photo']['image_url'])
release_date = unified_strdate(ep_data.get('aired_at'))
# meta season_number = int_or_none(ep_meta.get('season_number'))
release_date = episode_data.get('aired_at') episode = ep_meta.get('title')
description = episode_content_meta.get('description') episode_number = int_or_none(ep_meta.get('episode_number'))
episode_number = int(episode_content_meta.get('episode_number'))
episode = episode_content_meta.get('title')
episode_id = episode_content_meta.get('episode_slug')
season_number = int(episode_content_meta.get('season_number'))
return { return {
'_type': 'url_transparent', '_type': 'url_transparent',
'url': 'kaltura:%s:%s' % (partner_id, external_id), 'url': 'kaltura:%s:%s' % (partner_id, external_id),
'ie_key': 'Kaltura', 'ie_key': 'Kaltura',
'id': video_id, 'title': title,
'release_date': release_date,
'thumbnail': thumbnail,
'description': description, 'description': description,
'episode_number': episode_number, 'thumbnail': thumbnail,
'release_date': release_date,
'series': "America's Test Kitchen",
'season_number': season_number,
'episode': episode, 'episode': episode,
'episode_id': episode_id, 'episode_number': episode_number,
'season_number': season_number
} }