[americastestkitchen] improve season extraction

This commit is contained in:
Remita Amine 2021-01-21 16:47:55 +01:00
parent 657221c81d
commit cff72b4cc0

View file

@ -99,7 +99,7 @@ class AmericasTestKitchenSeasonIE(InfoExtractor):
# ATK Season # ATK Season
'url': 'https://www.americastestkitchen.com/episodes/browse/season_1', 'url': 'https://www.americastestkitchen.com/episodes/browse/season_1',
'info_dict': { 'info_dict': {
'id': 'season-1', 'id': 'season_1',
'title': 'Season 1', 'title': 'Season 1',
}, },
'playlist_count': 13, 'playlist_count': 13,
@ -107,53 +107,53 @@ class AmericasTestKitchenSeasonIE(InfoExtractor):
# Cooks Country Season # Cooks Country Season
'url': 'https://www.cookscountry.com/episodes/browse/season_12', 'url': 'https://www.cookscountry.com/episodes/browse/season_12',
'info_dict': { 'info_dict': {
'id': 'season-12', 'id': 'season_12',
'title': 'Season 12', 'title': 'Season 12',
}, },
'playlist_count': 13, 'playlist_count': 13,
}, {
# Multi-digit season
'url': 'https://www.americastestkitchen.com/episodes/browse/season_20',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
show_name, season = re.match(self._VALID_URL, url).groups() show_name, season_number = re.match(self._VALID_URL, url).groups()
season_number = int(season_number)
slug = 'atk' if show_name == 'americastestkitchen' else 'cco' slug = 'atk' if show_name == 'americastestkitchen' else 'cco'
filters = [ season = 'Season %d' % season_number
'search_season_list:Season %s' % season,
'search_document_klass:episode',
'search_show_slug:%s' % slug,
]
season_search = self._download_json( season_search = self._download_json(
'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_atk_season_desc_production', 'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_%s_season_desc_production' % slug,
season, headers={ season, headers={
'Origin': 'https://www.%s.com' % show_name, 'Origin': 'https://www.%s.com' % show_name,
'X-Algolia-API-Key': '8d504d0099ed27c1b73708d22871d805', 'X-Algolia-API-Key': '8d504d0099ed27c1b73708d22871d805',
'X-Algolia-Application-Id': 'Y1FNZXUI30', 'X-Algolia-Application-Id': 'Y1FNZXUI30',
}, query={ }, query={
'facetFilters': json.dumps(filters), 'facetFilters': json.dumps([
'attributesToRetrieve': 'search_url', 'search_season_list:' + season,
'search_document_klass:episode',
'search_show_slug:' + slug,
]),
'attributesToRetrieve': 'description,search_%s_episode_number,search_document_date,search_url,title' % slug,
'attributesToHighlight': '', 'attributesToHighlight': '',
# ATK and CCO generally have less than 26 episodes per season 'hitsPerPage': 1000,
'hitsPerPage': '100',
}) })
entries = [ def entries():
self.url_result( for episode in (season_search.get('hits') or []):
'https://www.%s.com%s' % (show_name, episode['search_url']), search_url = episode.get('search_url')
'AmericasTestKitchen', if not search_url:
try_get(episode, lambda e: e['objectID'].split('_')[-1])) continue
for episode in season_search['hits'] yield {
if 'search_url' in episode and episode['search_url'] '_type': 'url',
] 'url': 'https://www.%s.com%s' % (show_name, search_url),
'id': try_get(episode, lambda e: e['objectID'].split('_')[-1]),
'title': episode.get('title'),
'description': episode.get('description'),
'timestamp': unified_timestamp(episode.get('search_document_date')),
'season_number': season_number,
'episode_number': int_or_none(episode.get('search_%s_episode_number' % slug)),
'ie_key': AmericasTestKitchenIE.ie_key(),
}
return { return self.playlist_result(
'_type': 'playlist', entries(), 'season_%d' % season_number, season)
'id': 'season-%s' % season,
'title': 'Season %s' % season,
'entries': sorted(entries, key=lambda e: e.get('id')),
}