[youtube] Fix ytsearch* when cookies are provided

Closes #11924

The API with `page` is no longer used in browsers, and YouTube always
returns {'reload': 'now'} when cookies are provided.

See http://youtube.github.io/spfjs/documentation/start/ for how SPF
works. Basically appending static link with a `spf` parameter yields the
corresponding dynamic link.
This commit is contained in:
Yen Chi Hsuan 2017-02-03 01:28:24 +08:00
parent c54c01f82d
commit a22b2fd19b
No known key found for this signature in database
GPG key ID: 7F902A182457CA23
2 changed files with 15 additions and 8 deletions

View file

@ -1,6 +1,7 @@
version <unreleased> version <unreleased>
Extractors Extractors
* [youtube] Fix ytsearch when cookies are provided (#11924)
+ [bilibili] Support new Bangumi URLs (#11845) + [bilibili] Support new Bangumi URLs (#11845)
version 2017.02.01 version 2017.02.01

View file

@ -2348,18 +2348,18 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE):
videos = [] videos = []
limit = n limit = n
for pagenum in itertools.count(1):
url_query = { url_query = {
'search_query': query.encode('utf-8'), 'search_query': query.encode('utf-8'),
'page': pagenum,
'spf': 'navigate',
} }
url_query.update(self._EXTRA_QUERY_ARGS) url_query.update(self._EXTRA_QUERY_ARGS)
result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query) result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
for pagenum in itertools.count(1):
data = self._download_json( data = self._download_json(
result_url, video_id='query "%s"' % query, result_url, video_id='query "%s"' % query,
note='Downloading page %s' % pagenum, note='Downloading page %s' % pagenum,
errnote='Unable to download API page') errnote='Unable to download API page',
query={'spf': 'navigate'})
html_content = data[1]['body']['content'] html_content = data[1]['body']['content']
if 'class="search-message' in html_content: if 'class="search-message' in html_content:
@ -2371,6 +2371,12 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE):
videos += new_videos videos += new_videos
if not new_videos or len(videos) > limit: if not new_videos or len(videos) > limit:
break break
next_link = self._html_search_regex(
r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
html_content, 'next link', default=None)
if next_link is None:
break
result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
if len(videos) > n: if len(videos) > n:
videos = videos[:n] videos = videos[:n]