[cbslocal] Support newyork.cbslocal.com

Closes #11285
This commit is contained in:
Yen Chi Hsuan 2016-11-24 20:32:17 +08:00
parent c867adc68c
commit 44444f0d3b
No known key found for this signature in database
GPG key ID: 7F902A182457CA23
2 changed files with 41 additions and 4 deletions

View file

@ -1,3 +1,9 @@
version <unreleased>
Extractors
+ [cbslocal] Recognize New York site (#11285)
version 2016.11.22 version 2016.11.22
Extractors Extractors

View file

@ -4,11 +4,14 @@ from __future__ import unicode_literals
from .anvato import AnvatoIE from .anvato import AnvatoIE
from .sendtonews import SendtoNewsIE from .sendtonews import SendtoNewsIE
from ..compat import compat_urlparse from ..compat import compat_urlparse
from ..utils import unified_timestamp from ..utils import (
parse_iso8601,
unified_timestamp,
)
class CBSLocalIE(AnvatoIE): class CBSLocalIE(AnvatoIE):
_VALID_URL = r'https?://[a-z]+\.cbslocal\.com/\d+/\d+/\d+/(?P<id>[0-9a-z-]+)' _VALID_URL = r'https?://[a-z]+\.cbslocal\.com/(?:\d+/\d+/\d+|video)/(?P<id>[0-9a-z-]+)'
_TESTS = [{ _TESTS = [{
# Anvato backend # Anvato backend
@ -49,6 +52,31 @@ class CBSLocalIE(AnvatoIE):
# m3u8 download # m3u8 download
'skip_download': True, 'skip_download': True,
}, },
}, {
'url': 'http://newyork.cbslocal.com/video/3580809-a-very-blue-anniversary/',
'info_dict': {
'id': '3580809',
'ext': 'mp4',
'title': 'A Very Blue Anniversary',
'description': 'CBS2s Cindy Hsu has more.',
'thumbnail': 're:^https?://.*',
'timestamp': 1479962220,
'upload_date': '20161124',
'uploader': 'CBS',
'subtitles': {
'en': 'mincount:5',
},
'categories': [
'Stations\\Spoken Word\\WCBSTV',
'Syndication\\AOL',
'Syndication\\MSN',
'Syndication\\NDN',
'Syndication\\Yahoo',
'Content\\News',
'Content\\News\\Local News',
],
'tags': ['CBS 2 News Weekends', 'Cindy Hsu', 'Blue Man Group'],
},
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -64,8 +92,11 @@ class CBSLocalIE(AnvatoIE):
info_dict = self._extract_anvato_videos(webpage, display_id) info_dict = self._extract_anvato_videos(webpage, display_id)
time_str = self._html_search_regex( time_str = self._html_search_regex(
r'class="entry-date">([^<]+)<', webpage, 'released date', fatal=False) r'class="entry-date">([^<]+)<', webpage, 'released date', default=None)
if time_str:
timestamp = unified_timestamp(time_str) timestamp = unified_timestamp(time_str)
else:
timestamp = parse_iso8601(self._html_search_meta('uploadDate', webpage))
info_dict.update({ info_dict.update({
'display_id': display_id, 'display_id': display_id,