[tv2:article] Add extractor (Closes #5724)

This commit is contained in:
Sergey M․ 2015-05-17 03:32:53 +06:00
parent bc0f937b55
commit 588b82bbf8
2 changed files with 37 additions and 1 deletions

View file

@ -572,7 +572,10 @@ from .tumblr import TumblrIE
from .tunein import TuneInIE from .tunein import TuneInIE
from .turbo import TurboIE from .turbo import TurboIE
from .tutv import TutvIE from .tutv import TutvIE
from .tv2 import TV2IE from .tv2 import (
TV2IE,
TV2ArticleIE,
)
from .tv4 import TV4IE from .tv4 import TV4IE
from .tvigle import TvigleIE from .tvigle import TvigleIE
from .tvp import TvpIE, TvpSeriesIE from .tvp import TvpIE, TvpSeriesIE

View file

@ -1,12 +1,15 @@
# encoding: utf-8 # encoding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
int_or_none, int_or_none,
float_or_none, float_or_none,
parse_iso8601, parse_iso8601,
remove_end,
) )
@ -91,3 +94,33 @@ class TV2IE(InfoExtractor):
'categories': categories, 'categories': categories,
'formats': formats, 'formats': formats,
} }
class TV2ArticleIE(InfoExtractor):
_VALID_URL = 'http://(?:www\.)?tv2\.no/(?:a|\d{4}/\d{2}/\d{2}(/[^/]+)+)/(?P<id>\d+)'
_TESTS = [{
'url': 'http://www.tv2.no/2015/05/16/nyheter/alesund/krim/pingvin/6930542',
'info_dict': {
'id': '6930542',
'title': 'Russen hetses etter pingvintyveri innrømmer å ha åpnet luken på buret',
'description': 'md5:339573779d3eea3542ffe12006190954',
},
'playlist_count': 2,
}, {
'url': 'http://www.tv2.no/a/6930542',
'only_matching': True,
}]
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
entries = [
self.url_result('http://www.tv2.no/v/%s' % video_id, 'TV2')
for video_id in re.findall(r'data-assetid="(\d+)"', webpage)]
title = remove_end(self._og_search_title(webpage), ' - TV2.no')
description = remove_end(self._og_search_description(webpage), ' - TV2.no')
return self.playlist_result(entries, playlist_id, title, description)