[20min] Improve (Closes #8110)

2020-11-16 09:42:26 +00:00 · 2016-01-04 02:33:08 +06:00 · 2016-01-04 02:33:08 +06:00 · 133b1886fc
parent 66295fa4a6
commit 133b1886fc
3 changed files with 74 additions and 57 deletions
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -364,7 +364,6 @@ from .mdr import MDRIE
 from .metacafe import MetacafeIE
 from .metacritic import MetacriticIE
 from .mgoon import MgoonIE
 from .min20 import Min20IE
 from .minhateca import MinhatecaIE
 from .ministrygrid import MinistryGridIE
 from .miomio import MioMioIE
@ -747,6 +746,7 @@ from .tvp import TvpIE, TvpSeriesIE
 from .tvplay import TVPlayIE
 from .tweakers import TweakersIE
 from .twentyfourvideo import TwentyFourVideoIE
 from .twentymin import TwentyMinutenIE
 from .twentytwotracks import (
    TwentyTwoTracksIE,
    TwentyTwoTracksGenreIE
--- a/youtube_dl/extractor/min20.py
+++ b/youtube_dl/extractor/min20.py
@ -1,56 +0,0 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 class Min20IE(InfoExtractor):
    _VALID_URL = r'http://www\.20min\.ch/(videotv/\?vid=(?P<video_id>[0-9]+)|.+?-(?P<page_id>[0-9]+)$)'
    _TESTS = [{
        'url': 'http://www.20min.ch/schweiz/news/story/-Wir-muessen-mutig-nach-vorne-schauen--22050469',
        'md5': 'cd4cbb99b94130cff423e967cd275e5e',
        'info_dict': {
            'id': '22050469',
            'ext': 'flv',
            'title': '«Wir müssen mutig nach vorne schauen»',
            'description': 'Kein Land sei innovativer als die Schweiz, sagte Johann Schneider-Ammann in seiner Neujahrsansprache. Das Land müsse aber seine Hausaufgaben machen.',
            'thumbnail': 'http://www.20min.ch/images/content/2/2/0/22050469/10/teaserbreit.jpg'
        }
    }, {
        'url': 'http://www.20min.ch/videotv/?vid=469148&cid=2',
        'md5': 'b52d6bc6ea6398e6a38f12cfd418149c',
        'info_dict': {
            'id': '469148',
            'ext': 'flv',
            'title': '85 000 Franken für 15 perfekte Minuten',
            'description': 'Was die Besucher vom Silvesterzauber erwarten können. (Video: Alice Grosjean/Murat Temel)',
            'thumbnail': 'http://thumbnails.20min-tv.ch/server063/469148/frame-72-469148.jpg'
        }
    }]
    # location of the flv videos, can't be extracted from the web page
    _BASE_URL = "http://flv-rr.20min-tv.ch/videos/"
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('page_id')
        if video_id is None:
            # URL from the videoportal
            video_id = mobj.group('video_id')
        webpage = self._download_webpage(url, video_id)
        title = self._html_search_regex(r'<h1>.*<span>(.+?)</span></h1>', webpage, 'title')
        flash_id = self._search_regex(r"so\.addVariable\(\"file1\",\"([0-9]+)\"\)", webpage, 'flash_id')
        description = self._html_search_regex(r'<meta name="description" content="(.+?)" />', webpage, 'description')
        thumbnail = self._html_search_regex(r'<meta property="og:image" content="(.+?)" />', webpage, 'thumbnail')
        url = self._BASE_URL + flash_id + "m.flv"
        return {
            'id': video_id,
            'url': url,
            'title': title,
            'description': description,
            'thumbnail': thumbnail
        }
--- a/youtube_dl/extractor/twentymin.py
+++ b/youtube_dl/extractor/twentymin.py
@ -0,0 +1,73 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import remove_end
 class TwentyMinutenIE(InfoExtractor):
    IE_NAME = '20min'
    _VALID_URL = r'https?://(?:www\.)?20min\.ch/(?:videotv/*\?.*\bvid=(?P<id>\d+)|(?:[^/]+/)*(?P<display_id>[^/#?]+))'
    _TESTS = [{
        # regular video
        'url': 'http://www.20min.ch/videotv/?vid=469148&cid=2',
        'md5': 'b52d6bc6ea6398e6a38f12cfd418149c',
        'info_dict': {
            'id': '469148',
            'ext': 'flv',
            'title': '85 000 Franken für 15 perfekte Minuten',
            'description': 'Was die Besucher vom Silvesterzauber erwarten können. (Video: Alice Grosjean/Murat Temel)',
            'thumbnail': 'http://thumbnails.20min-tv.ch/server063/469148/frame-72-469148.jpg'
        }
    }, {
        # news article with video
        'url': 'http://www.20min.ch/schweiz/news/story/-Wir-muessen-mutig-nach-vorne-schauen--22050469',
        'md5': 'cd4cbb99b94130cff423e967cd275e5e',
        'info_dict': {
            'id': '469408',
            'display_id': '-Wir-muessen-mutig-nach-vorne-schauen--22050469',
            'ext': 'flv',
            'title': '«Wir müssen mutig nach vorne schauen»',
            'description': 'Kein Land sei innovativer als die Schweiz, sagte Johann Schneider-Ammann in seiner Neujahrsansprache. Das Land müsse aber seine Hausaufgaben machen.',
            'thumbnail': 'http://www.20min.ch/images/content/2/2/0/22050469/10/teaserbreit.jpg'
        }
    }, {
        'url': 'http://www.20min.ch/videotv/?cid=44&vid=468738',
        'only_matching': True,
    }, {
        'url': 'http://www.20min.ch/ro/sortir/cinema/story/Grandir-au-bahut--c-est-dur-18927411',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        display_id = mobj.group('display_id') or video_id
        webpage = self._download_webpage(url, display_id)
        title = self._html_search_regex(
            r'<h1>.*?<span>(.+?)</span></h1>',
            webpage, 'title', default=None)
        if not title:
            title = remove_end(re.sub(
                r'^20 [Mm]inuten.*? -', '', self._og_search_title(webpage)), ' - News')
        if not video_id:
            video_id = self._search_regex(
                r'"file\d?"\s*,\s*\"(\d+)', webpage, 'video id')
        description = self._html_search_meta(
            'description', webpage, 'description')
        thumbnail = self._og_search_thumbnail(webpage)
        return {
            'id': video_id,
            'display_id': display_id,
            'url': 'http://speed.20min-tv.ch/%sm.flv' % video_id,
            'title': title,
            'description': description,
            'thumbnail': thumbnail,
        }