Merge branch 'dhm' of https://github.com/ossi96/youtube-dl into ossi96-dhm

2024-01-07 17:16:08 +00:00 · 2015-03-28 22:09:05 +06:00 · 2015-03-28 22:09:05 +06:00 · 79fd11ab8e
parent cb88671e37 ff79552f13
commit 79fd11ab8e
2 changed files with 54 additions and 0 deletions
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -106,6 +106,7 @@ from .dbtv import DBTVIE
 from .dctp import DctpTvIE
 from .deezer import DeezerPlaylistIE
 from .dfb import DFBIE
 from .dhm import DHMIE
 from .dotsub import DotsubIE
 from .douyutv import DouyuTVIE
 from .dreisat import DreiSatIE
--- a/youtube_dl/extractor/dhm.py
+++ b/youtube_dl/extractor/dhm.py
@ -0,0 +1,53 @@
 # coding: utf-8
 from __future__ import unicode_literals
 from .common import InfoExtractor
 import urllib2
 import xml.etree.ElementTree as ET
 import re
 class DHMIE(InfoExtractor):
    IE_DESC = 'Deutsches Historisches Museum'
    _VALID_URL = r'http://www\.dhm\.de/filmarchiv/(?P<id>.*?)'
    _TEST = {
        'url': 'http://www.dhm.de/filmarchiv/die-filme/the-marshallplan-at-work-in-west-germany/',
        'md5': '11c475f670209bf6acca0b2b7ef51827',
        'info_dict': {
            'id': 'marshallwg',
            'ext': 'flv',
            'title': 'MARSHALL PLAN AT WORK IN WESTERN GERMANY, THE',
            'thumbnail': 'http://www.dhm.de/filmarchiv/video/mpworkwg.jpg',
        }
    }
    def _real_extract(self, url):
        video_id = ''
        webpage = self._download_webpage(url, video_id)
        title = self._html_search_regex(
            r'dc:title=\"(.*?)\"', webpage, 'title')
        playlist_url = self._html_search_regex(
            r'file: \'(.*?)\'', webpage, 'playlist URL')
        xml_file = urllib2.urlopen(playlist_url)
        data = xml_file.read()
        xml_file.close()
        root = ET.fromstring(data)
        video_url = root[0][0][0].text
        thumbnail = root[0][0][2].text
        m = re.search('video/(.+?).flv', video_url)
        if m:
            video_id = m.group(1)
        return {
            'id': video_id,
            'title': title,
            'url': video_url,
            'thumbnail': thumbnail,
        }