Merge branch 'peugeot-hellporno'

2024-01-07 17:16:08 +00:00 · 2014-12-29 21:33:57 +06:00 · 2014-12-29 21:33:57 +06:00 · 479514d015
parent 3e2bcf530b 355e41466d
commit 479514d015
2 changed files with 72 additions and 0 deletions
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -169,6 +169,7 @@ from .grooveshark import GroovesharkIE
 from .groupon import GrouponIE
 from .hark import HarkIE
 from .heise import HeiseIE
 from .hellporno import HellPornoIE
 from .helsinki import HelsinkiIE
 from .hentaistigma import HentaiStigmaIE
 from .hornbunny import HornBunnyIE
--- a/youtube_dl/extractor/hellporno.py
+++ b/youtube_dl/extractor/hellporno.py
@ -0,0 +1,71 @@
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import (
    js_to_json,
    remove_end,
 )
 class HellPornoIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?hellporno\.com/videos/(?P<id>[^/]+)'
    _TEST = {
        'url': 'http://hellporno.com/videos/dixie-is-posing-with-naked-ass-very-erotic/',
        'md5': '1fee339c610d2049699ef2aa699439f1',
        'info_dict': {
            'id': '149116',
            'display_id': 'dixie-is-posing-with-naked-ass-very-erotic',
            'ext': 'mp4',
            'title': 'Dixie is posing with naked ass very erotic',
            'thumbnail': 're:https?://.*\.jpg$',
            'age_limit': 18,
        }
    }
    def _real_extract(self, url):
        display_id = self._match_id(url)
        webpage = self._download_webpage(url, display_id)
        title = remove_end(self._html_search_regex(
            r'<title>([^<]+)</title>', webpage, 'title'), ' - Hell Porno')
        flashvars = self._parse_json(self._search_regex(
            r'var\s+flashvars\s*=\s*({.+?});', webpage, 'flashvars'),
            display_id, transform_source=js_to_json)
        video_id = flashvars.get('video_id')
        thumbnail = flashvars.get('preview_url')
        ext = flashvars.get('postfix', '.mp4')[1:]
        formats = []
        for video_url_key in ['video_url', 'video_alt_url']:
            video_url = flashvars.get(video_url_key)
            if not video_url:
                continue
            video_text = flashvars.get('%s_text' % video_url_key)
            fmt = {
                'url': video_url,
                'ext': ext,
                'format_id': video_text,
            }
            m = re.search(r'^(?P<height>\d+)[pP]', video_text)
            if m:
                fmt['height'] = int(m.group('height'))
            formats.append(fmt)
        self._sort_formats(formats)
        categories = self._html_search_meta(
            'keywords', webpage, 'categories', default='').split(',')
        return {
            'id': video_id,
            'display_id': display_id,
            'title': title,
            'thumbnail': thumbnail,
            'categories': categories,
            'age_limit': 18,
            'formats': formats,
        }