from __future__ import unicode_literals import re from .common import InfoExtractor from ..compat import ( compat_urllib_request, ) from ..utils import ( parse_duration, parse_iso8601, str_to_int, ) class FourTubeIE(InfoExtractor): IE_NAME = '4tube' _VALID_URL = r'https?://(?:www\.)?4tube\.com/videos/(?P\d+)' _TEST = { 'url': 'http://www.4tube.com/videos/209733/hot-babe-holly-michaels-gets-her-ass-stuffed-by-black', 'md5': '6516c8ac63b03de06bc8eac14362db4f', 'info_dict': { 'id': '209733', 'ext': 'mp4', 'title': 'Hot Babe Holly Michaels gets her ass stuffed by black', 'uploader': 'WCP Club', 'uploader_id': 'wcp-club', 'upload_date': '20131031', 'timestamp': 1383263892, 'duration': 583, 'view_count': int, 'like_count': int, 'categories': list, 'age_limit': 18, } } def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) title = self._html_search_meta('name', webpage) timestamp = parse_iso8601(self._html_search_meta( 'uploadDate', webpage)) thumbnail = self._html_search_meta('thumbnailUrl', webpage) uploader_id = self._html_search_regex( r'', webpage, 'uploader id') uploader = self._html_search_regex( r'', webpage, 'uploader') categories_html = self._search_regex( r'(?s)>\s*Categories / Tags\s*.*?', webpage, 'categories', fatal=False) categories = None if categories_html: categories = [ c.strip() for c in re.findall( r'(?s)
  • (.*?)', categories_html)] view_count = str_to_int(self._search_regex( r'', webpage, 'view count', fatal=False)) like_count = str_to_int(self._search_regex( r'', webpage, 'like count', fatal=False)) duration = parse_duration(self._html_search_meta('duration', webpage)) player_url = self._search_regex(r'