1
0
Fork 0
mirror of https://gitlab.com/dstftw/youtube-dl.git synced 2020-11-16 09:42:26 +00:00

[xhamster] Move into own file

This commit is contained in:
Philipp Hagemeister 2013-06-23 22:32:44 +02:00
parent cd8b830292
commit cb10cded2a
2 changed files with 62 additions and 51 deletions

View file

@ -66,6 +66,7 @@ from .extractor.vimeo import VimeoIE
from .extractor.vine import VineIE from .extractor.vine import VineIE
from .extractor.worldstarhiphop import WorldStarHipHopIE from .extractor.worldstarhiphop import WorldStarHipHopIE
from .extractor.xnxx import XNXXIE from .extractor.xnxx import XNXXIE
from .extractor.xhamster import XHamsterIE
from .extractor.xvideos import XVideosIE from .extractor.xvideos import XVideosIE
from .extractor.yahoo import YahooIE, YahooSearchIE from .extractor.yahoo import YahooIE, YahooSearchIE
from .extractor.youjizz import YouJizzIE from .extractor.youjizz import YouJizzIE
@ -113,57 +114,6 @@ from .extractor.zdf import ZDFIE
class XHamsterIE(InfoExtractor):
"""Information Extractor for xHamster"""
_VALID_URL = r'(?:http://)?(?:www.)?xhamster\.com/movies/(?P<id>[0-9]+)/.*\.html'
def _real_extract(self,url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
mrss_url = 'http://xhamster.com/movies/%s/.html' % video_id
webpage = self._download_webpage(mrss_url, video_id)
mobj = re.search(r'\'srv\': \'(?P<server>[^\']*)\',\s*\'file\': \'(?P<file>[^\']+)\',', webpage)
if mobj is None:
raise ExtractorError(u'Unable to extract media URL')
if len(mobj.group('server')) == 0:
video_url = compat_urllib_parse.unquote(mobj.group('file'))
else:
video_url = mobj.group('server')+'/key='+mobj.group('file')
video_extension = video_url.split('.')[-1]
video_title = self._html_search_regex(r'<title>(?P<title>.+?) - xHamster\.com</title>',
webpage, u'title')
# Can't see the description anywhere in the UI
# video_description = self._html_search_regex(r'<span>Description: </span>(?P<description>[^<]+)',
# webpage, u'description', fatal=False)
# if video_description: video_description = unescapeHTML(video_description)
mobj = re.search(r'hint=\'(?P<upload_date_Y>[0-9]{4})-(?P<upload_date_m>[0-9]{2})-(?P<upload_date_d>[0-9]{2}) [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{3,4}\'', webpage)
if mobj:
video_upload_date = mobj.group('upload_date_Y')+mobj.group('upload_date_m')+mobj.group('upload_date_d')
else:
video_upload_date = None
self._downloader.report_warning(u'Unable to extract upload date')
video_uploader_id = self._html_search_regex(r'<a href=\'/user/[^>]+>(?P<uploader_id>[^<]+)',
webpage, u'uploader id', default=u'anonymous')
video_thumbnail = self._search_regex(r'\'image\':\'(?P<thumbnail>[^\']+)\'',
webpage, u'thumbnail', fatal=False)
return [{
'id': video_id,
'url': video_url,
'ext': video_extension,
'title': video_title,
# 'description': video_description,
'upload_date': video_upload_date,
'uploader_id': video_uploader_id,
'thumbnail': video_thumbnail
}]

View file

@ -0,0 +1,61 @@
import re
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse,
ExtractorError,
)
class XHamsterIE(InfoExtractor):
"""Information Extractor for xHamster"""
_VALID_URL = r'(?:http://)?(?:www.)?xhamster\.com/movies/(?P<id>[0-9]+)/.*\.html'
def _real_extract(self,url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
mrss_url = 'http://xhamster.com/movies/%s/.html' % video_id
webpage = self._download_webpage(mrss_url, video_id)
mobj = re.search(r'\'srv\': \'(?P<server>[^\']*)\',\s*\'file\': \'(?P<file>[^\']+)\',', webpage)
if mobj is None:
raise ExtractorError(u'Unable to extract media URL')
if len(mobj.group('server')) == 0:
video_url = compat_urllib_parse.unquote(mobj.group('file'))
else:
video_url = mobj.group('server')+'/key='+mobj.group('file')
video_extension = video_url.split('.')[-1]
video_title = self._html_search_regex(r'<title>(?P<title>.+?) - xHamster\.com</title>',
webpage, u'title')
# Can't see the description anywhere in the UI
# video_description = self._html_search_regex(r'<span>Description: </span>(?P<description>[^<]+)',
# webpage, u'description', fatal=False)
# if video_description: video_description = unescapeHTML(video_description)
mobj = re.search(r'hint=\'(?P<upload_date_Y>[0-9]{4})-(?P<upload_date_m>[0-9]{2})-(?P<upload_date_d>[0-9]{2}) [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{3,4}\'', webpage)
if mobj:
video_upload_date = mobj.group('upload_date_Y')+mobj.group('upload_date_m')+mobj.group('upload_date_d')
else:
video_upload_date = None
self._downloader.report_warning(u'Unable to extract upload date')
video_uploader_id = self._html_search_regex(r'<a href=\'/user/[^>]+>(?P<uploader_id>[^<]+)',
webpage, u'uploader id', default=u'anonymous')
video_thumbnail = self._search_regex(r'\'image\':\'(?P<thumbnail>[^\']+)\'',
webpage, u'thumbnail', fatal=False)
return [{
'id': video_id,
'url': video_url,
'ext': video_extension,
'title': video_title,
# 'description': video_description,
'upload_date': video_upload_date,
'uploader_id': video_uploader_id,
'thumbnail': video_thumbnail
}]