[extractor/generic] Extract wistia embed code into separate method

This commit is contained in:
Sergey M․ 2017-05-13 21:51:58 +07:00
parent 7ad4362357
commit 58bb440283
No known key found for this signature in database
GPG key ID: 2C393E0F18A9236D
2 changed files with 27 additions and 25 deletions

View file

@ -88,6 +88,7 @@ from .rutube import RutubeIE
from .limelight import LimelightBaseIE from .limelight import LimelightBaseIE
from .anvato import AnvatoIE from .anvato import AnvatoIE
from .washingtonpost import WashingtonPostIE from .washingtonpost import WashingtonPostIE
from .wistia import WistiaIE
class GenericIE(InfoExtractor): class GenericIE(InfoExtractor):
@ -2111,36 +2112,15 @@ class GenericIE(InfoExtractor):
playlists, video_id, video_title, lambda p: '//dailymotion.com/playlist/%s' % p) playlists, video_id, video_title, lambda p: '//dailymotion.com/playlist/%s' % p)
# Look for embedded Wistia player # Look for embedded Wistia player
match = re.search( wistia_url = WistiaIE._extract_url(webpage)
r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage) if wistia_url:
if match:
embed_url = self._proto_relative_url(
unescapeHTML(match.group('url')))
return { return {
'_type': 'url_transparent', '_type': 'url_transparent',
'url': embed_url, 'url': self._proto_relative_url(wistia_url),
'ie_key': 'Wistia', 'ie_key': WistiaIE.ie_key(),
'uploader': video_uploader, 'uploader': video_uploader,
} }
match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
if match:
return {
'_type': 'url_transparent',
'url': 'wistia:%s' % match.group('id'),
'ie_key': 'Wistia',
'uploader': video_uploader,
}
match = re.search(
r'''(?sx)
<script[^>]+src=(["'])(?:https?:)?//fast\.wistia\.com/assets/external/E-v1\.js\1[^>]*>.*?
<div[^>]+class=(["']).*?\bwistia_async_(?P<id>[a-z0-9]+)\b.*?\2
''', webpage)
if match:
return self.url_result(self._proto_relative_url(
'wistia:%s' % match.group('id')), 'Wistia')
# Look for SVT player # Look for SVT player
svt_url = SVTIE._extract_url(webpage) svt_url = SVTIE._extract_url(webpage)
if svt_url: if svt_url:

View file

@ -1,10 +1,13 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
float_or_none, float_or_none,
unescapeHTML,
) )
@ -34,6 +37,25 @@ class WistiaIE(InfoExtractor):
'only_matching': True, 'only_matching': True,
}] }]
@staticmethod
def _extract_url(webpage):
match = re.search(
r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
if match:
return unescapeHTML(match.group('url'))
match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
if match:
return 'wistia:%s' % match.group('id')
match = re.search(
r'''(?sx)
<script[^>]+src=(["'])(?:https?:)?//fast\.wistia\.com/assets/external/E-v1\.js\1[^>]*>.*?
<div[^>]+class=(["']).*?\bwistia_async_(?P<id>[a-z0-9]+)\b.*?\2
''', webpage)
if match:
return 'wistia:%s' % match.group('id')
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)