mirror of
https://gitlab.com/dstftw/youtube-dl.git
synced 2020-11-16 09:42:26 +00:00
[youtube] Separate methods for embeds extraction
This commit is contained in:
parent
c5c9bf0c12
commit
66c9fa36c1
|
@ -2243,36 +2243,11 @@ class GenericIE(InfoExtractor):
|
|||
if vid_me_embed_url is not None:
|
||||
return self.url_result(vid_me_embed_url, 'Vidme')
|
||||
|
||||
# Look for embedded YouTube player
|
||||
matches = re.findall(r'''(?x)
|
||||
(?:
|
||||
<iframe[^>]+?src=|
|
||||
data-video-url=|
|
||||
<embed[^>]+?src=|
|
||||
embedSWF\(?:\s*|
|
||||
<object[^>]+data=|
|
||||
new\s+SWFObject\(
|
||||
)
|
||||
(["\'])
|
||||
(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
|
||||
(?:embed|v|p)/.+?)
|
||||
\1''', webpage)
|
||||
if matches:
|
||||
# Look for YouTube embeds
|
||||
youtube_urls = YoutubeIE._extract_urls(webpage)
|
||||
if youtube_urls:
|
||||
return self.playlist_from_matches(
|
||||
matches, video_id, video_title, lambda m: unescapeHTML(m[1]))
|
||||
|
||||
# Look for lazyYT YouTube embed
|
||||
matches = re.findall(
|
||||
r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
|
||||
if matches:
|
||||
return self.playlist_from_matches(matches, video_id, video_title, lambda m: unescapeHTML(m))
|
||||
|
||||
# Look for Wordpress "YouTube Video Importer" plugin
|
||||
matches = re.findall(r'''(?x)<div[^>]+
|
||||
class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
|
||||
data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
|
||||
if matches:
|
||||
return self.playlist_from_matches(matches, video_id, video_title, lambda m: m[-1])
|
||||
youtube_urls, video_id, video_title, ie=YoutubeIE.ie_key())
|
||||
|
||||
matches = DailymotionIE._extract_urls(webpage)
|
||||
if matches:
|
||||
|
|
|
@ -1374,6 +1374,43 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||
playback_url, video_id, 'Marking watched',
|
||||
'Unable to mark watched', fatal=False)
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
# Embedded YouTube player
|
||||
entries = [
|
||||
unescapeHTML(mobj.group('url'))
|
||||
for mobj in re.finditer(r'''(?x)
|
||||
(?:
|
||||
<iframe[^>]+?src=|
|
||||
data-video-url=|
|
||||
<embed[^>]+?src=|
|
||||
embedSWF\(?:\s*|
|
||||
<object[^>]+data=|
|
||||
new\s+SWFObject\(
|
||||
)
|
||||
(["\'])
|
||||
(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
|
||||
(?:embed|v|p)/.+?)
|
||||
\1''', webpage)]
|
||||
|
||||
# lazyYT YouTube embed
|
||||
entries.extend(list(map(
|
||||
unescapeHTML,
|
||||
re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
|
||||
|
||||
# Wordpress "YouTube Video Importer" plugin
|
||||
matches = re.findall(r'''(?x)<div[^>]+
|
||||
class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
|
||||
data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
|
||||
entries.extend(m[-1] for m in matches)
|
||||
|
||||
return entries
|
||||
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
urls = YoutubeIE._extract_urls(webpage)
|
||||
return urls[0] if urls else None
|
||||
|
||||
@classmethod
|
||||
def extract_id(cls, url):
|
||||
mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
|
||||
|
|
Loading…
Reference in a new issue