[kaltura] Improve iframe extraction (#28969)

Co-authored-by: Sergey M. <dstftw@gmail.com>
This commit is contained in:
Ben Rog-Wilhelm 2021-05-04 14:14:35 -05:00 committed by GitHub
parent 0204838163
commit fe05191b8c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 16 additions and 1 deletions

View file

@ -102,6 +102,21 @@ class GDCVaultIE(InfoExtractor):
'format': 'mp4-408', 'format': 'mp4-408',
}, },
}, },
{
# Kaltura embed, whitespace between quote and embedded URL in iframe's src
'url': 'https://www.gdcvault.com/play/1025699',
'info_dict': {
'id': '0_zagynv0a',
'ext': 'mp4',
'title': 'Tech Toolbox',
'upload_date': '20190408',
'uploader_id': 'joe@blazestreaming.com',
'timestamp': 1554764629,
},
'params': {
'skip_download': True,
},
},
] ]
def _login(self, webpage_url, display_id): def _login(self, webpage_url, display_id):

View file

@ -145,7 +145,7 @@ class KalturaIE(InfoExtractor):
''', webpage)) ''', webpage))
or list(re.finditer( or list(re.finditer(
r'''(?xs) r'''(?xs)
<(?:iframe[^>]+src|meta[^>]+\bcontent)=(?P<q1>["']) <(?:iframe[^>]+src|meta[^>]+\bcontent)=(?P<q1>["'])\s*
(?:https?:)?//(?:(?:www|cdnapi(?:sec)?)\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+) (?:https?:)?//(?:(?:www|cdnapi(?:sec)?)\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)
(?:(?!(?P=q1)).)* (?:(?!(?P=q1)).)*
[?&;]entry_id=(?P<id>(?:(?!(?P=q1))[^&])+) [?&;]entry_id=(?P<id>(?:(?!(?P=q1))[^&])+)