Additional tweaks: allow any .ndr.de, simplify quote match

This commit is contained in:
dirkf 2022-01-19 13:24:33 +00:00
parent 39a98b09a2
commit 01824d275b

View file

@ -28,7 +28,7 @@ class NDRBaseIE(InfoExtractor):
class NDRIE(NDRBaseIE): class NDRIE(NDRBaseIE):
IE_NAME = 'ndr' IE_NAME = 'ndr'
IE_DESC = 'NDR.de - Norddeutscher Rundfunk' IE_DESC = 'NDR.de - Norddeutscher Rundfunk'
_VALID_URL = r'https?://(?:\w+\.)?ndr\.de/(?:[^/]+/)*(?P<id>[^/?#]+),[\da-z]+\.html' _VALID_URL = r'https?://(?:\w+\.)*ndr\.de/(?:[^/]+/)*(?P<id>[^/?#]+),[\da-z]+\.html'
_TESTS = [{ _TESTS = [{
# httpVideo, same content id # httpVideo, same content id
'url': 'http://www.ndr.de/fernsehen/Party-Poette-und-Parade,hafengeburtstag988.html', 'url': 'http://www.ndr.de/fernsehen/Party-Poette-und-Parade,hafengeburtstag988.html',
@ -202,7 +202,7 @@ class NJoyIE(NDRBaseIE):
def _extract_embed(self, webpage, display_id, url=None): def _extract_embed(self, webpage, display_id, url=None):
# find tell-tale URL with the actual ID, or ... # find tell-tale URL with the actual ID, or ...
video_id = self._search_regex( video_id = self._search_regex(
(r'''\bsrc\s*=\s*(?:"|')?(?:/\w+)+/([a-z]+\d+)(?!\.)\b''', (r'''\bsrc\s*=\s*["']?(?:/\w+)+/([a-z]+\d+)(?!\.)\b''',
r'<iframe[^>]+id="pp_([\da-z]+)"', ), r'<iframe[^>]+id="pp_([\da-z]+)"', ),
webpage, 'NDR id', default=None) webpage, 'NDR id', default=None)
@ -322,7 +322,7 @@ class NDREmbedBaseIE(InfoExtractor):
class NDREmbedIE(NDREmbedBaseIE): class NDREmbedIE(NDREmbedBaseIE):
IE_NAME = 'ndr:embed' IE_NAME = 'ndr:embed'
_VALID_URL = r'https?://(?:www\.)?ndr\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:(?:ard)?player|externalPlayer)\.html' _VALID_URL = r'https?://(?:\w+\.)*ndr\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:(?:ard)?player|externalPlayer)\.html'
_TESTS = [{ _TESTS = [{
'url': 'http://www.ndr.de/fernsehen/sendungen/ndr_aktuell/ndraktuell28488-player.html', 'url': 'http://www.ndr.de/fernsehen/sendungen/ndr_aktuell/ndraktuell28488-player.html',
'md5': '8b9306142fe65bbdefb5ce24edb6b0a9', 'md5': '8b9306142fe65bbdefb5ce24edb6b0a9',