NJoy: improve extraction of NDR id, description, etc with current page formats

This commit is contained in:
dirkf 2022-01-17 03:22:32 +00:00
parent 4186e81777
commit f0a05a55c2

View file

@ -196,18 +196,25 @@ class NJoyIE(NDRBaseIE):
'only_matching': True, 'only_matching': True,
}] }]
def _extract_embed(self, webpage, display_id): def _extract_embed(self, webpage, display_id, url=None):
# find tell-tale URL with the actual ID, or ...
video_id = self._search_regex( video_id = self._search_regex(
r'<iframe[^>]+id="pp_([\da-z]+)"', webpage, 'embed id') (r'''\bsrc\s*=\s*(?:"|')?(?:/\w+)+/([a-z]+\d+)(?!\.)\b''',
description = self._search_regex( r'<iframe[^>]+id="pp_([\da-z]+)"', ),
webpage, 'NDR id', default=None)
description = (
self._html_search_meta('description', webpage)
or self._search_regex(
r'<div[^>]+class="subline"[^>]*>[^<]+</div>\s*<p>([^<]+)</p>', r'<div[^>]+class="subline"[^>]*>[^<]+</div>\s*<p>([^<]+)</p>',
webpage, 'description', fatal=False) webpage, 'description', fatal=False))
return { return {
'_type': 'url_transparent', '_type': 'url_transparent',
'ie_key': 'NDREmbedBase', 'ie_key': 'NDREmbedBase',
'url': 'ndr:%s' % video_id, 'url': 'ndr:%s' % video_id,
'display_id': display_id, 'display_id': display_id,
'description': description, 'description': description,
'title': display_id.replace('-', ' ').strip(),
} }