mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2024-01-07 17:16:08 +00:00
[udn] Add new extractor
This commit is contained in:
parent
de5c545648
commit
418c5cc3fc
|
@ -53,6 +53,7 @@ from youtube_dl.utils import (
|
||||||
uppercase_escape,
|
uppercase_escape,
|
||||||
url_basename,
|
url_basename,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
|
url_infer_protocol,
|
||||||
version_tuple,
|
version_tuple,
|
||||||
xpath_with_ns,
|
xpath_with_ns,
|
||||||
xpath_text,
|
xpath_text,
|
||||||
|
@ -296,6 +297,10 @@ class TestUtil(unittest.TestCase):
|
||||||
url_basename('http://media.w3.org/2010/05/sintel/trailer.mp4'),
|
url_basename('http://media.w3.org/2010/05/sintel/trailer.mp4'),
|
||||||
'trailer.mp4')
|
'trailer.mp4')
|
||||||
|
|
||||||
|
def test_url_infer_protocol(self):
|
||||||
|
self.assertEqual(url_infer_protocol('http://foo.com/', '//bar.com/'), 'http://bar.com/')
|
||||||
|
self.assertEqual(url_infer_protocol('http://foo.com/', 'https://bar.com/'), 'https://bar.com/')
|
||||||
|
|
||||||
def test_parse_duration(self):
|
def test_parse_duration(self):
|
||||||
self.assertEqual(parse_duration(None), None)
|
self.assertEqual(parse_duration(None), None)
|
||||||
self.assertEqual(parse_duration(False), None)
|
self.assertEqual(parse_duration(False), None)
|
||||||
|
|
|
@ -557,6 +557,7 @@ from .udemy import (
|
||||||
UdemyIE,
|
UdemyIE,
|
||||||
UdemyCourseIE
|
UdemyCourseIE
|
||||||
)
|
)
|
||||||
|
from .udn import UDNEmbedIE
|
||||||
from .ultimedia import UltimediaIE
|
from .ultimedia import UltimediaIE
|
||||||
from .unistra import UnistraIE
|
from .unistra import UnistraIE
|
||||||
from .urort import UrortIE
|
from .urort import UrortIE
|
||||||
|
|
|
@ -26,6 +26,7 @@ from ..utils import (
|
||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
UnsupportedError,
|
UnsupportedError,
|
||||||
url_basename,
|
url_basename,
|
||||||
|
url_infer_protocol,
|
||||||
xpath_text,
|
xpath_text,
|
||||||
)
|
)
|
||||||
from .brightcove import BrightcoveIE
|
from .brightcove import BrightcoveIE
|
||||||
|
@ -34,6 +35,7 @@ from .ooyala import OoyalaIE
|
||||||
from .rutv import RUTVIE
|
from .rutv import RUTVIE
|
||||||
from .smotri import SmotriIE
|
from .smotri import SmotriIE
|
||||||
from .condenast import CondeNastIE
|
from .condenast import CondeNastIE
|
||||||
|
from .udn import UDNEmbedIE
|
||||||
|
|
||||||
|
|
||||||
class GenericIE(InfoExtractor):
|
class GenericIE(InfoExtractor):
|
||||||
|
@ -650,6 +652,17 @@ class GenericIE(InfoExtractor):
|
||||||
'title': "PFT Live: New leader in the 'new-look' defense",
|
'title': "PFT Live: New leader in the 'new-look' defense",
|
||||||
'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
|
'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
|
||||||
},
|
},
|
||||||
|
},
|
||||||
|
# UDN embed
|
||||||
|
{
|
||||||
|
'url': 'http://www.udn.com/news/story/7314/822787',
|
||||||
|
'md5': 'de06b4c90b042c128395a88f0384817e',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '300040',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '生物老師男變女 全校挺"做自己"',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -1268,6 +1281,13 @@ class GenericIE(InfoExtractor):
|
||||||
if nbc_sports_url:
|
if nbc_sports_url:
|
||||||
return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
|
return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
|
||||||
|
|
||||||
|
# Look for UDN embeds
|
||||||
|
mobj = re.search(
|
||||||
|
r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._VALID_URL, webpage)
|
||||||
|
if mobj is not None:
|
||||||
|
return self.url_result(
|
||||||
|
url_infer_protocol(url, mobj.group('url')), 'UDNEmbed')
|
||||||
|
|
||||||
def check_video(vurl):
|
def check_video(vurl):
|
||||||
if YoutubeIE.suitable(vurl):
|
if YoutubeIE.suitable(vurl):
|
||||||
return True
|
return True
|
||||||
|
|
66
youtube_dl/extractor/udn.py
Normal file
66
youtube_dl/extractor/udn.py
Normal file
|
@ -0,0 +1,66 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import json
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
url_infer_protocol,
|
||||||
|
js_to_json
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class UDNEmbedIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'(?:https?:)?//video\.udn\.com/embed/news/(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://video.udn.com/embed/news/300040',
|
||||||
|
'md5': 'de06b4c90b042c128395a88f0384817e',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '300040',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '生物老師男變女 全校挺"做自己"',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': '//video.udn.com/embed/news/300040',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
page = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
options = json.loads(js_to_json(self._html_search_regex(
|
||||||
|
r'var options\s*=\s*([^;]+);', page, 'video urls dictionary')))
|
||||||
|
|
||||||
|
video_urls = options['video']
|
||||||
|
|
||||||
|
if video_urls.get('youtube'):
|
||||||
|
return self.url_result(video_urls.get('youtube'), 'Youtube')
|
||||||
|
|
||||||
|
try:
|
||||||
|
del video_urls['youtube']
|
||||||
|
except KeyError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
formats = [{
|
||||||
|
'url': self._download_webpage(
|
||||||
|
url_infer_protocol(url, api_url), video_id,
|
||||||
|
'retrieve url for %s video' % video_type),
|
||||||
|
'format_id': video_type,
|
||||||
|
'preference': 0 if video_type == 'mp4' else -1,
|
||||||
|
} for video_type, api_url in video_urls.items()]
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
thumbnail = None
|
||||||
|
|
||||||
|
if options.get('gallery') and len(options['gallery']):
|
||||||
|
thumbnail = options['gallery'][0].get('original')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'title': options['title'],
|
||||||
|
'thumbnail': thumbnail
|
||||||
|
}
|
|
@ -1711,6 +1711,17 @@ def determine_protocol(info_dict):
|
||||||
return compat_urllib_parse_urlparse(url).scheme
|
return compat_urllib_parse_urlparse(url).scheme
|
||||||
|
|
||||||
|
|
||||||
|
def url_infer_protocol(ref_url, target_url):
|
||||||
|
""" Infer protocol for protocol independent target urls """
|
||||||
|
parsed_target_url = list(compat_urllib_parse_urlparse(target_url))
|
||||||
|
if parsed_target_url[0]:
|
||||||
|
return target_url
|
||||||
|
|
||||||
|
parsed_target_url[0] = compat_urllib_parse_urlparse(ref_url).scheme
|
||||||
|
|
||||||
|
return compat_urlparse.urlunparse(parsed_target_url)
|
||||||
|
|
||||||
|
|
||||||
def render_table(header_row, data):
|
def render_table(header_row, data):
|
||||||
""" Render a list of rows, each as a list of values """
|
""" Render a list of rows, each as a list of values """
|
||||||
table = [header_row] + data
|
table = [header_row] + data
|
||||||
|
|
Loading…
Reference in a new issue