mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2024-01-07 17:16:08 +00:00
Merge remote-tracking branch 'upstream/master'
Conflicts: youtube_dl/extractor/jeuxvideo.py
This commit is contained in:
commit
8d212e604a
|
@ -50,6 +50,7 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||||
self.assertEqual(YoutubeIE()._extract_id('http://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc')
|
self.assertEqual(YoutubeIE()._extract_id('http://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc')
|
||||||
self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc')
|
self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc')
|
||||||
self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc'), 'BaW_jenozKc')
|
self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc'), 'BaW_jenozKc')
|
||||||
|
self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch_popup?v=BaW_jenozKc'), 'BaW_jenozKc')
|
||||||
|
|
||||||
def test_no_duplicates(self):
|
def test_no_duplicates(self):
|
||||||
ies = gen_extractors()
|
ies = gen_extractors()
|
||||||
|
|
|
@ -79,9 +79,13 @@ class FileDownloader(object):
|
||||||
rate = float(current) / dif
|
rate = float(current) / dif
|
||||||
eta = int((float(total) - float(current)) / rate)
|
eta = int((float(total) - float(current)) / rate)
|
||||||
(eta_mins, eta_secs) = divmod(eta, 60)
|
(eta_mins, eta_secs) = divmod(eta, 60)
|
||||||
if eta_mins > 99:
|
(eta_hours, eta_mins) = divmod(eta_mins, 60)
|
||||||
return '--:--'
|
if eta_hours > 99:
|
||||||
|
return '--:--:--'
|
||||||
|
if eta_hours == 0:
|
||||||
return '%02d:%02d' % (eta_mins, eta_secs)
|
return '%02d:%02d' % (eta_mins, eta_secs)
|
||||||
|
else:
|
||||||
|
return '%02d:%02d:%02d' % (eta_hours, eta_mins, eta_secs)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def calc_speed(start, now, bytes):
|
def calc_speed(start, now, bytes):
|
||||||
|
|
|
@ -51,13 +51,16 @@ from .myspass import MySpassIE
|
||||||
from .myvideo import MyVideoIE
|
from .myvideo import MyVideoIE
|
||||||
from .nba import NBAIE
|
from .nba import NBAIE
|
||||||
from .ooyala import OoyalaIE
|
from .ooyala import OoyalaIE
|
||||||
|
from .pbs import PBSIE
|
||||||
from .photobucket import PhotobucketIE
|
from .photobucket import PhotobucketIE
|
||||||
from .pornotube import PornotubeIE
|
from .pornotube import PornotubeIE
|
||||||
from .rbmaradio import RBMARadioIE
|
from .rbmaradio import RBMARadioIE
|
||||||
from .redtube import RedTubeIE
|
from .redtube import RedTubeIE
|
||||||
from .ringtv import RingTVIE
|
from .ringtv import RingTVIE
|
||||||
from .roxwel import RoxwelIE
|
from .roxwel import RoxwelIE
|
||||||
|
from .rtlnow import RTLnowIE
|
||||||
from .sina import SinaIE
|
from .sina import SinaIE
|
||||||
|
from .slashdot import SlashdotIE
|
||||||
from .soundcloud import SoundcloudIE, SoundcloudSetIE
|
from .soundcloud import SoundcloudIE, SoundcloudSetIE
|
||||||
from .spiegel import SpiegelIE
|
from .spiegel import SpiegelIE
|
||||||
from .stanfordoc import StanfordOpenClassroomIE
|
from .stanfordoc import StanfordOpenClassroomIE
|
||||||
|
|
|
@ -4,6 +4,7 @@ import xml.etree.ElementTree
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_urllib_parse_urlparse,
|
compat_urllib_parse_urlparse,
|
||||||
|
determine_ext,
|
||||||
|
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
)
|
)
|
||||||
|
@ -12,7 +13,7 @@ from ..utils import (
|
||||||
class CollegeHumorIE(InfoExtractor):
|
class CollegeHumorIE(InfoExtractor):
|
||||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed|e)/(?P<videoid>[0-9]+)/?(?P<shorttitle>.*)$'
|
_VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed|e)/(?P<videoid>[0-9]+)/?(?P<shorttitle>.*)$'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
u'url': u'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe',
|
u'url': u'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe',
|
||||||
u'file': u'6902724.mp4',
|
u'file': u'6902724.mp4',
|
||||||
u'md5': u'1264c12ad95dca142a9f0bf7968105a0',
|
u'md5': u'1264c12ad95dca142a9f0bf7968105a0',
|
||||||
|
@ -20,7 +21,16 @@ class CollegeHumorIE(InfoExtractor):
|
||||||
u'title': u'Comic-Con Cosplay Catastrophe',
|
u'title': u'Comic-Con Cosplay Catastrophe',
|
||||||
u'description': u'Fans get creative this year at San Diego. Too creative. And yes, that\'s really Joss Whedon.',
|
u'description': u'Fans get creative this year at San Diego. Too creative. And yes, that\'s really Joss Whedon.',
|
||||||
},
|
},
|
||||||
}
|
},
|
||||||
|
{
|
||||||
|
u'url': u'http://www.collegehumor.com/video/3505939/font-conference',
|
||||||
|
u'file': u'3505939.mp4',
|
||||||
|
u'md5': u'c51ca16b82bb456a4397987791a835f5',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'Font Conference',
|
||||||
|
u'description': u'This video wasn\'t long enough, so we made it double-spaced.',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
@ -49,11 +59,12 @@ class CollegeHumorIE(InfoExtractor):
|
||||||
info['description'] = videoNode.findall('./description')[0].text
|
info['description'] = videoNode.findall('./description')[0].text
|
||||||
info['title'] = videoNode.findall('./caption')[0].text
|
info['title'] = videoNode.findall('./caption')[0].text
|
||||||
info['thumbnail'] = videoNode.findall('./thumbnail')[0].text
|
info['thumbnail'] = videoNode.findall('./thumbnail')[0].text
|
||||||
manifest_url = videoNode.findall('./file')[0].text
|
next_url = videoNode.findall('./file')[0].text
|
||||||
except IndexError:
|
except IndexError:
|
||||||
raise ExtractorError(u'Invalid metadata XML file')
|
raise ExtractorError(u'Invalid metadata XML file')
|
||||||
|
|
||||||
manifest_url += '?hdcore=2.10.3'
|
if next_url.endswith(u'manifest.f4m'):
|
||||||
|
manifest_url = next_url + '?hdcore=2.10.3'
|
||||||
manifestXml = self._download_webpage(manifest_url, video_id,
|
manifestXml = self._download_webpage(manifest_url, video_id,
|
||||||
u'Downloading XML manifest',
|
u'Downloading XML manifest',
|
||||||
u'Unable to download video info XML')
|
u'Unable to download video info XML')
|
||||||
|
@ -65,9 +76,12 @@ class CollegeHumorIE(InfoExtractor):
|
||||||
video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text
|
video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text
|
||||||
except IndexError as err:
|
except IndexError as err:
|
||||||
raise ExtractorError(u'Invalid manifest file')
|
raise ExtractorError(u'Invalid manifest file')
|
||||||
|
|
||||||
url_pr = compat_urllib_parse_urlparse(info['thumbnail'])
|
url_pr = compat_urllib_parse_urlparse(info['thumbnail'])
|
||||||
|
|
||||||
info['url'] = url_pr.scheme + '://' + url_pr.netloc + video_id[:-2].replace('.csmil','').replace(',','')
|
info['url'] = url_pr.scheme + '://' + url_pr.netloc + video_id[:-2].replace('.csmil','').replace(',','')
|
||||||
info['ext'] = 'mp4'
|
info['ext'] = 'mp4'
|
||||||
return [info]
|
else:
|
||||||
|
# Old-style direct links
|
||||||
|
info['url'] = next_url
|
||||||
|
info['ext'] = determine_ext(info['url'])
|
||||||
|
|
||||||
|
return info
|
||||||
|
|
|
@ -77,7 +77,13 @@ class InfoExtractor(object):
|
||||||
@classmethod
|
@classmethod
|
||||||
def suitable(cls, url):
|
def suitable(cls, url):
|
||||||
"""Receives a URL and returns True if suitable for this IE."""
|
"""Receives a URL and returns True if suitable for this IE."""
|
||||||
return re.match(cls._VALID_URL, url) is not None
|
|
||||||
|
# This does not use has/getattr intentionally - we want to know whether
|
||||||
|
# we have cached the regexp for *this* class, whereas getattr would also
|
||||||
|
# match the superclass
|
||||||
|
if '_VALID_URL_RE' not in cls.__dict__:
|
||||||
|
cls._VALID_URL_RE = re.compile(cls._VALID_URL)
|
||||||
|
return cls._VALID_URL_RE.match(url) is not None
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def working(cls):
|
def working(cls):
|
||||||
|
|
|
@ -21,17 +21,14 @@ class FunnyOrDieIE(InfoExtractor):
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
video_url = self._html_search_regex(r'<video[^>]*>\s*<source[^>]*>\s*<source src="(?P<url>[^"]+)"',
|
video_url = self._search_regex(r'type: "video/mp4", src: "(.*?)"',
|
||||||
webpage, u'video URL', flags=re.DOTALL)
|
webpage, u'video URL', flags=re.DOTALL)
|
||||||
|
|
||||||
title = self._html_search_regex((r"<h1 class='player_page_h1'.*?>(?P<title>.*?)</h1>",
|
|
||||||
r'<title>(?P<title>[^<]+?)</title>'), webpage, 'title', flags=re.DOTALL)
|
|
||||||
|
|
||||||
info = {
|
info = {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': title,
|
'title': self._og_search_title(webpage),
|
||||||
'description': self._og_search_description(webpage),
|
'description': self._og_search_description(webpage),
|
||||||
}
|
}
|
||||||
return [info]
|
return [info]
|
||||||
|
|
|
@ -107,8 +107,13 @@ class GenericIE(InfoExtractor):
|
||||||
return new_url
|
return new_url
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
try:
|
||||||
new_url = self._test_redirect(url)
|
new_url = self._test_redirect(url)
|
||||||
if new_url: return [self.url_result(new_url)]
|
if new_url:
|
||||||
|
return [self.url_result(new_url)]
|
||||||
|
except compat_urllib_error.HTTPError:
|
||||||
|
# This may be a stupid server that doesn't like HEAD, our UA, or so
|
||||||
|
pass
|
||||||
|
|
||||||
video_id = url.split('/')[-1]
|
video_id = url.split('/')[-1]
|
||||||
try:
|
try:
|
||||||
|
@ -144,6 +149,9 @@ class GenericIE(InfoExtractor):
|
||||||
# We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
|
# We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
|
||||||
if m_video_type is not None:
|
if m_video_type is not None:
|
||||||
mobj = re.search(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)
|
mobj = re.search(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)
|
||||||
|
if mobj is None:
|
||||||
|
# HTML5 video
|
||||||
|
mobj = re.search(r'<video[^<]*>.*?<source .*?src="([^"]+)"', webpage, flags=re.DOTALL)
|
||||||
if mobj is None:
|
if mobj is None:
|
||||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||||
|
|
||||||
|
|
|
@ -1,11 +1,24 @@
|
||||||
|
# coding: utf-8
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
class JeuxVideoIE(InfoExtractor):
|
class JeuxVideoIE(InfoExtractor):
|
||||||
_VALID_URL = r'http://.*?\.jeuxvideo\.com/.*/(.*?)-\d+\.htm'
|
_VALID_URL = r'http://.*?\.jeuxvideo\.com/.*/(.*?)-\d+\.htm'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm',
|
||||||
|
u'file': u'5182.mp4',
|
||||||
|
u'md5': u'e0fdb0cd3ce98713ef9c1e1e025779d0',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'GC 2013 : Tearaway nous présente ses papiers d\'identité',
|
||||||
|
u'description': u'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.\n',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
title = re.match(self._VALID_URL, url).group(1)
|
title = re.match(self._VALID_URL, url).group(1)
|
||||||
|
@ -18,16 +31,18 @@ class JeuxVideoIE(InfoExtractor):
|
||||||
|
|
||||||
xml_config = self._download_webpage(xml_link, title,
|
xml_config = self._download_webpage(xml_link, title,
|
||||||
'Downloading XML config')
|
'Downloading XML config')
|
||||||
|
|
||||||
|
config = xml.etree.ElementTree.fromstring(xml_config.encode('utf-8'))
|
||||||
info = re.search(r'<format\.json>(.*?)</format\.json>',
|
info = re.search(r'<format\.json>(.*?)</format\.json>',
|
||||||
xml_config, re.MULTILINE|re.DOTALL).group(1)
|
xml_config, re.MULTILINE|re.DOTALL).group(1)
|
||||||
info = json.loads(info)['versions'][0]
|
info = json.loads(info)['versions'][0]
|
||||||
|
|
||||||
video_url = 'http://video720.jeuxvideo.com/' + info['file']
|
video_url = 'http://video720.jeuxvideo.com/' + info['file']
|
||||||
|
|
||||||
track_info = {'id':id,
|
return {'id': id,
|
||||||
'title' : title,
|
'title' : config.find('titre_video').text,
|
||||||
'ext' : 'mp4',
|
'ext' : 'mp4',
|
||||||
'url' : video_url
|
'url' : video_url,
|
||||||
|
'description': self._og_search_description(webpage),
|
||||||
|
'thumbnail': config.find('image').text,
|
||||||
}
|
}
|
||||||
|
|
||||||
return [track_info]
|
|
||||||
|
|
34
youtube_dl/extractor/pbs.py
Normal file
34
youtube_dl/extractor/pbs.py
Normal file
|
@ -0,0 +1,34 @@
|
||||||
|
import re
|
||||||
|
import json
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class PBSIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://video.pbs.org/video/(?P<id>\d+)/?'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://video.pbs.org/video/2365006249/',
|
||||||
|
u'file': u'2365006249.mp4',
|
||||||
|
u'md5': 'ce1888486f0908d555a8093cac9a7362',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'A More Perfect Union',
|
||||||
|
u'description': u'md5:ba0c207295339c8d6eced00b7c363c6a',
|
||||||
|
u'duration': 3190,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id
|
||||||
|
info_page = self._download_webpage(info_url, video_id)
|
||||||
|
info =json.loads(info_page)
|
||||||
|
return {'id': video_id,
|
||||||
|
'title': info['title'],
|
||||||
|
'url': info['alternate_encoding']['url'],
|
||||||
|
'ext': 'mp4',
|
||||||
|
'description': info['program'].get('description'),
|
||||||
|
'thumbnail': info.get('image_url'),
|
||||||
|
'duration': info.get('duration'),
|
||||||
|
}
|
113
youtube_dl/extractor/rtlnow.py
Normal file
113
youtube_dl/extractor/rtlnow.py
Normal file
|
@ -0,0 +1,113 @@
|
||||||
|
# encoding: utf-8
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
|
ExtractorError,
|
||||||
|
)
|
||||||
|
|
||||||
|
class RTLnowIE(InfoExtractor):
|
||||||
|
"""Information Extractor for RTLnow, RTL2now and VOXnow"""
|
||||||
|
_VALID_URL = r'(?:http://)?(?P<url>(?P<base_url>rtl(?:(?P<is_rtl2>2)|-)now\.rtl(?(is_rtl2)2|)\.de/|(?:www\.)?voxnow\.de/)[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)'
|
||||||
|
_TESTS = [{
|
||||||
|
u'url': u'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1',
|
||||||
|
u'file': u'90419.flv',
|
||||||
|
u'info_dict': {
|
||||||
|
u'upload_date': u'20070416',
|
||||||
|
u'title': u'Ahornallee - Folge 1 - Der Einzug',
|
||||||
|
u'description': u'Folge 1 - Der Einzug',
|
||||||
|
},
|
||||||
|
u'params': {
|
||||||
|
u'skip_download': True,
|
||||||
|
},
|
||||||
|
u'skip': u'Only works from Germany',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
u'url': u'http://rtl2now.rtl2.de/aerger-im-revier/episode-15-teil-1.php?film_id=69756&player=1&season=2&index=5',
|
||||||
|
u'file': u'69756.flv',
|
||||||
|
u'info_dict': {
|
||||||
|
u'upload_date': u'20120519',
|
||||||
|
u'title': u'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit...',
|
||||||
|
u'description': u'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit u.a.',
|
||||||
|
u'thumbnail': u'http://autoimg.static-fra.de/rtl2now/219850/1500x1500/image2.jpg',
|
||||||
|
},
|
||||||
|
u'params': {
|
||||||
|
u'skip_download': True,
|
||||||
|
},
|
||||||
|
u'skip': u'Only works from Germany',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
u'url': u'www.voxnow.de/voxtours/suedafrika-reporter-ii.php?film_id=13883&player=1&season=17',
|
||||||
|
u'file': u'13883.flv',
|
||||||
|
u'info_dict': {
|
||||||
|
u'upload_date': u'20090627',
|
||||||
|
u'title': u'Voxtours - Südafrika-Reporter II',
|
||||||
|
u'description': u'Südafrika-Reporter II',
|
||||||
|
},
|
||||||
|
u'params': {
|
||||||
|
u'skip_download': True,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self,url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
|
||||||
|
webpage_url = u'http://' + mobj.group('url')
|
||||||
|
video_page_url = u'http://' + mobj.group('base_url')
|
||||||
|
video_id = mobj.group(u'video_id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(webpage_url, video_id)
|
||||||
|
|
||||||
|
note_m = re.search(r'''(?sx)
|
||||||
|
<div[ ]style="margin-left:[ ]20px;[ ]font-size:[ ]13px;">(.*?)
|
||||||
|
<div[ ]id="playerteaser">''', webpage)
|
||||||
|
if note_m:
|
||||||
|
msg = clean_html(note_m.group(1))
|
||||||
|
raise ExtractorError(msg)
|
||||||
|
|
||||||
|
video_title = self._html_search_regex(r'<title>(?P<title>[^<]+)</title>',
|
||||||
|
webpage, u'title')
|
||||||
|
playerdata_url = self._html_search_regex(r'\'playerdata\': \'(?P<playerdata_url>[^\']+)\'',
|
||||||
|
webpage, u'playerdata_url')
|
||||||
|
|
||||||
|
playerdata = self._download_webpage(playerdata_url, video_id)
|
||||||
|
mobj = re.search(r'<title><!\[CDATA\[(?P<description>.+?)\s+- (?:Sendung )?vom (?P<upload_date_d>[0-9]{2})\.(?P<upload_date_m>[0-9]{2})\.(?:(?P<upload_date_Y>[0-9]{4})|(?P<upload_date_y>[0-9]{2})) [0-9]{2}:[0-9]{2} Uhr\]\]></title>', playerdata)
|
||||||
|
if mobj:
|
||||||
|
video_description = mobj.group(u'description')
|
||||||
|
if mobj.group('upload_date_Y'):
|
||||||
|
video_upload_date = mobj.group('upload_date_Y')
|
||||||
|
else:
|
||||||
|
video_upload_date = u'20' + mobj.group('upload_date_y')
|
||||||
|
video_upload_date += mobj.group('upload_date_m')+mobj.group('upload_date_d')
|
||||||
|
else:
|
||||||
|
video_description = None
|
||||||
|
video_upload_date = None
|
||||||
|
self._downloader.report_warning(u'Unable to extract description and upload date')
|
||||||
|
|
||||||
|
# Thumbnail: not every video has an thumbnail
|
||||||
|
mobj = re.search(r'<meta property="og:image" content="(?P<thumbnail>[^"]+)">', webpage)
|
||||||
|
if mobj:
|
||||||
|
video_thumbnail = mobj.group(u'thumbnail')
|
||||||
|
else:
|
||||||
|
video_thumbnail = None
|
||||||
|
|
||||||
|
mobj = re.search(r'<filename [^>]+><!\[CDATA\[(?P<url>rtmpe://(?:[^/]+/){2})(?P<play_path>[^\]]+)\]\]></filename>', playerdata)
|
||||||
|
if mobj is None:
|
||||||
|
raise ExtractorError(u'Unable to extract media URL')
|
||||||
|
video_url = mobj.group(u'url')
|
||||||
|
video_play_path = u'mp4:' + mobj.group(u'play_path')
|
||||||
|
video_player_url = video_page_url + u'includes/vodplayer.swf'
|
||||||
|
|
||||||
|
return [{
|
||||||
|
'id': video_id,
|
||||||
|
'url': video_url,
|
||||||
|
'play_path': video_play_path,
|
||||||
|
'page_url': video_page_url,
|
||||||
|
'player_url': video_player_url,
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': video_title,
|
||||||
|
'description': video_description,
|
||||||
|
'upload_date': video_upload_date,
|
||||||
|
'thumbnail': video_thumbnail,
|
||||||
|
}]
|
23
youtube_dl/extractor/slashdot.py
Normal file
23
youtube_dl/extractor/slashdot.py
Normal file
|
@ -0,0 +1,23 @@
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class SlashdotIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://tv.slashdot.org/video/\?embed=(?P<id>.*?)(&|$)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://tv.slashdot.org/video/?embed=JscHMzZDplD0p-yNLOzTfzC3Q3xzJaUz',
|
||||||
|
u'file': u'JscHMzZDplD0p-yNLOzTfzC3Q3xzJaUz.mp4',
|
||||||
|
u'md5': u'd2222e7a4a4c1541b3e0cf732fb26735',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u' Meet the Stampede Supercomputing Cluster\'s Administrator',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
ooyala_url = self._search_regex(r'<script src="(.*?)"', webpage, 'ooyala url')
|
||||||
|
return self.url_result(ooyala_url, 'Ooyala')
|
|
@ -4,6 +4,7 @@ import re
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_str,
|
compat_str,
|
||||||
|
compat_urlparse,
|
||||||
|
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
|
@ -22,6 +23,7 @@ class SoundcloudIE(InfoExtractor):
|
||||||
_VALID_URL = r'''^(?:https?://)?
|
_VALID_URL = r'''^(?:https?://)?
|
||||||
(?:(?:(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)/?(?:[?].*)?$)
|
(?:(?:(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)/?(?:[?].*)?$)
|
||||||
|(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+))
|
|(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+))
|
||||||
|
|(?P<widget>w.soundcloud.com/player/?.*?url=.*)
|
||||||
)
|
)
|
||||||
'''
|
'''
|
||||||
IE_NAME = u'soundcloud'
|
IE_NAME = u'soundcloud'
|
||||||
|
@ -79,6 +81,9 @@ class SoundcloudIE(InfoExtractor):
|
||||||
if track_id is not None:
|
if track_id is not None:
|
||||||
info_json_url = 'http://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID
|
info_json_url = 'http://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID
|
||||||
full_title = track_id
|
full_title = track_id
|
||||||
|
elif mobj.group('widget'):
|
||||||
|
query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
|
||||||
|
return self.url_result(query['url'][0], ie='Soundcloud')
|
||||||
else:
|
else:
|
||||||
# extract uploader (which is in the url)
|
# extract uploader (which is in the url)
|
||||||
uploader = mobj.group(1)
|
uploader = mobj.group(1)
|
||||||
|
|
|
@ -5,13 +5,13 @@ from .common import InfoExtractor
|
||||||
class StatigramIE(InfoExtractor):
|
class StatigramIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?:http://)?(?:www\.)?statigr\.am/p/([^/]+)'
|
_VALID_URL = r'(?:http://)?(?:www\.)?statigr\.am/p/([^/]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://statigr.am/p/484091715184808010_284179915',
|
u'url': u'http://statigr.am/p/522207370455279102_24101272',
|
||||||
u'file': u'484091715184808010_284179915.mp4',
|
u'file': u'522207370455279102_24101272.mp4',
|
||||||
u'md5': u'deda4ff333abe2e118740321e992605b',
|
u'md5': u'6eb93b882a3ded7c378ee1d6884b1814',
|
||||||
u'info_dict': {
|
u'info_dict': {
|
||||||
u"uploader_id": u"videoseconds",
|
u'uploader_id': u'aguynamedpatrick',
|
||||||
u"title": u"Instagram photo by @videoseconds"
|
u'title': u'Instagram photo by @aguynamedpatrick (Patrick Janelle)',
|
||||||
}
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|
|
@ -11,7 +11,7 @@ class VevoIE(InfoExtractor):
|
||||||
Accepts urls from vevo.com or in the format 'vevo:{id}'
|
Accepts urls from vevo.com or in the format 'vevo:{id}'
|
||||||
(currently used by MTVIE)
|
(currently used by MTVIE)
|
||||||
"""
|
"""
|
||||||
_VALID_URL = r'((http://www.vevo.com/watch/.*?/.*?/)|(vevo:))(?P<id>.*)$'
|
_VALID_URL = r'((http://www.vevo.com/watch/.*?/.*?/)|(vevo:))(?P<id>.*?)(\?|$)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
|
u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
|
||||||
u'file': u'GB1101300280.mp4',
|
u'file': u'GB1101300280.mp4',
|
||||||
|
|
|
@ -20,7 +20,8 @@ class VimeoIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)(?:[?].*)?$'
|
_VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)(?:[?].*)?$'
|
||||||
_NETRC_MACHINE = 'vimeo'
|
_NETRC_MACHINE = 'vimeo'
|
||||||
IE_NAME = u'vimeo'
|
IE_NAME = u'vimeo'
|
||||||
_TEST = {
|
_TESTS = [
|
||||||
|
{
|
||||||
u'url': u'http://vimeo.com/56015672',
|
u'url': u'http://vimeo.com/56015672',
|
||||||
u'file': u'56015672.mp4',
|
u'file': u'56015672.mp4',
|
||||||
u'md5': u'8879b6cc097e987f02484baf890129e5',
|
u'md5': u'8879b6cc097e987f02484baf890129e5',
|
||||||
|
@ -29,9 +30,21 @@ class VimeoIE(InfoExtractor):
|
||||||
u"description": u"This is a test case for youtube-dl.\nFor more information, see github.com/rg3/youtube-dl\nTest chars: \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550",
|
u"description": u"This is a test case for youtube-dl.\nFor more information, see github.com/rg3/youtube-dl\nTest chars: \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550",
|
||||||
u"uploader_id": u"user7108434",
|
u"uploader_id": u"user7108434",
|
||||||
u"uploader": u"Filippo Valsorda",
|
u"uploader": u"Filippo Valsorda",
|
||||||
u"title": u"youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550"
|
u"title": u"youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550",
|
||||||
}
|
},
|
||||||
}
|
},
|
||||||
|
{
|
||||||
|
u'url': u'http://vimeopro.com/openstreetmapus/state-of-the-map-us-2013/video/68093876',
|
||||||
|
u'file': u'68093876.mp4',
|
||||||
|
u'md5': u'3b5ca6aa22b60dfeeadf50b72e44ed82',
|
||||||
|
u'note': u'Vimeo Pro video (#1197)',
|
||||||
|
u'info_dict': {
|
||||||
|
u'uploader_id': u'openstreetmapus',
|
||||||
|
u'uploader': u'OpenStreetMap US',
|
||||||
|
u'title': u'Andy Allan - Putting the Carto into OpenStreetMap Cartography',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
def _login(self):
|
def _login(self):
|
||||||
(username, password) = self._get_login_info()
|
(username, password) = self._get_login_info()
|
||||||
|
@ -83,7 +96,9 @@ class VimeoIE(InfoExtractor):
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
if not mobj.group('proto'):
|
if not mobj.group('proto'):
|
||||||
url = 'https://' + url
|
url = 'https://' + url
|
||||||
if mobj.group('direct_link') or mobj.group('pro'):
|
elif mobj.group('pro'):
|
||||||
|
url = 'http://player.vimeo.com/video/' + video_id
|
||||||
|
elif mobj.group('direct_link'):
|
||||||
url = 'https://vimeo.com/' + video_id
|
url = 'https://vimeo.com/' + video_id
|
||||||
|
|
||||||
# Retrieve video webpage to extract further information
|
# Retrieve video webpage to extract further information
|
||||||
|
|
|
@ -141,7 +141,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
(?: # the various things that can precede the ID:
|
(?: # the various things that can precede the ID:
|
||||||
(?:(?:v|embed|e)/) # v/ or embed/ or e/
|
(?:(?:v|embed|e)/) # v/ or embed/ or e/
|
||||||
|(?: # or the v= param in all its forms
|
|(?: # or the v= param in all its forms
|
||||||
(?:watch|movie(?:_popup)?(?:\.php)?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
|
(?:(?:watch|movie)(?:_popup)?(?:\.php)?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
|
||||||
(?:\?|\#!?) # the params delimiter ? or # or #!
|
(?:\?|\#!?) # the params delimiter ? or # or #!
|
||||||
(?:.*?&)? # any other preceding param (like /?s=tuff&v=xxxx)
|
(?:.*?&)? # any other preceding param (like /?s=tuff&v=xxxx)
|
||||||
v=
|
v=
|
||||||
|
|
|
@ -1,2 +1,2 @@
|
||||||
|
|
||||||
__version__ = '2013.08.17'
|
__version__ = '2013.08.21'
|
||||||
|
|
Loading…
Reference in a new issue