1
0
Fork 0
mirror of https://gitlab.com/dstftw/youtube-dl.git synced 2020-11-16 09:42:26 +00:00

Merge remote-tracking branch 'upstream/master'

This commit is contained in:
Pierre Rudloff 2013-08-27 10:47:47 +02:00
commit c5b921b597
9 changed files with 159 additions and 8 deletions

View file

@ -6,7 +6,9 @@ from .bandcamp import BandcampIE
from .bliptv import BlipTVIE, BlipTVUserIE from .bliptv import BlipTVIE, BlipTVUserIE
from .breakcom import BreakIE from .breakcom import BreakIE
from .brightcove import BrightcoveIE from .brightcove import BrightcoveIE
from .c56 import C56IE
from .canalplus import CanalplusIE from .canalplus import CanalplusIE
from .canalc2 import Canalc2IE
from .collegehumor import CollegeHumorIE from .collegehumor import CollegeHumorIE
from .comedycentral import ComedyCentralIE from .comedycentral import ComedyCentralIE
from .condenast import CondeNastIE from .condenast import CondeNastIE
@ -29,6 +31,7 @@ from .gametrailers import GametrailersIE
from .generic import GenericIE from .generic import GenericIE
from .googleplus import GooglePlusIE from .googleplus import GooglePlusIE
from .googlesearch import GoogleSearchIE from .googlesearch import GoogleSearchIE
from .hark import HarkIE
from .hotnewhiphop import HotNewHipHopIE from .hotnewhiphop import HotNewHipHopIE
from .howcast import HowcastIE from .howcast import HowcastIE
from .hypem import HypemIE from .hypem import HypemIE
@ -72,18 +75,18 @@ from .ted import TEDIE
from .tf1 import TF1IE from .tf1 import TF1IE
from .thisav import ThisAVIE from .thisav import ThisAVIE
from .traileraddict import TrailerAddictIE from .traileraddict import TrailerAddictIE
from .trilulilu import TriluliluIE
from .tudou import TudouIE from .tudou import TudouIE
from .tumblr import TumblrIE from .tumblr import TumblrIE
from .tutv import TutvIE from .tutv import TutvIE
from .ustream import UstreamIE
from .unistra import UnistraIE from .unistra import UnistraIE
from .ustream import UstreamIE
from .vbox7 import Vbox7IE from .vbox7 import Vbox7IE
from .veoh import VeohIE from .veoh import VeohIE
from .vevo import VevoIE from .vevo import VevoIE
from .videofyme import VideofyMeIE from .videofyme import VideofyMeIE
from .vimeo import VimeoIE, VimeoChannelIE from .vimeo import VimeoIE, VimeoChannelIE
from .vine import VineIE from .vine import VineIE
from .c56 import C56IE
from .wat import WatIE from .wat import WatIE
from .weibo import WeiboIE from .weibo import WeiboIE
from .wimp import WimpIE from .wimp import WimpIE

View file

@ -12,8 +12,8 @@ class C56IE(InfoExtractor):
_TEST ={ _TEST ={
u'url': u'http://www.56.com/u39/v_OTM0NDA3MTY.html', u'url': u'http://www.56.com/u39/v_OTM0NDA3MTY.html',
u'file': u'93440716.mp4', u'file': u'93440716.flv',
u'md5': u'9dc07b5c8e978112a6441f9e75d2b59e', u'md5': u'e59995ac63d0457783ea05f93f12a866',
u'info_dict': { u'info_dict': {
u'title': u'网事知多少 第32期车怒', u'title': u'网事知多少 第32期车怒',
}, },

View file

@ -0,0 +1,35 @@
# coding: utf-8
import re
from .common import InfoExtractor
class Canalc2IE(InfoExtractor):
_IE_NAME = 'canalc2.tv'
_VALID_URL = r'http://.*?\.canalc2\.tv/video\.asp\?idVideo=(\d+)&voir=oui'
_TEST = {
u'url': u'http://www.canalc2.tv/video.asp?idVideo=12163&voir=oui',
u'file': u'12163.mp4',
u'md5': u'060158428b650f896c542dfbb3d6487f',
u'info_dict': {
u'title': u'Terrasses du Numérique'
}
}
def _real_extract(self, url):
video_id = re.match(self._VALID_URL, url).group(1)
webpage = self._download_webpage(url, video_id)
file_name = self._search_regex(
r"so\.addVariable\('file','(.*?)'\);",
webpage, 'file name')
video_url = 'http://vod-flash.u-strasbg.fr:8080/' + file_name
title = self._html_search_regex(
r'class="evenement8">(.*?)</a>', webpage, u'title')
return {'id': video_id,
'ext': 'mp4',
'url': video_url,
'title': title,
}

View file

@ -5,7 +5,7 @@ from .common import InfoExtractor
from ..utils import unified_strdate from ..utils import unified_strdate
class CanalplusIE(InfoExtractor): class CanalplusIE(InfoExtractor):
_VALID_URL = r'https?://www\.canalplus\.fr/.*?\?vid=(?P<id>\d+)' _VALID_URL = r'https?://(www\.canalplus\.fr/.*?\?vid=|player\.canalplus\.fr/#/)(?P<id>\d+)'
_VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/cplus/%s' _VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/cplus/%s'
IE_NAME = u'canalplus.fr' IE_NAME = u'canalplus.fr'

View file

@ -21,7 +21,7 @@ class DailymotionIE(InfoExtractor):
u'file': u'x33vw9.mp4', u'file': u'x33vw9.mp4',
u'md5': u'392c4b85a60a90dc4792da41ce3144eb', u'md5': u'392c4b85a60a90dc4792da41ce3144eb',
u'info_dict': { u'info_dict': {
u"uploader": u"Alex and Van .", u"uploader": u"Amphora Alex and Van .",
u"title": u"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\"" u"title": u"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\""
} }
} }

View file

@ -126,7 +126,7 @@ class GenericIE(InfoExtractor):
raise ExtractorError(u'Invalid URL: %s' % url) raise ExtractorError(u'Invalid URL: %s' % url)
self.report_extraction(video_id) self.report_extraction(video_id)
# Look for BrigthCove: # Look for BrightCove:
m_brightcove = re.search(r'<object.+?class=([\'"]).*?BrightcoveExperience.*?\1.+?</object>', webpage, re.DOTALL) m_brightcove = re.search(r'<object.+?class=([\'"]).*?BrightcoveExperience.*?\1.+?</object>', webpage, re.DOTALL)
if m_brightcove is not None: if m_brightcove is not None:
self.to_screen(u'Brightcove video detected.') self.to_screen(u'Brightcove video detected.')

View file

@ -0,0 +1,37 @@
# -*- coding: utf-8 -*-
import re
import json
from .common import InfoExtractor
from ..utils import determine_ext
class HarkIE(InfoExtractor):
_VALID_URL = r'https?://www\.hark\.com/clips/(.+?)-.+'
_TEST = {
u'url': u'http://www.hark.com/clips/mmbzyhkgny-obama-beyond-the-afghan-theater-we-only-target-al-qaeda-on-may-23-2013',
u'file': u'mmbzyhkgny.mp3',
u'md5': u'6783a58491b47b92c7c1af5a77d4cbee',
u'info_dict': {
u'title': u"Obama: 'Beyond The Afghan Theater, We Only Target Al Qaeda' on May 23, 2013",
u'description': u'President Barack Obama addressed the nation live on May 23, 2013 in a speech aimed at addressing counter-terrorism policies including the use of drone strikes, detainees at Guantanamo Bay prison facility, and American citizens who are terrorists.',
u'duration': 11,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group(1)
json_url = "http://www.hark.com/clips/%s.json" %(video_id)
info_json = self._download_webpage(json_url, video_id)
info = json.loads(info_json)
final_url = info['url']
return {'id': video_id,
'url' : final_url,
'title': info['name'],
'ext': determine_ext(final_url),
'description': info['description'],
'thumbnail': info['image_original'],
'duration': info['duration'],
}

View file

@ -0,0 +1,76 @@
import json
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
ExtractorError,
)
class TriluliluIE(InfoExtractor):
_VALID_URL = r'(?x)(?:https?://)?(?:www\.)?trilulilu\.ro/video-(?P<category>[^/]+)/(?P<video_id>[^/]+)'
_TEST = {
u"url": u"http://www.trilulilu.ro/video-animatie/big-buck-bunny-1",
u'file': u"big-buck-bunny-1.mp4",
u'info_dict': {
u"title": u"Big Buck Bunny",
u"description": u":) pentru copilul din noi",
},
# Server ignores Range headers (--test)
u"params": {
u"skip_download": True
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('video_id')
webpage = self._download_webpage(url, video_id)
title = self._og_search_title(webpage)
thumbnail = self._og_search_thumbnail(webpage)
description = self._og_search_description(webpage)
log_str = self._search_regex(
r'block_flash_vars[ ]=[ ]({[^}]+})', webpage, u'log info')
log = json.loads(log_str)
format_url = (u'http://fs%(server)s.trilulilu.ro/%(hash)s/'
u'video-formats2' % log)
format_str = self._download_webpage(
format_url, video_id,
note=u'Downloading formats',
errnote=u'Error while downloading formats')
format_doc = xml.etree.ElementTree.fromstring(format_str)
video_url_template = (
u'http://fs%(server)s.trilulilu.ro/stream.php?type=video'
u'&source=site&hash=%(hash)s&username=%(userid)s&'
u'key=ministhebest&format=%%s&sig=&exp=' %
log)
formats = [
{
'format': fnode.text,
'url': video_url_template % fnode.text,
}
for fnode in format_doc.findall('./formats/format')
]
info = {
'_type': 'video',
'id': video_id,
'formats': formats,
'title': title,
'description': description,
'thumbnail': thumbnail,
}
# TODO: Remove when #980 has been merged
info['url'] = formats[-1]['url']
info['ext'] = formats[-1]['format'].partition('-')[0]
return info

View file

@ -1,2 +1,2 @@
__version__ = '2013.08.23' __version__ = '2013.08.27'