mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2024-01-07 17:16:08 +00:00
Merge branch 'bleacherreport' of github.com:remitamine/youtube-dl into remitamine-bleacherreport
This commit is contained in:
commit
2c28da8e05
|
@ -61,6 +61,10 @@ from .beatportpro import BeatportProIE
|
||||||
from .bet import BetIE
|
from .bet import BetIE
|
||||||
from .bild import BildIE
|
from .bild import BildIE
|
||||||
from .bilibili import BiliBiliIE
|
from .bilibili import BiliBiliIE
|
||||||
|
from .bleacherreport import (
|
||||||
|
BleacherReportIE,
|
||||||
|
BleacherReportCMSIE,
|
||||||
|
)
|
||||||
from .blinkx import BlinkxIE
|
from .blinkx import BlinkxIE
|
||||||
from .bloomberg import BloombergIE
|
from .bloomberg import BloombergIE
|
||||||
from .bpb import BpbIE
|
from .bpb import BpbIE
|
||||||
|
|
84
youtube_dl/extractor/amp.py
Normal file
84
youtube_dl/extractor/amp.py
Normal file
|
@ -0,0 +1,84 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
parse_iso8601,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class AMPIE(InfoExtractor):
|
||||||
|
# parse Akamai Adaptive Media Player feed
|
||||||
|
def _extract_feed_info(self, url):
|
||||||
|
item = self._download_json(
|
||||||
|
url, None, 'Downloading Akamai AMP feed',
|
||||||
|
'Unable to download Akamai AMP feed')['channel']['item']
|
||||||
|
|
||||||
|
video_id = item['guid']
|
||||||
|
|
||||||
|
def get_media_node(name, default=None):
|
||||||
|
media_name = 'media-%s' % name
|
||||||
|
media_group = item.get('media-group') or item
|
||||||
|
return media_group.get(media_name) or item.get(media_name) or item.get(name, default)
|
||||||
|
|
||||||
|
thumbnails = []
|
||||||
|
media_thumbnail = get_media_node('thumbnail')
|
||||||
|
if media_thumbnail:
|
||||||
|
if isinstance(media_thumbnail, dict):
|
||||||
|
media_thumbnail = [media_thumbnail]
|
||||||
|
for thumbnail_data in media_thumbnail:
|
||||||
|
thumbnail = thumbnail_data['@attributes']
|
||||||
|
thumbnails.append({
|
||||||
|
'url': self._proto_relative_url(thumbnail['url'], 'http:'),
|
||||||
|
'width': int_or_none(thumbnail.get('width')),
|
||||||
|
'height': int_or_none(thumbnail.get('height')),
|
||||||
|
})
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
media_subtitle = get_media_node('subTitle')
|
||||||
|
if media_subtitle:
|
||||||
|
if isinstance(media_subtitle, dict):
|
||||||
|
media_subtitle = [media_subtitle]
|
||||||
|
for subtitle_data in media_subtitle:
|
||||||
|
subtitle = subtitle_data['@attributes']
|
||||||
|
lang = subtitle.get('lang') or 'en'
|
||||||
|
subtitles[lang] = [{'url': subtitle['href']}]
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
media_content = get_media_node('content')
|
||||||
|
if isinstance(media_content, dict):
|
||||||
|
media_content = [media_content]
|
||||||
|
for media_data in media_content:
|
||||||
|
media = media_data['@attributes']
|
||||||
|
media_type = media['type']
|
||||||
|
if media_type == 'video/f4m':
|
||||||
|
f4m_formats = self._extract_f4m_formats(
|
||||||
|
media['url'] + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124',
|
||||||
|
video_id, f4m_id='hds', fatal=False)
|
||||||
|
if f4m_formats:
|
||||||
|
formats.extend(f4m_formats)
|
||||||
|
elif media_type == 'application/x-mpegURL':
|
||||||
|
m3u8_formats = self._extract_m3u8_formats(
|
||||||
|
media['url'], video_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||||
|
if m3u8_formats:
|
||||||
|
formats.extend(m3u8_formats)
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
|
'format_id': media_data['media-category']['@attributes']['label'],
|
||||||
|
'url': media['url'],
|
||||||
|
'tbr': int_or_none(media.get('bitrate')),
|
||||||
|
'filesize': int_or_none(media.get('fileSize')),
|
||||||
|
})
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': get_media_node('title'),
|
||||||
|
'description': get_media_node('description'),
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
'timestamp': parse_iso8601(item.get('pubDate'), ' '),
|
||||||
|
'duration': int_or_none(media_content[0].get('@attributes', {}).get('duration')),
|
||||||
|
'formats': formats,
|
||||||
|
}
|
106
youtube_dl/extractor/bleacherreport.py
Normal file
106
youtube_dl/extractor/bleacherreport.py
Normal file
|
@ -0,0 +1,106 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from .amp import AMPIE
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
|
parse_iso8601,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class BleacherReportIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/articles/(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://bleacherreport.com/articles/2496438-fsu-stat-projections-is-jalen-ramsey-best-defensive-player-in-college-football',
|
||||||
|
'md5': 'a3ffc3dc73afdbc2010f02d98f990f20',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2496438',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'FSU Stat Projections: Is Jalen Ramsey Best Defensive Player in College Football?',
|
||||||
|
'uploader_id': 3992341,
|
||||||
|
'description': 'CFB, ACC, Florida State',
|
||||||
|
'timestamp': 1434380212,
|
||||||
|
'upload_date': '20150615',
|
||||||
|
'uploader': 'Team Stream Now ',
|
||||||
|
},
|
||||||
|
'add_ie': ['Ooyala'],
|
||||||
|
}, {
|
||||||
|
'url': 'http://bleacherreport.com/articles/2586817-aussie-golfers-get-fright-of-their-lives-after-being-chased-by-angry-kangaroo',
|
||||||
|
'md5': 'af5f90dc9c7ba1c19d0a3eac806bbf50',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2586817',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Aussie Golfers Get Fright of Their Lives After Being Chased by Angry Kangaroo',
|
||||||
|
'timestamp': 1446839961,
|
||||||
|
'uploader': 'Sean Fay',
|
||||||
|
'description': 'md5:825e94e0f3521df52fa83b2ed198fa20',
|
||||||
|
'uploader_id': 6466954,
|
||||||
|
'upload_date': '20151011',
|
||||||
|
},
|
||||||
|
'add_ie': ['Youtube'],
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
article_id = self._match_id(url)
|
||||||
|
|
||||||
|
article_data = self._download_json('http://api.bleacherreport.com/api/v1/articles/%s' % article_id, article_id)['article']
|
||||||
|
|
||||||
|
thumbnails = []
|
||||||
|
primary_photo = article_data.get('primaryPhoto')
|
||||||
|
if primary_photo:
|
||||||
|
thumbnails = [{
|
||||||
|
'url': primary_photo['url'],
|
||||||
|
'width': primary_photo.get('width'),
|
||||||
|
'height': primary_photo.get('height'),
|
||||||
|
}]
|
||||||
|
|
||||||
|
info = {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'id': article_id,
|
||||||
|
'title': article_data['title'],
|
||||||
|
'uploader': article_data.get('author', {}).get('name'),
|
||||||
|
'uploader_id': article_data.get('authorId'),
|
||||||
|
'timestamp': parse_iso8601(article_data.get('createdAt')),
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
'comment_count': int_or_none(article_data.get('commentsCount')),
|
||||||
|
'view_count': int_or_none(article_data.get('hitCount')),
|
||||||
|
}
|
||||||
|
|
||||||
|
video = article_data.get('video')
|
||||||
|
if video:
|
||||||
|
video_type = video['type']
|
||||||
|
if video_type == 'cms.bleacherreport.com':
|
||||||
|
info['url'] = 'http://bleacherreport.com/video_embed?id=%s' % video['id']
|
||||||
|
elif video_type == 'ooyala.com':
|
||||||
|
info['url'] = 'ooyala:%s' % video['id']
|
||||||
|
elif video_type == 'youtube.com':
|
||||||
|
info['url'] = video['id']
|
||||||
|
elif video_type == 'vine.co':
|
||||||
|
info['url'] = 'https://vine.co/v/%s' % video['id']
|
||||||
|
else:
|
||||||
|
info['url'] = video_type + video['id']
|
||||||
|
return info
|
||||||
|
else:
|
||||||
|
raise ExtractorError('no video in the article', expected=True)
|
||||||
|
|
||||||
|
|
||||||
|
class BleacherReportCMSIE(AMPIE):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36})'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
|
||||||
|
'md5': 'f0ca220af012d4df857b54f792c586bb',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Cena vs. Rollins Would Expose the Heavyweight Division',
|
||||||
|
'description': 'md5:984afb4ade2f9c0db35f3267ed88b36e',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
info = self._extract_feed_info('http://cms.bleacherreport.com/media/items/%s/akamai.json' % video_id)
|
||||||
|
info['id'] = video_id
|
||||||
|
return info
|
|
@ -3,7 +3,7 @@ from __future__ import unicode_literals
|
||||||
|
|
||||||
import itertools
|
import itertools
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .amp import AMPIE
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_HTTPError,
|
compat_HTTPError,
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
|
@ -12,14 +12,11 @@ from ..compat import (
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
clean_html,
|
clean_html,
|
||||||
determine_ext,
|
|
||||||
int_or_none,
|
|
||||||
parse_iso8601,
|
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class DramaFeverBaseIE(InfoExtractor):
|
class DramaFeverBaseIE(AMPIE):
|
||||||
_LOGIN_URL = 'https://www.dramafever.com/accounts/login/'
|
_LOGIN_URL = 'https://www.dramafever.com/accounts/login/'
|
||||||
_NETRC_MACHINE = 'dramafever'
|
_NETRC_MACHINE = 'dramafever'
|
||||||
|
|
||||||
|
@ -80,60 +77,25 @@ class DramaFeverIE(DramaFeverBaseIE):
|
||||||
'timestamp': 1404336058,
|
'timestamp': 1404336058,
|
||||||
'upload_date': '20140702',
|
'upload_date': '20140702',
|
||||||
'duration': 343,
|
'duration': 343,
|
||||||
}
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url).replace('/', '.')
|
video_id = self._match_id(url).replace('/', '.')
|
||||||
|
|
||||||
try:
|
try:
|
||||||
feed = self._download_json(
|
info = self._extract_feed_info(
|
||||||
'http://www.dramafever.com/amp/episode/feed.json?guid=%s' % video_id,
|
'http://www.dramafever.com/amp/episode/feed.json?guid=%s' % video_id)
|
||||||
video_id, 'Downloading episode JSON')['channel']['item']
|
|
||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
if isinstance(e.cause, compat_HTTPError):
|
if isinstance(e.cause, compat_HTTPError):
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
'Currently unavailable in your country.', expected=True)
|
'Currently unavailable in your country.', expected=True)
|
||||||
raise
|
raise
|
||||||
|
|
||||||
media_group = feed.get('media-group', {})
|
|
||||||
|
|
||||||
formats = []
|
|
||||||
for media_content in media_group['media-content']:
|
|
||||||
src = media_content.get('@attributes', {}).get('url')
|
|
||||||
if not src:
|
|
||||||
continue
|
|
||||||
ext = determine_ext(src)
|
|
||||||
if ext == 'f4m':
|
|
||||||
formats.extend(self._extract_f4m_formats(
|
|
||||||
src, video_id, f4m_id='hds'))
|
|
||||||
elif ext == 'm3u8':
|
|
||||||
formats.extend(self._extract_m3u8_formats(
|
|
||||||
src, video_id, 'mp4', m3u8_id='hls'))
|
|
||||||
else:
|
|
||||||
formats.append({
|
|
||||||
'url': src,
|
|
||||||
})
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
title = media_group.get('media-title')
|
|
||||||
description = media_group.get('media-description')
|
|
||||||
duration = int_or_none(media_group['media-content'][0].get('@attributes', {}).get('duration'))
|
|
||||||
thumbnail = self._proto_relative_url(
|
|
||||||
media_group.get('media-thumbnail', {}).get('@attributes', {}).get('url'))
|
|
||||||
timestamp = parse_iso8601(feed.get('pubDate'), ' ')
|
|
||||||
|
|
||||||
subtitles = {}
|
|
||||||
for media_subtitle in media_group.get('media-subTitle', []):
|
|
||||||
lang = media_subtitle.get('@attributes', {}).get('lang')
|
|
||||||
href = media_subtitle.get('@attributes', {}).get('href')
|
|
||||||
if not lang or not href:
|
|
||||||
continue
|
|
||||||
subtitles[lang] = [{
|
|
||||||
'ext': 'ttml',
|
|
||||||
'url': href,
|
|
||||||
}]
|
|
||||||
|
|
||||||
series_id, episode_number = video_id.split('.')
|
series_id, episode_number = video_id.split('.')
|
||||||
episode_info = self._download_json(
|
episode_info = self._download_json(
|
||||||
# We only need a single episode info, so restricting page size to one episode
|
# We only need a single episode info, so restricting page size to one episode
|
||||||
|
@ -146,21 +108,12 @@ class DramaFeverIE(DramaFeverBaseIE):
|
||||||
if value:
|
if value:
|
||||||
subfile = value[0].get('subfile') or value[0].get('new_subfile')
|
subfile = value[0].get('subfile') or value[0].get('new_subfile')
|
||||||
if subfile and subfile != 'http://www.dramafever.com/st/':
|
if subfile and subfile != 'http://www.dramafever.com/st/':
|
||||||
subtitles.setdefault('English', []).append({
|
info['subtitiles'].setdefault('English', []).append({
|
||||||
'ext': 'srt',
|
'ext': 'srt',
|
||||||
'url': subfile,
|
'url': subfile,
|
||||||
})
|
})
|
||||||
|
|
||||||
return {
|
return info
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'description': description,
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
'timestamp': timestamp,
|
|
||||||
'duration': duration,
|
|
||||||
'formats': formats,
|
|
||||||
'subtitles': subtitles,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class DramaFeverSeriesIE(DramaFeverBaseIE):
|
class DramaFeverSeriesIE(DramaFeverBaseIE):
|
||||||
|
|
|
@ -2,14 +2,10 @@ from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .amp import AMPIE
|
||||||
from ..utils import (
|
|
||||||
parse_iso8601,
|
|
||||||
int_or_none,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class FoxNewsIE(InfoExtractor):
|
class FoxNewsIE(AMPIE):
|
||||||
IE_DESC = 'Fox News and Fox Business Video'
|
IE_DESC = 'Fox News and Fox Business Video'
|
||||||
_VALID_URL = r'https?://(?P<host>video\.fox(?:news|business)\.com)/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)'
|
_VALID_URL = r'https?://(?P<host>video\.fox(?:news|business)\.com)/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)'
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
|
@ -20,10 +16,10 @@ class FoxNewsIE(InfoExtractor):
|
||||||
'id': '3937480',
|
'id': '3937480',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': 'Frozen in Time',
|
'title': 'Frozen in Time',
|
||||||
'description': 'Doctors baffled by 16-year-old girl that is the size of a toddler',
|
'description': '16-year-old girl is size of toddler',
|
||||||
'duration': 265,
|
'duration': 265,
|
||||||
'timestamp': 1304411491,
|
# 'timestamp': 1304411491,
|
||||||
'upload_date': '20110503',
|
# 'upload_date': '20110503',
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
@ -34,10 +30,10 @@ class FoxNewsIE(InfoExtractor):
|
||||||
'id': '3922535568001',
|
'id': '3922535568001',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': "Rep. Luis Gutierrez on if Obama's immigration plan is legal",
|
'title': "Rep. Luis Gutierrez on if Obama's immigration plan is legal",
|
||||||
'description': "Congressman discusses the president's executive action",
|
'description': "Congressman discusses president's plan",
|
||||||
'duration': 292,
|
'duration': 292,
|
||||||
'timestamp': 1417662047,
|
# 'timestamp': 1417662047,
|
||||||
'upload_date': '20141204',
|
# 'upload_date': '20141204',
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
@ -52,52 +48,9 @@ class FoxNewsIE(InfoExtractor):
|
||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
host, video_id = re.match(self._VALID_URL, url).groups()
|
||||||
video_id = mobj.group('id')
|
|
||||||
host = mobj.group('host')
|
|
||||||
|
|
||||||
video = self._download_json(
|
info = self._extract_feed_info(
|
||||||
'http://%s/v/feed/video/%s.js?template=fox' % (host, video_id), video_id)
|
'http://%s/v/feed/video/%s.js?template=fox' % (host, video_id))
|
||||||
|
info['id'] = video_id
|
||||||
item = video['channel']['item']
|
return info
|
||||||
title = item['title']
|
|
||||||
description = item['description']
|
|
||||||
timestamp = parse_iso8601(item['dc-date'])
|
|
||||||
|
|
||||||
media_group = item['media-group']
|
|
||||||
duration = None
|
|
||||||
formats = []
|
|
||||||
for media in media_group['media-content']:
|
|
||||||
attributes = media['@attributes']
|
|
||||||
video_url = attributes['url']
|
|
||||||
if video_url.endswith('.f4m'):
|
|
||||||
formats.extend(self._extract_f4m_formats(video_url + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124', video_id))
|
|
||||||
elif video_url.endswith('.m3u8'):
|
|
||||||
formats.extend(self._extract_m3u8_formats(video_url, video_id, 'flv'))
|
|
||||||
elif not video_url.endswith('.smil'):
|
|
||||||
duration = int_or_none(attributes.get('duration'))
|
|
||||||
formats.append({
|
|
||||||
'url': video_url,
|
|
||||||
'format_id': media['media-category']['@attributes']['label'],
|
|
||||||
'preference': 1,
|
|
||||||
'vbr': int_or_none(attributes.get('bitrate')),
|
|
||||||
'filesize': int_or_none(attributes.get('fileSize'))
|
|
||||||
})
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
media_thumbnail = media_group['media-thumbnail']['@attributes']
|
|
||||||
thumbnails = [{
|
|
||||||
'url': media_thumbnail['url'],
|
|
||||||
'width': int_or_none(media_thumbnail.get('width')),
|
|
||||||
'height': int_or_none(media_thumbnail.get('height')),
|
|
||||||
}] if media_thumbnail else []
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'description': description,
|
|
||||||
'duration': duration,
|
|
||||||
'timestamp': timestamp,
|
|
||||||
'formats': formats,
|
|
||||||
'thumbnails': thumbnails,
|
|
||||||
}
|
|
||||||
|
|
Loading…
Reference in a new issue