mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2024-01-07 17:16:08 +00:00
[amara] improve extraction
This commit is contained in:
parent
cf1a8668e8
commit
2cf8003638
|
@ -1,76 +1,103 @@
|
||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from .youtube import YoutubeIE
|
||||||
|
from .vimeo import VimeoIE
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
parse_iso8601,
|
||||||
|
update_url_query,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class AmaraIE(InfoExtractor):
|
class AmaraIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?amara\.org/(?:\w+/)?videos/(?P<id>\w+)'
|
_VALID_URL = r'https?://(?:www\.)?amara\.org/(?:\w+/)?videos/(?P<id>\w+)'
|
||||||
_TESTS = [
|
_TESTS = [{
|
||||||
{
|
# Youtube
|
||||||
'url': 'https://amara.org/en/videos/jVx79ZKGK1ky/info/why-jury-trials-are-becoming-less-common/?tab=video',
|
'url': 'https://amara.org/en/videos/jVx79ZKGK1ky/info/why-jury-trials-are-becoming-less-common/?tab=video',
|
||||||
'md5': 'ea10daf2b6154b8c1ecf9922aca5e8ae',
|
'md5': 'ea10daf2b6154b8c1ecf9922aca5e8ae',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'h6ZuVdvYnfE',
|
'id': 'h6ZuVdvYnfE',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Why jury trials are becoming less common',
|
'title': 'Why jury trials are becoming less common',
|
||||||
'description': 'md5:a61811c319943960b6ab1c23e0cbc2c1',
|
'description': 'md5:a61811c319943960b6ab1c23e0cbc2c1',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'subtitles': dict,
|
'subtitles': dict,
|
||||||
'upload_date': '20160813',
|
'upload_date': '20160813',
|
||||||
'uploader': 'PBS NewsHour',
|
'uploader': 'PBS NewsHour',
|
||||||
'uploader_id': 'PBSNewsHour'
|
'uploader_id': 'PBSNewsHour',
|
||||||
}
|
'timestamp': 1549639570,
|
||||||
},
|
|
||||||
{
|
|
||||||
'url': 'https://amara.org/en/videos/kYkK1VUTWW5I/info/vimeo-at-ces-2011',
|
|
||||||
'md5': '99392c75fa05d432a8f11df03612195e',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '18622084',
|
|
||||||
'ext': 'mov',
|
|
||||||
'title': 'Vimeo at CES 2011!',
|
|
||||||
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
|
||||||
'subtitles': dict,
|
|
||||||
'timestamp': 1294649110,
|
|
||||||
'upload_date': '20110110',
|
|
||||||
'uploader': 'Sam Morrill',
|
|
||||||
'uploader_id': 'sammorrill'
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
'url': 'https://amara.org/en/videos/s8KL7I3jLmh6/info/the-danger-of-a-single-story/',
|
|
||||||
'md5': 'd3970f08512738ee60c5807311ff5d3f',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'ChimamandaAdichie_2009G-transcript',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'The danger of a single story',
|
|
||||||
'description': 'md5:d769b31139c3b8bb5be9177f62ea3f23',
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
|
||||||
'subtitles': dict,
|
|
||||||
'upload_date': '20131206'
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
]
|
}, {
|
||||||
|
# Vimeo
|
||||||
def get_subtitles_for_language(self, language):
|
'url': 'https://amara.org/en/videos/kYkK1VUTWW5I/info/vimeo-at-ces-2011',
|
||||||
return [{
|
'md5': '99392c75fa05d432a8f11df03612195e',
|
||||||
'ext': type,
|
'info_dict': {
|
||||||
'url': language['subtitles_uri'].replace('format=json', 'format=' + type)
|
'id': '18622084',
|
||||||
} for type in ['vtt', 'srt', 'json']]
|
'ext': 'mov',
|
||||||
|
'title': 'Vimeo at CES 2011!',
|
||||||
|
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'subtitles': dict,
|
||||||
|
'timestamp': 1294763658,
|
||||||
|
'upload_date': '20110111',
|
||||||
|
'uploader': 'Sam Morrill',
|
||||||
|
'uploader_id': 'sammorrill'
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
# Direct Link
|
||||||
|
'url': 'https://amara.org/en/videos/s8KL7I3jLmh6/info/the-danger-of-a-single-story/',
|
||||||
|
'md5': 'd3970f08512738ee60c5807311ff5d3f',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 's8KL7I3jLmh6',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'The danger of a single story',
|
||||||
|
'description': 'md5:d769b31139c3b8bb5be9177f62ea3f23',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'subtitles': dict,
|
||||||
|
'upload_date': '20091007',
|
||||||
|
'timestamp': 1254942511,
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
meta = self._download_json('https://amara.org/api/videos/%s/' % video_id, video_id, query={'format': 'json'})
|
meta = self._download_json(
|
||||||
|
'https://amara.org/api/videos/%s/' % video_id,
|
||||||
|
video_id, query={'format': 'json'})
|
||||||
|
title = meta['title']
|
||||||
|
video_url = meta['all_urls'][0]
|
||||||
|
|
||||||
video_url = meta.get('all_urls')[0]
|
subtitles = {}
|
||||||
subtitles = dict([(language['code'], self.get_subtitles_for_language(language)) for language in meta.get('languages', []) if language['published']])
|
for language in (meta.get('languages') or []):
|
||||||
|
subtitles_uri = language.get('subtitles_uri')
|
||||||
|
if not (subtitles_uri and language.get('published')):
|
||||||
|
continue
|
||||||
|
subtitle = subtitles.setdefault(language.get('code') or 'en', [])
|
||||||
|
for f in ('json', 'srt', 'vtt'):
|
||||||
|
subtitle.append({
|
||||||
|
'ext': f,
|
||||||
|
'url': update_url_query(subtitles_uri, {'format': f}),
|
||||||
|
})
|
||||||
|
|
||||||
return {
|
info = {
|
||||||
'_type': 'url_transparent',
|
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
'title': meta['title'],
|
'title': title,
|
||||||
'description': meta.get('description'),
|
'description': meta.get('description'),
|
||||||
'thumbnail': meta.get('thumbnail')
|
'thumbnail': meta.get('thumbnail'),
|
||||||
|
'duration': int_or_none(meta.get('duration')),
|
||||||
|
'timestamp': parse_iso8601(meta.get('created')),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for ie in (YoutubeIE, VimeoIE):
|
||||||
|
if ie.suitable(video_url):
|
||||||
|
info.update({
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'ie_key': ie.ie_key(),
|
||||||
|
})
|
||||||
|
break
|
||||||
|
|
||||||
|
return info
|
||||||
|
|
Loading…
Reference in a new issue