mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2024-01-07 17:16:08 +00:00
Refactor fragments interface and dash segments downloader
- Eliminate segment_urls and initialization_url + Introduce manifest_url (manifest may contain unfragmented data in this case url will be used for direct media URL and manifest_url for manifest itself correspondingly) * Rewrite dashsegments downloader to use fragments data * Improve generic mpd extraction
This commit is contained in:
parent
21d21b0c72
commit
86f4d14f81
|
@ -1,7 +1,6 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import re
|
|
||||||
|
|
||||||
from .fragment import FragmentFD
|
from .fragment import FragmentFD
|
||||||
from ..compat import compat_urllib_error
|
from ..compat import compat_urllib_error
|
||||||
|
@ -19,34 +18,32 @@ class DashSegmentsFD(FragmentFD):
|
||||||
FD_NAME = 'dashsegments'
|
FD_NAME = 'dashsegments'
|
||||||
|
|
||||||
def real_download(self, filename, info_dict):
|
def real_download(self, filename, info_dict):
|
||||||
base_url = info_dict['url']
|
segments = info_dict['fragments'][:1] if self.params.get(
|
||||||
segment_urls = [info_dict['segment_urls'][0]] if self.params.get('test', False) else info_dict['segment_urls']
|
'test', False) else info_dict['fragments']
|
||||||
initialization_url = info_dict.get('initialization_url')
|
|
||||||
|
|
||||||
ctx = {
|
ctx = {
|
||||||
'filename': filename,
|
'filename': filename,
|
||||||
'total_frags': len(segment_urls) + (1 if initialization_url else 0),
|
'total_frags': len(segments),
|
||||||
}
|
}
|
||||||
|
|
||||||
self._prepare_and_start_frag_download(ctx)
|
self._prepare_and_start_frag_download(ctx)
|
||||||
|
|
||||||
def combine_url(base_url, target_url):
|
|
||||||
if re.match(r'^https?://', target_url):
|
|
||||||
return target_url
|
|
||||||
return '%s%s%s' % (base_url, '' if base_url.endswith('/') else '/', target_url)
|
|
||||||
|
|
||||||
segments_filenames = []
|
segments_filenames = []
|
||||||
|
|
||||||
fragment_retries = self.params.get('fragment_retries', 0)
|
fragment_retries = self.params.get('fragment_retries', 0)
|
||||||
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
|
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
|
||||||
|
|
||||||
def process_segment(segment, tmp_filename, fatal):
|
def process_segment(segment, tmp_filename, num):
|
||||||
target_url, segment_name = segment
|
segment_url = segment['url']
|
||||||
|
segment_name = 'Frag%d' % num
|
||||||
target_filename = '%s-%s' % (tmp_filename, segment_name)
|
target_filename = '%s-%s' % (tmp_filename, segment_name)
|
||||||
|
# In DASH, the first segment contains necessary headers to
|
||||||
|
# generate a valid MP4 file, so always abort for the first segment
|
||||||
|
fatal = num == 0 or not skip_unavailable_fragments
|
||||||
count = 0
|
count = 0
|
||||||
while count <= fragment_retries:
|
while count <= fragment_retries:
|
||||||
try:
|
try:
|
||||||
success = ctx['dl'].download(target_filename, {'url': combine_url(base_url, target_url)})
|
success = ctx['dl'].download(target_filename, {'url': segment_url})
|
||||||
if not success:
|
if not success:
|
||||||
return False
|
return False
|
||||||
down, target_sanitized = sanitize_open(target_filename, 'rb')
|
down, target_sanitized = sanitize_open(target_filename, 'rb')
|
||||||
|
@ -72,16 +69,8 @@ class DashSegmentsFD(FragmentFD):
|
||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
segments_to_download = [(initialization_url, 'Init')] if initialization_url else []
|
for i, segment in enumerate(segments):
|
||||||
segments_to_download.extend([
|
if not process_segment(segment, ctx['tmpfilename'], i):
|
||||||
(segment_url, 'Seg%d' % i)
|
|
||||||
for i, segment_url in enumerate(segment_urls)])
|
|
||||||
|
|
||||||
for i, segment in enumerate(segments_to_download):
|
|
||||||
# In DASH, the first segment contains necessary headers to
|
|
||||||
# generate a valid MP4 file, so always abort for the first segment
|
|
||||||
fatal = i == 0 or not skip_unavailable_fragments
|
|
||||||
if not process_segment(segment, ctx['tmpfilename'], fatal):
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
self._finish_frag_download(ctx)
|
self._finish_frag_download(ctx)
|
||||||
|
|
|
@ -86,9 +86,10 @@ class InfoExtractor(object):
|
||||||
from worst to best quality.
|
from worst to best quality.
|
||||||
|
|
||||||
Potential fields:
|
Potential fields:
|
||||||
* url Mandatory. The URL of the video file or URL of
|
* url Mandatory. The URL of the video file
|
||||||
the manifest file in case of fragmented media
|
* manifest_url
|
||||||
(DASH, hls, hds).
|
The URL of the manifest file in case of
|
||||||
|
fragmented media (DASH, hls, hds)
|
||||||
* ext Will be calculated from URL if missing
|
* ext Will be calculated from URL if missing
|
||||||
* format A human-readable description of the format
|
* format A human-readable description of the format
|
||||||
("mp4 container with h264/opus").
|
("mp4 container with h264/opus").
|
||||||
|
@ -1528,9 +1529,10 @@ class InfoExtractor(object):
|
||||||
mpd_base_url = re.match(r'https?://.+/', urlh.geturl()).group()
|
mpd_base_url = re.match(r'https?://.+/', urlh.geturl()).group()
|
||||||
|
|
||||||
return self._parse_mpd_formats(
|
return self._parse_mpd_formats(
|
||||||
compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url, formats_dict=formats_dict)
|
compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url,
|
||||||
|
formats_dict=formats_dict, mpd_url=mpd_url)
|
||||||
|
|
||||||
def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}):
|
def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}, mpd_url=None):
|
||||||
"""
|
"""
|
||||||
Parse formats from MPD manifest.
|
Parse formats from MPD manifest.
|
||||||
References:
|
References:
|
||||||
|
@ -1654,6 +1656,7 @@ class InfoExtractor(object):
|
||||||
f = {
|
f = {
|
||||||
'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id,
|
'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id,
|
||||||
'url': base_url,
|
'url': base_url,
|
||||||
|
'manifest_url': mpd_url,
|
||||||
'ext': mimetype2ext(mime_type),
|
'ext': mimetype2ext(mime_type),
|
||||||
'width': int_or_none(representation_attrib.get('width')),
|
'width': int_or_none(representation_attrib.get('width')),
|
||||||
'height': int_or_none(representation_attrib.get('height')),
|
'height': int_or_none(representation_attrib.get('height')),
|
||||||
|
@ -1682,14 +1685,6 @@ class InfoExtractor(object):
|
||||||
if 'total_number' not in representation_ms_info and 'segment_duration':
|
if 'total_number' not in representation_ms_info and 'segment_duration':
|
||||||
segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale'])
|
segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale'])
|
||||||
representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
|
representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
|
||||||
representation_ms_info['segment_urls'] = [
|
|
||||||
media_template % {
|
|
||||||
'Number': segment_number,
|
|
||||||
'Bandwidth': representation_attrib.get('bandwidth'),
|
|
||||||
}
|
|
||||||
for segment_number in range(
|
|
||||||
representation_ms_info['start_number'],
|
|
||||||
representation_ms_info['total_number'] + representation_ms_info['start_number'])]
|
|
||||||
representation_ms_info['fragments'] = [{
|
representation_ms_info['fragments'] = [{
|
||||||
'url': media_template % {
|
'url': media_template % {
|
||||||
'Number': segment_number,
|
'Number': segment_number,
|
||||||
|
@ -1703,7 +1698,6 @@ class InfoExtractor(object):
|
||||||
# $Number*$ or $Time$ in media template with S list available
|
# $Number*$ or $Time$ in media template with S list available
|
||||||
# Example $Number*$: http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg
|
# Example $Number*$: http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg
|
||||||
# Example $Time$: https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411
|
# Example $Time$: https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411
|
||||||
representation_ms_info['segment_urls'] = []
|
|
||||||
representation_ms_info['fragments'] = []
|
representation_ms_info['fragments'] = []
|
||||||
segment_time = 0
|
segment_time = 0
|
||||||
segment_d = None
|
segment_d = None
|
||||||
|
@ -1715,7 +1709,6 @@ class InfoExtractor(object):
|
||||||
'Bandwidth': representation_attrib.get('bandwidth'),
|
'Bandwidth': representation_attrib.get('bandwidth'),
|
||||||
'Number': segment_number,
|
'Number': segment_number,
|
||||||
}
|
}
|
||||||
representation_ms_info['segment_urls'].append(segment_url)
|
|
||||||
representation_ms_info['fragments'].append({
|
representation_ms_info['fragments'].append({
|
||||||
'url': segment_url,
|
'url': segment_url,
|
||||||
'duration': float_or_none(segment_d, representation_ms_info['timescale']),
|
'duration': float_or_none(segment_d, representation_ms_info['timescale']),
|
||||||
|
@ -1745,17 +1738,15 @@ class InfoExtractor(object):
|
||||||
'duration': float_or_none(s['d'], representation_ms_info['timescale']),
|
'duration': float_or_none(s['d'], representation_ms_info['timescale']),
|
||||||
})
|
})
|
||||||
representation_ms_info['fragments'] = fragments
|
representation_ms_info['fragments'] = fragments
|
||||||
if 'segment_urls' in representation_ms_info:
|
# NB: MPD manifest may contain direct URLs to unfragmented media.
|
||||||
|
# No fragments key is present in this case.
|
||||||
|
if 'fragments' in representation_ms_info:
|
||||||
f.update({
|
f.update({
|
||||||
'segment_urls': representation_ms_info['segment_urls'],
|
|
||||||
'fragments': [],
|
'fragments': [],
|
||||||
'protocol': 'http_dash_segments',
|
'protocol': 'http_dash_segments',
|
||||||
})
|
})
|
||||||
if 'initialization_url' in representation_ms_info:
|
if 'initialization_url' in representation_ms_info:
|
||||||
initialization_url = representation_ms_info['initialization_url'].replace('$RepresentationID$', representation_id)
|
initialization_url = representation_ms_info['initialization_url'].replace('$RepresentationID$', representation_id)
|
||||||
f.update({
|
|
||||||
'initialization_url': initialization_url,
|
|
||||||
})
|
|
||||||
if not f.get('url'):
|
if not f.get('url'):
|
||||||
f['url'] = initialization_url
|
f['url'] = initialization_url
|
||||||
f['fragments'].append({'url': initialization_url})
|
f['fragments'].append({'url': initialization_url})
|
||||||
|
|
|
@ -1657,7 +1657,9 @@ class GenericIE(InfoExtractor):
|
||||||
return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
|
return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
|
||||||
elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
|
elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
|
||||||
info_dict['formats'] = self._parse_mpd_formats(
|
info_dict['formats'] = self._parse_mpd_formats(
|
||||||
doc, video_id, mpd_base_url=url.rpartition('/')[0])
|
doc, video_id,
|
||||||
|
mpd_base_url=full_response.geturl().rpartition('/')[0],
|
||||||
|
mpd_url=url)
|
||||||
self._sort_formats(info_dict['formats'])
|
self._sort_formats(info_dict['formats'])
|
||||||
return info_dict
|
return info_dict
|
||||||
elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
|
elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
|
||||||
|
|
Loading…
Reference in a new issue