Barbagus
eac65aaa1c
Due to faulty syntax the `provides_accessibility` field was None/True instead of False/True
342 lines
11 KiB
Python
342 lines
11 KiB
Python
# License: GNU AGPL v3: http://www.gnu.org/licenses/
|
|
# This file is part of `delarte` (https://git.afpy.org/fcode/delarte.git)
|
|
|
|
"""Provide HLS protocol utilities."""
|
|
|
|
# For terminology, from HLS protocol RFC8216
|
|
|
|
# 2. Overview
|
|
#
|
|
# A multimedia presentation is specified by a Uniform Resource
|
|
# Identifier (URI) [RFC3986] to a Playlist.
|
|
#
|
|
# A Playlist is either a Media Playlist or a Master Playlist. Both are
|
|
# UTF-8 text files containing URIs and descriptive tags.
|
|
#
|
|
# A Media Playlist contains a list of Media Segments, which, when
|
|
# played sequentially, will play the multimedia presentation.
|
|
#
|
|
# Here is an example of a Media Playlist:
|
|
#
|
|
# #EXTM3U
|
|
# #EXT-X-TARGETDURATION:10
|
|
#
|
|
# #EXTINF:9.009,
|
|
# http://media.example.com/first.ts
|
|
# #EXTINF:9.009,
|
|
# http://media.example.com/second.ts
|
|
# #EXTINF:3.003,
|
|
# http://media.example.com/third.ts
|
|
#
|
|
# The first line is the format identifier tag #EXTM3U. The line
|
|
# containing #EXT-X-TARGETDURATION says that all Media Segments will be
|
|
# 10 seconds long or less. Then, three Media Segments are declared.
|
|
# The first and second are 9.009 seconds long; the third is 3.003
|
|
# seconds.
|
|
#
|
|
# To play this Playlist, the client first downloads it and then
|
|
# downloads and plays each Media Segment declared within it. The
|
|
# client reloads the Playlist as described in this document to discover
|
|
# any added segments. Data SHOULD be carried over HTTP [RFC7230], but,
|
|
# in general, a URI can specify any protocol that can reliably transfer
|
|
# the specified resource on demand.
|
|
#
|
|
# A more complex presentation can be described by a Master Playlist. A
|
|
# Master Playlist provides a set of Variant Streams, each of which
|
|
# describes a different version of the same content.
|
|
#
|
|
# A Variant Stream includes a Media Playlist that specifies media
|
|
# encoded at a particular bit rate, in a particular format, and at a
|
|
# particular resolution for media containing video.
|
|
#
|
|
# A Variant Stream can also specify a set of Renditions. Renditions
|
|
# are alternate versions of the content, such as audio produced in
|
|
# different languages or video recorded from different camera angles.
|
|
#
|
|
# Clients should switch between different Variant Streams to adapt to
|
|
# network conditions. Clients should choose Renditions based on user
|
|
# preferences.
|
|
|
|
import contextlib
|
|
import os
|
|
from tempfile import NamedTemporaryFile
|
|
|
|
import m3u8
|
|
|
|
from . import error, model, subtitles
|
|
|
|
#
|
|
# WARNING !
|
|
#
|
|
# This module does not aim for a full implementation of HLS, only the
|
|
# subset useful for the actual observed usage of ArteTV.
|
|
#
|
|
# - URIs are relative file paths
|
|
# - Master playlists have at least one variant
|
|
# - Every variant is of different resolution
|
|
# - Every variant has exactly one audio medium
|
|
# - Every variant has at most one subtitles medium
|
|
# - Audio and video media playlists segments are incremental ranges of
|
|
# the same file
|
|
# - Subtitles media playlists have only one segment
|
|
|
|
|
|
class UnexpectedResponse(error.UnexpectedError):
|
|
"""Unexpected response from ArteTV."""
|
|
|
|
|
|
def _fetch_playlist(http_session, url):
|
|
# Fetch a M3U8 playlist
|
|
r = http_session.get(url)
|
|
r.raise_for_status()
|
|
return m3u8.loads(r.text, url)
|
|
|
|
|
|
def fetch_program_sources(http_session, metadata, master_playlist_url):
|
|
"""Fetch the given master playlist and yield available sources."""
|
|
master_playlist = _fetch_playlist(http_session, master_playlist_url)
|
|
|
|
audio_media = None
|
|
subtitles_media = None
|
|
|
|
for media in master_playlist.media:
|
|
match media.type:
|
|
case "AUDIO":
|
|
if audio_media:
|
|
raise UnexpectedResponse(
|
|
"MULTIPLE_AUDIO_MEDIA", master_playlist_url
|
|
)
|
|
audio_media = media
|
|
case "SUBTITLES":
|
|
if subtitles_media:
|
|
raise UnexpectedResponse(
|
|
"MULTIPLE_SUBTITLES_MEDIA", master_playlist_url
|
|
)
|
|
subtitles_media = media
|
|
|
|
if not audio_media:
|
|
raise UnexpectedResponse("NO_AUDIO_MEDIA", master_playlist_url)
|
|
|
|
rendition = model.Rendition(
|
|
model.RenditionAudio(
|
|
audio_media.language,
|
|
audio_media.name.startswith("VO"),
|
|
(
|
|
audio_media.characteristics is not None
|
|
and ("public.accessibility" in audio_media.characteristics)
|
|
),
|
|
),
|
|
model.RenditionSubtitles(
|
|
subtitles_media.language,
|
|
(
|
|
subtitles_media.characteristics is not None
|
|
and ("public.accessibility" in subtitles_media.characteristics)
|
|
),
|
|
)
|
|
if subtitles_media
|
|
else None,
|
|
)
|
|
|
|
cache = set()
|
|
|
|
for video_media in master_playlist.playlists:
|
|
stream_info = video_media.stream_info
|
|
if stream_info.audio != audio_media.group_id:
|
|
raise UnexpectedResponse(
|
|
"INVALID_VARIANT_AUDIO_MEDIA", master_playlist_url, stream_info.audio
|
|
)
|
|
|
|
if subtitles_media:
|
|
if stream_info.subtitles != subtitles_media.group_id:
|
|
raise UnexpectedResponse(
|
|
"INVALID_VARIANT_SUBTITLES_MEDIA",
|
|
master_playlist_url,
|
|
stream_info.subtitles,
|
|
)
|
|
elif stream_info.subtitles:
|
|
raise UnexpectedResponse(
|
|
"INVALID_VARIANT_SUBTITLES_MEDIA",
|
|
master_playlist_url,
|
|
stream_info.subtitles,
|
|
)
|
|
|
|
variant = model.Variant(
|
|
stream_info.resolution[0],
|
|
stream_info.resolution[1],
|
|
stream_info.frame_rate,
|
|
)
|
|
|
|
if variant in cache:
|
|
raise UnexpectedResponse("DUPLICATE_VARIANT", master_playlist_url, variant)
|
|
cache.add(variant)
|
|
|
|
yield model.Source(
|
|
metadata,
|
|
rendition,
|
|
variant,
|
|
video_media.absolute_uri,
|
|
audio_media.absolute_uri,
|
|
subtitles_media.absolute_uri if subtitles_media else None,
|
|
)
|
|
|
|
|
|
def _convert_byterange(obj):
|
|
# Convert a M3U8 `byterange` (1) to an `http range` (2).
|
|
# 1. "count@offset"
|
|
# 2. (start, end)
|
|
count, offset = [int(v) for v in obj.byterange.split("@")]
|
|
return offset, offset + count - 1
|
|
|
|
|
|
def _fetch_av_media_playlist(http_session, url):
|
|
# Fetch an audio or video media playlist.
|
|
# Return a tuple:
|
|
# - the media file url
|
|
# - the media file's ranges
|
|
media_playlist = _fetch_playlist(http_session, url)
|
|
|
|
file_name = media_playlist.segment_map[0].uri
|
|
start, end = _convert_byterange(media_playlist.segment_map[0])
|
|
if start != 0:
|
|
raise UnexpectedResponse("INVALID_AV_MEDIA_FRAGMENT_START", url)
|
|
ranges = [(start, end)]
|
|
next_start = end + 1
|
|
|
|
for segment in media_playlist.segments:
|
|
if segment.uri != file_name:
|
|
raise UnexpectedResponse("MULTIPLE_AV_MEDIA_FILES", url)
|
|
|
|
start, end = _convert_byterange(segment)
|
|
if start != next_start:
|
|
raise UnexpectedResponse("DISCONTINUOUS_AV_MEDIA_FRAGMENT", url)
|
|
|
|
ranges.append((start, end))
|
|
next_start = end + 1
|
|
|
|
return media_playlist.segment_map[0].absolute_uri, ranges
|
|
|
|
|
|
def _fetch_subtitles_media_playlist(http_session, url):
|
|
# Fetch subtitles media playlist.
|
|
# Return the subtitle file url.
|
|
subtitles_index = _fetch_playlist(http_session, url)
|
|
urls = [s.absolute_uri for s in subtitles_index.segments]
|
|
|
|
if not urls:
|
|
raise UnexpectedResponse("SUBTITLES_MEDIA_NO_FILES", url)
|
|
|
|
if len(urls) > 1:
|
|
raise UnexpectedResponse("SUBTITLES_MEDIA_MULTIPLE_FILES", url)
|
|
|
|
return urls[0]
|
|
|
|
|
|
def _download_av_media(http_session, media_playlist_url, progress):
|
|
# Download an audio or video stream to temporary file.
|
|
# Return the temporary file name.
|
|
url, ranges = _fetch_av_media_playlist(http_session, media_playlist_url)
|
|
total = ranges[-1][1]
|
|
|
|
with (
|
|
NamedTemporaryFile(
|
|
mode="w+b", delete=False, prefix="delarte.", suffix=".mp4"
|
|
) as f
|
|
):
|
|
for range_start, range_end in ranges:
|
|
r = http_session.get(
|
|
url,
|
|
headers={
|
|
"Range": f"bytes={range_start}-{range_end}",
|
|
},
|
|
timeout=5,
|
|
)
|
|
|
|
r.raise_for_status()
|
|
|
|
if r.status_code != 206:
|
|
raise UnexpectedResponse(
|
|
"UNEXPECTED_AV_MEDIA_HTTP_STATUS",
|
|
media_playlist_url,
|
|
r.request.headers,
|
|
r.status,
|
|
)
|
|
|
|
if len(r.content) != range_end - range_start + 1:
|
|
raise UnexpectedResponse(
|
|
"INVALID_AV_MEDIA_FRAGMENT_LENGTH", media_playlist_url
|
|
)
|
|
f.write(r.content)
|
|
|
|
progress(range_end, total)
|
|
|
|
return f.name
|
|
|
|
|
|
def _download_subtitles_media(http_session, media_playlist_url, progress):
|
|
# Download a subtitle file (converted from VTT to SRT format) into a temporary file.
|
|
# Return the temporary file name.
|
|
url = _fetch_subtitles_media_playlist(http_session, media_playlist_url)
|
|
|
|
progress(0, 2)
|
|
r = http_session.get(url)
|
|
r.raise_for_status()
|
|
r.encoding = "utf-8"
|
|
progress(1, 2)
|
|
|
|
with NamedTemporaryFile(
|
|
"w", delete=False, prefix="delarte.", suffix=".srt", encoding="utf8"
|
|
) as f:
|
|
subtitles.convert(r.text, f)
|
|
progress(2, 2)
|
|
return f.name
|
|
|
|
|
|
@contextlib.contextmanager
|
|
def download_source(http_session, source, progress):
|
|
"""Download source inputs into temporary files.
|
|
|
|
Returns a context manager that will delete the temporary files on exit.
|
|
The context expression is a local version of the given source.
|
|
"""
|
|
video_filename = None
|
|
audio_filename = None
|
|
subtitles_filename = None
|
|
|
|
try:
|
|
subtitles_filename = (
|
|
_download_subtitles_media(
|
|
http_session,
|
|
source.subtitles,
|
|
lambda i, n: progress("subtitles", i, n),
|
|
)
|
|
if source.subtitles
|
|
else None
|
|
)
|
|
|
|
video_filename = _download_av_media(
|
|
http_session, source.video, lambda i, n: progress("video", i, n)
|
|
)
|
|
|
|
audio_filename = _download_av_media(
|
|
http_session, source.audio, lambda i, n: progress("audio", i, n)
|
|
)
|
|
|
|
yield model.Source(
|
|
source.metadata,
|
|
source.rendition,
|
|
source.variant,
|
|
video_filename,
|
|
audio_filename,
|
|
subtitles_filename,
|
|
)
|
|
|
|
finally:
|
|
if video_filename and os.path.isfile(video_filename):
|
|
os.unlink(video_filename)
|
|
|
|
if audio_filename and os.path.isfile(audio_filename):
|
|
os.unlink(audio_filename)
|
|
|
|
if subtitles_filename and os.path.isfile(subtitles_filename):
|
|
os.unlink(subtitles_filename)
|