delarte/src/delarte/hls.py
Barbagus db0a954497 Refactor code to use the model types
- Rename variables and function to reflect model names.
- Convert infrastructure data (JSON, M3U8) to model types.
- Change algorithms to produce/consume `Source` model, in particular
  using generator functions to build a list of `Source`s rather than the
  opaque `rendition => variant => urls` mapping (this will make #7 very
  straight forward).
- Download all master playlists after API call before selecting
  rendition/variants.

Motivation for the last point:

We use to offer rendition choosing right after the API call, before we
download the appropriate master playlist to figure out the available
variants.

The problem with that is that ArteTV's codes for the renditions (given
by the API) do not necessarily include complete languages information
(if it is not French or German), for instance a original audio track in
Portuguese would show as `VOEU-` (as in "EUropean"). The actual mention
of the Portuguese would only show up in the master playlist.

So, the new implementation actually downloads all master playlists
straight after the API call. This is a bit wasteful, but I figured it
was necessary to provide quality interaction with the user.

Bonus? Now when we first prompt the user for rendition choice, we
actually already know the available variants available, maybe we make
use of that fact in the future...
2022-12-29 08:43:20 +01:00

352 lines
11 KiB
Python

# License: GNU AGPL v3: http://www.gnu.org/licenses/
# This file is part of `delarte` (https://git.afpy.org/fcode/delarte.git)
"""Provide HLS protocol utilities."""
# For terminology, from HLS protocol RFC8216
# 2. Overview
#
# A multimedia presentation is specified by a Uniform Resource
# Identifier (URI) [RFC3986] to a Playlist.
#
# A Playlist is either a Media Playlist or a Master Playlist. Both are
# UTF-8 text files containing URIs and descriptive tags.
#
# A Media Playlist contains a list of Media Segments, which, when
# played sequentially, will play the multimedia presentation.
#
# Here is an example of a Media Playlist:
#
# #EXTM3U
# #EXT-X-TARGETDURATION:10
#
# #EXTINF:9.009,
# http://media.example.com/first.ts
# #EXTINF:9.009,
# http://media.example.com/second.ts
# #EXTINF:3.003,
# http://media.example.com/third.ts
#
# The first line is the format identifier tag #EXTM3U. The line
# containing #EXT-X-TARGETDURATION says that all Media Segments will be
# 10 seconds long or less. Then, three Media Segments are declared.
# The first and second are 9.009 seconds long; the third is 3.003
# seconds.
#
# To play this Playlist, the client first downloads it and then
# downloads and plays each Media Segment declared within it. The
# client reloads the Playlist as described in this document to discover
# any added segments. Data SHOULD be carried over HTTP [RFC7230], but,
# in general, a URI can specify any protocol that can reliably transfer
# the specified resource on demand.
#
# A more complex presentation can be described by a Master Playlist. A
# Master Playlist provides a set of Variant Streams, each of which
# describes a different version of the same content.
#
# A Variant Stream includes a Media Playlist that specifies media
# encoded at a particular bit rate, in a particular format, and at a
# particular resolution for media containing video.
#
# A Variant Stream can also specify a set of Renditions. Renditions
# are alternate versions of the content, such as audio produced in
# different languages or video recorded from different camera angles.
#
# Clients should switch between different Variant Streams to adapt to
# network conditions. Clients should choose Renditions based on user
# preferences.
import contextlib
import io
import os
import re
from tempfile import NamedTemporaryFile
import m3u8
import webvtt
from . import common, model
#
# WARNING !
#
# This module does not aim for a full implementation of HLS, only the
# subset useful for the actual observed usage of ArteTV.
#
# - URIs are relative file paths
# - Master playlists have at least one variant
# - Every variant is of different resolution
# - Every variant has exactly one audio medium
# - Every variant has at most one subtitles medium
# - Audio and video media playlists segments are incremental ranges of
# the same file
# - Subtitles media playlists have only one segment
class UnexpectedResponse(common.UnexpectedError):
"""Unexpected response from ArteTV."""
def _fetch_playlist(http_session, url):
# Fetch a M3U8 playlist
r = http_session.get(url)
r.raise_for_status()
return m3u8.loads(r.text, url)
def fetch_program_sources(http_session, metadata, master_playlist_url):
"""Fetch the given master playlist and yield available sources."""
master_playlist = _fetch_playlist(http_session, master_playlist_url)
audio_media = None
subtitles_media = None
for media in master_playlist.media:
match media.type:
case "AUDIO":
if audio_media:
raise UnexpectedResponse(
"MULTIPLE_AUDIO_MEDIA", master_playlist_url
)
audio_media = media
case "SUBTITLES":
if subtitles_media:
raise UnexpectedResponse(
"MULTIPLE_SUBTITLES_MEDIA", master_playlist_url
)
subtitles_media = media
if not audio_media:
raise UnexpectedResponse("NO_AUDIO_MEDIA", master_playlist_url)
rendition = model.Rendition(
model.RenditionAudio(
audio_media.language,
audio_media.name.startswith("VO"),
audio_media.characteristics
and ("public.accessibility" in audio_media.characteristics),
),
model.RenditionSubtitles(
subtitles_media.language,
subtitles_media.characteristics
and ("public.accessibility" in subtitles_media.characteristics),
)
if subtitles_media
else None,
)
cache = set()
for video_media in master_playlist.playlists:
stream_info = video_media.stream_info
if stream_info.audio != audio_media.group_id:
raise UnexpectedResponse(
"INVALID_VARIANT_AUDIO_MEDIA", master_playlist_url, stream_info.audio
)
if subtitles_media:
if stream_info.subtitles != subtitles_media.group_id:
raise UnexpectedResponse(
"INVALID_VARIANT_SUBTITLES_MEDIA",
master_playlist_url,
stream_info.subtitles,
)
elif stream_info.subtitles:
raise UnexpectedResponse(
"INVALID_VARIANT_SUBTITLES_MEDIA",
master_playlist_url,
stream_info.subtitles,
)
variant = model.Variant(
stream_info.resolution[0],
stream_info.resolution[1],
stream_info.frame_rate,
)
if variant in cache:
raise UnexpectedResponse("DUPLICATE_VARIANT", master_playlist_url, variant)
cache.add(variant)
yield model.Source(
metadata,
rendition,
variant,
video_media.absolute_uri,
audio_media.absolute_uri,
subtitles_media.absolute_uri if subtitles_media else None,
)
def _convert_byterange(obj):
# Convert a M3U8 `byterange` (1) to an `http range` (2).
# 1. "count@offset"
# 2. (start, end)
count, offset = [int(v) for v in obj.byterange.split("@")]
return offset, offset + count - 1
def _fetch_av_media_playlist(http_session, url):
# Fetch an audio or video media playlist.
# Return a tuple:
# - the media file url
# - the media file's ranges
media_playlist = _fetch_playlist(http_session, url)
file_name = media_playlist.segment_map[0].uri
start, end = _convert_byterange(media_playlist.segment_map[0])
if start != 0:
raise UnexpectedResponse("INVALID_AV_MEDIA_FRAGMENT_START", url)
ranges = [(start, end)]
next_start = end + 1
for segment in media_playlist.segments:
if segment.uri != file_name:
raise UnexpectedResponse("MULTIPLE_AV_MEDIA_FILES", url)
start, end = _convert_byterange(segment)
if start != next_start:
raise UnexpectedResponse("DISCONTINUOUS_AV_MEDIA_FRAGMENT", url)
ranges.append((start, end))
next_start = end + 1
return media_playlist.segment_map[0].absolute_uri, ranges
def _fetch_subtitles_media_playlist(http_session, url):
# Fetch subtitles media playlist.
# Return the subtitle file url.
subtitles_index = _fetch_playlist(http_session, url)
urls = [s.absolute_uri for s in subtitles_index.segments]
if not urls:
raise UnexpectedResponse("SUBTITLES_MEDIA_NO_FILES", url)
if len(urls) > 1:
raise UnexpectedResponse("SUBTITLES_MEDIA_MULTIPLE_FILES", url)
return urls[0]
def _download_av_media(http_session, media_playlist_url, progress):
# Download an audio or video stream to temporary file.
# Return the temporary file name.
url, ranges = _fetch_av_media_playlist(http_session, media_playlist_url)
total = ranges[-1][1]
with (
NamedTemporaryFile(
mode="w+b", delete=False, prefix="delarte.", suffix=".mp4"
) as f
):
for range_start, range_end in ranges:
r = http_session.get(
url,
headers={
"Range": f"bytes={range_start}-{range_end}",
},
timeout=5,
)
r.raise_for_status()
if r.status_code != 206:
raise UnexpectedResponse(
"UNEXPECTED_AV_MEDIA_HTTP_STATUS",
media_playlist_url,
r.request.headers,
r.status,
)
if len(r.content) != range_end - range_start + 1:
raise UnexpectedResponse(
"INVALID_AV_MEDIA_FRAGMENT_LENGTH", media_playlist_url
)
f.write(r.content)
progress(range_end, total)
return f.name
def _download_subtitles_media(http_session, media_playlist_url, progress):
# Download a subtitle file (converted from VTT to SRT format) into a temporary file.
# Return the temporary file name.
url = _fetch_subtitles_media_playlist(http_session, media_playlist_url)
progress(0, 2)
r = http_session.get(url)
r.raise_for_status()
buffer = io.StringIO(r.text)
progress(1, 2)
with NamedTemporaryFile(
"w", delete=False, prefix="delarte.", suffix=".srt", encoding="utf8"
) as f:
i = 1
for caption in webvtt.read_buffer(buffer):
print(i, file=f)
print(
re.sub(r"\.", ",", caption.start)
+ " --> "
+ re.sub(r"\.", ",", caption.end),
file=f,
)
print(caption.text + "\n", file=f)
i += 1
progress(2, 2)
return f.name
@contextlib.contextmanager
def download_source(http_session, source, progress):
"""Download source inputs into temporary files.
Returns a context manager that will delete the temporary files on exit.
The context expression is a local version of the given source.
"""
video_filename = None
audio_filename = None
subtitles_filename = None
try:
video_filename = _download_av_media(
http_session, source.video, lambda i, n: progress("video", i, n)
)
audio_filename = _download_av_media(
http_session, source.audio, lambda i, n: progress("audio", i, n)
)
subtitles_filename = (
_download_subtitles_media(
http_session,
source.subtitles,
lambda i, n: progress("subtitles", i, n),
)
if source.subtitles
else None
)
yield model.Source(
source.metadata,
source.rendition,
source.variant,
video_filename,
audio_filename,
subtitles_filename,
)
finally:
if video_filename and os.path.isfile(video_filename):
os.unlink(video_filename)
if audio_filename and os.path.isfile(audio_filename):
os.unlink(audio_filename)
if subtitles_filename and os.path.isfile(subtitles_filename):
os.unlink(subtitles_filename)