Barbagus
db0a954497
- Rename variables and function to reflect model names. - Convert infrastructure data (JSON, M3U8) to model types. - Change algorithms to produce/consume `Source` model, in particular using generator functions to build a list of `Source`s rather than the opaque `rendition => variant => urls` mapping (this will make #7 very straight forward). - Download all master playlists after API call before selecting rendition/variants. Motivation for the last point: We use to offer rendition choosing right after the API call, before we download the appropriate master playlist to figure out the available variants. The problem with that is that ArteTV's codes for the renditions (given by the API) do not necessarily include complete languages information (if it is not French or German), for instance a original audio track in Portuguese would show as `VOEU-` (as in "EUropean"). The actual mention of the Portuguese would only show up in the master playlist. So, the new implementation actually downloads all master playlists straight after the API call. This is a bit wasteful, but I figured it was necessary to provide quality interaction with the user. Bonus? Now when we first prompt the user for rendition choice, we actually already know the available variants available, maybe we make use of that fact in the future...
352 lines
11 KiB
Python
352 lines
11 KiB
Python
# License: GNU AGPL v3: http://www.gnu.org/licenses/
|
|
# This file is part of `delarte` (https://git.afpy.org/fcode/delarte.git)
|
|
|
|
"""Provide HLS protocol utilities."""
|
|
|
|
# For terminology, from HLS protocol RFC8216
|
|
|
|
# 2. Overview
|
|
#
|
|
# A multimedia presentation is specified by a Uniform Resource
|
|
# Identifier (URI) [RFC3986] to a Playlist.
|
|
#
|
|
# A Playlist is either a Media Playlist or a Master Playlist. Both are
|
|
# UTF-8 text files containing URIs and descriptive tags.
|
|
#
|
|
# A Media Playlist contains a list of Media Segments, which, when
|
|
# played sequentially, will play the multimedia presentation.
|
|
#
|
|
# Here is an example of a Media Playlist:
|
|
#
|
|
# #EXTM3U
|
|
# #EXT-X-TARGETDURATION:10
|
|
#
|
|
# #EXTINF:9.009,
|
|
# http://media.example.com/first.ts
|
|
# #EXTINF:9.009,
|
|
# http://media.example.com/second.ts
|
|
# #EXTINF:3.003,
|
|
# http://media.example.com/third.ts
|
|
#
|
|
# The first line is the format identifier tag #EXTM3U. The line
|
|
# containing #EXT-X-TARGETDURATION says that all Media Segments will be
|
|
# 10 seconds long or less. Then, three Media Segments are declared.
|
|
# The first and second are 9.009 seconds long; the third is 3.003
|
|
# seconds.
|
|
#
|
|
# To play this Playlist, the client first downloads it and then
|
|
# downloads and plays each Media Segment declared within it. The
|
|
# client reloads the Playlist as described in this document to discover
|
|
# any added segments. Data SHOULD be carried over HTTP [RFC7230], but,
|
|
# in general, a URI can specify any protocol that can reliably transfer
|
|
# the specified resource on demand.
|
|
#
|
|
# A more complex presentation can be described by a Master Playlist. A
|
|
# Master Playlist provides a set of Variant Streams, each of which
|
|
# describes a different version of the same content.
|
|
#
|
|
# A Variant Stream includes a Media Playlist that specifies media
|
|
# encoded at a particular bit rate, in a particular format, and at a
|
|
# particular resolution for media containing video.
|
|
#
|
|
# A Variant Stream can also specify a set of Renditions. Renditions
|
|
# are alternate versions of the content, such as audio produced in
|
|
# different languages or video recorded from different camera angles.
|
|
#
|
|
# Clients should switch between different Variant Streams to adapt to
|
|
# network conditions. Clients should choose Renditions based on user
|
|
# preferences.
|
|
|
|
import contextlib
|
|
import io
|
|
import os
|
|
import re
|
|
from tempfile import NamedTemporaryFile
|
|
|
|
import m3u8
|
|
import webvtt
|
|
|
|
from . import common, model
|
|
|
|
#
|
|
# WARNING !
|
|
#
|
|
# This module does not aim for a full implementation of HLS, only the
|
|
# subset useful for the actual observed usage of ArteTV.
|
|
#
|
|
# - URIs are relative file paths
|
|
# - Master playlists have at least one variant
|
|
# - Every variant is of different resolution
|
|
# - Every variant has exactly one audio medium
|
|
# - Every variant has at most one subtitles medium
|
|
# - Audio and video media playlists segments are incremental ranges of
|
|
# the same file
|
|
# - Subtitles media playlists have only one segment
|
|
|
|
|
|
class UnexpectedResponse(common.UnexpectedError):
|
|
"""Unexpected response from ArteTV."""
|
|
|
|
|
|
def _fetch_playlist(http_session, url):
|
|
# Fetch a M3U8 playlist
|
|
r = http_session.get(url)
|
|
r.raise_for_status()
|
|
return m3u8.loads(r.text, url)
|
|
|
|
|
|
def fetch_program_sources(http_session, metadata, master_playlist_url):
|
|
"""Fetch the given master playlist and yield available sources."""
|
|
master_playlist = _fetch_playlist(http_session, master_playlist_url)
|
|
|
|
audio_media = None
|
|
subtitles_media = None
|
|
|
|
for media in master_playlist.media:
|
|
match media.type:
|
|
case "AUDIO":
|
|
if audio_media:
|
|
raise UnexpectedResponse(
|
|
"MULTIPLE_AUDIO_MEDIA", master_playlist_url
|
|
)
|
|
audio_media = media
|
|
case "SUBTITLES":
|
|
if subtitles_media:
|
|
raise UnexpectedResponse(
|
|
"MULTIPLE_SUBTITLES_MEDIA", master_playlist_url
|
|
)
|
|
subtitles_media = media
|
|
|
|
if not audio_media:
|
|
raise UnexpectedResponse("NO_AUDIO_MEDIA", master_playlist_url)
|
|
|
|
rendition = model.Rendition(
|
|
model.RenditionAudio(
|
|
audio_media.language,
|
|
audio_media.name.startswith("VO"),
|
|
audio_media.characteristics
|
|
and ("public.accessibility" in audio_media.characteristics),
|
|
),
|
|
model.RenditionSubtitles(
|
|
subtitles_media.language,
|
|
subtitles_media.characteristics
|
|
and ("public.accessibility" in subtitles_media.characteristics),
|
|
)
|
|
if subtitles_media
|
|
else None,
|
|
)
|
|
|
|
cache = set()
|
|
|
|
for video_media in master_playlist.playlists:
|
|
stream_info = video_media.stream_info
|
|
if stream_info.audio != audio_media.group_id:
|
|
raise UnexpectedResponse(
|
|
"INVALID_VARIANT_AUDIO_MEDIA", master_playlist_url, stream_info.audio
|
|
)
|
|
|
|
if subtitles_media:
|
|
if stream_info.subtitles != subtitles_media.group_id:
|
|
raise UnexpectedResponse(
|
|
"INVALID_VARIANT_SUBTITLES_MEDIA",
|
|
master_playlist_url,
|
|
stream_info.subtitles,
|
|
)
|
|
elif stream_info.subtitles:
|
|
raise UnexpectedResponse(
|
|
"INVALID_VARIANT_SUBTITLES_MEDIA",
|
|
master_playlist_url,
|
|
stream_info.subtitles,
|
|
)
|
|
|
|
variant = model.Variant(
|
|
stream_info.resolution[0],
|
|
stream_info.resolution[1],
|
|
stream_info.frame_rate,
|
|
)
|
|
|
|
if variant in cache:
|
|
raise UnexpectedResponse("DUPLICATE_VARIANT", master_playlist_url, variant)
|
|
cache.add(variant)
|
|
|
|
yield model.Source(
|
|
metadata,
|
|
rendition,
|
|
variant,
|
|
video_media.absolute_uri,
|
|
audio_media.absolute_uri,
|
|
subtitles_media.absolute_uri if subtitles_media else None,
|
|
)
|
|
|
|
|
|
def _convert_byterange(obj):
|
|
# Convert a M3U8 `byterange` (1) to an `http range` (2).
|
|
# 1. "count@offset"
|
|
# 2. (start, end)
|
|
count, offset = [int(v) for v in obj.byterange.split("@")]
|
|
return offset, offset + count - 1
|
|
|
|
|
|
def _fetch_av_media_playlist(http_session, url):
|
|
# Fetch an audio or video media playlist.
|
|
# Return a tuple:
|
|
# - the media file url
|
|
# - the media file's ranges
|
|
media_playlist = _fetch_playlist(http_session, url)
|
|
|
|
file_name = media_playlist.segment_map[0].uri
|
|
start, end = _convert_byterange(media_playlist.segment_map[0])
|
|
if start != 0:
|
|
raise UnexpectedResponse("INVALID_AV_MEDIA_FRAGMENT_START", url)
|
|
ranges = [(start, end)]
|
|
next_start = end + 1
|
|
|
|
for segment in media_playlist.segments:
|
|
if segment.uri != file_name:
|
|
raise UnexpectedResponse("MULTIPLE_AV_MEDIA_FILES", url)
|
|
|
|
start, end = _convert_byterange(segment)
|
|
if start != next_start:
|
|
raise UnexpectedResponse("DISCONTINUOUS_AV_MEDIA_FRAGMENT", url)
|
|
|
|
ranges.append((start, end))
|
|
next_start = end + 1
|
|
|
|
return media_playlist.segment_map[0].absolute_uri, ranges
|
|
|
|
|
|
def _fetch_subtitles_media_playlist(http_session, url):
|
|
# Fetch subtitles media playlist.
|
|
# Return the subtitle file url.
|
|
subtitles_index = _fetch_playlist(http_session, url)
|
|
urls = [s.absolute_uri for s in subtitles_index.segments]
|
|
|
|
if not urls:
|
|
raise UnexpectedResponse("SUBTITLES_MEDIA_NO_FILES", url)
|
|
|
|
if len(urls) > 1:
|
|
raise UnexpectedResponse("SUBTITLES_MEDIA_MULTIPLE_FILES", url)
|
|
|
|
return urls[0]
|
|
|
|
|
|
def _download_av_media(http_session, media_playlist_url, progress):
|
|
# Download an audio or video stream to temporary file.
|
|
# Return the temporary file name.
|
|
url, ranges = _fetch_av_media_playlist(http_session, media_playlist_url)
|
|
total = ranges[-1][1]
|
|
|
|
with (
|
|
NamedTemporaryFile(
|
|
mode="w+b", delete=False, prefix="delarte.", suffix=".mp4"
|
|
) as f
|
|
):
|
|
for range_start, range_end in ranges:
|
|
r = http_session.get(
|
|
url,
|
|
headers={
|
|
"Range": f"bytes={range_start}-{range_end}",
|
|
},
|
|
timeout=5,
|
|
)
|
|
|
|
r.raise_for_status()
|
|
|
|
if r.status_code != 206:
|
|
raise UnexpectedResponse(
|
|
"UNEXPECTED_AV_MEDIA_HTTP_STATUS",
|
|
media_playlist_url,
|
|
r.request.headers,
|
|
r.status,
|
|
)
|
|
|
|
if len(r.content) != range_end - range_start + 1:
|
|
raise UnexpectedResponse(
|
|
"INVALID_AV_MEDIA_FRAGMENT_LENGTH", media_playlist_url
|
|
)
|
|
f.write(r.content)
|
|
|
|
progress(range_end, total)
|
|
|
|
return f.name
|
|
|
|
|
|
def _download_subtitles_media(http_session, media_playlist_url, progress):
|
|
# Download a subtitle file (converted from VTT to SRT format) into a temporary file.
|
|
# Return the temporary file name.
|
|
url = _fetch_subtitles_media_playlist(http_session, media_playlist_url)
|
|
|
|
progress(0, 2)
|
|
r = http_session.get(url)
|
|
r.raise_for_status()
|
|
|
|
buffer = io.StringIO(r.text)
|
|
progress(1, 2)
|
|
|
|
with NamedTemporaryFile(
|
|
"w", delete=False, prefix="delarte.", suffix=".srt", encoding="utf8"
|
|
) as f:
|
|
i = 1
|
|
for caption in webvtt.read_buffer(buffer):
|
|
print(i, file=f)
|
|
print(
|
|
re.sub(r"\.", ",", caption.start)
|
|
+ " --> "
|
|
+ re.sub(r"\.", ",", caption.end),
|
|
file=f,
|
|
)
|
|
print(caption.text + "\n", file=f)
|
|
i += 1
|
|
progress(2, 2)
|
|
return f.name
|
|
|
|
|
|
@contextlib.contextmanager
|
|
def download_source(http_session, source, progress):
|
|
"""Download source inputs into temporary files.
|
|
|
|
Returns a context manager that will delete the temporary files on exit.
|
|
The context expression is a local version of the given source.
|
|
"""
|
|
video_filename = None
|
|
audio_filename = None
|
|
subtitles_filename = None
|
|
|
|
try:
|
|
video_filename = _download_av_media(
|
|
http_session, source.video, lambda i, n: progress("video", i, n)
|
|
)
|
|
|
|
audio_filename = _download_av_media(
|
|
http_session, source.audio, lambda i, n: progress("audio", i, n)
|
|
)
|
|
|
|
subtitles_filename = (
|
|
_download_subtitles_media(
|
|
http_session,
|
|
source.subtitles,
|
|
lambda i, n: progress("subtitles", i, n),
|
|
)
|
|
if source.subtitles
|
|
else None
|
|
)
|
|
|
|
yield model.Source(
|
|
source.metadata,
|
|
source.rendition,
|
|
source.variant,
|
|
video_filename,
|
|
audio_filename,
|
|
subtitles_filename,
|
|
)
|
|
|
|
finally:
|
|
if video_filename and os.path.isfile(video_filename):
|
|
os.unlink(video_filename)
|
|
|
|
if audio_filename and os.path.isfile(audio_filename):
|
|
os.unlink(audio_filename)
|
|
|
|
if subtitles_filename and os.path.isfile(subtitles_filename):
|
|
os.unlink(subtitles_filename)
|