diff --git a/README.md b/README.md index 5b2af39..fa0e2b5 100644 --- a/README.md +++ b/README.md @@ -98,25 +98,25 @@ The response is a JSON object, a sample of which can be found [here](https://git Information about the program is detailed in `$.data.attributes.metadata` and a list of available audio/subtitles combinations in `$.data.attributes.streams`. In our code such a combination is referred to as a _rendition_ (or _version_ in the CLI). -Every such _rendition_ has a reference to a _master playlist_ file in `.streams[i].url` +Every such _rendition_ has a reference to a _program index_ file in `.streams[i].url` -### The _master playlist_ file +### The _program index_ file -As defined in [HTTP Live Streaming](https://www.rfc-editor.org/rfc/rfc8216) (sample file can be found [here](https://git.afpy.org/fcode/delarte/src/branch/stable/samples/hls/master-105612-000-A_VOF-STMF_XQ.m3u8) or [here](https://git.afpy.org/fcode/delarte/src/branch/stable/samples/hls/master-105612-000-A_VA-STA_XQ.m3u8)). This file show the a list of video _variants_ URIs (one per video resolution). Each of them has -- exactly one video _media playlist_ reference -- exactly one audio _media playlist_ reference -- at most one subtitles _media playlist_ reference +As defined in [HTTP Live Streaming](https://www.rfc-editor.org/rfc/rfc8216) (sample file can be found [here](https://git.afpy.org/fcode/delarte/src/branch/stable/samples/hls/program-105612-000-A_VOF-STMF_XQ.m3u8) or [here](https://git.afpy.org/fcode/delarte/src/branch/stable/samples/hls/program-105612-000-A_VA-STA_XQ.m3u8)). This file show the a list of video _variants_ URIs (one per video resolution). Each of them has +- exactly one video _track index_ reference +- exactly one audio _track index_ reference +- at most one subtitles _track index_ reference Audio and subtitles tracks reference also include: - a two-letter `language` code attribute (`mul` is used for audio multiple language) - a free form `name` attribute that is used to detect an audio _original version_ - a coded `characteristics` that is used to detect accessibility tracks (audio or textual description) -### The video and audio _media playlist_ file +### The video and audio _track index_ file As defined in [HTTP Live Streaming](https://www.rfc-editor.org/rfc/rfc8216) (a sample file can be found [here](https://git.afpy.org/fcode/delarte/src/branch/stable/samples/hls/audio-105612-000-A_aud_VA.m3u8) or [here](https://git.afpy.org/fcode/delarte/src/branch/stable/samples/hls/video-105612-000-A_v1080.m3u8)). This file is basically a list of _segments_ (http ranges) the client is supposed to download in sequence. -### The subtitles _media playlist_ file +### The subtitles _track index_ file As defined in [HTTP Live Streaming](https://www.rfc-editor.org/rfc/rfc8216) (a sample file can be found [here](https://git.afpy.org/fcode/delarte/src/branch/stable/samples/hls/subtitles-105612-000-A_st_VA-ALL.m3u8)). This file references the actual file containing the subtitles [VTT](https://developer.mozilla.org/en-US/docs/Web/API/WebVTT_API) data. @@ -124,20 +124,20 @@ As defined in [HTTP Live Streaming](https://www.rfc-editor.org/rfc/rfc8216) (a s 1. Figure out available _sources_ by: - fetching the _config_ API object for the _program identifier_ - - fetching all referenced _master playlist_. + - fetching all referenced _program index_. 2. Select the desired _source_ based on _renditions_ and _variants_ codes. 3. Figure out the _output filename_ from _source_ details. -4. Download video, audio and subtitles media content. +4. Download video, audio and subtitles tracks content. - convert `VTT` subtitles to `SRT` -5. Feed the all the media to `ffmpeg` for multiplexing (or _muxing_) +5. Feed the all the tracks to `ffmpeg` for multiplexing (or _muxing_) ## 📽️ FFMPEG The multiplexing (_muxing_) the video file is handled by [ffmpeg](https://ffmpeg.org/). The script expects [ffmpeg](https://ffmpeg.org/) to be installed in the environnement and will call it as a subprocess. -### Why not use FFMPEG directly with the HLS _master playlist_ URL ? +### Why not use FFMPEG directly with the HLS _program index_ URL ? So we can be more granular about _renditions_ and _variants_ that we want. @@ -145,14 +145,14 @@ So we can be more granular about _renditions_ and _variants_ that we want. Because FFMPEG do not support styles in WebVTT 😒. -### Why not use FFMPEG directly with the _media playlist_ URLs and let it do the download ? +### Why not use FFMPEG directly with the _track index_ URLs and let it do the download ? Because some programs would randomly fail 😒. Probably due to invalid _segmentation_ on the server. ## 📌 Dependencies -- [m3u8](https://pypi.org/project/m3u8/) to parse playlists. +- [m3u8](https://pypi.org/project/m3u8/) to parse indexes. - [requests](https://pypi.org/project/requests/) to handle HTTP traffic. - [docopt-ng](https://pypi.org/project/docopt-ng/) to parse command line. diff --git a/samples/hls/master-105612-000-A_VA-STA_XQ.m3u8 b/samples/hls/program-105612-000-A_VA-STA_XQ.m3u8 similarity index 100% rename from samples/hls/master-105612-000-A_VA-STA_XQ.m3u8 rename to samples/hls/program-105612-000-A_VA-STA_XQ.m3u8 diff --git a/samples/hls/master-105612-000-A_VOF-STMF_XQ.m3u8 b/samples/hls/program-105612-000-A_VOF-STMF_XQ.m3u8 similarity index 100% rename from samples/hls/master-105612-000-A_VOF-STMF_XQ.m3u8 rename to samples/hls/program-105612-000-A_VOF-STMF_XQ.m3u8 diff --git a/src/delarte/__init__.py b/src/delarte/__init__.py index 70a4570..40bc8f7 100644 --- a/src/delarte/__init__.py +++ b/src/delarte/__init__.py @@ -19,10 +19,10 @@ def fetch_sources(http_session, url): return [ source - for metadata, master_playlist_url in fetch_program_info( + for metadata, program_index_url in fetch_program_info( http_session, site, target_id ) - for source in fetch_program_sources(http_session, metadata, master_playlist_url) + for source in fetch_program_sources(http_session, metadata, program_index_url) ] diff --git a/src/delarte/api.py b/src/delarte/api.py index b65dc4f..beeafd6 100644 --- a/src/delarte/api.py +++ b/src/delarte/api.py @@ -45,14 +45,14 @@ def fetch_program_info(http_session, site, target_id): if (_ := s["protocol"]) != "HLS_NG": raise UnsupportedHLSProtocol(site, target_id, _) - if (master_playlist_url := s["url"]) in cache: + if (program_index_url := s["url"]) in cache: raise UnexpectedAPIResponse( - "DUPLICATE_MASTER_PLAYLIST_URL", + "DUPLICATE_PROGRAM_INDEX_URL", site, target_id, - master_playlist_url, + program_index_url, ) - cache.add(master_playlist_url) + cache.add(program_index_url) - yield (metadata, master_playlist_url) + yield (metadata, program_index_url) diff --git a/src/delarte/hls.py b/src/delarte/hls.py index 274dae8..1d915a7 100644 --- a/src/delarte/hls.py +++ b/src/delarte/hls.py @@ -3,59 +3,6 @@ """Provide HLS protocol utilities.""" -# For terminology, from HLS protocol RFC8216 - -# 2. Overview -# -# A multimedia presentation is specified by a Uniform Resource -# Identifier (URI) [RFC3986] to a Playlist. -# -# A Playlist is either a Media Playlist or a Master Playlist. Both are -# UTF-8 text files containing URIs and descriptive tags. -# -# A Media Playlist contains a list of Media Segments, which, when -# played sequentially, will play the multimedia presentation. -# -# Here is an example of a Media Playlist: -# -# #EXTM3U -# #EXT-X-TARGETDURATION:10 -# -# #EXTINF:9.009, -# http://media.example.com/first.ts -# #EXTINF:9.009, -# http://media.example.com/second.ts -# #EXTINF:3.003, -# http://media.example.com/third.ts -# -# The first line is the format identifier tag #EXTM3U. The line -# containing #EXT-X-TARGETDURATION says that all Media Segments will be -# 10 seconds long or less. Then, three Media Segments are declared. -# The first and second are 9.009 seconds long; the third is 3.003 -# seconds. -# -# To play this Playlist, the client first downloads it and then -# downloads and plays each Media Segment declared within it. The -# client reloads the Playlist as described in this document to discover -# any added segments. Data SHOULD be carried over HTTP [RFC7230], but, -# in general, a URI can specify any protocol that can reliably transfer -# the specified resource on demand. -# -# A more complex presentation can be described by a Master Playlist. A -# Master Playlist provides a set of Variant Streams, each of which -# describes a different version of the same content. -# -# A Variant Stream includes a Media Playlist that specifies media -# encoded at a particular bit rate, in a particular format, and at a -# particular resolution for media containing video. -# -# A Variant Stream can also specify a set of Renditions. Renditions -# are alternate versions of the content, such as audio produced in -# different languages or video recorded from different camera angles. -# -# Clients should switch between different Variant Streams to adapt to -# network conditions. Clients should choose Renditions based on user -# preferences. import contextlib import os @@ -74,46 +21,46 @@ from .model import Rendition, RenditionAudio, RenditionSubtitles, Source, Varian # subset useful for the actual observed usage of ArteTV. # # - URIs are relative file paths -# - Master playlists have at least one variant +# - Program indexes have at least one variant # - Every variant is of different resolution # - Every variant has exactly one audio medium # - Every variant has at most one subtitles medium -# - Audio and video media playlists segments are incremental ranges of +# - Audio and video indexes segments are incremental ranges of # the same file -# - Subtitles media playlists have only one segment +# - Subtitles indexes have only one segment -def _fetch_playlist(http_session, url): +def _fetch_index(http_session, url): # Fetch a M3U8 playlist r = http_session.get(url) r.raise_for_status() return m3u8.loads(r.text, url) -def fetch_program_sources(http_session, metadata, master_playlist_url): - """Fetch the given master playlist and yield available sources.""" - master_playlist = _fetch_playlist(http_session, master_playlist_url) +def fetch_program_sources(http_session, metadata, program_index_url): + """Fetch the given index and yield available sources.""" + program_index = _fetch_index(http_session, program_index_url) audio_media = None subtitles_media = None - for media in master_playlist.media: + for media in program_index.media: match media.type: case "AUDIO": if audio_media: raise UnexpectedHLSResponse( - "MULTIPLE_AUDIO_MEDIA", master_playlist_url + "MULTIPLE_AUDIO_MEDIA", program_index_url ) audio_media = media case "SUBTITLES": if subtitles_media: raise UnexpectedHLSResponse( - "MULTIPLE_SUBTITLES_MEDIA", master_playlist_url + "MULTIPLE_SUBTITLES_MEDIA", program_index_url ) subtitles_media = media if not audio_media: - raise UnexpectedHLSResponse("NO_AUDIO_MEDIA", master_playlist_url) + raise UnexpectedHLSResponse("NO_AUDIO_MEDIA", program_index_url) rendition = Rendition( RenditionAudio( @@ -137,24 +84,24 @@ def fetch_program_sources(http_session, metadata, master_playlist_url): cache = set() - for video_media in master_playlist.playlists: + for video_media in program_index.playlists: stream_info = video_media.stream_info if stream_info.audio != audio_media.group_id: raise UnexpectedHLSResponse( - "INVALID_VARIANT_AUDIO_MEDIA", master_playlist_url, stream_info.audio + "INVALID_VARIANT_AUDIO_MEDIA", program_index_url, stream_info.audio ) if subtitles_media: if stream_info.subtitles != subtitles_media.group_id: raise UnexpectedHLSResponse( "INVALID_VARIANT_SUBTITLES_MEDIA", - master_playlist_url, + program_index_url, stream_info.subtitles, ) elif stream_info.subtitles: raise UnexpectedHLSResponse( "INVALID_VARIANT_SUBTITLES_MEDIA", - master_playlist_url, + program_index_url, stream_info.subtitles, ) @@ -165,9 +112,7 @@ def fetch_program_sources(http_session, metadata, master_playlist_url): ) if variant in cache: - raise UnexpectedHLSResponse( - "DUPLICATE_VARIANT", master_playlist_url, variant - ) + raise UnexpectedHLSResponse("DUPLICATE_VARIANT", program_index_url, variant) cache.add(variant) yield Source( @@ -188,53 +133,55 @@ def _convert_byterange(obj): return offset, offset + count - 1 -def _fetch_av_media_playlist(http_session, url): - # Fetch an audio or video media playlist. +def _fetch_av_track_index(http_session, track_index_url): + # Fetch an audio or video index. # Return a tuple: # - the media file url # - the media file's ranges - media_playlist = _fetch_playlist(http_session, url) + track_index = _fetch_index(http_session, track_index_url) - file_name = media_playlist.segment_map[0].uri - start, end = _convert_byterange(media_playlist.segment_map[0]) + file_name = track_index.segment_map[0].uri + start, end = _convert_byterange(track_index.segment_map[0]) if start != 0: - raise UnexpectedHLSResponse("INVALID_AV_MEDIA_FRAGMENT_START", url) + raise UnexpectedHLSResponse("INVALID_AV_INDEX_FRAGMENT_START", track_index_url) ranges = [(start, end)] next_start = end + 1 - for segment in media_playlist.segments: + for segment in track_index.segments: if segment.uri != file_name: - raise UnexpectedHLSResponse("MULTIPLE_AV_MEDIA_FILES", url) + raise UnexpectedHLSResponse("MULTIPLE_AV_INDEX_FILES", track_index_url) start, end = _convert_byterange(segment) if start != next_start: - raise UnexpectedHLSResponse("DISCONTINUOUS_AV_MEDIA_FRAGMENT", url) + raise UnexpectedHLSResponse( + "DISCONTINUOUS_AV_INDEX_FRAGMENT", track_index_url + ) ranges.append((start, end)) next_start = end + 1 - return media_playlist.segment_map[0].absolute_uri, ranges + return track_index.segment_map[0].absolute_uri, ranges -def _fetch_subtitles_media_playlist(http_session, url): - # Fetch subtitles media playlist. +def _fetch_subtitles_track_index(http_session, track_index_url): + # Fetch subtitles index. # Return the subtitle file url. - subtitles_index = _fetch_playlist(http_session, url) - urls = [s.absolute_uri for s in subtitles_index.segments] + track_index = _fetch_index(http_session, track_index_url) + urls = [s.absolute_uri for s in track_index.segments] if not urls: - raise UnexpectedHLSResponse("SUBTITLES_MEDIA_NO_FILES", url) + raise UnexpectedHLSResponse("SUBTITLES_INDEX_NO_FILES", track_index_url) if len(urls) > 1: - raise UnexpectedHLSResponse("SUBTITLES_MEDIA_MULTIPLE_FILES", url) + raise UnexpectedHLSResponse("SUBTITLES_INDEX_MULTIPLE_FILES", track_index_url) return urls[0] -def _download_av_media(http_session, media_playlist_url, progress): +def _download_av_track(http_session, track_index_url, progress): # Download an audio or video stream to temporary file. # Return the temporary file name. - url, ranges = _fetch_av_media_playlist(http_session, media_playlist_url) + url, ranges = _fetch_av_track_index(http_session, track_index_url) total = ranges[-1][1] with ( @@ -255,15 +202,15 @@ def _download_av_media(http_session, media_playlist_url, progress): if r.status_code != 206: raise UnexpectedHLSResponse( - "UNEXPECTED_AV_MEDIA_HTTP_STATUS", - media_playlist_url, + "UNEXPECTED_AV_TRACK_HTTP_STATUS", + track_index_url, r.request.headers, r.status, ) if len(r.content) != range_end - range_start + 1: raise UnexpectedHLSResponse( - "INVALID_AV_MEDIA_FRAGMENT_LENGTH", media_playlist_url + "INVALID_AV_TRACK_FRAGMENT_LENGTH", track_index_url ) f.write(r.content) @@ -272,10 +219,10 @@ def _download_av_media(http_session, media_playlist_url, progress): return f.name -def _download_subtitles_media(http_session, media_playlist_url, progress): +def _download_subtitles_track(http_session, track_index_url, progress): # Download a subtitle file (converted from VTT to SRT format) into a temporary file. # Return the temporary file name. - url = _fetch_subtitles_media_playlist(http_session, media_playlist_url) + url = _fetch_subtitles_track_index(http_session, track_index_url) progress(0, 2) r = http_session.get(url) @@ -304,7 +251,7 @@ def download_source(http_session, source, progress): try: subtitles_filename = ( - _download_subtitles_media( + _download_subtitles_track( http_session, source.subtitles, lambda i, n: progress("subtitles", i, n), @@ -313,11 +260,11 @@ def download_source(http_session, source, progress): else None ) - video_filename = _download_av_media( + video_filename = _download_av_track( http_session, source.video, lambda i, n: progress("video", i, n) ) - audio_filename = _download_av_media( + audio_filename = _download_av_track( http_session, source.audio, lambda i, n: progress("audio", i, n) ) diff --git a/src/delarte/muxing.py b/src/delarte/muxing.py index d1c4042..312de5e 100644 --- a/src/delarte/muxing.py +++ b/src/delarte/muxing.py @@ -1,7 +1,7 @@ # License: GNU AGPL v3: http://www.gnu.org/licenses/ # This file is part of `delarte` (https://git.afpy.org/fcode/delarte.git) -"""Provide media muxing utilities.""" +"""Provide tracks muxing utilities.""" import subprocess