Merge pull request 'refactoring' (#21) from barbadev2 into stable

Reviewed-on: #21
2022-12-29 07:58:48 +00:00 · 2022-12-29 07:58:48 +00:00 · 3ec2961a85
parent 458d4cbb6d e4cba27bdd
commit 3ec2961a85
11 changed files with 607 additions and 471 deletions
--- a/README.md
+++ b/README.md
@ -7,9 +7,9 @@
 💡 What is it ?
 ---------------

-This is a toy/research project whose only goal is to familiarize with some of the technologies involved in multi-lingual video streaming. Using this program may violate usage policy of ArteTV website and we do not recommend using it for other purpose then studying the code.
+This is a toy/research project whose primary goal is to familiarize with some of the technologies involved in multi-lingual video streaming. Using this program may violate usage policy of ArteTV website and we do not recommend using it for other purpose then studying the code.

-ArteTV is a is a European public service channel dedicated to culture. Available programmes are usually available with multiple audio and subtitles languages.
+ArteTV is a is a European public service channel dedicated to culture. Programmes are usually available with multiple audio and subtitles languages.

 🚀 Quick start
 ---------------
@ -59,7 +59,7 @@ usage: delarte [-h|--help]                            - print this message
 🔧 How it works
 ----------------

-### 🏗️ The streaming infrastructure
+## 🏗️ The streaming infrastructure

 Every video program have a _program identifier_ visible in their web page URL:

@ -71,7 +71,7 @@ https://www.arte.tv/en/videos/104001-000-A/clint-eastwood/

 That _program identifier_ enables us to query an API for the program's information.

-##### The _config_ API
+### The _config_ API

 For the last example the API call is as such:

@ -79,216 +79,68 @@ For the last example the API call is as such:
 https://api.arte.tv/api/player/v2/config/en/104001-000-A
 ```

-The response is a JSON object:
+The response is a JSON object, a sample of which can be found [here](https://git.afpy.org/fcode/delarte/src/branch/stable/samples/api/config-105612-000-A.json):

-```json
-{
-  "data": {
-    "id": "104001-000-A_en",
-    "type": "ConfigPlayer",
-    "attributes": {
-      "metadata": {
-        "providerId": "104001-000-A",
-        "language": "en",
-        "title": "Clint Eastwood",
-        "subtitle": "The Last Legend",
-        "description": "70 years of career in front of and behind the camera and still active at 90, Clint Eastwood is a Hollywood legend. A look back at his unique career through a portrait that explores the complexity of the Eastwood myth.",
-        "duration": { "seconds": 4652 },
-        ...
-      },
-      "streams": [
-        {
-          "url": "https://.../104001-000-A_VOF-STE%5BANG%5D_XQ.m3u8",
-          "versions": [
-            {
-              "label": "English (Subtitles)",
-              "shortLabel": "OGsub-ANG",
-              "eStat": {
-                "ml5": "VOF-STE[ANG]"
-              }
-            }
-          ],
-          ...
-        },
-        {
-          "url": "https://.../104001-000-A_VOF-STF_XQ.m3u8",
-          "versions": [
-            {
-              "label": "French (Original)",
-              "shortLabel": "FR",
-              "eStat": {
-                "ml5": "VOF-STF"
-              }
-            }
-          ],
-          ...
-        },
-        {
-          "url": "https://.../104001-000-A_VOF-STMF_XQ.m3u8",
-          "versions": [
-            {
-              "label": "Original french version - closed captioning (FR)",
-              "shortLabel": "ccFR",
-              "eStat": {
-                "ml5": "VOF-STMF"
-              }
-            }
-          ],
-          ...
-        },
-        {
-          "url": "https://.../104001-000-A_VA-STA_XQ.m3u8",
-          "versions": [
-            {
-              "label": "German (Dubbed)",
-              "shortLabel": "DE",
-              "eStat": {
-                "ml5": "VA-STA"
-              }
-            }
-          ],
-          ...
-        },
-        {
-          "url": "https://.../104001-000-A_VA-STMA_XQ.m3u8",
-          "versions": [
-            {
-              "label": "German closed captioning ",
-              "shortLabel": "ccDE",
-              "eStat": {
-                "ml5": "VA-STMA"
-              }
-            }
-          ],
-          ...
-        }
-      ],
-      ...
-    }
-  }
-}
-```
-Information about the program is detailed in `data.attributes.metadata` and a list of available audio/subtitles combinations in `data.attributes.streams`. In our code such a combination is referred to as a _rendition_ (or _version_ in the CLI).
+Information about the program is detailed in `$.data.attributes.metadata` and a list of available audio/subtitles combinations in `$.data.attributes.streams`. In our code such a combination is referred to as a _rendition_ (or _version_ in the CLI).

-Every such _rendition_ has a reference to a _master playlist_ file in `.streams[i].url` and description of the audio/subtitle combination in `.streams[i].versions[0]`.
+Every such _rendition_ has a reference to a _master playlist_ file in `.streams[i].url`

-We are using `.streams[i].versions[0].eStat.ml5` as our _rendition_ key:
+### The _master playlist_ file

- `VOF-STE[ANG]` English (Subtitles)
- `VOF-STF` French (Original)
- `VOF-STMF` Original french version - closed captioning (FR)
- `VA-STA` German (Dubbed)
- `VA-STMA` German closed captioning
- ...
-
-#### The _master playlist_
-
-As defined in [HTTP Live Streaming](https://www.rfc-editor.org/rfc/rfc8216), for example:
-
-```
-#EXTM3U
-...
-#EXT-X-STREAM-INF:BANDWIDTH=2335200,AVERAGE-BANDWIDTH=1123304,VIDEO-RANGE=SDR,CODECS="avc1.4d401e,mp4a.40.2",RESOLUTION=768x432,FRAME-RATE=25.000,AUDIO="program_audio_0",SUBTITLES="subs"
-medias/104001-000-A_v432.m3u8
-#EXT-X-STREAM-INF:BANDWIDTH=4534432,AVERAGE-BANDWIDTH=2124680,VIDEO-RANGE=SDR,CODECS="avc1.4d0028,mp4a.40.2",RESOLUTION=1920x1080,FRAME-RATE=25.000,AUDIO="program_audio_0",SUBTITLES="subs"
-medias/104001-000-A_v1080.m3u8
-#EXT-X-STREAM-INF:BANDWIDTH=4153392,AVERAGE-BANDWIDTH=1917840,VIDEO-RANGE=SDR,CODECS="avc1.4d401f,mp4a.40.2",RESOLUTION=1280x720,FRAME-RATE=25.000,AUDIO="program_audio_0",SUBTITLES="subs"
-medias/104001-000-A_v720.m3u8
-#EXT-X-STREAM-INF:BANDWIDTH=1445432,AVERAGE-BANDWIDTH=726160,VIDEO-RANGE=SDR,CODECS="avc1.4d401e,mp4a.40.2",RESOLUTION=640x360,FRAME-RATE=25.000,AUDIO="program_audio_0",SUBTITLES="subs"
-medias/104001-000-A_v360.m3u8
-#EXT-X-STREAM-INF:BANDWIDTH=815120,AVERAGE-BANDWIDTH=429104,VIDEO-RANGE=SDR,CODECS="avc1.42e00d,mp4a.40.2",RESOLUTION=384x216,FRAME-RATE=25.000,AUDIO="program_audio_0",SUBTITLES="subs"
-medias/104001-000-A_v216.m3u8
-...
-#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="program_audio_0",LANGUAGE="fr",NAME="VOF",AUTOSELECT=YES,DEFAULT=YES,URI="medias/104001-000-A_aud_VOF.m3u8"
-#EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="English",DEFAULT=YES,AUTOSELECT=YES,FORCED=NO,LANGUAGE="en",URI="medias/104001-000-A_st_VO-ANG.m3u8"
-...
-```
-
-This file show the a list of video _variants_ URIs (one per video resolution). Each of them has
+As defined in [HTTP Live Streaming](https://www.rfc-editor.org/rfc/rfc8216) (sample file can be found [here](https://git.afpy.org/fcode/delarte/src/branch/stable/samples/hls/master-105612-000-A_VOF-STMF_XQ.m3u8) or [here](https://git.afpy.org/fcode/delarte/src/branch/stable/samples/hls/master-105612-000-A_VA-STA_XQ.m3u8)). This file show the a list of video _variants_ URIs (one per video resolution). Each of them has
 - exactly one video _media playlist_ reference
 - exactly one audio _media playlist_ reference
 - at most one subtitles _media playlist_ reference

-##### The video and audio _media playlist_
+Audio and subtitles tracks reference also include:
+- a two-letter `language` code attribute (`mul` is used for audio multiple language)
+- a free form `name` attribute that is used to detect an audio _original version_
+- a coded `characteristics` that is used to detect accessibility tracks (audio or textual description)

-As defined in [HTTP Live Streaming](https://www.rfc-editor.org/rfc/rfc8216), for example:
+### The video and audio _media playlist_ file

-```
-#EXTM3U
-#EXT-X-TARGETDURATION:6
-#EXT-X-VERSION:7
-#EXT-X-MEDIA-SEQUENCE:1
-#EXT-X-INDEPENDENT-SEGMENTS
-#EXT-X-PLAYLIST-TYPE:VOD
-#EXT-X-MAP:URI="104001-000-A_v1080.mp4",BYTERANGE="28792@0"
-#EXTINF:6.000,
-#EXT-X-BYTERANGE:1734621@28792
-104001-000-A_v1080.mp4
-#EXTINF:6.000,
-#EXT-X-BYTERANGE:1575303@1763413
-104001-000-A_v1080.mp4
-#EXTINF:6.000,
-#EXT-X-BYTERANGE:1603739@3338716
-104001-000-A_v1080.mp4
-#EXTINF:6.000,
-#EXT-X-BYTERANGE:1333835@4942455
-104001-000-A_v1080.mp4
-...
-```
+As defined in [HTTP Live Streaming](https://www.rfc-editor.org/rfc/rfc8216) (a sample file can be found [here](https://git.afpy.org/fcode/delarte/src/branch/stable/samples/hls/audio-105612-000-A_aud_VA.m3u8) or [here](https://git.afpy.org/fcode/delarte/src/branch/stable/samples/hls/video-105612-000-A_v1080.m3u8)). This file is basically a list of _segments_ (http ranges) the client is supposed to download in sequence.

-This file shows the list of _segments_ the server expect to serve.
+### The subtitles _media playlist_ file

+As defined in [HTTP Live Streaming](https://www.rfc-editor.org/rfc/rfc8216) (a sample file can be found [here](https://git.afpy.org/fcode/delarte/src/branch/stable/samples/hls/subtitles-105612-000-A_st_VA-ALL.m3u8)). This file references the actual file containing the subtitles [VTT](https://developer.mozilla.org/en-US/docs/Web/API/WebVTT_API) data.

-##### The subtitles _media playlist_
+## ⚙️The process

-As defined in [HTTP Live Streaming](https://www.rfc-editor.org/rfc/rfc8216), for example:
+1. Figure out available _sources_ by:
+   - fetching the _config_ API object for the _program identifier_
+   - fetching all referenced _master playlist_.
+2. Select the desired _source_ based on _renditions_ and _variants_ codes.
+3. Figure out the _output filename_ from _source_ details.

-```
-#EXTM3U
-#EXT-X-VERSION:7
-#EXT-X-TARGETDURATION:4650
-#EXT-X-MEDIA-SEQUENCE:1
-#EXT-X-PLAYLIST-TYPE:VOD
-#EXTINF:4650,
-104001-000-A_st_VO-ANG.vtt
-#EXT-X-ENDLIST
-```
+4. Download video, audio and subtitles media content.
+   - convert `VTT` subtitles to `SRT`

-This file shows the file containing the subtitles data.
+5. Feed the all the media to `ffmpeg` for multiplexing (or _muxing_)

-### ⚙️The process
-
-1. Get the _config_ API object for the _program identifier_.
-    - Select a _rendition_.
-2. Get the _master playlist_.
-    - Select a _variant_.
-3. Download audio, video and subtitles media content.
-    - convert `VTT` subtitles to `SRT`
-4. Figure out the _output filename_ from _metadata_.
-5. Feed the all the media to `ffmpeg` for _muxing_
-
-### 📽️ FFMPEG
+## 📽️ FFMPEG

 The multiplexing (_muxing_) the video file is handled by [ffmpeg](https://ffmpeg.org/). The script expects [ffmpeg](https://ffmpeg.org/) to be installed in the environnement and will call it as a subprocess.

-#### Why not use FFMPEG direcly with the HLS _master playlist_ URL ?
+### Why not use FFMPEG directly with the HLS _master playlist_ URL ?

 So we can be more granular about _renditions_ and _variants_ that we want.

-#### Why not use `VTT` subtitles direcly ?
+### Why not use `VTT` subtitles directly ?

 Because it fails 😒.

-#### Why not use FFMPEG direcly with the _media playalist_ URLs and let it do the download ?
+### Why not use FFMPEG directly with the _media playlist_ URLs and let it do the download ?

 Because some programs would randomly fail 😒. Probably due to invalid _segmentation_ on the server.


-### 📌 Dependences
+## 📌 Dependencies

 - [m3u8](https://pypi.org/project/m3u8/) to parse playlists.
 - [webvtt-py](https://pypi.org/project/webvtt-py/) to load `vtt` subtitles files.
+- [requests](https://pypi.org/project/requests/) to handle HTTP traffic.

-### 🤝 Help
+## 🤝 Help

 For sure ! The more the merrier.
--- a/pyproject.toml
+++ b/pyproject.toml
@ -12,6 +12,7 @@ dynamic = ["version", "description"]
 dependencies = [
    "m3u8",
    "webvtt-py",
+    "requests"
 ]

 [project.urls]
--- a/src/delarte/init.py
+++ b/src/delarte/init.py
@ -4,3 +4,33 @@
 """delarte - ArteTV downloader."""

 __version__ = "0.1"
+
+from . import api, hls, muxing
+from .model import (
+    Metadata,
+    Rendition,
+    RenditionAudio,
+    RenditionSubtitles,
+    Source,
+    Variant,
+)
+from .www import parse_url as parse_web_url
+
+
+def fetch_sources(http_session, target_id, www_lang):
+    """Fetch a target's sources."""
+    return [
+        source
+        for metadata, master_playlist_url in api.fetch_program_info(
+            http_session, target_id, www_lang
+        )
+        for source in hls.fetch_program_sources(
+            http_session, metadata, master_playlist_url
+        )
+    ]
+
+
+def download_source(http_session, source, file_name, progress):
+    """Download the given source into given file."""
+    with hls.download_source(http_session, source, progress) as local_source:
+        muxing.mux_source(local_source, file_name, progress)
--- a/src/delarte/main.py
+++ b/src/delarte/main.py
@ -12,29 +12,98 @@ usage: delarte [-h|--help]                         - print this message
 import sys
 import time

-from . import api
-from . import hls
-from . import muxing
-from . import naming
-from . import www
-from . import cli
+import requests
+
+from . import cli, download_source, error, fetch_sources, naming, www


-def _fail(message, code=1):
-    print(message, file=sys.stderr)
-    return code
+class Abort(error.ModuleError):
+    """Aborted."""


-def _print_available_renditions(config, f):
-    print(f"Available versions:", file=f)
-    for code, label in api.iter_renditions(config):
-        print(f"\t{code} - {label}", file=f)
+class Fail(error.UnexpectedError):
+    """Unexpected error."""


-def _print_available_variants(version_index, f):
-    print(f"Available resolutions:", file=f)
-    for code, label in hls.iter_variants(version_index):
-        print(f"\t{code} - {label}", file=f)
+def _lookup_language_name(code):
+    # TODO: actually implement this
+    return f"[{code}]"
+
+
+def _build_rendition_label(rendition):
+    # Build a human readable description for a given rendition
+    label = ""
+    if rendition.audio.is_original:
+        label += "original "
+    elif not rendition.audio.is_accessible:
+        label += "dubbed "
+
+    label += _lookup_language_name(rendition.audio.lang)
+
+    if rendition.audio.is_accessible:
+        label += " audio description"
+
+    if rendition.subtitles:
+
+        if rendition.subtitles.is_accessible:
+            if rendition.subtitles.lang != rendition.audio.lang:
+                label += f" with {_lookup_language_name(rendition.subtitles.lang)} text description"
+            else:
+                label += " with text description"
+        elif rendition.subtitles.lang != rendition.audio.lang:
+            label += (
+                f" with {_lookup_language_name(rendition.subtitles.lang)} subtitles"
+            )
+
+    return label
+
+
+def _print_renditions(sources):
+    items = [
+        (r.code, _build_rendition_label(r)) for r in set(s.rendition for s in sources)
+    ]
+
+    items.sort(key=lambda t: t[1])
+
+    for code, label in items:
+        print(f"\t{code:>6} - {label}")
+
+
+def _print_variants(sources):
+    items = [(v.code, v.height) for v in set(s.variant for s in sources)]
+
+    items.sort(key=lambda t: t[1], reverse=True)
+
+    for code, _ in items:
+        print(f"\t{code}")
+
+
+def _select_rendition(sources, rendition_code):
+    filtered = [s for s in sources if s.rendition.code == rendition_code]
+
+    if not filtered:
+        print(f"{rendition_code!r} is not a valid version, possible values are:")
+        _print_renditions(sources)
+        raise Abort()
+
+    if len(_ := set(s.rendition for s in filtered)) > 1:
+        raise Fail("DUPLICATE_RENDITION_CODE", _)
+
+    return filtered
+
+
+def _select_variant(sources, variant_code):
+    filtered = [s for s in sources if s.variant.code == variant_code]
+
+    if not filtered:
+        print(f"{variant_code!r} is not a valid resolution, possible values are:")
+        _print_variants(sources)
+        raise Abort()
+
+    if len(_ := set(s.variant for s in filtered)) > 1:
+        raise Fail("DUPLICATE_VARIANT_CODE", _)
+
+    return filtered


 def create_progress():
@ -74,43 +143,61 @@ def main():
        return 0

    try:
-        www_lang, program_id = www.parse_url(args.pop(0))
-    except ValueError as e:
-        return _fail(f"Invalid url: {e}")
+        target_id, www_lang = www.parse_url(args.pop(0))

-    try:
-        config = api.load_config(www_lang, program_id)
-    except ValueError:
-        return _fail("Invalid program")
+        http_session = requests.sessions.Session()

-    if not args:
-        _print_available_renditions(config, sys.stdout)
-        return 0
+        sources = fetch_sources(http_session, target_id, www_lang)

-    master_playlist_url = api.select_rendition(config, args.pop(0))
-    if master_playlist_url is None:
-        _fail("Invalid version")
-        _print_available_renditions(config, sys.stderr)
+        if not args:
+            print(f"Available versions:")
+            _print_renditions(sources)
+            return 0
+
+        sources = _select_rendition(sources, args.pop(0))
+
+        if not args:
+            print(f"Available resolutions:")
+            _print_variants(sources)
+            return 0
+
+        sources = _select_variant(sources, args.pop(0))
+
+        file_names = [
+            naming.build_file_name(s, i, len(sources)) for i, s in enumerate(sources, 1)
+        ]
+
+        progress = create_progress()
+
+        for source, file_name in zip(sources, file_names):
+            download_source(http_session, source, file_name, progress)
+
+    except error.UnexpectedError as e:
+        print(str(e))
+        print()
+        print(
+            "This program is the result of browser/server traffic analysis and involves\n"
+            "some level of trying and guessing. This error might mean that we did not try\n"
+            "enough or that we guessed poorly."
+        )
+        print("")
+        print("Please consider submitting the issue to us so we may fix it.")
+        print("")
+        print("Issue tracker: https://git.afpy.org/fcode/delarte/issues")
+        print(f"Title: {e.args[0]}")
+        print("Body:")
+        print(f"    {repr(e)}")
        return 1

-    master_playlist = hls.load_master_playlist(master_playlist_url)
+    except error.ModuleError as e:
+        print(str(e))
+        # print(repr(e))
+        return 1

-    if not args:
-        _print_available_variants(master_playlist, sys.stdout)
-        return 0
-
-    remote_inputs = hls.select_variant(master_playlist, args.pop(0))
-    if remote_inputs is None:
-        _fail("Invalid resolution")
-        _print_available_variants(master_playlist, sys.stderr)
-        return 0
-
-    file_base_name = naming.build_file_base_name(config)
-
-    progress = create_progress()
-
-    with hls.download_inputs(remote_inputs, progress) as temp_inputs:
-        muxing.mux(temp_inputs, file_base_name, progress)
+    except requests.HTTPError as e:
+        print("Network error.")
+        # print(str(e))
+        return 1


 if __name__ == "__main__":
--- a/src/delarte/api.py
+++ b/src/delarte/api.py
@ -3,56 +3,72 @@

 """Provide ArteTV JSON API utilities."""

-import json
-from http import HTTPStatus
-from urllib.request import urlopen
+from . import error, model
+
+MIME_TYPE = "application/vnd.api+json; charset=utf-8"


-def load_api_data(url):
-    """Retrieve the root node (infamous "data") of an API call response."""
-    http_response = urlopen(url)
-
-    if http_response.status != HTTPStatus.OK:
-        raise RuntimeError("API request failed")
-
-    if (
-        http_response.getheader("Content-Type")
-        != "application/vnd.api+json; charset=utf-8"
-    ):
-        raise ValueError("API response not supported")
-
-    return json.load(http_response)["data"]
+class UnexpectedResponse(error.UnexpectedError):
+    """Unexpected response from ArteTV."""


-def load_config(lang, program_id):
-    """Retrieve a program config from API."""
-    url = f"https://api.arte.tv/api/player/v2/config/{lang}/{program_id}"
-    config = load_api_data(url)
-
-    if config["type"] != "ConfigPlayer":
-        raise ValueError("Invalid API response")
-
-    if config["attributes"]["metadata"]["providerId"] != program_id:
-        raise ValueError("Invalid API response")
-
-    return config
+class NotFound(error.ModuleError):
+    """Program not found on ArteTV."""


-def iter_renditions(config):
-    """Return a rendition (code, label) iterator."""
-    for stream in config["attributes"]["streams"]:
-        yield (
-            # rendition code
-            stream["versions"][0]["eStat"]["ml5"],
-            # rendition full name
-            stream["versions"][0]["label"],
-        )
+class UnsupportedProtocol(error.ModuleError):
+    """Program type not supported."""


-def select_rendition(config, rendition_code):
-    """Return the master playlist index url for the given rendition code."""
-    for stream in config["attributes"]["streams"]:
-        if stream["versions"][0]["eStat"]["ml5"] == rendition_code:
-            return stream["url"]
+def _fetch_api_data(http_session, path, object_type):
+    # Fetch an API object.
+    url = "https://api.arte.tv/api/player/v2/" + path

-    return None
+    r = http_session.get(url)
+    if r.status_code == 404:
+        raise NotFound(url)
+
+    r.raise_for_status()
+
+    if (_ := r.headers["content-type"]) != MIME_TYPE:
+        raise UnexpectedResponse("MIME_TYPE", path, MIME_TYPE, _)
+
+    obj = r.json()["data"]
+
+    if (_ := obj["type"]) != object_type:
+        raise UnexpectedResponse("OBJECT_TYPE", path, object_type, _)
+
+    return obj["attributes"]
+
+
+def fetch_program_info(http_session, target_id, www_lang):
+    """Fetch the given target's associated program information."""
+    obj = _fetch_api_data(
+        http_session, f"config/{www_lang}/{target_id}", "ConfigPlayer"
+    )
+
+    metadata = model.Metadata(
+        obj["metadata"]["providerId"],
+        obj["metadata"]["title"],
+        obj["metadata"]["subtitle"],
+        obj["metadata"]["description"],
+        obj["metadata"]["duration"]["seconds"],
+    )
+
+    cache = set()
+
+    for s in obj["streams"]:
+        if (_ := s["protocol"]) != "HLS_NG":
+            raise UnsupportedProtocol(target_id, www_lang, _)
+
+        if (master_playlist_url := s["url"]) in cache:
+            raise UnexpectedResponse(
+                "DUPLICATE_MASTER_PLAYLIST_URL",
+                target_id,
+                www_lang,
+                master_playlist_url,
+            )
+
+        cache.add(master_playlist_url)
+
+        yield (metadata, master_playlist_url)
--- a/src/delarte/error.py
+++ b/src/delarte/error.py
@ -0,0 +1,20 @@
+# License: GNU AGPL v3: http://www.gnu.org/licenses/
+# This file is part of `delarte` (https://git.afpy.org/fcode/delarte.git)
+
+"""Provide common utilities."""
+
+
+class ModuleError(Exception):
+    """Module error."""
+
+    def __str__(self):
+        """Use the class definition docstring as a string representation."""
+        return self.__doc__
+
+    def __repr__(self):
+        """Use the class qualified name and constructor arguments."""
+        return f"{self.__class__.__qualname__}{self.args!r}"
+
+
+class UnexpectedError(ModuleError):
+    """An error to report to developers."""
--- a/src/delarte/hls.py
+++ b/src/delarte/hls.py
@ -61,15 +61,13 @@ import contextlib
 import io
 import os
 import re
-from http import HTTPStatus
-from http.client import HTTPConnection, HTTPSConnection
 from tempfile import NamedTemporaryFile
-from urllib.parse import urlparse
-from urllib.request import urlopen

 import m3u8
 import webvtt

+from . import error, model
+
 #
 # WARNING !
 #
@ -81,200 +79,208 @@ import webvtt
 #   - Every variant is of different resolution
 #   - Every variant has exactly one audio medium
 #   - Every variant has at most one subtitles medium
-# - Audio and video media playlists segments are incremental ranges of the same file
+# - Audio and video media playlists segments are incremental ranges of
+#   the same file
 # - Subtitles media playlists have only one segment


-def _make_resolution_code(variant):
-    # resolution code (1080p, 720p, ...)
-    return f"{variant.stream_info.resolution[1]}p"
+class UnexpectedResponse(error.UnexpectedError):
+    """Unexpected response from ArteTV."""


-def _is_relative_file_path(uri):
-    try:
-        url = urlparse(uri)
-        return url.path == uri and not uri.startswith("/")
-    except ValueError:
-        return False
+def _fetch_playlist(http_session, url):
+    # Fetch a M3U8 playlist
+    r = http_session.get(url)
+    r.raise_for_status()
+    return m3u8.loads(r.text, url)


-def load_master_playlist(url):
-    """Download and return a master playlist."""
-    master_playlist = m3u8.load(url)
+def fetch_program_sources(http_session, metadata, master_playlist_url):
+    """Fetch the given master playlist and yield available sources."""
+    master_playlist = _fetch_playlist(http_session, master_playlist_url)

-    if not master_playlist.playlists:
-        raise ValueError("Unexpected missing playlists")
+    audio_media = None
+    subtitles_media = None

-    resolution_codes = set()
-
-    for variant in master_playlist.playlists:
-        resolution_code = _make_resolution_code(variant)
-
-        if resolution_code in resolution_codes:
-            raise ValueError("Unexpected duplicate resolution")
-        resolution_codes.add(resolution_code)
-
-        audio_media = False
-        subtitles_media = False
-
-        for m in variant.media:
-            if not _is_relative_file_path(m.uri):
-                raise ValueError("Invalid relative file name")
-
-            if m.type == "AUDIO":
+    for media in master_playlist.media:
+        match media.type:
+            case "AUDIO":
                if audio_media:
-                    raise ValueError("Unexpected multiple audio tracks")
-                audio_media = True
-
-            elif m.type == "SUBTITLES":
+                    raise UnexpectedResponse(
+                        "MULTIPLE_AUDIO_MEDIA", master_playlist_url
+                    )
+                audio_media = media
+            case "SUBTITLES":
                if subtitles_media:
-                    raise ValueError("Unexpected multiple subtitles tracks")
-                subtitles_media = True
+                    raise UnexpectedResponse(
+                        "MULTIPLE_SUBTITLES_MEDIA", master_playlist_url
+                    )
+                subtitles_media = media

-        if not audio_media:
-            raise ValueError("Unexpected missing audio track")
+    if not audio_media:
+        raise UnexpectedResponse("NO_AUDIO_MEDIA", master_playlist_url)

-    return master_playlist
+    rendition = model.Rendition(
+        model.RenditionAudio(
+            audio_media.language,
+            audio_media.name.startswith("VO"),
+            audio_media.characteristics
+            and ("public.accessibility" in audio_media.characteristics),
+        ),
+        model.RenditionSubtitles(
+            subtitles_media.language,
+            subtitles_media.characteristics
+            and ("public.accessibility" in subtitles_media.characteristics),
+        )
+        if subtitles_media
+        else None,
+    )

+    cache = set()

-def iter_variants(master_playlist):
-    """Iterate over variants."""
-    for variant in sorted(
-        master_playlist.playlists,
-        key=lambda v: v.stream_info.resolution[1],
-        reverse=True,
-    ):
-        yield (
-            _make_resolution_code(variant),
-            f"{variant.stream_info.resolution[0]} x {variant.stream_info.resolution[1]}",
+    for video_media in master_playlist.playlists:
+        stream_info = video_media.stream_info
+        if stream_info.audio != audio_media.group_id:
+            raise UnexpectedResponse(
+                "INVALID_VARIANT_AUDIO_MEDIA", master_playlist_url, stream_info.audio
+            )
+
+        if subtitles_media:
+            if stream_info.subtitles != subtitles_media.group_id:
+                raise UnexpectedResponse(
+                    "INVALID_VARIANT_SUBTITLES_MEDIA",
+                    master_playlist_url,
+                    stream_info.subtitles,
+                )
+        elif stream_info.subtitles:
+            raise UnexpectedResponse(
+                "INVALID_VARIANT_SUBTITLES_MEDIA",
+                master_playlist_url,
+                stream_info.subtitles,
+            )
+
+        variant = model.Variant(
+            stream_info.resolution[0],
+            stream_info.resolution[1],
+            stream_info.frame_rate,
+        )
+
+        if variant in cache:
+            raise UnexpectedResponse("DUPLICATE_VARIANT", master_playlist_url, variant)
+        cache.add(variant)
+
+        yield model.Source(
+            metadata,
+            rendition,
+            variant,
+            video_media.absolute_uri,
+            audio_media.absolute_uri,
+            subtitles_media.absolute_uri if subtitles_media else None,
        )


-def select_variant(master_playlist, resolution_code):
-    """Return the stream information for a given resolution code."""
-    for variant in master_playlist.playlists:
-        code = _make_resolution_code(variant)
-        if code != resolution_code:
-            continue
-
-        audio_track = None
-        for m in variant.media:
-            if m.type == "AUDIO":
-                audio_track = (m.language, variant.base_uri + m.uri)
-                break
-
-        subtitles_track = None
-        for m in variant.media:
-            if m.type == "SUBTITLES":
-                subtitles_track = (m.language, variant.base_uri + m.uri)
-                break
-
-        return (
-            variant.base_uri + variant.uri,
-            audio_track,
-            subtitles_track,
-        )
-
-    return None
-
-
-def _parse_byterange(obj):
-    # Parse a M3U8 `byterange` (count@offset) into http range (range_start, rang_end)
+def _convert_byterange(obj):
+    # Convert a M3U8 `byterange` (1) to an `http range` (2).
+    #  1. "count@offset"
+    #  2. (start, end)
    count, offset = [int(v) for v in obj.byterange.split("@")]
    return offset, offset + count - 1


-def _load_av_segments(media_playlist_url):
-    media_playlist = m3u8.load(media_playlist_url)
+def _fetch_av_media_playlist(http_session, url):
+    # Fetch an audio or video media playlist.
+    # Return a tuple:
+    #  - the media file url
+    #  - the media file's ranges
+    media_playlist = _fetch_playlist(http_session, url)

    file_name = media_playlist.segment_map[0].uri
-    range_start, range_end = _parse_byterange(media_playlist.segment_map[0])
-    if range_start != 0:
-        raise ValueError("Invalid a/v index: does not start at 0")
-    chunks = [(range_start, range_end)]
-    total = range_end + 1
+    start, end = _convert_byterange(media_playlist.segment_map[0])
+    if start != 0:
+        raise UnexpectedResponse("INVALID_AV_MEDIA_FRAGMENT_START", url)
+    ranges = [(start, end)]
+    next_start = end + 1

    for segment in media_playlist.segments:
        if segment.uri != file_name:
-            raise ValueError("Invalid a/v index: multiple file names")
+            raise UnexpectedResponse("MULTIPLE_AV_MEDIA_FILES", url)

-        range_start, range_end = _parse_byterange(segment)
-        if range_start != total:
-            raise ValueError(
-                f"Invalid a/v index: discontinuous ranges ({range_start} != {total})"
-            )
+        start, end = _convert_byterange(segment)
+        if start != next_start:
+            raise UnexpectedResponse("DISCONTINUOUS_AV_MEDIA_FRAGMENT", url)

-        chunks.append((range_start, range_end))
-        total = range_end + 1
+        ranges.append((start, end))
+        next_start = end + 1

-    return urlparse(media_playlist.segment_map[0].absolute_uri), chunks
+    return media_playlist.segment_map[0].absolute_uri, ranges


-def _download_av_stream(media_playlist_url, progress):
-    # Download an audio or video stream to temporary directory
-    url, ranges = _load_av_segments(media_playlist_url)
+def _fetch_subtitles_media_playlist(http_session, url):
+    # Fetch subtitles media playlist.
+    # Return the subtitle file url.
+    subtitles_index = _fetch_playlist(http_session, url)
+    urls = [s.absolute_uri for s in subtitles_index.segments]
+
+    if not urls:
+        raise UnexpectedResponse("SUBTITLES_MEDIA_NO_FILES", url)
+
+    if len(urls) > 1:
+        raise UnexpectedResponse("SUBTITLES_MEDIA_MULTIPLE_FILES", url)
+
+    return urls[0]
+
+
+def _download_av_media(http_session, media_playlist_url, progress):
+    # Download an audio or video stream to temporary file.
+    # Return the temporary file name.
+    url, ranges = _fetch_av_media_playlist(http_session, media_playlist_url)
    total = ranges[-1][1]

-    Connector = HTTPSConnection if url.scheme == "https" else HTTPConnection
-    connection = Connector(url.hostname)
-    connection.connect()
-
    with (
        NamedTemporaryFile(
            mode="w+b", delete=False, prefix="delarte.", suffix=".mp4"
-        ) as f,
-        contextlib.closing(connection) as c,
+        ) as f
    ):
        for range_start, range_end in ranges:
-            c.request(
-                "GET",
-                url.path,
+            r = http_session.get(
+                url,
                headers={
-                    "Accept": "*/*",
-                    "Accept-Language": "fr,en;q=0.7,en-US;q=0.3",
-                    "Accept-Encoding": "gzip, deflate, br, identity",
                    "Range": f"bytes={range_start}-{range_end}",
-                    "Origin": "https://www.arte.tv",
-                    "Connection": "keep-alive",
-                    "Referer": "https://www.arte.tv/",
-                    "Sec-Fetch-Dest": "empty",
-                    "Sec-Fetch-Mode": "cors",
-                    "Sec-Fetch-Site": "cross-site",
-                    "Sec-GPC": "1",
-                    "DNT": "1",
                },
+                timeout=5,
            )
-            r = c.getresponse()
-            if r.status != 206:
-                raise ValueError(f"Invalid response status {r.status}")

-            content = r.read()
-            if len(content) != range_end - range_start + 1:
-                raise ValueError("Invalid range length")
-            f.write(content)
+            r.raise_for_status()
+
+            if r.status_code != 206:
+                raise UnexpectedResponse(
+                    "UNEXPECTED_AV_MEDIA_HTTP_STATUS",
+                    media_playlist_url,
+                    r.request.headers,
+                    r.status,
+                )
+
+            if len(r.content) != range_end - range_start + 1:
+                raise UnexpectedResponse(
+                    "INVALID_AV_MEDIA_FRAGMENT_LENGTH", media_playlist_url
+                )
+            f.write(r.content)

            progress(range_end, total)

        return f.name


-def _download_subtitles_input(index_url, progress):
-    # Return a temporary file name where VTT subtitle has been downloaded/converted to SRT
-    subtitles_index = m3u8.load(index_url)
-    urls = [subtitles_index.base_uri + "/" + f for f in subtitles_index.files]
-
-    if not urls:
-        raise ValueError("No subtitle files")
-
-    if len(urls) > 1:
-        raise ValueError("Multiple subtitle files")
+def _download_subtitles_media(http_session, media_playlist_url, progress):
+    # Download a subtitle file (converted from VTT to SRT format) into a temporary file.
+    # Return the temporary file name.
+    url = _fetch_subtitles_media_playlist(http_session, media_playlist_url)

    progress(0, 2)
-    http_response = urlopen(urls[0])
-    if http_response.status != HTTPStatus.OK:
-        raise RuntimeError("Subtitle request failed")
+    r = http_session.get(url)
+    r.raise_for_status()

-    buffer = io.StringIO(http_response.read().decode("utf8"))
+    buffer = io.StringIO(r.text)
    progress(1, 2)

    with NamedTemporaryFile(
@ -296,43 +302,50 @@ def _download_subtitles_input(index_url, progress):


@contextlib.contextmanager
-def download_inputs(remote_inputs, progress):
-    """Download inputs in temporary files."""
-    # It is implemented as a context manager that will delete temporary files on exit.
-
-    video_index_url, audio_track, subtitles_track = remote_inputs
+def download_source(http_session, source, progress):
+    """Download source inputs into temporary files.

+    Returns a context manager that will delete the temporary files on exit.
+    The context expression is a local version of the given source.
+    """
    video_filename = None
    audio_filename = None
    subtitles_filename = None

    try:
-        video_filename = _download_av_stream(
-            video_index_url, lambda i, n: progress("video", i, n)
+        video_filename = _download_av_media(
+            http_session, source.video, lambda i, n: progress("video", i, n)
        )

-        (audio_lang, audio_index_url) = audio_track
-        audio_filename = _download_av_stream(
-            audio_index_url, lambda i, n: progress("audio", i, n)
+        audio_filename = _download_av_media(
+            http_session, source.audio, lambda i, n: progress("audio", i, n)
        )

-        if subtitles_track:
-            (subtitles_lang, subtitles_index_url) = subtitles_track
-            subtitles_filename = _download_subtitles_input(
-                subtitles_index_url, lambda i, n: progress("subtitles", i, n)
+        subtitles_filename = (
+            _download_subtitles_media(
+                http_session,
+                source.subtitles,
+                lambda i, n: progress("subtitles", i, n),
            )
+            if source.subtitles
+            else None
+        )
+
+        yield model.Source(
+            source.metadata,
+            source.rendition,
+            source.variant,
+            video_filename,
+            audio_filename,
+            subtitles_filename,
+        )

-            yield (
-                video_filename,
-                (audio_lang, audio_filename),
-                (subtitles_lang, subtitles_filename),
-            )
-        else:
-            yield (video_filename, (audio_lang, audio_filename), None)
    finally:
        if video_filename and os.path.isfile(video_filename):
            os.unlink(video_filename)
+
        if audio_filename and os.path.isfile(audio_filename):
            os.unlink(audio_filename)
+
        if subtitles_filename and os.path.isfile(subtitles_filename):
            os.unlink(subtitles_filename)
--- a/src/delarte/model.py
+++ b/src/delarte/model.py
@ -0,0 +1,119 @@
+# License: GNU AGPL v3: http://www.gnu.org/licenses/
+# This file is part of `delarte` (https://git.afpy.org/fcode/delarte.git)
+
+"""Provide data model types."""
+
+
+from typing import NamedTuple, Optional
+
+
+class Metadata(NamedTuple):
+    """A program metadata."""
+
+    id: str
+    """The ID string."""
+
+    title: str
+    """The title."""
+
+    subtitle: str
+    """The subtitle or secondary title."""
+
+    description: str
+    """The description."""
+
+    duration: int
+    """The duration in seconds."""
+
+
+class RenditionAudio(NamedTuple):
+    """A rendition's audio part."""
+
+    lang: str
+    """ISO 639-1 two-letter language codes, or "mul" for multiple languages."""
+
+    is_original: bool
+    """Whether audio track is original (no audio description or dubbing)."""
+
+    provides_accessibility: bool
+    """Whether provides an audio description."""
+
+
+class RenditionSubtitles(NamedTuple):
+    """A rendition's subtitles part."""
+
+    lang: str
+    """ISO 639-1 two-letter language codes."""
+
+    provides_accessibility: bool
+    """Whether provides an readable description."""
+
+
+class Rendition(NamedTuple):
+    """A program's content version."""
+
+    audio: RenditionAudio
+    subtitles: Optional[RenditionSubtitles]
+
+    @property
+    def code(self):
+        """Return a code string representation."""
+        # This code string MUST be built in a way that every possible rendition
+        # object has its own code string.
+        # Also, it should be as concise as possible because it will be typed
+        # by the user in the command line.
+        code = self.audio.lang
+
+        if self.audio.provides_accessibility:
+            # audio accessibility (audio description)
+            code += "-AD"
+
+        if self.subtitles:
+            if self.subtitles.provides_accessibility:
+                # visual accessibility (text description)
+                code += "-TD"
+
+            if self.subtitles.lang != self.audio.lang:
+                # specifies subtitles language only if different from audio language
+                return code + "-" + self.subtitles.lang
+
+        return code
+
+
+class Variant(NamedTuple):
+    """A program's quality version."""
+
+    width: int
+    """Horizontal part of the resolution."""
+    height: int
+    """Vertical part of the resolution."""
+    frame_rate: int
+    """Frame rate per seconds."""
+
+    @property
+    def code(self):
+        """Return a code string representation."""
+        # This code string MUST be built in a way that every possible variant
+        # object has its own code string.
+        # Also, it should be as concise as possible because it will be typed
+        # by the user in the command line.
+        #
+        # So far, it seems variants differ on resolution only.
+        return f"{self.height}p"
+
+
+class Source(NamedTuple):
+    """A program source."""
+
+    metadata: Metadata
+    rendition: Rendition
+    variant: Variant
+
+    video: str
+    """Video track locator."""
+
+    audio: str
+    """Audio track locator."""
+
+    subtitles: Optional[str]
+    """Subtitles track locator."""
--- a/src/delarte/muxing.py
+++ b/src/delarte/muxing.py
@ -6,30 +6,24 @@
 import subprocess


-def mux(inputs, file_base_name, progress):
+def mux_source(source, file_base_name, _progress):
    """Build FFMPEG args."""
-    video_input, audio_track, subtitles_track = inputs
-
-    audio_lang, audio_input = audio_track
-    if subtitles_track:
-        subtitles_lang, subtitles_input = subtitles_track
-
    cmd = ["ffmpeg", "-hide_banner"]
-    cmd.extend(["-i", video_input])
-    cmd.extend(["-i", audio_input])
-    if subtitles_track:
-        cmd.extend(["-i", subtitles_input])
+    cmd.extend(["-i", source.video])
+    cmd.extend(["-i", source.audio])
+    if source.subtitles:
+        cmd.extend(["-i", source.subtitles])

    cmd.extend(["-c:v", "copy"])
    cmd.extend(["-c:a", "copy"])
-    if subtitles_track:
+    if source.subtitles:
        cmd.extend(["-c:s", "copy"])

    cmd.extend(["-bsf:a", "aac_adtstoasc"])
-    cmd.extend(["-metadata:s:a:0", f"language={audio_lang}"])
+    cmd.extend(["-metadata:s:a:0", f"language={source.rendition.audio.lang}"])

-    if subtitles_track:
-        cmd.extend(["-metadata:s:s:0", f"language={subtitles_lang}"])
+    if source.rendition.subtitles:
+        cmd.extend(["-metadata:s:s:0", f"language={source.rendition.subtitles.lang}"])
        cmd.extend(["-disposition:s:0", "default"])

    cmd.append(f"{file_base_name}.mkv")
--- a/src/delarte/naming.py
+++ b/src/delarte/naming.py
@ -4,6 +4,6 @@
 """Provide contextualized based file naming utility."""


-def build_file_base_name(config):
-    """Create a base file name from config metadata."""
-    return config["attributes"]["metadata"]["title"].replace("/", "-")
+def build_file_name(source, _index, _total):
+    """Create a base file name from a source."""
+    return source.metadata.title.replace("/", "-")
--- a/src/delarte/www.py
+++ b/src/delarte/www.py
@ -3,27 +3,31 @@

 """Provide ArteTV website utilities."""

-from urllib.parse import urlparse
+from . import error

+BASE = "https://www.arte.tv/"
 LANGUAGES = ["fr", "de", "en", "es", "pl", "it"]


-def parse_url(program_page_url):
-    """Parse ArteTV web URL into UI language and program ID."""
-    url = urlparse(program_page_url)
-    if url.hostname != "www.arte.tv":
-        raise ValueError("not an ArteTV url")
+class InvalidUrl(error.ModuleError):
+    """Invalid ArteTV URL."""

-    program_page_path = url.path.split("/")[1:]

-    lang = program_page_path.pop(0)
+def parse_url(url):
+    """Parse ArteTV web URL into target ID and web UI language."""
+    if not url.startswith(BASE):
+        raise InvalidUrl("BASE", url)

-    if lang not in LANGUAGES:
-        raise ValueError(f"invalid url language code: {lang}")
+    path = url[len(BASE) :].split("/")

-    if program_page_path.pop(0) != "videos":
-        raise ValueError("invalid ArteTV url")
+    www_lang = path.pop(0)

-    program_id = program_page_path.pop(0)
+    if www_lang not in LANGUAGES:
+        raise InvalidUrl("WWW_LANG", url, www_lang)

-    return lang, program_id
+    if (_ := path.pop(0)) != "videos":
+        raise InvalidUrl("PATH", url, _)
+
+    target_id = path.pop(0)
+
+    return target_id, www_lang