#!/usr/bin/env python3 # coding: utf8 """delarte. Retrieve video stream in a local file, including sub-titles Licence: GNU AGPL v3: http://www.gnu.org/licenses/ This file is part of [`delarte`](https://git.afpy.org/fcode/delarte) """ import io import json import os import re import subprocess import sys import tempfile from http import HTTPStatus from os import environ from typing import NamedTuple, cast from urllib.parse import urlparse from urllib.request import urlopen import m3u8 import webvtt FFMPEG = environ.get("PATH_FFMPEG", "ffmpeg path not found") def api_root(url: str): """Retrieve the root node (infamous "data") of an API call response.""" http_response = urlopen(url) if http_response.status != HTTPStatus.OK: raise RuntimeError("API request failed") if ( http_response.getheader("Content-Type") != "application/vnd.api+json; charset=utf-8" ): raise ValueError("API response not supported") return json.load(http_response)["data"] class Config(NamedTuple): """A structure representing a config API object.""" provider_id: str title: str subtitle: str versions: dict[str, tuple[str, str]] def api_config(lang: str, provider_id: str) -> Config: """Retrieve a stream config from API.""" url = f"https://api.arte.tv/api/player/v2/config/{lang}/{provider_id}" root = api_root(url) if root["type"] != "ConfigPlayer": raise ValueError("API response not supported") attrs = root["attributes"] if attrs["metadata"]["providerId"] != provider_id: raise ValueError("API response not supported") return Config( provider_id, attrs["metadata"]["title"], attrs["metadata"]["subtitle"], { s["versions"][0]["eStat"]["ml5"]: (s["url"], s["versions"][0]["label"]) for s in attrs["streams"] }, ) def api_playlist(lang: str, provider_id: str): """Retrieve a playlist from API.""" url = f"https://api.arte.tv/api/player/v2/playlist/{lang}/{provider_id}" raise NotImplementedError def write_subtitles(lang, m3u8_uri, file_base_name): """Convert distant vtt subtitles to local srt.""" sub_m3u8 = m3u8.load(m3u8_uri) sub_urls = [cast(str, sub_m3u8.base_uri) + "/" + f for f in sub_m3u8.files] if not sub_urls: raise ValueError("No subtitle files") if len(sub_urls) > 1: raise ValueError("Multiple subtitle files") http_response = urlopen(sub_urls[0]) if http_response.status != HTTPStatus.OK: raise RuntimeError("Subtitle request failed") buffer = io.StringIO(http_response.read().decode("utf8")) with open(f"{file_base_name}.{lang}.srt", "w", encoding="utf8") as f: for i, caption in enumerate(webvtt.read_buffer(buffer), 1): print(i, file=f) print( re.sub(r"\.", ",", caption.start) + " --> " + re.sub(r"\.", ",", caption.end), file=f, ) print(caption.text + "\n", file=f) return f.name def download_stream(m3u8_url: str, file_base_name: str): """Download and writes the video and subtitles files.""" dst = m3u8.M3U8() src = m3u8.load(m3u8_url) # sort streams by resolution (descending) and pick the bigger one src.playlists.sort(key=lambda pl: pl.stream_info.resolution, reverse=True) src.playlists[0].uri = src.base_uri + src.playlists[0].uri dst.add_playlist(src.playlists[0]) for media in src.playlists[0].media: media.uri = src.base_uri + media.uri if media.type == "SUBTITLES": write_subtitles(media.language, media.uri, file_base_name) else: dst.add_media(media) with tempfile.NamedTemporaryFile( "w", delete=False, encoding="utf8", prefix="delarte.", suffix=".m3u8" ) as f: f.write(dst.dumps()) dst_path = f.name subprocess.run( [ FFMPEG, "-protocol_whitelist", "https,file,tls,tcp", "-i", dst_path, "-c", "copy", "-bsf:a", "aac_adtstoasc", f"{file_base_name}.mp4", ] ) os.unlink(dst_path) def main(): """CLI function, options passed as arguments.""" (ui_lang, _, stream_id, _slug) = urlparse(sys.argv[1]).path[1:-1].split("/") version = " ".join(sys.argv[2:]) if ui_lang not in ("fr", "de", "en", "es", "pl", "it") or _ != "videos": raise ValueError("Invalid URL") config = api_config(ui_lang, stream_id) file_base_name = config.title.replace("/", "-") if version not in config.versions: print(f"{config.title} - {config.subtitle}") for version_code, (_, version_label) in config.versions.items(): print(f"\t{version_code} : {version_label}") exit(1) m3u8_url, _version_name = config.versions[version] download_stream(m3u8_url, file_base_name) if __name__ == "__main__": sys.exit(main())