diff --git a/README.md b/README.md index 7315af5..e83de4d 100644 --- a/README.md +++ b/README.md @@ -22,21 +22,26 @@ mkdir ~/.venvs && python3 -m venv ~/.venvs/delarte source ~/.venvs/delarte/bin/activate pip install -r requirements.txt export PATH_FFMPEG=$(which ffmpeg) -./delarte.py https://www.arte.tv/fr/videos/093644-001-A/meaningless_strings_but_mandatory/ -L'incroyable périple de Magellan (1/4) - VF : Français - VO-STF : Version originale - ST français - VF-STMF : Français (sourds et malentendants) - VFAUD : Français (audiodescription) - VA-STA : Allemand - VA-STMA : Allemand (sourds et malentendants) - VAAUD : Allemand (audiodescription) -``` - -Rajouter le code sous-titre en paramètre: - -```python -./delarte.py https://www.arte.tv/fr/videos/093644-001-A/meaningless_strings_but_mandatory/ VO-STF +./delarte.py https://www.arte.tv/fr/videos/093644-001-A/l-incroyable-periple-de-magellan-1-4/ +Available versions: + VF - Français + VO-STF - Version originale - ST français + VF-STMF - Français (sourds et malentendants) + VFAUD - Français (audiodescription) + VA-STA - Allemand + VA-STMA - Allemand (sourds et malentendants) + VAAUD - Allemand (audiodescription) +./delarte.py https://www.arte.tv/fr/videos/093644-001-A/l-incroyable-periple-de-magellan-1-4/ VO-STF +Available resolutions: + 1080 + 720 + 432 + 360 + 216 +$ ./delarte.py https://www.arte.tv/fr/videos/093644-001-A/l-incroyable-periple-de-magellan-1-4/ VO-STF 720 +ffmpeg version 4.3.5-0+deb11u1 Copyright (c) 2000-2022 the FFmpeg developers +frame=78910 fps=1204 q=-1.0 Lsize= 738210kB time=00:52:36.45 bitrate=1915.9kbits/s speed=48.2x +video:685949kB audio:50702kB subtitle:9kB other streams:0kB global headers:0kB muxing overhead: 0.210475% ``` 🔧 Tripoter sous le capot diff --git a/delarte.py b/delarte.py index 1adc04f..72f9b2e 100755 --- a/delarte.py +++ b/delarte.py @@ -10,6 +10,7 @@ Licence: GNU AGPL v3: http://www.gnu.org/licenses/ This file is part of [`delarte`](https://git.afpy.org/fcode/delarte) """ +from __future__ import annotations import io import json @@ -21,7 +22,7 @@ import tempfile from http import HTTPStatus from os import environ -from typing import NamedTuple, cast +from typing import NamedTuple, Optional, cast from urllib.parse import urlparse from urllib.request import urlopen @@ -55,56 +56,63 @@ class Config(NamedTuple): subtitle: str versions: dict[str, tuple[str, str]] + @classmethod + def load(cls, lang: str, provider_id: str) -> Config: + """Retrieve a stream config from API.""" + url = f"https://api.arte.tv/api/player/v2/config/{lang}/{provider_id}" + root = api_root(url) -def api_config(lang: str, provider_id: str) -> Config: - """Retrieve a stream config from API.""" - url = f"https://api.arte.tv/api/player/v2/config/{lang}/{provider_id}" - root = api_root(url) + if root["type"] != "ConfigPlayer": + raise ValueError("API response not supported") - if root["type"] != "ConfigPlayer": - raise ValueError("API response not supported") + attrs = root["attributes"] - attrs = root["attributes"] + if attrs["metadata"]["providerId"] != provider_id: + raise ValueError("API response not supported") - if attrs["metadata"]["providerId"] != provider_id: - raise ValueError("API response not supported") + return Config( + provider_id, + attrs["metadata"]["title"], + attrs["metadata"]["subtitle"], + { + s["versions"][0]["eStat"]["ml5"]: (s["versions"][0]["label"], s["url"]) + for s in attrs["streams"] + }, + ) - return Config( - provider_id, - attrs["metadata"]["title"], - attrs["metadata"]["subtitle"], - { - s["versions"][0]["eStat"]["ml5"]: (s["url"], s["versions"][0]["label"]) - for s in attrs["streams"] - }, - ) + def url_for_version(self, version_code: str) -> str: + """Return the m3u8 url for the given version code.""" + if version_code not in self.versions: + print(f"Available versions:") + for code, (label, _) in self.versions.items(): + print(f"\t{code} - {label}") + exit(1) + + return self.versions[version_code][1] -def api_playlist(lang: str, provider_id: str): - """Retrieve a playlist from API.""" - url = f"https://api.arte.tv/api/player/v2/playlist/{lang}/{provider_id}" - raise NotImplementedError +def make_srt_tempfile(url): + """Return a temporary file name where VTT subtitle has been downloaded/converted to SRT.""" + mpeg = m3u8.load(url) + urls = [cast(str, mpeg.base_uri) + "/" + f for f in mpeg.files] - -def write_subtitles(lang, m3u8_uri, file_base_name): - """Convert distant vtt subtitles to local srt.""" - sub_m3u8 = m3u8.load(m3u8_uri) - sub_urls = [cast(str, sub_m3u8.base_uri) + "/" + f for f in sub_m3u8.files] - - if not sub_urls: + if not urls: raise ValueError("No subtitle files") - if len(sub_urls) > 1: + if len(urls) > 1: raise ValueError("Multiple subtitle files") - http_response = urlopen(sub_urls[0]) + http_response = urlopen(urls[0]) if http_response.status != HTTPStatus.OK: raise RuntimeError("Subtitle request failed") buffer = io.StringIO(http_response.read().decode("utf8")) - with open(f"{file_base_name}.{lang}.srt", "w", encoding="utf8") as f: - for i, caption in enumerate(webvtt.read_buffer(buffer), 1): + with tempfile.NamedTemporaryFile( + "w", delete=False, prefix="delarte.", suffix=".srt", encoding="utf8" + ) as f: + i = 1 + for caption in webvtt.read_buffer(buffer): print(i, file=f) print( re.sub(r"\.", ",", caption.start) @@ -113,71 +121,120 @@ def write_subtitles(lang, m3u8_uri, file_base_name): file=f, ) print(caption.text + "\n", file=f) + i += 1 return f.name -def download_stream(m3u8_url: str, file_base_name: str): - """Download and writes the video and subtitles files.""" - dst = m3u8.M3U8() - src = m3u8.load(m3u8_url) +class Version(NamedTuple): + """A structure representing a version M3U8 object.""" - # sort streams by resolution (descending) and pick the bigger one - src.playlists.sort(key=lambda pl: pl.stream_info.resolution, reverse=True) - src.playlists[0].uri = src.base_uri + src.playlists[0].uri + videos: dict[str, str] + audio_url: str + subtitiles: Optional[tuple[str, str]] - dst.add_playlist(src.playlists[0]) - for media in src.playlists[0].media: - media.uri = src.base_uri + media.uri - if media.type == "SUBTITLES": - write_subtitles(media.language, media.uri, file_base_name) - else: - dst.add_media(media) + @classmethod + def load(cls, url: str) -> Version: + """Retrieve a version from m3u8 file.""" + mpeg = m3u8.load(url) - with tempfile.NamedTemporaryFile( - "w", delete=False, encoding="utf8", prefix="delarte.", suffix=".m3u8" - ) as f: - f.write(dst.dumps()) - dst_path = f.name + videos = { + str(pl.stream_info.resolution[1]): mpeg.base_uri + pl.uri + for pl in mpeg.playlists + } - subprocess.run( - [ - FFMPEG, - "-protocol_whitelist", - "https,file,tls,tcp", - "-i", - dst_path, - "-c", - "copy", - "-bsf:a", - "aac_adtstoasc", - f"{file_base_name}.mp4", + audios = [mpeg.base_uri + m.uri for m in mpeg.media if m.type == "AUDIO"] + if len(audios) != 1: + raise ValueError("Unexpected missing or multiple audio tracks.") + + subtitles = [ + (m.language, mpeg.base_uri + m.uri) + for m in mpeg.media + if m.type == "SUBTITLES" ] - ) + if len(subtitles) > 1: + raise ValueError("Unexpected multiple subtitles tracks.") - os.unlink(dst_path) + return cls(videos, audios[0], subtitles[0] if subtitles else None) + + def download(self, resolution_code: str, file_base_name: str): + """Download a given resolution (video/audio/subtitles) and write it to an MKV container.""" + if resolution_code not in self.videos: + print(f"Available resolutions:") + for code in sorted(map(int, self.videos.keys()), reverse=True): + print(f"\t{code}") + exit(1) + + video_url = self.videos[resolution_code] + + if self.subtitiles: + srt_tempfile = make_srt_tempfile(self.subtitiles[1]) + subprocess.run( + [ + FFMPEG, + "-i", + srt_tempfile, + "-i", + video_url, + "-i", + self.audio_url, + "-c:v", + "copy", + "-c:a", + "copy", + "-bsf:a", + "aac_adtstoasc", + "-c:s", + "copy", + "-metadata:s:s:0", + f"language={self.subtitiles[0]}", + "-disposition:s:0", + "default", + f"{file_base_name}.mkv", + ] + ) + os.unlink(srt_tempfile) + else: + subprocess.run( + [ + FFMPEG, + "-i", + video_url, + "-i", + self.audio_url, + "-c:v", + "copy", + "-c:a", + "copy", + "-bsf:a", + "aac_adtstoasc", + f"{file_base_name}.mkv", + ] + ) + + +def api_playlist(lang: str, provider_id: str): + """Retrieve a playlist from API.""" + url = f"https://api.arte.tv/api/player/v2/playlist/{lang}/{provider_id}" + raise NotImplementedError def main(): """CLI function, options passed as arguments.""" (ui_lang, _, stream_id, _slug) = urlparse(sys.argv[1]).path[1:-1].split("/") - version = " ".join(sys.argv[2:]) + version_code = sys.argv[2] if len(sys.argv) > 2 else "" + resolution_code = sys.argv[3] if len(sys.argv) > 3 else "" if ui_lang not in ("fr", "de", "en", "es", "pl", "it") or _ != "videos": raise ValueError("Invalid URL") - config = api_config(ui_lang, stream_id) + config = Config.load(ui_lang, stream_id) + version_url = config.url_for_version(version_code) file_base_name = config.title.replace("/", "-") - if version not in config.versions: - print(f"{config.title} - {config.subtitle}") - for version_code, (_, version_label) in config.versions.items(): - print(f"\t{version_code} : {version_label}") - exit(1) + version = Version.load(version_url) - m3u8_url, _version_name = config.versions[version] - - download_stream(m3u8_url, file_base_name) + version.download(resolution_code, file_base_name) if __name__ == "__main__":