diff --git a/README.md b/README.md index 6206261..f46e239 100644 --- a/README.md +++ b/README.md @@ -295,14 +295,18 @@ This file shows the file(s) containing the subtitles data. The actual build of the video file is handled by [ffmpeg](https://ffmpeg.org/). The script expects [ffmpeg](https://ffmpeg.org/) to be installed in the environement and will call it as a subprocess. -##### Why not use FFMPEG direcly with the _version index_ URL ? +#### Why not use FFMPEG direcly with the _version index_ URL ? So we can select the video resolution and not rely on stream mapping arguments in `ffmpeg`. -##### Why not use VTT subtitles direcly ? +#### Why not use VTT subtitles direcly ? Because it fails 😒. +#### Why not use FFMPEG direcly with the _video_ and _audio_ _index_ URL ? + +Because some programs would randomly fail 😒. Probably due to invalid _chunking_ on the server. + ### 📌 Dependences diff --git a/src/delarte/__init__.py b/src/delarte/__init__.py index 629258a..a0e28e9 100644 --- a/src/delarte/__init__.py +++ b/src/delarte/__init__.py @@ -8,12 +8,16 @@ This file is part of [`delarte`](https://git.afpy.org/fcode/delarte.git) """ __version__ = "0.1" +import contextlib import io import json +import os import re -import tempfile from http import HTTPStatus +from http.client import HTTPSConnection, HTTPConnection +from tempfile import NamedTemporaryFile +from urllib.parse import urlparse from urllib.request import urlopen import m3u8 @@ -75,9 +79,9 @@ def build_file_base_name(config): return config["attributes"]["metadata"]["title"].replace("/", "-") -def make_srt_tempfile(subtitles_index_url): +def download_subtitles_input(index_url, progress): """Return a temporary file name where VTT subtitle has been downloaded/converted to SRT.""" - subtitles_index = m3u8.load(subtitles_index_url) + subtitles_index = m3u8.load(index_url) urls = [subtitles_index.base_uri + "/" + f for f in subtitles_index.files] if not urls: @@ -86,13 +90,15 @@ def make_srt_tempfile(subtitles_index_url): if len(urls) > 1: raise ValueError("Multiple subtitle files") + progress(0, 2) http_response = urlopen(urls[0]) if http_response.status != HTTPStatus.OK: raise RuntimeError("Subtitle request failed") buffer = io.StringIO(http_response.read().decode("utf8")) + progress(1, 2) - with tempfile.NamedTemporaryFile( + with NamedTemporaryFile( "w", delete=False, prefix="delarte.", suffix=".srt", encoding="utf8" ) as f: i = 1 @@ -106,6 +112,7 @@ def make_srt_tempfile(subtitles_index_url): ) print(caption.text + "\n", file=f) i += 1 + progress(2, 2) return f.name @@ -177,17 +184,19 @@ def select_resolution(version_index, resolution_code): return None -def build_ffmpeg_cmd(video_index_url, audio_track, subtitles_track, file_base_name): +def build_ffmpeg_cmd(inputs, file_base_name): """Build FFMPEG args.""" - audio_lang, audio_index_url = audio_track - if subtitles_track: - subtitles_lang, subtitles_file = subtitles_track + video_input, audio_track, subtitles_track = inputs - cmd = ["ffmpeg"] - cmd.extend(["-i", video_index_url]) - cmd.extend(["-i", audio_index_url]) + audio_lang, audio_input = audio_track if subtitles_track: - cmd.extend(["-i", subtitles_file]) + subtitles_lang, subtitles_input = subtitles_track + + cmd = ["ffmpeg", "-hide_banner"] + cmd.extend(["-i", video_input]) + cmd.extend(["-i", audio_input]) + if subtitles_track: + cmd.extend(["-i", subtitles_input]) cmd.extend(["-c:v", "copy"]) cmd.extend(["-c:a", "copy"]) @@ -203,3 +212,127 @@ def build_ffmpeg_cmd(video_index_url, audio_track, subtitles_track, file_base_na cmd.append(f"{file_base_name}.mkv") return cmd + + +def parse_byterange(obj): + """Parse a M3U8 `byterange` (count@offset) into http range (range_start, rang_end).""" + count, offset = [int(v) for v in obj.byterange.split("@")] + return offset, offset + count - 1 + + +def load_av_index(index_url): + """Load a M3U8 audio or video index.""" + index = m3u8.load(index_url) + + file_name = index.segment_map[0].uri + range_start, range_end = parse_byterange(index.segment_map[0]) + if range_start != 0: + raise ValueError("Invalid a/v index: does not start at 0") + chunks = [(range_start, range_end)] + total = range_end + 1 + + for segment in index.segments: + if segment.uri != file_name: + raise ValueError("Invalid a/v index: multiple file names") + + range_start, range_end = parse_byterange(segment) + if range_start != total: + raise ValueError( + f"Invalid a/v index: discontious ranges ({range_start} != {total})" + ) + + chunks.append((range_start, range_end)) + total = range_end + 1 + + return urlparse(index.segment_map[0].absolute_uri), chunks + + +def download_av_input(index_url, progress): + """Download an audio or video stream to temporary directory.""" + url, ranges = load_av_index(index_url) + total = ranges[-1][1] + + Connector = HTTPSConnection if url.scheme == "https" else HTTPConnection + connection = Connector(url.hostname) + connection.connect() + + with ( + NamedTemporaryFile( + mode="w+b", delete=False, prefix="delarte.", suffix=".mp4" + ) as f, + contextlib.closing(connection) as c, + ): + for range_start, range_end in ranges: + c.request( + "GET", + url.path, + headers={ + "Accept": "*/*", + "Accept-Language": "fr,en;q=0.7,en-US;q=0.3", + "Accept-Encoding": "gzip, deflate, br, identity", + "Range": f"bytes={range_start}-{range_end}", + "Origin": "https://www.arte.tv", + "Connection": "keep-alive", + "Referer": "https://www.arte.tv/", + "Sec-Fetch-Dest": "empty", + "Sec-Fetch-Mode": "cors", + "Sec-Fetch-Site": "cross-site", + "Sec-GPC": "1", + "DNT": "1", + }, + ) + r = c.getresponse() + if r.status != 206: + raise ValueError(f"Invalid response status {r.status}") + + content = r.read() + if len(content) != range_end - range_start + 1: + raise ValueError("Invalid range length") + f.write(content) + + progress(range_end, total) + + return f.name + + +@contextlib.contextmanager +def download_inputs(remote_inputs, progress): + """Download inputs in temporary files.""" + # It is implemented as a context manager that will delete temporary files on exit. + + video_index_url, audio_track, subtitles_track = remote_inputs + + video_filename = None + audio_filename = None + subtitles_filename = None + + try: + video_filename = download_av_input( + video_index_url, lambda i, n: progress("video", i, n) + ) + + (audio_lang, audio_index_url) = audio_track + audio_filename = download_av_input( + audio_index_url, lambda i, n: progress("audio", i, n) + ) + + if subtitles_track: + (subtitles_lang, subtitles_index_url) = subtitles_track + subtitles_filename = download_subtitles_input( + subtitles_index_url, lambda i, n: progress("subtitles", i, n) + ) + + yield ( + video_filename, + (audio_lang, audio_filename), + (subtitles_lang, subtitles_filename), + ) + else: + yield (video_filename, (audio_lang, audio_filename), None) + finally: + if video_filename and os.path.isfile(video_filename): + os.unlink(video_filename) + if audio_filename and os.path.isfile(audio_filename): + os.unlink(audio_filename) + if subtitles_filename and os.path.isfile(subtitles_filename): + os.unlink(subtitles_filename) diff --git a/src/delarte/__main__.py b/src/delarte/__main__.py index 2a54021..fb248c9 100644 --- a/src/delarte/__main__.py +++ b/src/delarte/__main__.py @@ -5,22 +5,22 @@ usage: delarte [-h|--help] - print this message or: delarte program_page_url version - show available resolutions or: delarte program_page_url version resolution - download the given video """ -import os import subprocess import sys +import time from urllib.parse import urlparse from . import ( build_ffmpeg_cmd, build_file_base_name, + download_inputs, select_resolution, select_version, iter_resolutions, iter_versions, load_config_api, load_version_index, - make_srt_tempfile, ) @@ -44,6 +44,33 @@ def print_available_resolutions(version_index, f): print(f"\t{code} - {label}", file=f) +def create_progress(): + """Create a progress handler for input downloads.""" + state = { + "last_update_time": 0, + "last_channel": None, + } + + def progress(channel, current, total): + now = time.time() + + if current == total: + print(f"\rDownloading {channel}: 100.0%") + state["last_update_time"] = now + elif channel != state["last_channel"]: + print(f"Dowloading {channel}: 0.0%", end="") + state["last_update_time"] = now + state["last_channel"] = channel + elif now - state["last_update_time"] > 1: + print( + f"\rDownloading {channel}: {int(1000.0 * current / total) / 10.0}%", + end="", + ) + state["last_update_time"] = now + + return progress + + def main(): """CLI command.""" args = sys.argv[1:] @@ -92,27 +119,17 @@ def main(): print_available_resolutions(version_index, sys.stdout) return 0 - stream_info = select_resolution(version_index, args.pop(0)) - if stream_info is None: + remote_inputs = select_resolution(version_index, args.pop(0)) + if remote_inputs is None: fail("Invalid resolution") print_available_resolutions(version_index, sys.stderr) return 0 - video_index_url, audio_track, subtitles_track = stream_info - if subtitles_track: - subtitles_lang, subtitles_index_url = subtitles_track - subtitle_file = make_srt_tempfile(subtitles_index_url) - subtitles_track = (subtitles_lang, subtitle_file) - file_base_name = build_file_base_name(config) - args = build_ffmpeg_cmd( - video_index_url, audio_track, subtitles_track, file_base_name - ) - - subprocess.run(args) - if subtitle_file: - os.unlink(subtitle_file) + with download_inputs(remote_inputs, create_progress()) as temp_inputs: + args = build_ffmpeg_cmd(temp_inputs, file_base_name) + subprocess.run(args) if __name__ == "__main__":