From be4363a339468da63a6f85d7646060dbc6fed154 Mon Sep 17 00:00:00 2001 From: Barbagus Date: Sun, 11 Dec 2022 16:09:11 +0100 Subject: [PATCH 1/2] Fix issue #6 on FFMPEG header error Handle the audio and video channel downloading to temporary files prior to calling ffmpeg. Although it might not be necessary, the download is made by "chunks" as it would be by a client/player. Downloading progress feedback is printed to the terminal. --- src/delarte/__init__.py | 157 +++++++++++++++++++++++++++++++++++++--- src/delarte/__main__.py | 51 ++++++++----- 2 files changed, 179 insertions(+), 29 deletions(-) diff --git a/src/delarte/__init__.py b/src/delarte/__init__.py index 629258a..a0e28e9 100644 --- a/src/delarte/__init__.py +++ b/src/delarte/__init__.py @@ -8,12 +8,16 @@ This file is part of [`delarte`](https://git.afpy.org/fcode/delarte.git) """ __version__ = "0.1" +import contextlib import io import json +import os import re -import tempfile from http import HTTPStatus +from http.client import HTTPSConnection, HTTPConnection +from tempfile import NamedTemporaryFile +from urllib.parse import urlparse from urllib.request import urlopen import m3u8 @@ -75,9 +79,9 @@ def build_file_base_name(config): return config["attributes"]["metadata"]["title"].replace("/", "-") -def make_srt_tempfile(subtitles_index_url): +def download_subtitles_input(index_url, progress): """Return a temporary file name where VTT subtitle has been downloaded/converted to SRT.""" - subtitles_index = m3u8.load(subtitles_index_url) + subtitles_index = m3u8.load(index_url) urls = [subtitles_index.base_uri + "/" + f for f in subtitles_index.files] if not urls: @@ -86,13 +90,15 @@ def make_srt_tempfile(subtitles_index_url): if len(urls) > 1: raise ValueError("Multiple subtitle files") + progress(0, 2) http_response = urlopen(urls[0]) if http_response.status != HTTPStatus.OK: raise RuntimeError("Subtitle request failed") buffer = io.StringIO(http_response.read().decode("utf8")) + progress(1, 2) - with tempfile.NamedTemporaryFile( + with NamedTemporaryFile( "w", delete=False, prefix="delarte.", suffix=".srt", encoding="utf8" ) as f: i = 1 @@ -106,6 +112,7 @@ def make_srt_tempfile(subtitles_index_url): ) print(caption.text + "\n", file=f) i += 1 + progress(2, 2) return f.name @@ -177,17 +184,19 @@ def select_resolution(version_index, resolution_code): return None -def build_ffmpeg_cmd(video_index_url, audio_track, subtitles_track, file_base_name): +def build_ffmpeg_cmd(inputs, file_base_name): """Build FFMPEG args.""" - audio_lang, audio_index_url = audio_track - if subtitles_track: - subtitles_lang, subtitles_file = subtitles_track + video_input, audio_track, subtitles_track = inputs - cmd = ["ffmpeg"] - cmd.extend(["-i", video_index_url]) - cmd.extend(["-i", audio_index_url]) + audio_lang, audio_input = audio_track if subtitles_track: - cmd.extend(["-i", subtitles_file]) + subtitles_lang, subtitles_input = subtitles_track + + cmd = ["ffmpeg", "-hide_banner"] + cmd.extend(["-i", video_input]) + cmd.extend(["-i", audio_input]) + if subtitles_track: + cmd.extend(["-i", subtitles_input]) cmd.extend(["-c:v", "copy"]) cmd.extend(["-c:a", "copy"]) @@ -203,3 +212,127 @@ def build_ffmpeg_cmd(video_index_url, audio_track, subtitles_track, file_base_na cmd.append(f"{file_base_name}.mkv") return cmd + + +def parse_byterange(obj): + """Parse a M3U8 `byterange` (count@offset) into http range (range_start, rang_end).""" + count, offset = [int(v) for v in obj.byterange.split("@")] + return offset, offset + count - 1 + + +def load_av_index(index_url): + """Load a M3U8 audio or video index.""" + index = m3u8.load(index_url) + + file_name = index.segment_map[0].uri + range_start, range_end = parse_byterange(index.segment_map[0]) + if range_start != 0: + raise ValueError("Invalid a/v index: does not start at 0") + chunks = [(range_start, range_end)] + total = range_end + 1 + + for segment in index.segments: + if segment.uri != file_name: + raise ValueError("Invalid a/v index: multiple file names") + + range_start, range_end = parse_byterange(segment) + if range_start != total: + raise ValueError( + f"Invalid a/v index: discontious ranges ({range_start} != {total})" + ) + + chunks.append((range_start, range_end)) + total = range_end + 1 + + return urlparse(index.segment_map[0].absolute_uri), chunks + + +def download_av_input(index_url, progress): + """Download an audio or video stream to temporary directory.""" + url, ranges = load_av_index(index_url) + total = ranges[-1][1] + + Connector = HTTPSConnection if url.scheme == "https" else HTTPConnection + connection = Connector(url.hostname) + connection.connect() + + with ( + NamedTemporaryFile( + mode="w+b", delete=False, prefix="delarte.", suffix=".mp4" + ) as f, + contextlib.closing(connection) as c, + ): + for range_start, range_end in ranges: + c.request( + "GET", + url.path, + headers={ + "Accept": "*/*", + "Accept-Language": "fr,en;q=0.7,en-US;q=0.3", + "Accept-Encoding": "gzip, deflate, br, identity", + "Range": f"bytes={range_start}-{range_end}", + "Origin": "https://www.arte.tv", + "Connection": "keep-alive", + "Referer": "https://www.arte.tv/", + "Sec-Fetch-Dest": "empty", + "Sec-Fetch-Mode": "cors", + "Sec-Fetch-Site": "cross-site", + "Sec-GPC": "1", + "DNT": "1", + }, + ) + r = c.getresponse() + if r.status != 206: + raise ValueError(f"Invalid response status {r.status}") + + content = r.read() + if len(content) != range_end - range_start + 1: + raise ValueError("Invalid range length") + f.write(content) + + progress(range_end, total) + + return f.name + + +@contextlib.contextmanager +def download_inputs(remote_inputs, progress): + """Download inputs in temporary files.""" + # It is implemented as a context manager that will delete temporary files on exit. + + video_index_url, audio_track, subtitles_track = remote_inputs + + video_filename = None + audio_filename = None + subtitles_filename = None + + try: + video_filename = download_av_input( + video_index_url, lambda i, n: progress("video", i, n) + ) + + (audio_lang, audio_index_url) = audio_track + audio_filename = download_av_input( + audio_index_url, lambda i, n: progress("audio", i, n) + ) + + if subtitles_track: + (subtitles_lang, subtitles_index_url) = subtitles_track + subtitles_filename = download_subtitles_input( + subtitles_index_url, lambda i, n: progress("subtitles", i, n) + ) + + yield ( + video_filename, + (audio_lang, audio_filename), + (subtitles_lang, subtitles_filename), + ) + else: + yield (video_filename, (audio_lang, audio_filename), None) + finally: + if video_filename and os.path.isfile(video_filename): + os.unlink(video_filename) + if audio_filename and os.path.isfile(audio_filename): + os.unlink(audio_filename) + if subtitles_filename and os.path.isfile(subtitles_filename): + os.unlink(subtitles_filename) diff --git a/src/delarte/__main__.py b/src/delarte/__main__.py index 2a54021..fb248c9 100644 --- a/src/delarte/__main__.py +++ b/src/delarte/__main__.py @@ -5,22 +5,22 @@ usage: delarte [-h|--help] - print this message or: delarte program_page_url version - show available resolutions or: delarte program_page_url version resolution - download the given video """ -import os import subprocess import sys +import time from urllib.parse import urlparse from . import ( build_ffmpeg_cmd, build_file_base_name, + download_inputs, select_resolution, select_version, iter_resolutions, iter_versions, load_config_api, load_version_index, - make_srt_tempfile, ) @@ -44,6 +44,33 @@ def print_available_resolutions(version_index, f): print(f"\t{code} - {label}", file=f) +def create_progress(): + """Create a progress handler for input downloads.""" + state = { + "last_update_time": 0, + "last_channel": None, + } + + def progress(channel, current, total): + now = time.time() + + if current == total: + print(f"\rDownloading {channel}: 100.0%") + state["last_update_time"] = now + elif channel != state["last_channel"]: + print(f"Dowloading {channel}: 0.0%", end="") + state["last_update_time"] = now + state["last_channel"] = channel + elif now - state["last_update_time"] > 1: + print( + f"\rDownloading {channel}: {int(1000.0 * current / total) / 10.0}%", + end="", + ) + state["last_update_time"] = now + + return progress + + def main(): """CLI command.""" args = sys.argv[1:] @@ -92,27 +119,17 @@ def main(): print_available_resolutions(version_index, sys.stdout) return 0 - stream_info = select_resolution(version_index, args.pop(0)) - if stream_info is None: + remote_inputs = select_resolution(version_index, args.pop(0)) + if remote_inputs is None: fail("Invalid resolution") print_available_resolutions(version_index, sys.stderr) return 0 - video_index_url, audio_track, subtitles_track = stream_info - if subtitles_track: - subtitles_lang, subtitles_index_url = subtitles_track - subtitle_file = make_srt_tempfile(subtitles_index_url) - subtitles_track = (subtitles_lang, subtitle_file) - file_base_name = build_file_base_name(config) - args = build_ffmpeg_cmd( - video_index_url, audio_track, subtitles_track, file_base_name - ) - - subprocess.run(args) - if subtitle_file: - os.unlink(subtitle_file) + with download_inputs(remote_inputs, create_progress()) as temp_inputs: + args = build_ffmpeg_cmd(temp_inputs, file_base_name) + subprocess.run(args) if __name__ == "__main__": From ddf323cb30b8aa97f274168409dbd3c55013ded5 Mon Sep 17 00:00:00 2001 From: Barbagus Date: Sun, 11 Dec 2022 18:36:46 +0100 Subject: [PATCH 2/2] Update readme according to #6 bug fix --- README.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 6206261..f46e239 100644 --- a/README.md +++ b/README.md @@ -295,14 +295,18 @@ This file shows the file(s) containing the subtitles data. The actual build of the video file is handled by [ffmpeg](https://ffmpeg.org/). The script expects [ffmpeg](https://ffmpeg.org/) to be installed in the environement and will call it as a subprocess. -##### Why not use FFMPEG direcly with the _version index_ URL ? +#### Why not use FFMPEG direcly with the _version index_ URL ? So we can select the video resolution and not rely on stream mapping arguments in `ffmpeg`. -##### Why not use VTT subtitles direcly ? +#### Why not use VTT subtitles direcly ? Because it fails 😒. +#### Why not use FFMPEG direcly with the _video_ and _audio_ _index_ URL ? + +Because some programs would randomly fail 😒. Probably due to invalid _chunking_ on the server. + ### 📌 Dependences