From 68e76085d98ab8a68cfc6d719d961ae6a290ad48 Mon Sep 17 00:00:00 2001 From: freezed Date: Mon, 5 Dec 2022 22:36:14 +0100 Subject: [PATCH] =?UTF-8?q?=E2=9A=97=20WIP:=20POC=20to=20retrieve=20locall?= =?UTF-8?q?y=20video=20streams?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 37 +++++++++++++++++++-- delarte.py | 96 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 131 insertions(+), 2 deletions(-) create mode 100755 delarte.py diff --git a/README.md b/README.md index fccdc0f..8cf36db 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,36 @@ -# delarte +`delarte` +========= -Du code a mettre au propre \ No newline at end of file +Du code a mettre au propre + +Notes +----- + +2 dépendances: + +* `m3u8` +* `webvtt` + +Editer `PATH to ffmpeg` + +```python +python ./delarte.py https://www.arte.tv/fr/videos/093644-001-A/l-incroyable-periple-de-magellan-1-4/ +L'incroyable périple de Magellan (1/4) + VF : Français + VO-STF : Version originale - ST français + VF-STMF : Français (sourds et malentendants) + VFAUD : Français (audiodescription) + VA-STA : Allemand + VA-STMA : Allemand (sourds et malentendants) + VAAUD : Allemand (audiodescription) +``` + +liste les versions avec les codes + +rajouter le code à la ligne de commande: + +`python ./delarte.py https://www.arte.tv/fr/videos/093644-001-A/l-incroyable-periple-de-magellan-1-4/ VO-STF` + +et c'est partit + +ca créé le fichier .mp4 et le(s) fichiers srt dans le directory en cours diff --git a/delarte.py b/delarte.py new file mode 100755 index 0000000..98778b4 --- /dev/null +++ b/delarte.py @@ -0,0 +1,96 @@ +import json +import sys +import re +import io +import subprocess + +from http import HTTPStatus +from typing import cast +from urllib.parse import urlparse +from urllib.request import urlopen + +import m3u8 +import webvtt + +FFMPEG = 'ffmpeg.exe' + +def call_api(api_url): + http_response = urlopen(api_url) + + if http_response.status != HTTPStatus.OK: + raise RuntimeError("API request failed") + + config = json.load(http_response)["data"]["attributes"] + + title = config["metadata"]["title"] + + versions = { + s["versions"][0]["eStat"]["ml5"]: (s["url"], s["versions"][0]["label"]) + for s in config["streams"] + } + + return (title, versions) + +def write_subtitles(m3u8_url, base_name): + main = m3u8.load(m3u8_url) + + sub_m3u8_urls = [(m.base_uri + '/' + m.uri, m.language) for m in main.media if m.type == "SUBTITLES"] + + for sub_m3u8_url, sub_lang in sub_m3u8_urls: + + sub_m3u8 = m3u8.load(sub_m3u8_url) + sub_urls = [cast(str, sub_m3u8.base_uri) + '/' + f for f in sub_m3u8.files] + + if not sub_urls: + raise ValueError("No subtitle files") + + if len(sub_urls) > 1: + raise ValueError("Multiple subtitle files") + + http_response = urlopen(sub_urls[0]) + if http_response.status != HTTPStatus.OK: + raise RuntimeError("Subtitle request failed") + + buffer = io.StringIO(http_response.read().decode('utf8')) + + with open(f"{base_name}.{sub_lang}.srt", "w", encoding='utf8') as f: + for i, caption in enumerate(webvtt.read_buffer(buffer), 1): + print(i, file=f) + print( + re.sub(r"\.", ",", caption.start) + + " --> " + + re.sub(r"\.", ",", caption.end), + file=f, + ) + print(caption.text + "\n", file=f) + return f.name + +# command line arguments +(UI_LANG, _, STREAM_ID, SLUG) = urlparse(sys.argv[1]).path[1:-1].split('/') +VERSION = " ".join(sys.argv[2:]) + +if UI_LANG not in ('fr', 'de', 'en', 'es', 'pl', 'it') or _ != "videos": + raise ValueError("Invalid URL") + +TITLE, VERSIONS = call_api(f"https://api.arte.tv/api/player/v2/config/{UI_LANG}/{STREAM_ID}") + +FILENAME = TITLE.replace("/", "-") + +if VERSION not in VERSIONS: + print(TITLE) + for v, (_, l) in VERSIONS.items(): + print(f"\t{v} : {l}") + exit(1) + +M3U8_URL, VERSION_NAME = VERSIONS[VERSION] + +write_subtitles(M3U8_URL, FILENAME) + +subprocess.run([ + FFMPEG, + '-i', M3U8_URL, + '-c', 'copy', + '-bsf:a', 'aac_adtstoasc', + f"{FILENAME}.mp4" + ] +)