delarte_test/delarte.py
Etienne Zind d908e4f15b - refactor API calls
- define namedtuple for "config" API
2022-12-06 09:15:15 +01:00

159 lines
4.1 KiB
Python
Executable File

#!/usr/bin/env python3
# coding: utf8
"""delarte.
Retrieve video stream in a local file, including sub-titles
Licence: GNU AGPL v3: http://www.gnu.org/licenses/
This file is part of [`delarte`](https://git.afpy.org/fcode/delarte)
"""
import json
import sys
import re
import io
import subprocess
from http import HTTPStatus
from os import environ
from typing import NamedTuple, cast
from urllib.parse import urlparse
from urllib.request import urlopen
import m3u8
import webvtt
FFMPEG = environ.get("PATH_FFMPEG", "ffmpeg path not found")
def api_root(url: str):
"""Retrieve the root node (infamous "data") of an API call response."""
http_response = urlopen(url)
if http_response.status != HTTPStatus.OK:
raise RuntimeError("API request failed")
if http_response.getheader("Content-Type") != "application/vnd.api+json; charset=utf-8":
raise ValueError("API response not supported")
return json.load(http_response)["data"]
class Config(NamedTuple):
provider_id: str
title: str
subtitle: str
versions: dict[str, tuple[str, str]]
def api_config(lang: str, provider_id: str) -> Config:
"""Retrieve a stream config from API."""
url = f"https://api.arte.tv/api/player/v2/config/{lang}/{provider_id}"
root = api_root(url)
if root["type"] != "ConfigPlayer":
raise ValueError("API response not supported")
attrs = root["attributes"]
if attrs["metadata"]["providerId"] != provider_id:
raise ValueError("API response not supported")
return Config(
provider_id,
attrs["metadata"]["title"],
attrs["metadata"]["subtitle"],
{
s["versions"][0]["eStat"]["ml5"]: (s["url"], s["versions"][0]["label"])
for s in attrs["streams"]
}
)
def api_playlist(lang: str, provider_id: str):
url = f"https://api.arte.tv/api/player/v2/playlist/{lang}/{provider_id}"
raise NotImplementedError
def write_subtitles(m3u8_url, base_name):
"""Convert distant vtt subtitles to local srt."""
main = m3u8.load(m3u8_url)
sub_m3u8_urls = [
(m.base_uri + "/" + m.uri, m.language)
for m in main.media
if m.type == "SUBTITLES"
]
for sub_m3u8_url, sub_lang in sub_m3u8_urls:
sub_m3u8 = m3u8.load(sub_m3u8_url)
sub_urls = [cast(str, sub_m3u8.base_uri) + "/" + f for f in sub_m3u8.files]
if not sub_urls:
raise ValueError("No subtitle files")
if len(sub_urls) > 1:
raise ValueError("Multiple subtitle files")
http_response = urlopen(sub_urls[0])
if http_response.status != HTTPStatus.OK:
raise RuntimeError("Subtitle request failed")
buffer = io.StringIO(http_response.read().decode("utf8"))
with open(f"{base_name}.{sub_lang}.srt", "w", encoding="utf8") as f:
for i, caption in enumerate(webvtt.read_buffer(buffer), 1):
print(i, file=f)
print(
re.sub(r"\.", ",", caption.start)
+ " --> "
+ re.sub(r"\.", ",", caption.end),
file=f,
)
print(caption.text + "\n", file=f)
return f.name
def main():
"""CLI function, options passed as arguments."""
(UI_LANG, _, STREAM_ID, SLUG) = urlparse(sys.argv[1]).path[1:-1].split("/")
VERSION = " ".join(sys.argv[2:])
if UI_LANG not in ("fr", "de", "en", "es", "pl", "it") or _ != "videos":
raise ValueError("Invalid URL")
CONFIG = api_config(UI_LANG, STREAM_ID)
TITLE = CONFIG.title
VERSIONS = CONFIG.versions
FILENAME = TITLE.replace("/", "-")
if VERSION not in VERSIONS:
print(TITLE)
for v, (_, l) in VERSIONS.items():
print(f"\t{v} : {l}")
exit(1)
M3U8_URL, VERSION_NAME = VERSIONS[VERSION]
write_subtitles(M3U8_URL, FILENAME)
subprocess.run(
[
FFMPEG,
"-i",
M3U8_URL,
"-c",
"copy",
"-bsf:a",
"aac_adtstoasc",
f"{FILENAME}.mp4",
]
)
if __name__ == "__main__":
sys.exit(main())