delarte_test/delarte.py
Etienne Zind af465ad79e 1) Parse and locally temp/rewrite the main m3u8 file to enable future selection
of resultion. Picks the bigger one for now.
2) Revrite subtitles selection to limit to selected resolution.
2022-12-06 13:19:57 +01:00

185 lines
5.0 KiB
Python
Executable File

#!/usr/bin/env python3
# coding: utf8
"""delarte.
Retrieve video stream in a local file, including sub-titles
Licence: GNU AGPL v3: http://www.gnu.org/licenses/
This file is part of [`delarte`](https://git.afpy.org/fcode/delarte)
"""
import io
import json
import os
import re
import subprocess
import sys
import tempfile
from http import HTTPStatus
from os import environ
from typing import NamedTuple, cast
from urllib.parse import urlparse
from urllib.request import urlopen
import m3u8
import webvtt
FFMPEG = environ.get("PATH_FFMPEG", "ffmpeg path not found")
def api_root(url: str):
"""Retrieve the root node (infamous "data") of an API call response."""
http_response = urlopen(url)
if http_response.status != HTTPStatus.OK:
raise RuntimeError("API request failed")
if (
http_response.getheader("Content-Type")
!= "application/vnd.api+json; charset=utf-8"
):
raise ValueError("API response not supported")
return json.load(http_response)["data"]
class Config(NamedTuple):
"""A structure representing a config API object."""
provider_id: str
title: str
subtitle: str
versions: dict[str, tuple[str, str]]
def api_config(lang: str, provider_id: str) -> Config:
"""Retrieve a stream config from API."""
url = f"https://api.arte.tv/api/player/v2/config/{lang}/{provider_id}"
root = api_root(url)
if root["type"] != "ConfigPlayer":
raise ValueError("API response not supported")
attrs = root["attributes"]
if attrs["metadata"]["providerId"] != provider_id:
raise ValueError("API response not supported")
return Config(
provider_id,
attrs["metadata"]["title"],
attrs["metadata"]["subtitle"],
{
s["versions"][0]["eStat"]["ml5"]: (s["url"], s["versions"][0]["label"])
for s in attrs["streams"]
},
)
def api_playlist(lang: str, provider_id: str):
"""Retrieve a playlist from API."""
url = f"https://api.arte.tv/api/player/v2/playlist/{lang}/{provider_id}"
raise NotImplementedError
def write_subtitles(lang, m3u8_uri, file_base_name):
"""Convert distant vtt subtitles to local srt."""
sub_m3u8 = m3u8.load(m3u8_uri)
sub_urls = [cast(str, sub_m3u8.base_uri) + "/" + f for f in sub_m3u8.files]
if not sub_urls:
raise ValueError("No subtitle files")
if len(sub_urls) > 1:
raise ValueError("Multiple subtitle files")
http_response = urlopen(sub_urls[0])
if http_response.status != HTTPStatus.OK:
raise RuntimeError("Subtitle request failed")
buffer = io.StringIO(http_response.read().decode("utf8"))
with open(f"{file_base_name}.{lang}.srt", "w", encoding="utf8") as f:
for i, caption in enumerate(webvtt.read_buffer(buffer), 1):
print(i, file=f)
print(
re.sub(r"\.", ",", caption.start)
+ " --> "
+ re.sub(r"\.", ",", caption.end),
file=f,
)
print(caption.text + "\n", file=f)
return f.name
def download_stream(m3u8_url: str, file_base_name: str):
"""Download and writes the video and subtitles files."""
dst = m3u8.M3U8()
src = m3u8.load(m3u8_url)
# sort streams by resolution (descending) and pick the bigger one
src.playlists.sort(key=lambda pl: pl.stream_info.resolution, reverse=True)
src.playlists[0].uri = src.base_uri + src.playlists[0].uri
dst.add_playlist(src.playlists[0])
for media in src.playlists[0].media:
media.uri = src.base_uri + media.uri
if media.type == "SUBTITLES":
write_subtitles(media.language, media.uri, file_base_name)
else:
dst.add_media(media)
with tempfile.NamedTemporaryFile(
"w", delete=False, encoding="utf8", prefix="delarte.", suffix=".m3u8"
) as f:
f.write(dst.dumps())
dst_path = f.name
subprocess.run(
[
FFMPEG,
"-protocol_whitelist",
"https,file,tls,tcp",
"-i",
dst_path,
"-c",
"copy",
"-bsf:a",
"aac_adtstoasc",
f"{file_base_name}.mp4",
]
)
os.unlink(dst_path)
def main():
"""CLI function, options passed as arguments."""
(ui_lang, _, stream_id, _slug) = urlparse(sys.argv[1]).path[1:-1].split("/")
version = " ".join(sys.argv[2:])
if ui_lang not in ("fr", "de", "en", "es", "pl", "it") or _ != "videos":
raise ValueError("Invalid URL")
config = api_config(ui_lang, stream_id)
file_base_name = config.title.replace("/", "-")
if version not in config.versions:
print(f"{config.title} - {config.subtitle}")
for version_code, (_, version_label) in config.versions.items():
print(f"\t{version_code} : {version_label}")
exit(1)
m3u8_url, _version_name = config.versions[version]
download_stream(m3u8_url, file_base_name)
if __name__ == "__main__":
sys.exit(main())