delarte/delarte.py

117 lines
3.1 KiB
Python
Raw Normal View History

2022-12-05 23:56:46 +00:00
#!/usr/bin/env python3
# coding:utf-8
"""delarte.
Retrieve video stream in a local file, including sub-titles
Licence: GNU AGPL v3: http://www.gnu.org/licenses/
This file is part of [`delarte`](https://git.afpy.org/fcode/delarte)
"""
import json
import sys
import re
import io
import subprocess
from http import HTTPStatus
2022-12-05 21:56:29 +00:00
from os import environ
from typing import cast
from urllib.parse import urlparse
from urllib.request import urlopen
import m3u8
import webvtt
2022-12-05 21:56:29 +00:00
FFMPEG = environ.get("PATH_FFMPEG", "ffmpeg path not found")
2022-12-05 23:18:15 +00:00
def call_api(api_url):
2022-12-05 23:56:46 +00:00
"""Retrieve subtitles versions available for a given URL."""
http_response = urlopen(api_url)
if http_response.status != HTTPStatus.OK:
raise RuntimeError("API request failed")
config = json.load(http_response)["data"]["attributes"]
title = config["metadata"]["title"]
versions = {
s["versions"][0]["eStat"]["ml5"]: (s["url"], s["versions"][0]["label"])
for s in config["streams"]
}
return (title, versions)
2022-12-05 23:18:15 +00:00
def write_subtitles(m3u8_url, base_name):
2022-12-05 23:56:46 +00:00
"""Convert distant vtt subtitles to local srt."""
main = m3u8.load(m3u8_url)
2022-12-05 23:18:15 +00:00
sub_m3u8_urls = [
(m.base_uri + "/" + m.uri, m.language)
for m in main.media
if m.type == "SUBTITLES"
]
for sub_m3u8_url, sub_lang in sub_m3u8_urls:
sub_m3u8 = m3u8.load(sub_m3u8_url)
2022-12-05 23:18:15 +00:00
sub_urls = [cast(str, sub_m3u8.base_uri) + "/" + f for f in sub_m3u8.files]
if not sub_urls:
raise ValueError("No subtitle files")
if len(sub_urls) > 1:
raise ValueError("Multiple subtitle files")
http_response = urlopen(sub_urls[0])
if http_response.status != HTTPStatus.OK:
raise RuntimeError("Subtitle request failed")
2022-12-05 23:18:15 +00:00
buffer = io.StringIO(http_response.read().decode("utf8"))
2022-12-05 23:18:15 +00:00
with open(f"{base_name}.{sub_lang}.srt", "w", encoding="utf8") as f:
for i, caption in enumerate(webvtt.read_buffer(buffer), 1):
print(i, file=f)
print(
re.sub(r"\.", ",", caption.start)
+ " --> "
+ re.sub(r"\.", ",", caption.end),
file=f,
)
print(caption.text + "\n", file=f)
return f.name
2022-12-05 23:18:15 +00:00
# command line arguments
2022-12-05 23:18:15 +00:00
(UI_LANG, _, STREAM_ID, SLUG) = urlparse(sys.argv[1]).path[1:-1].split("/")
VERSION = " ".join(sys.argv[2:])
2022-12-05 23:18:15 +00:00
if UI_LANG not in ("fr", "de", "en", "es", "pl", "it") or _ != "videos":
raise ValueError("Invalid URL")
2022-12-05 23:18:15 +00:00
TITLE, VERSIONS = call_api(
f"https://api.arte.tv/api/player/v2/config/{UI_LANG}/{STREAM_ID}"
)
FILENAME = TITLE.replace("/", "-")
if VERSION not in VERSIONS:
print(TITLE)
for v, (_, l) in VERSIONS.items():
print(f"\t{v} : {l}")
exit(1)
M3U8_URL, VERSION_NAME = VERSIONS[VERSION]
write_subtitles(M3U8_URL, FILENAME)
2022-12-05 23:18:15 +00:00
subprocess.run(
[FFMPEG, "-i", M3U8_URL, "-c", "copy", "-bsf:a", "aac_adtstoasc", f"{FILENAME}.mp4"]
)