* refactoring using classmethods

* video resolution selection
* use video/audio URLs directly in FFMPEG command
  (no temporary m3u8 file anymore)
* embed subtitle in final video file
* moved to MKV container to enable auto-on subtitle
This commit is contained in:
Etienne Zind 2022-12-06 21:05:34 +01:00
parent af465ad79e
commit c0feaa820a
2 changed files with 154 additions and 92 deletions

View File

@ -22,21 +22,26 @@ mkdir ~/.venvs && python3 -m venv ~/.venvs/delarte
source ~/.venvs/delarte/bin/activate
pip install -r requirements.txt
export PATH_FFMPEG=$(which ffmpeg)
./delarte.py https://www.arte.tv/fr/videos/093644-001-A/meaningless_strings_but_mandatory/
L'incroyable périple de Magellan (1/4)
VF : Français
VO-STF : Version originale - ST français
VF-STMF : Français (sourds et malentendants)
VFAUD : Français (audiodescription)
VA-STA : Allemand
VA-STMA : Allemand (sourds et malentendants)
VAAUD : Allemand (audiodescription)
```
Rajouter le code sous-titre en paramètre:
```python
./delarte.py https://www.arte.tv/fr/videos/093644-001-A/meaningless_strings_but_mandatory/ VO-STF
./delarte.py https://www.arte.tv/fr/videos/093644-001-A/l-incroyable-periple-de-magellan-1-4/
Available versions:
VF - Français
VO-STF - Version originale - ST français
VF-STMF - Français (sourds et malentendants)
VFAUD - Français (audiodescription)
VA-STA - Allemand
VA-STMA - Allemand (sourds et malentendants)
VAAUD - Allemand (audiodescription)
./delarte.py https://www.arte.tv/fr/videos/093644-001-A/l-incroyable-periple-de-magellan-1-4/ VO-STF
Available resolutions:
1080
720
432
360
216
$ ./delarte.py https://www.arte.tv/fr/videos/093644-001-A/l-incroyable-periple-de-magellan-1-4/ VO-STF 720
ffmpeg version 4.3.5-0+deb11u1 Copyright (c) 2000-2022 the FFmpeg developers
frame=78910 fps=1204 q=-1.0 Lsize= 738210kB time=00:52:36.45 bitrate=1915.9kbits/s speed=48.2x
video:685949kB audio:50702kB subtitle:9kB other streams:0kB global headers:0kB muxing overhead: 0.210475%
```
🔧 Tripoter sous le capot

View File

@ -10,6 +10,7 @@ Licence: GNU AGPL v3: http://www.gnu.org/licenses/
This file is part of [`delarte`](https://git.afpy.org/fcode/delarte)
"""
from __future__ import annotations
import io
import json
@ -21,7 +22,7 @@ import tempfile
from http import HTTPStatus
from os import environ
from typing import NamedTuple, cast
from typing import NamedTuple, Optional, cast
from urllib.parse import urlparse
from urllib.request import urlopen
@ -55,56 +56,63 @@ class Config(NamedTuple):
subtitle: str
versions: dict[str, tuple[str, str]]
@classmethod
def load(cls, lang: str, provider_id: str) -> Config:
"""Retrieve a stream config from API."""
url = f"https://api.arte.tv/api/player/v2/config/{lang}/{provider_id}"
root = api_root(url)
def api_config(lang: str, provider_id: str) -> Config:
"""Retrieve a stream config from API."""
url = f"https://api.arte.tv/api/player/v2/config/{lang}/{provider_id}"
root = api_root(url)
if root["type"] != "ConfigPlayer":
raise ValueError("API response not supported")
if root["type"] != "ConfigPlayer":
raise ValueError("API response not supported")
attrs = root["attributes"]
attrs = root["attributes"]
if attrs["metadata"]["providerId"] != provider_id:
raise ValueError("API response not supported")
if attrs["metadata"]["providerId"] != provider_id:
raise ValueError("API response not supported")
return Config(
provider_id,
attrs["metadata"]["title"],
attrs["metadata"]["subtitle"],
{
s["versions"][0]["eStat"]["ml5"]: (s["versions"][0]["label"], s["url"])
for s in attrs["streams"]
},
)
return Config(
provider_id,
attrs["metadata"]["title"],
attrs["metadata"]["subtitle"],
{
s["versions"][0]["eStat"]["ml5"]: (s["url"], s["versions"][0]["label"])
for s in attrs["streams"]
},
)
def url_for_version(self, version_code: str) -> str:
"""Return the m3u8 url for the given version code."""
if version_code not in self.versions:
print(f"Available versions:")
for code, (label, _) in self.versions.items():
print(f"\t{code} - {label}")
exit(1)
return self.versions[version_code][1]
def api_playlist(lang: str, provider_id: str):
"""Retrieve a playlist from API."""
url = f"https://api.arte.tv/api/player/v2/playlist/{lang}/{provider_id}"
raise NotImplementedError
def make_srt_tempfile(url):
"""Return a temporary file name where VTT subtitle has been downloaded/converted to SRT."""
mpeg = m3u8.load(url)
urls = [cast(str, mpeg.base_uri) + "/" + f for f in mpeg.files]
def write_subtitles(lang, m3u8_uri, file_base_name):
"""Convert distant vtt subtitles to local srt."""
sub_m3u8 = m3u8.load(m3u8_uri)
sub_urls = [cast(str, sub_m3u8.base_uri) + "/" + f for f in sub_m3u8.files]
if not sub_urls:
if not urls:
raise ValueError("No subtitle files")
if len(sub_urls) > 1:
if len(urls) > 1:
raise ValueError("Multiple subtitle files")
http_response = urlopen(sub_urls[0])
http_response = urlopen(urls[0])
if http_response.status != HTTPStatus.OK:
raise RuntimeError("Subtitle request failed")
buffer = io.StringIO(http_response.read().decode("utf8"))
with open(f"{file_base_name}.{lang}.srt", "w", encoding="utf8") as f:
for i, caption in enumerate(webvtt.read_buffer(buffer), 1):
with tempfile.NamedTemporaryFile(
"w", delete=False, prefix="delarte.", suffix=".srt", encoding="utf8"
) as f:
i = 1
for caption in webvtt.read_buffer(buffer):
print(i, file=f)
print(
re.sub(r"\.", ",", caption.start)
@ -113,71 +121,120 @@ def write_subtitles(lang, m3u8_uri, file_base_name):
file=f,
)
print(caption.text + "\n", file=f)
i += 1
return f.name
def download_stream(m3u8_url: str, file_base_name: str):
"""Download and writes the video and subtitles files."""
dst = m3u8.M3U8()
src = m3u8.load(m3u8_url)
class Version(NamedTuple):
"""A structure representing a version M3U8 object."""
# sort streams by resolution (descending) and pick the bigger one
src.playlists.sort(key=lambda pl: pl.stream_info.resolution, reverse=True)
src.playlists[0].uri = src.base_uri + src.playlists[0].uri
videos: dict[str, str]
audio_url: str
subtitiles: Optional[tuple[str, str]]
dst.add_playlist(src.playlists[0])
for media in src.playlists[0].media:
media.uri = src.base_uri + media.uri
if media.type == "SUBTITLES":
write_subtitles(media.language, media.uri, file_base_name)
else:
dst.add_media(media)
@classmethod
def load(cls, url: str) -> Version:
"""Retrieve a version from m3u8 file."""
mpeg = m3u8.load(url)
with tempfile.NamedTemporaryFile(
"w", delete=False, encoding="utf8", prefix="delarte.", suffix=".m3u8"
) as f:
f.write(dst.dumps())
dst_path = f.name
videos = {
str(pl.stream_info.resolution[1]): mpeg.base_uri + pl.uri
for pl in mpeg.playlists
}
subprocess.run(
[
FFMPEG,
"-protocol_whitelist",
"https,file,tls,tcp",
"-i",
dst_path,
"-c",
"copy",
"-bsf:a",
"aac_adtstoasc",
f"{file_base_name}.mp4",
audios = [mpeg.base_uri + m.uri for m in mpeg.media if m.type == "AUDIO"]
if len(audios) != 1:
raise ValueError("Unexpected missing or multiple audio tracks.")
subtitles = [
(m.language, mpeg.base_uri + m.uri)
for m in mpeg.media
if m.type == "SUBTITLES"
]
)
if len(subtitles) > 1:
raise ValueError("Unexpected multiple subtitles tracks.")
os.unlink(dst_path)
return cls(videos, audios[0], subtitles[0] if subtitles else None)
def download(self, resolution_code: str, file_base_name: str):
"""Download a given resolution (video/audio/subtitles) and write it to an MKV container."""
if resolution_code not in self.videos:
print(f"Available resolutions:")
for code in sorted(map(int, self.videos.keys()), reverse=True):
print(f"\t{code}")
exit(1)
video_url = self.videos[resolution_code]
if self.subtitiles:
srt_tempfile = make_srt_tempfile(self.subtitiles[1])
subprocess.run(
[
FFMPEG,
"-i",
srt_tempfile,
"-i",
video_url,
"-i",
self.audio_url,
"-c:v",
"copy",
"-c:a",
"copy",
"-bsf:a",
"aac_adtstoasc",
"-c:s",
"copy",
"-metadata:s:s:0",
f"language={self.subtitiles[0]}",
"-disposition:s:0",
"default",
f"{file_base_name}.mkv",
]
)
os.unlink(srt_tempfile)
else:
subprocess.run(
[
FFMPEG,
"-i",
video_url,
"-i",
self.audio_url,
"-c:v",
"copy",
"-c:a",
"copy",
"-bsf:a",
"aac_adtstoasc",
f"{file_base_name}.mkv",
]
)
def api_playlist(lang: str, provider_id: str):
"""Retrieve a playlist from API."""
url = f"https://api.arte.tv/api/player/v2/playlist/{lang}/{provider_id}"
raise NotImplementedError
def main():
"""CLI function, options passed as arguments."""
(ui_lang, _, stream_id, _slug) = urlparse(sys.argv[1]).path[1:-1].split("/")
version = " ".join(sys.argv[2:])
version_code = sys.argv[2] if len(sys.argv) > 2 else ""
resolution_code = sys.argv[3] if len(sys.argv) > 3 else ""
if ui_lang not in ("fr", "de", "en", "es", "pl", "it") or _ != "videos":
raise ValueError("Invalid URL")
config = api_config(ui_lang, stream_id)
config = Config.load(ui_lang, stream_id)
version_url = config.url_for_version(version_code)
file_base_name = config.title.replace("/", "-")
if version not in config.versions:
print(f"{config.title} - {config.subtitle}")
for version_code, (_, version_label) in config.versions.items():
print(f"\t{version_code} : {version_label}")
exit(1)
version = Version.load(version_url)
m3u8_url, _version_name = config.versions[version]
download_stream(m3u8_url, file_base_name)
version.download(resolution_code, file_base_name)
if __name__ == "__main__":