Compare commits

...

5 Commits

Author SHA1 Message Date
a404dd1da4 Get rid of gitlab references 2022-12-09 21:14:57 +01:00
f9c20e2149 Match signature for version & resolution functions 2022-12-09 20:52:55 +01:00
9593619c68 Setup the execution arch 2022-12-09 00:34:15 +01:00
f22fe297c5 Packaging with flit 2022-12-08 22:39:46 +01:00
Etienne Zind
5125f1d6ba reflect readme terminology in code 2022-12-08 01:20:23 +01:00
8 changed files with 388 additions and 259 deletions

4
.gitignore vendored
View File

@ -2,4 +2,6 @@
*.pyc
__pycache__/
.vscode/
*.mkv
*.mkv
dist/
.venv/

View File

@ -14,21 +14,42 @@ ArteTV is a is a European public service channel dedicated to culture. Available
🚀 Quick start
---------------
_(Linux/Debian distribution)_
```bash
sudo apt install ffmpeg
mkdir ~/.venvs && python3 -m venv ~/.venvs/delarte
source ~/.venvs/delarte/bin/activate
git clone https://gitlab.com/Barbagus/delarte.git && cd delarte
pip install -r requirements.txt
export PATH_FFMPEG=$(which ffmpeg)
Install [FFMPEG](https://ffmpeg.org/download.html) binaries and ensure it is in your `PATH`
```
$ ffmpeg -version
ffmpeg version N-109344-g1bebcd43e1-20221202 Copyright (c) 2000-2022 the FFmpeg developers
built with gcc 12.2.0 (crosstool-NG 1.25.0.90_cf9beb1)
```
```bash
./delarte.py <PROGRAM_PAGE_URL> <VERSION> <RESOLUTION>
Clone this repository
```
$ git clone https://git.afpy.org/fcode/delarte.git
$ cd delarte
```
Optionally create a virtual environement
```
$ python3 -m venv .venv
$ source .venv/Scripts/activate
```
Install in edit mode
```
$ pip install -e .[dev]
```
Now you can run the script
```
$ python3 -m delarte --help
or
$ delarte --help
ArteTV dowloader.
usage: delarte [-h|--help] - print this message
or: delarte program_page_url - show available versions
or: delarte program_page_url version - show available resolutions
or: delarte program_page_url version resolution - download the given video
```
🔧 How it works
----------------
@ -271,7 +292,7 @@ The actual build of the video file is handled by [ffmpeg](https://ffmpeg.org/).
##### Why not use FFMPEG direcly with the _version index_ URL ?
So we can select the video resolution _version_ and not rely on stream mapping arguments in `ffmpeg`.
So we can select the video resolution and not rely on stream mapping arguments in `ffmpeg`.
##### Why not use VTT subtitles direcly ?

View File

@ -1,241 +0,0 @@
#!/usr/bin/env python3
# coding: utf8
"""delarte.
ArteTV downloader
Licence: GNU AGPL v3: http://www.gnu.org/licenses/
This file is part of [`delarte`](https://gitlab.com/Barbagus/delarte)
"""
from __future__ import annotations
import io
import json
import os
import re
import subprocess
import sys
import tempfile
from http import HTTPStatus
from os import environ
from typing import NamedTuple, Optional, cast
from urllib.parse import urlparse
from urllib.request import urlopen
import m3u8
import webvtt
FFMPEG = environ.get("PATH_FFMPEG", "ffmpeg path not found")
def api_root(url: str):
"""Retrieve the root node (infamous "data") of an API call response."""
http_response = urlopen(url)
if http_response.status != HTTPStatus.OK:
raise RuntimeError("API request failed")
if (
http_response.getheader("Content-Type")
!= "application/vnd.api+json; charset=utf-8"
):
raise ValueError("API response not supported")
return json.load(http_response)["data"]
class Config(NamedTuple):
"""A structure representing a config API object."""
provider_id: str
title: str
subtitle: str
versions: dict[str, tuple[str, str]]
@classmethod
def load(cls, lang: str, provider_id: str) -> Config:
"""Retrieve a stream config from API."""
url = f"https://api.arte.tv/api/player/v2/config/{lang}/{provider_id}"
root = api_root(url)
if root["type"] != "ConfigPlayer":
raise ValueError("API response not supported")
attrs = root["attributes"]
if attrs["metadata"]["providerId"] != provider_id:
raise ValueError("API response not supported")
return Config(
provider_id,
attrs["metadata"]["title"],
attrs["metadata"]["subtitle"],
{
s["versions"][0]["eStat"]["ml5"]: (s["versions"][0]["label"], s["url"])
for s in attrs["streams"]
},
)
def url_for_version(self, version_code: str) -> str:
"""Return the m3u8 url for the given version code."""
if version_code not in self.versions:
print(f"Available versions:")
for code, (label, _) in self.versions.items():
print(f"\t{code} - {label}")
exit(1)
return self.versions[version_code][1]
def make_srt_tempfile(url):
"""Return a temporary file name where VTT subtitle has been downloaded/converted to SRT."""
mpeg = m3u8.load(url)
urls = [cast(str, mpeg.base_uri) + "/" + f for f in mpeg.files]
if not urls:
raise ValueError("No subtitle files")
if len(urls) > 1:
raise ValueError("Multiple subtitle files")
http_response = urlopen(urls[0])
if http_response.status != HTTPStatus.OK:
raise RuntimeError("Subtitle request failed")
buffer = io.StringIO(http_response.read().decode("utf8"))
with tempfile.NamedTemporaryFile(
"w", delete=False, prefix="delarte.", suffix=".srt", encoding="utf8"
) as f:
i = 1
for caption in webvtt.read_buffer(buffer):
print(i, file=f)
print(
re.sub(r"\.", ",", caption.start)
+ " --> "
+ re.sub(r"\.", ",", caption.end),
file=f,
)
print(caption.text + "\n", file=f)
i += 1
return f.name
class Version(NamedTuple):
"""A structure representing a version M3U8 object."""
videos: dict[str, str]
audio_url: str
subtitiles: Optional[tuple[str, str]]
@classmethod
def load(cls, url: str) -> Version:
"""Retrieve a version from m3u8 file."""
mpeg = m3u8.load(url)
videos = {
str(pl.stream_info.resolution[1]): mpeg.base_uri + pl.uri
for pl in mpeg.playlists
}
audios = [mpeg.base_uri + m.uri for m in mpeg.media if m.type == "AUDIO"]
if len(audios) != 1:
raise ValueError("Unexpected missing or multiple audio tracks.")
subtitles = [
(m.language, mpeg.base_uri + m.uri)
for m in mpeg.media
if m.type == "SUBTITLES"
]
if len(subtitles) > 1:
raise ValueError("Unexpected multiple subtitles tracks.")
return cls(videos, audios[0], subtitles[0] if subtitles else None)
def download(self, resolution_code: str, file_base_name: str):
"""Download a given resolution (video/audio/subtitles) and write it to an MKV container."""
if resolution_code not in self.videos:
print(f"Available resolutions:")
for code in sorted(map(int, self.videos.keys()), reverse=True):
print(f"\t{code}")
exit(1)
video_url = self.videos[resolution_code]
if self.subtitiles:
srt_tempfile = make_srt_tempfile(self.subtitiles[1])
subprocess.run(
[
FFMPEG,
"-i",
srt_tempfile,
"-i",
video_url,
"-i",
self.audio_url,
"-c:v",
"copy",
"-c:a",
"copy",
"-bsf:a",
"aac_adtstoasc",
"-c:s",
"copy",
"-metadata:s:s:0",
f"language={self.subtitiles[0]}",
"-disposition:s:0",
"default",
f"{file_base_name}.mkv",
]
)
os.unlink(srt_tempfile)
else:
subprocess.run(
[
FFMPEG,
"-i",
video_url,
"-i",
self.audio_url,
"-c:v",
"copy",
"-c:a",
"copy",
"-bsf:a",
"aac_adtstoasc",
f"{file_base_name}.mkv",
]
)
def api_playlist(lang: str, provider_id: str):
"""Retrieve a playlist from API."""
url = f"https://api.arte.tv/api/player/v2/playlist/{lang}/{provider_id}"
raise NotImplementedError
def main():
"""CLI function, options passed as arguments."""
(ui_lang, _, stream_id, _slug) = urlparse(sys.argv[1]).path[1:-1].split("/")
version_code = sys.argv[2] if len(sys.argv) > 2 else ""
resolution_code = sys.argv[3] if len(sys.argv) > 3 else ""
if ui_lang not in ("fr", "de", "en", "es", "pl", "it") or _ != "videos":
raise ValueError("Invalid URL")
config = Config.load(ui_lang, stream_id)
version_url = config.url_for_version(version_code)
file_base_name = config.title.replace("/", "-")
version = Version.load(version_url)
version.download(resolution_code, file_base_name)
if __name__ == "__main__":
sys.exit(main())

28
pyproject.toml Normal file
View File

@ -0,0 +1,28 @@
[build-system]
requires = ["flit_core >=3.2,<4"]
build-backend = "flit_core.buildapi"
[project]
name = "delarte"
authors = [{name = "Barbagus", email = "barbagus@proton.me"}]
readme = "README.md"
license = {file = "LICENSE.md"}
classifiers = ["License :: OSI Approved :: GNU Affero General Public License v3"]
dynamic = ["version", "description"]
dependencies = [
"m3u8",
"webvtt-py",
]
[project.urls]
Home = "https://git.afpy.org/fcode/delarte.git"
[project.optional-dependencies]
dev = [
"black",
"pydocstyle",
"toml"
]
[project.scripts]
delarte = "delarte.__main__:main"

View File

@ -1,3 +0,0 @@
black
pydocstyle
toml

View File

@ -1,2 +0,0 @@
m3u8
webvtt-py

205
src/delarte/__init__.py Normal file
View File

@ -0,0 +1,205 @@
"""delarte.
ArteTV downloader
Licence: GNU AGPL v3: http://www.gnu.org/licenses/
This file is part of [`delarte`](https://git.afpy.org/fcode/delarte.git)
"""
__version__ = "0.1"
import io
import json
import re
import tempfile
from http import HTTPStatus
from urllib.request import urlopen
import m3u8
import webvtt
def load_api_data(url):
"""Retrieve the root node (infamous "data") of an API call response."""
http_response = urlopen(url)
if http_response.status != HTTPStatus.OK:
raise RuntimeError("API request failed")
if (
http_response.getheader("Content-Type")
!= "application/vnd.api+json; charset=utf-8"
):
raise ValueError("API response not supported")
return json.load(http_response)["data"]
def load_config_api(lang, program_id):
"""Retrieve a program config from API."""
url = f"https://api.arte.tv/api/player/v2/config/{lang}/{program_id}"
config = load_api_data(url)
if config["type"] != "ConfigPlayer":
raise ValueError("Invalid API response")
if config["attributes"]["metadata"]["providerId"] != program_id:
raise ValueError("Invalid API response")
return config
def iter_versions(config):
"""Return a (code, label, index_url) iterator."""
for stream in config["attributes"]["streams"]:
yield (
# version code
stream["versions"][0]["eStat"]["ml5"],
# version full name
stream["versions"][0]["label"],
)
def select_version(config, version_code):
"""Return the version index url for the given version code."""
for stream in config["attributes"]["streams"]:
if stream["versions"][0]["eStat"]["ml5"] == version_code:
return stream["url"]
return None
def build_file_base_name(config):
"""Create a base file name from config metadata."""
return config["attributes"]["metadata"]["title"].replace("/", "-")
def make_srt_tempfile(subtitles_index_url):
"""Return a temporary file name where VTT subtitle has been downloaded/converted to SRT."""
subtitles_index = m3u8.load(subtitles_index_url)
urls = [subtitles_index.base_uri + "/" + f for f in subtitles_index.files]
if not urls:
raise ValueError("No subtitle files")
if len(urls) > 1:
raise ValueError("Multiple subtitle files")
http_response = urlopen(urls[0])
if http_response.status != HTTPStatus.OK:
raise RuntimeError("Subtitle request failed")
buffer = io.StringIO(http_response.read().decode("utf8"))
with tempfile.NamedTemporaryFile(
"w", delete=False, prefix="delarte.", suffix=".srt", encoding="utf8"
) as f:
i = 1
for caption in webvtt.read_buffer(buffer):
print(i, file=f)
print(
re.sub(r"\.", ",", caption.start)
+ " --> "
+ re.sub(r"\.", ",", caption.end),
file=f,
)
print(caption.text + "\n", file=f)
i += 1
return f.name
def load_version_index(url):
"""Retrieve a version from m3u8 file."""
version_index = m3u8.load(url)
if not version_index.playlists:
raise ValueError("Unexpected missing playlists")
for pl in version_index.playlists:
count = 0
for m in pl.media:
if m.type == "AUDIO":
count += 1
if count != 1:
raise ValueError("Unexpected missing or multiple audio tracks")
count = 0
for m in pl.media:
if m.type == "SUBTITLES":
count += 1
if count > 1:
raise ValueError("Unexpected multiple subtitle tracks")
return version_index
def iter_resolutions(version_index):
"""Iterate over resolution options."""
for pl in sorted(
version_index.playlists,
key=lambda pl: pl.stream_info.resolution[1],
reverse=True,
):
yield (
# resolution code (1080p, 720p, ...)
f"{pl.stream_info.resolution[1]}p",
# resolution label
f"{pl.stream_info.resolution[0]} x {pl.stream_info.resolution[1]}",
)
def select_resolution(version_index, resolution_code):
"""Return the stream information for a given resolution_code."""
for pl in version_index.playlists:
code = f"{pl.stream_info.resolution[1]}p"
if code != resolution_code:
continue
audio_track = None
for m in pl.media:
if m.type == "AUDIO":
audio_track = (m.language, pl.base_uri + m.uri)
break
subtitles_track = None
for m in pl.media:
if m.type == "SUBTITLES":
subtitles_track = (m.language, pl.base_uri + m.uri)
break
return (
pl.base_uri + pl.uri,
audio_track,
subtitles_track,
)
return None
def build_ffmpeg_cmd(video_index_url, audio_track, subtitles_track, file_base_name):
"""Build FFMPEG args."""
audio_lang, audio_index_url = audio_track
if subtitles_track:
subtitles_lang, subtitles_file = subtitles_track
cmd = ["ffmpeg"]
cmd.extend(["-i", video_index_url])
cmd.extend(["-i", audio_index_url])
if subtitles_track:
cmd.extend(["-i", subtitles_file])
cmd.extend(["-c:v", "copy"])
cmd.extend(["-c:a", "copy"])
if subtitles_track:
cmd.extend(["-c:s", "copy"])
cmd.extend(["-bsf:a", "aac_adtstoasc"])
cmd.extend(["-metadata:s:a:0", f"language={audio_lang}"])
if subtitles_track:
cmd.extend(["-metadata:s:s:0", f"language={subtitles_lang}"])
cmd.extend(["-disposition:s:0", "default"])
cmd.append(f"{file_base_name}.mkv")
return cmd

119
src/delarte/__main__.py Normal file
View File

@ -0,0 +1,119 @@
"""ArteTV dowloader.
usage: delarte [-h|--help] - print this message
or: delarte program_page_url - show available versions
or: delarte program_page_url version - show available resolutions
or: delarte program_page_url version resolution - download the given video
"""
import os
import subprocess
import sys
from urllib.parse import urlparse
from . import (
build_ffmpeg_cmd,
build_file_base_name,
select_resolution,
select_version,
iter_resolutions,
iter_versions,
load_config_api,
load_version_index,
make_srt_tempfile,
)
def fail(message, code=1):
"""Print a message to STDERR and return a given exit code."""
print(message, file=sys.stderr)
return code
def print_available_versions(config, f):
"""Print available program versions."""
print(f"Available versions:", file=f)
for code, label in iter_versions(config):
print(f"\t{code} - {label}", file=f)
def print_available_resolutions(version_index, f):
"""Print available version resolutions."""
print(f"Available resolutions:", file=f)
for code, label in iter_resolutions(version_index):
print(f"\t{code} - {label}", file=f)
def main():
"""CLI command."""
args = sys.argv[1:]
if not args or args[0] == "-h" or args[0] == "--help":
print(__doc__)
return 0
try:
program_page_url = urlparse(args.pop(0))
if program_page_url.hostname != "www.arte.tv":
return fail("Not an ArteTV url")
program_page_path = program_page_url.path.split("/")[1:]
ui_language = program_page_path.pop(0)
if ui_language not in ("fr", "de", "en", "es", "pl", "it"):
return fail(f"Invalid url language code: {ui_language}")
if program_page_path.pop(0) != "videos":
return fail("Invalid ArteTV url")
program_id = program_page_path.pop(0)
except ValueError:
return fail("Invalid url")
try:
config = load_config_api(ui_language, program_id)
except ValueError:
return fail("Invalid program")
if not args:
print_available_versions(config, sys.stdout)
return 0
version_index_url = select_version(config, args.pop(0))
if version_index_url is None:
fail("Invalid version")
print_available_versions(config, sys.stderr)
return 1
version_index = load_version_index(version_index_url)
if not args:
print_available_resolutions(version_index, sys.stdout)
return 0
stream_info = select_resolution(version_index, args.pop(0))
if stream_info is None:
fail("Invalid resolution")
print_available_resolutions(version_index, sys.stderr)
return 0
video_index_url, audio_track, subtitles_track = stream_info
if subtitles_track:
subtitles_lang, subtitles_index_url = subtitles_track
subtitle_file = make_srt_tempfile(subtitles_index_url)
subtitles_track = (subtitles_lang, subtitle_file)
file_base_name = build_file_base_name(config)
args = build_ffmpeg_cmd(
video_index_url, audio_track, subtitles_track, file_base_name
)
subprocess.run(args)
if subtitle_file:
os.unlink(subtitle_file)
if __name__ == "__main__":
sys.exit(main())