delarte/src/delarte/__init__.py

279 lines
8.9 KiB
Python

# License: GNU AGPL v3: http://www.gnu.org/licenses/
# This file is part of `delarte` (https://git.afpy.org/fcode/delarte.git)
"""delarte - ArteTV downloader."""
__version__ = "0.1"
from .error import *
from .model import *
def load_program_sources(http, page_url):
"""Load programs sources listed on given ArteTV page."""
from .www import extract_page_data, fetch_page_content, read_page_data
page_content = fetch_page_content(http, page_url)
page_data = extract_page_data(page_content)
programs = read_page_data(page_data)
if not programs:
raise UnexpectedError("NO_PROGRAMS")
return [
ProgramSource(
Program(id, language, title, subtitle),
f"https://api.arte.tv/api/player/v2/config/{language}/{id}",
)
for id, language, title, subtitle in programs
]
# "und" language code officially stands for "undefined"
_AUDIO_RENDITIONS = {
"VO": Rendition.Audio("und", True, False),
"VOF": Rendition.Audio("fra", True, False),
"VOA": Rendition.Audio("deu", True, False),
"VOEU": Rendition.Audio("und", True, False),
"VF": Rendition.Audio("fra", False, False),
"VA": Rendition.Audio("deu", False, False),
"VE": Rendition.Audio("und", False, False),
"VFAUD": Rendition.Audio("fra", False, True),
"VAAUD": Rendition.Audio("deu", False, True),
}
_SUBTITLES_RENDITIONS = {
"STF": Rendition.Subtitles("fra", False),
"STA": Rendition.Subtitles("deu", False),
"STMF": Rendition.Subtitles("fra", True),
"STMA": Rendition.Subtitles("deu", True),
"STE[ANG]": Rendition.Subtitles("eng", False),
"STE[ESP]": Rendition.Subtitles("esp", False),
"STE[POL]": Rendition.Subtitles("pol", False),
"STE[ITA]": Rendition.Subtitles("ita", False),
}
def _parse_rendition_code(code):
audio_code, subtitles_code = code.split("-") if "-" in code else (code, None)
try:
audio_rendition = _AUDIO_RENDITIONS[audio_code]
except KeyError:
raise UnexpectedError("AUDIO_RENDITION_CODE")
if subtitles_code:
try:
subtitles_rendition = _SUBTITLES_RENDITIONS[subtitles_code]
except KeyError:
raise UnexpectedError("SUBTITLES_RENDITION_CODE")
else:
subtitles_rendition = None
return audio_rendition, subtitles_rendition
def load_rendition_sources(http, program_source):
"""Fetch rendition sources for a given program."""
from .api import fetch_api_object, read_config_player_object
program, config_player_url = program_source
config_player_object = fetch_api_object(http, config_player_url)
program_id, streams = read_config_player_object(config_player_object)
if program_id != program.id:
raise UnexpectedError("PROGRAM_ID_MISMATCH")
if not streams:
raise UnexpectedError("NO_RENDITIONS")
track_renditions = [_parse_rendition_code(code) for code, _, _, _ in streams]
# sometimes it happens that original audio renditions be a mix "fra" or "deu" and "und",
# so in that case, we replace the "und" accordingly
originals = {audio.language for audio, _ in track_renditions if audio.original}
if len(originals) == 2 and "und" in originals:
originals.remove("und")
original = originals.pop()
track_renditions = [
(
audio._replace(language=original) if audio.original else audio,
subtitles,
)
for audio, subtitles in track_renditions
]
return [
RenditionSource(
program,
Rendition(label, audio, subtitles, protocol),
program_index_url,
)
for (audio, subtitles), (_, label, protocol, program_index_url) in zip(
track_renditions, streams
)
]
def _check_variant_renditions(rendition, audio, subtitles):
# Check wether the audio/subtitles rendition we figured
# from ArteTV rendition code do match the one found in the
# program index.
# Update track languages (in particular, will get rid of "und")
updated_audio = rendition.audio._replace(language=audio.language)
if updated_audio != audio:
raise UnexpectedError("AUDIO_RENDITION_MISMATCH")
updated_subtitles = (
rendition.subtitles._replace(language=subtitles.language)
if rendition.subtitles
else None
)
if updated_subtitles != subtitles:
raise UnexpectedError("SUBTITLES_RENDITION_MISMATCH")
return rendition._replace(audio=audio, subtitles=subtitles)
def load_variant_sources(http, rendition_source):
"""Load variant sources for a given rendition."""
from .hls import fetch_index, read_ng_program_index
program, rendition, program_index_url = rendition_source
if rendition.protocol != "HLS_NG":
raise UnsupportedHLSProtocol()
program_index = fetch_index(http, program_index_url)
(
variants,
(audio, audio_index_url),
(subtitles, subtitles_index_url),
) = read_ng_program_index(program_index)
if not variants:
raise UnexpectedError("NO_VARIANTS")
rendition = _check_variant_renditions(rendition, audio, subtitles)
return [
VariantSource(
program,
rendition,
variant,
video_track_index,
audio_index_url,
subtitles_index_url,
)
for variant, video_track_index in variants
]
def _load_mp4_input(http, track_index_url):
from .hls import fetch_index, read_mp4_index
track_index = fetch_index(http, track_index_url)
return read_mp4_index(track_index)[0]
def _load_vtt_input(http, track_index_url):
from .hls import fetch_index, read_vtt_index
track_index = fetch_index(http, track_index_url)
return read_vtt_index(track_index)
def fetch_targets(variant_sources: list[VariantSource], http, **naming_options):
"""Compile download targets for given variants."""
from .naming import file_name_builder
build_file_name = file_name_builder(**naming_options)
targets = [
Target(
Target.VideoInput(
VideoTrack(
variant.video.resolution[0],
variant.video.resolution[1],
variant.video.frame_rate,
),
_load_mp4_input(http, video_index_url),
),
Target.AudioInput(
AudioTrack(
f"Audio[{rendition.audio.language}]",
rendition.audio.language,
rendition.audio.original,
rendition.audio.visual_impaired,
),
_load_mp4_input(http, audio_index_url),
),
(
Target.SubtitlesInput(
SubtitlesTrack(
f"Audio[{rendition.subtitles.language}]",
rendition.subtitles.language,
rendition.subtitles.hearing_impaired,
),
_load_vtt_input(http, subtitles_index_url),
)
if rendition.subtitles and subtitles_index_url
else None
),
(program.title, program.subtitle) if program.subtitle else program.title,
build_file_name(program, rendition, variant),
)
for program, rendition, variant, video_index_url, audio_index_url, subtitles_index_url in variant_sources
]
return targets
def download_targets(targets, http, on_progress):
"""Download given target."""
import os
from .download import download_mp4_media, download_vtt_media
from .muxing import mux_target
for target in targets:
output_path = f"{target.output}.mkv"
if os.path.isfile(output_path):
print(f"Skipping {output_path!r}")
continue
video_path = target.output + ".video.mp4"
audio_path = target.output + ".audio.mp4"
subtitles_path = target.output + ".srt"
download_mp4_media(target.video_input.url, video_path, http, on_progress)
download_mp4_media(target.audio_input.url, audio_path, http, on_progress)
if target.subtitles_input:
download_vtt_media(
target.subtitles_input.url, subtitles_path, http, on_progress
)
mux_target(
target._replace(
video_input=target.video_input._replace(url=video_path),
audio_input=target.audio_input._replace(url=audio_path),
subtitles_input=(
target.subtitles_input._replace(url=subtitles_path)
if target.subtitles_input
else None
),
),
on_progress,
)
if os.path.isfile(subtitles_path):
os.unlink(subtitles_path)
if os.path.isfile(audio_path):
os.unlink(audio_path)
if os.path.isfile(video_path):
os.unlink(video_path)