diff --git a/README.md b/README.md index 40ebc5d..ac36c10 100644 --- a/README.md +++ b/README.md @@ -288,6 +288,7 @@ Because some programs would randomly fail 😒. Probably due to invalid _segment - [m3u8](https://pypi.org/project/m3u8/) to parse playlists. - [webvtt-py](https://pypi.org/project/webvtt-py/) to load `vtt` subtitles files. +- [requests](https://pypi.org/project/requests/) to handle HTTP traffic. ### 🤝 Help diff --git a/pyproject.toml b/pyproject.toml index 4cd3083..9a667bf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,6 +12,7 @@ dynamic = ["version", "description"] dependencies = [ "m3u8", "webvtt-py", + "requests" ] [project.urls] diff --git a/src/delarte/__main__.py b/src/delarte/__main__.py index ab54f43..1228803 100644 --- a/src/delarte/__main__.py +++ b/src/delarte/__main__.py @@ -12,6 +12,8 @@ usage: delarte [-h|--help] - print this message import sys import time +import requests + from . import api from . import hls from . import muxing @@ -73,13 +75,15 @@ def main(): print(__doc__) return 0 + http_session = requests.sessions.Session() + try: www_lang, program_id = www.parse_url(args.pop(0)) except ValueError as e: return _fail(f"Invalid url: {e}") try: - config = api.load_config(www_lang, program_id) + config = api.load_config(http_session, www_lang, program_id) except ValueError: return _fail("Invalid program") @@ -93,7 +97,7 @@ def main(): _print_available_renditions(config, sys.stderr) return 1 - master_playlist = hls.load_master_playlist(master_playlist_url) + master_playlist = hls.load_master_playlist(http_session, master_playlist_url) if not args: _print_available_variants(master_playlist, sys.stdout) @@ -109,7 +113,7 @@ def main(): progress = create_progress() - with hls.download_inputs(remote_inputs, progress) as temp_inputs: + with hls.download_inputs(http_session, remote_inputs, progress) as temp_inputs: muxing.mux(temp_inputs, file_base_name, progress) diff --git a/src/delarte/api.py b/src/delarte/api.py index a2002ef..199ddab 100644 --- a/src/delarte/api.py +++ b/src/delarte/api.py @@ -3,35 +3,38 @@ """Provide ArteTV JSON API utilities.""" -import json -from http import HTTPStatus -from urllib.request import urlopen + +MIME_TYPE = "application/vnd.api+json; charset=utf-8" -def load_api_data(url): - """Retrieve the root node (infamous "data") of an API call response.""" - http_response = urlopen(url) +def _fetch_api_data(http_session, path, object_type): + # Fetch an API object. + url = "https://api.arte.tv/api/player/v2/" + path - if http_response.status != HTTPStatus.OK: - raise RuntimeError("API request failed") + r = http_session.get(url) + if r.status_code == 404: + raise ValueError(f"{url}: not found") - if ( - http_response.getheader("Content-Type") - != "application/vnd.api+json; charset=utf-8" - ): + r.raise_for_status() + + if r.headers["content-type"] != MIME_TYPE: raise ValueError("API response not supported") - return json.load(http_response)["data"] + obj = r.json()["data"] - -def load_config(lang, program_id): - """Retrieve a program config from API.""" - url = f"https://api.arte.tv/api/player/v2/config/{lang}/{program_id}" - config = load_api_data(url) - - if config["type"] != "ConfigPlayer": + if obj["type"] != object_type: raise ValueError("Invalid API response") + return obj + + +def load_config(http_session, lang, program_id): + """Retrieve a program config from API.""" + url = f"config/{lang}/{program_id}" + config = _fetch_api_data( + http_session, f"config/{lang}/{program_id}", "ConfigPlayer" + ) + if config["attributes"]["metadata"]["providerId"] != program_id: raise ValueError("Invalid API response") diff --git a/src/delarte/hls.py b/src/delarte/hls.py index db8ca69..8af0881 100644 --- a/src/delarte/hls.py +++ b/src/delarte/hls.py @@ -61,11 +61,8 @@ import contextlib import io import os import re -from http import HTTPStatus -from http.client import HTTPConnection, HTTPSConnection from tempfile import NamedTemporaryFile from urllib.parse import urlparse -from urllib.request import urlopen import m3u8 import webvtt @@ -98,9 +95,16 @@ def _is_relative_file_path(uri): return False -def load_master_playlist(url): +def _fetch_playlist(http_session, url): + # Fetch a M3U8 playlist + r = http_session.get(url) + r.raise_for_status() + return m3u8.loads(r.text, url) + + +def load_master_playlist(http_session, url): """Download and return a master playlist.""" - master_playlist = m3u8.load(url) + master_playlist = _fetch_playlist(http_session, url) if not master_playlist.playlists: raise ValueError("Unexpected missing playlists") @@ -184,8 +188,8 @@ def _parse_byterange(obj): return offset, offset + count - 1 -def _load_av_segments(media_playlist_url): - media_playlist = m3u8.load(media_playlist_url) +def _load_av_segments(http_session, media_playlist_url): + media_playlist = _fetch_playlist(http_session, media_playlist_url) file_name = media_playlist.segment_map[0].uri range_start, range_end = _parse_byterange(media_playlist.segment_map[0]) @@ -207,61 +211,45 @@ def _load_av_segments(media_playlist_url): chunks.append((range_start, range_end)) total = range_end + 1 - return urlparse(media_playlist.segment_map[0].absolute_uri), chunks + return media_playlist.segment_map[0].absolute_uri, chunks -def _download_av_stream(media_playlist_url, progress): +def _download_av_stream(http_session, media_playlist_url, progress): # Download an audio or video stream to temporary directory - url, ranges = _load_av_segments(media_playlist_url) + url, ranges = _load_av_segments(http_session, media_playlist_url) total = ranges[-1][1] - Connector = HTTPSConnection if url.scheme == "https" else HTTPConnection - connection = Connector(url.hostname) - connection.connect() - with ( NamedTemporaryFile( mode="w+b", delete=False, prefix="delarte.", suffix=".mp4" - ) as f, - contextlib.closing(connection) as c, + ) as f ): for range_start, range_end in ranges: - c.request( - "GET", - url.path, + r = http_session.get( + url, headers={ - "Accept": "*/*", - "Accept-Language": "fr,en;q=0.7,en-US;q=0.3", - "Accept-Encoding": "gzip, deflate, br, identity", "Range": f"bytes={range_start}-{range_end}", - "Origin": "https://www.arte.tv", - "Connection": "keep-alive", - "Referer": "https://www.arte.tv/", - "Sec-Fetch-Dest": "empty", - "Sec-Fetch-Mode": "cors", - "Sec-Fetch-Site": "cross-site", - "Sec-GPC": "1", - "DNT": "1", }, ) - r = c.getresponse() - if r.status != 206: + + r.raise_for_status() + + if r.status_code != 206: raise ValueError(f"Invalid response status {r.status}") - content = r.read() - if len(content) != range_end - range_start + 1: + if len(r.content) != range_end - range_start + 1: raise ValueError("Invalid range length") - f.write(content) + f.write(r.content) progress(range_end, total) return f.name -def _download_subtitles_input(index_url, progress): +def _download_subtitles_input(http_session, index_url, progress): # Return a temporary file name where VTT subtitle has been downloaded/converted to SRT - subtitles_index = m3u8.load(index_url) - urls = [subtitles_index.base_uri + "/" + f for f in subtitles_index.files] + subtitles_index = _fetch_playlist(http_session, index_url) + urls = [s.absolute_uri for s in subtitles_index.segments] if not urls: raise ValueError("No subtitle files") @@ -270,11 +258,10 @@ def _download_subtitles_input(index_url, progress): raise ValueError("Multiple subtitle files") progress(0, 2) - http_response = urlopen(urls[0]) - if http_response.status != HTTPStatus.OK: - raise RuntimeError("Subtitle request failed") + r = http_session.get(urls[0]) + r.raise_for_status() - buffer = io.StringIO(http_response.read().decode("utf8")) + buffer = io.StringIO(r.text) progress(1, 2) with NamedTemporaryFile( @@ -296,7 +283,7 @@ def _download_subtitles_input(index_url, progress): @contextlib.contextmanager -def download_inputs(remote_inputs, progress): +def download_inputs(http_session, remote_inputs, progress): """Download inputs in temporary files.""" # It is implemented as a context manager that will delete temporary files on exit. @@ -308,18 +295,20 @@ def download_inputs(remote_inputs, progress): try: video_filename = _download_av_stream( - video_index_url, lambda i, n: progress("video", i, n) + http_session, video_index_url, lambda i, n: progress("video", i, n) ) (audio_lang, audio_index_url) = audio_track audio_filename = _download_av_stream( - audio_index_url, lambda i, n: progress("audio", i, n) + http_session, audio_index_url, lambda i, n: progress("audio", i, n) ) if subtitles_track: (subtitles_lang, subtitles_index_url) = subtitles_track subtitles_filename = _download_subtitles_input( - subtitles_index_url, lambda i, n: progress("subtitles", i, n) + http_session, + subtitles_index_url, + lambda i, n: progress("subtitles", i, n), ) yield (