Use `requests` library instead of `urllib`
Enables by default: - gzip compression - request pooling
This commit is contained in:
parent
458d4cbb6d
commit
88ffe31a94
|
@ -288,6 +288,7 @@ Because some programs would randomly fail 😒. Probably due to invalid _segment
|
|||
|
||||
- [m3u8](https://pypi.org/project/m3u8/) to parse playlists.
|
||||
- [webvtt-py](https://pypi.org/project/webvtt-py/) to load `vtt` subtitles files.
|
||||
- [requests](https://pypi.org/project/requests/) to handle HTTP traffic.
|
||||
|
||||
### 🤝 Help
|
||||
|
||||
|
|
|
@ -12,6 +12,7 @@ dynamic = ["version", "description"]
|
|||
dependencies = [
|
||||
"m3u8",
|
||||
"webvtt-py",
|
||||
"requests"
|
||||
]
|
||||
|
||||
[project.urls]
|
||||
|
|
|
@ -12,6 +12,8 @@ usage: delarte [-h|--help] - print this message
|
|||
import sys
|
||||
import time
|
||||
|
||||
import requests
|
||||
|
||||
from . import api
|
||||
from . import hls
|
||||
from . import muxing
|
||||
|
@ -73,13 +75,15 @@ def main():
|
|||
print(__doc__)
|
||||
return 0
|
||||
|
||||
http_session = requests.sessions.Session()
|
||||
|
||||
try:
|
||||
www_lang, program_id = www.parse_url(args.pop(0))
|
||||
except ValueError as e:
|
||||
return _fail(f"Invalid url: {e}")
|
||||
|
||||
try:
|
||||
config = api.load_config(www_lang, program_id)
|
||||
config = api.load_config(http_session, www_lang, program_id)
|
||||
except ValueError:
|
||||
return _fail("Invalid program")
|
||||
|
||||
|
@ -93,7 +97,7 @@ def main():
|
|||
_print_available_renditions(config, sys.stderr)
|
||||
return 1
|
||||
|
||||
master_playlist = hls.load_master_playlist(master_playlist_url)
|
||||
master_playlist = hls.load_master_playlist(http_session, master_playlist_url)
|
||||
|
||||
if not args:
|
||||
_print_available_variants(master_playlist, sys.stdout)
|
||||
|
@ -109,7 +113,7 @@ def main():
|
|||
|
||||
progress = create_progress()
|
||||
|
||||
with hls.download_inputs(remote_inputs, progress) as temp_inputs:
|
||||
with hls.download_inputs(http_session, remote_inputs, progress) as temp_inputs:
|
||||
muxing.mux(temp_inputs, file_base_name, progress)
|
||||
|
||||
|
||||
|
|
|
@ -3,35 +3,38 @@
|
|||
|
||||
"""Provide ArteTV JSON API utilities."""
|
||||
|
||||
import json
|
||||
from http import HTTPStatus
|
||||
from urllib.request import urlopen
|
||||
|
||||
MIME_TYPE = "application/vnd.api+json; charset=utf-8"
|
||||
|
||||
|
||||
def load_api_data(url):
|
||||
"""Retrieve the root node (infamous "data") of an API call response."""
|
||||
http_response = urlopen(url)
|
||||
def _fetch_api_data(http_session, path, object_type):
|
||||
# Fetch an API object.
|
||||
url = "https://api.arte.tv/api/player/v2/" + path
|
||||
|
||||
if http_response.status != HTTPStatus.OK:
|
||||
raise RuntimeError("API request failed")
|
||||
r = http_session.get(url)
|
||||
if r.status_code == 404:
|
||||
raise ValueError(f"{url}: not found")
|
||||
|
||||
if (
|
||||
http_response.getheader("Content-Type")
|
||||
!= "application/vnd.api+json; charset=utf-8"
|
||||
):
|
||||
r.raise_for_status()
|
||||
|
||||
if r.headers["content-type"] != MIME_TYPE:
|
||||
raise ValueError("API response not supported")
|
||||
|
||||
return json.load(http_response)["data"]
|
||||
obj = r.json()["data"]
|
||||
|
||||
|
||||
def load_config(lang, program_id):
|
||||
"""Retrieve a program config from API."""
|
||||
url = f"https://api.arte.tv/api/player/v2/config/{lang}/{program_id}"
|
||||
config = load_api_data(url)
|
||||
|
||||
if config["type"] != "ConfigPlayer":
|
||||
if obj["type"] != object_type:
|
||||
raise ValueError("Invalid API response")
|
||||
|
||||
return obj
|
||||
|
||||
|
||||
def load_config(http_session, lang, program_id):
|
||||
"""Retrieve a program config from API."""
|
||||
url = f"config/{lang}/{program_id}"
|
||||
config = _fetch_api_data(
|
||||
http_session, f"config/{lang}/{program_id}", "ConfigPlayer"
|
||||
)
|
||||
|
||||
if config["attributes"]["metadata"]["providerId"] != program_id:
|
||||
raise ValueError("Invalid API response")
|
||||
|
||||
|
|
|
@ -61,11 +61,8 @@ import contextlib
|
|||
import io
|
||||
import os
|
||||
import re
|
||||
from http import HTTPStatus
|
||||
from http.client import HTTPConnection, HTTPSConnection
|
||||
from tempfile import NamedTemporaryFile
|
||||
from urllib.parse import urlparse
|
||||
from urllib.request import urlopen
|
||||
|
||||
import m3u8
|
||||
import webvtt
|
||||
|
@ -98,9 +95,16 @@ def _is_relative_file_path(uri):
|
|||
return False
|
||||
|
||||
|
||||
def load_master_playlist(url):
|
||||
def _fetch_playlist(http_session, url):
|
||||
# Fetch a M3U8 playlist
|
||||
r = http_session.get(url)
|
||||
r.raise_for_status()
|
||||
return m3u8.loads(r.text, url)
|
||||
|
||||
|
||||
def load_master_playlist(http_session, url):
|
||||
"""Download and return a master playlist."""
|
||||
master_playlist = m3u8.load(url)
|
||||
master_playlist = _fetch_playlist(http_session, url)
|
||||
|
||||
if not master_playlist.playlists:
|
||||
raise ValueError("Unexpected missing playlists")
|
||||
|
@ -184,8 +188,8 @@ def _parse_byterange(obj):
|
|||
return offset, offset + count - 1
|
||||
|
||||
|
||||
def _load_av_segments(media_playlist_url):
|
||||
media_playlist = m3u8.load(media_playlist_url)
|
||||
def _load_av_segments(http_session, media_playlist_url):
|
||||
media_playlist = _fetch_playlist(http_session, media_playlist_url)
|
||||
|
||||
file_name = media_playlist.segment_map[0].uri
|
||||
range_start, range_end = _parse_byterange(media_playlist.segment_map[0])
|
||||
|
@ -207,61 +211,45 @@ def _load_av_segments(media_playlist_url):
|
|||
chunks.append((range_start, range_end))
|
||||
total = range_end + 1
|
||||
|
||||
return urlparse(media_playlist.segment_map[0].absolute_uri), chunks
|
||||
return media_playlist.segment_map[0].absolute_uri, chunks
|
||||
|
||||
|
||||
def _download_av_stream(media_playlist_url, progress):
|
||||
def _download_av_stream(http_session, media_playlist_url, progress):
|
||||
# Download an audio or video stream to temporary directory
|
||||
url, ranges = _load_av_segments(media_playlist_url)
|
||||
url, ranges = _load_av_segments(http_session, media_playlist_url)
|
||||
total = ranges[-1][1]
|
||||
|
||||
Connector = HTTPSConnection if url.scheme == "https" else HTTPConnection
|
||||
connection = Connector(url.hostname)
|
||||
connection.connect()
|
||||
|
||||
with (
|
||||
NamedTemporaryFile(
|
||||
mode="w+b", delete=False, prefix="delarte.", suffix=".mp4"
|
||||
) as f,
|
||||
contextlib.closing(connection) as c,
|
||||
) as f
|
||||
):
|
||||
for range_start, range_end in ranges:
|
||||
c.request(
|
||||
"GET",
|
||||
url.path,
|
||||
r = http_session.get(
|
||||
url,
|
||||
headers={
|
||||
"Accept": "*/*",
|
||||
"Accept-Language": "fr,en;q=0.7,en-US;q=0.3",
|
||||
"Accept-Encoding": "gzip, deflate, br, identity",
|
||||
"Range": f"bytes={range_start}-{range_end}",
|
||||
"Origin": "https://www.arte.tv",
|
||||
"Connection": "keep-alive",
|
||||
"Referer": "https://www.arte.tv/",
|
||||
"Sec-Fetch-Dest": "empty",
|
||||
"Sec-Fetch-Mode": "cors",
|
||||
"Sec-Fetch-Site": "cross-site",
|
||||
"Sec-GPC": "1",
|
||||
"DNT": "1",
|
||||
},
|
||||
)
|
||||
r = c.getresponse()
|
||||
if r.status != 206:
|
||||
|
||||
r.raise_for_status()
|
||||
|
||||
if r.status_code != 206:
|
||||
raise ValueError(f"Invalid response status {r.status}")
|
||||
|
||||
content = r.read()
|
||||
if len(content) != range_end - range_start + 1:
|
||||
if len(r.content) != range_end - range_start + 1:
|
||||
raise ValueError("Invalid range length")
|
||||
f.write(content)
|
||||
f.write(r.content)
|
||||
|
||||
progress(range_end, total)
|
||||
|
||||
return f.name
|
||||
|
||||
|
||||
def _download_subtitles_input(index_url, progress):
|
||||
def _download_subtitles_input(http_session, index_url, progress):
|
||||
# Return a temporary file name where VTT subtitle has been downloaded/converted to SRT
|
||||
subtitles_index = m3u8.load(index_url)
|
||||
urls = [subtitles_index.base_uri + "/" + f for f in subtitles_index.files]
|
||||
subtitles_index = _fetch_playlist(http_session, index_url)
|
||||
urls = [s.absolute_uri for s in subtitles_index.segments]
|
||||
|
||||
if not urls:
|
||||
raise ValueError("No subtitle files")
|
||||
|
@ -270,11 +258,10 @@ def _download_subtitles_input(index_url, progress):
|
|||
raise ValueError("Multiple subtitle files")
|
||||
|
||||
progress(0, 2)
|
||||
http_response = urlopen(urls[0])
|
||||
if http_response.status != HTTPStatus.OK:
|
||||
raise RuntimeError("Subtitle request failed")
|
||||
r = http_session.get(urls[0])
|
||||
r.raise_for_status()
|
||||
|
||||
buffer = io.StringIO(http_response.read().decode("utf8"))
|
||||
buffer = io.StringIO(r.text)
|
||||
progress(1, 2)
|
||||
|
||||
with NamedTemporaryFile(
|
||||
|
@ -296,7 +283,7 @@ def _download_subtitles_input(index_url, progress):
|
|||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def download_inputs(remote_inputs, progress):
|
||||
def download_inputs(http_session, remote_inputs, progress):
|
||||
"""Download inputs in temporary files."""
|
||||
# It is implemented as a context manager that will delete temporary files on exit.
|
||||
|
||||
|
@ -308,18 +295,20 @@ def download_inputs(remote_inputs, progress):
|
|||
|
||||
try:
|
||||
video_filename = _download_av_stream(
|
||||
video_index_url, lambda i, n: progress("video", i, n)
|
||||
http_session, video_index_url, lambda i, n: progress("video", i, n)
|
||||
)
|
||||
|
||||
(audio_lang, audio_index_url) = audio_track
|
||||
audio_filename = _download_av_stream(
|
||||
audio_index_url, lambda i, n: progress("audio", i, n)
|
||||
http_session, audio_index_url, lambda i, n: progress("audio", i, n)
|
||||
)
|
||||
|
||||
if subtitles_track:
|
||||
(subtitles_lang, subtitles_index_url) = subtitles_track
|
||||
subtitles_filename = _download_subtitles_input(
|
||||
subtitles_index_url, lambda i, n: progress("subtitles", i, n)
|
||||
http_session,
|
||||
subtitles_index_url,
|
||||
lambda i, n: progress("subtitles", i, n),
|
||||
)
|
||||
|
||||
yield (
|
||||
|
|
Loading…
Reference in New Issue