Use `urllib3` instead of `requests`

We were not (and probably wont be ) using any worthwhile `requests`
features (beside `raise_for_status()`) and the `timeout` session
parameter propagation vs adapter plugging "thing" in requests just
annoys me deeply (not that kind of "... Human (TM)")
This commit is contained in:
Barbagus 2023-02-13 09:35:33 +01:00
parent f90179e7c3
commit a108135141
9 changed files with 79 additions and 69 deletions

View File

@ -139,7 +139,7 @@ Because some programs would randomly fail 😒. Probably due to invalid _segment
## 📌 Dependencies ## 📌 Dependencies
- [m3u8](https://pypi.org/project/m3u8/) to parse indexes. - [m3u8](https://pypi.org/project/m3u8/) to parse indexes.
- [requests](https://pypi.org/project/requests/) to handle HTTP traffic. - [urllib3](https://pypi.org/project/urllib3/) to handle HTTP traffic.
- [docopt-ng](https://pypi.org/project/docopt-ng/) to parse command line. - [docopt-ng](https://pypi.org/project/docopt-ng/) to parse command line.
## 🤝 Help ## 🤝 Help

View File

@ -4,14 +4,14 @@ build-backend = "flit_core.buildapi"
[project] [project]
name = "delarte" name = "delarte"
authors = [{name = "Barbagus", email = "barbagus@proton.me"}] authors = [{name = "Barbagus", email = "barbagus42@proton.me"}]
readme = "README.md" readme = "README.md"
license = {file = "LICENSE.md"} license = {file = "LICENSE.md"}
classifiers = ["License :: OSI Approved :: GNU Affero General Public License v3"] classifiers = ["License :: OSI Approved :: GNU Affero General Public License v3"]
dynamic = ["version", "description"] dynamic = ["version", "description"]
dependencies = [ dependencies = [
"m3u8", "m3u8",
"requests", "urllib3",
"docopt-ng" "docopt-ng"
] ]
@ -22,7 +22,6 @@ Home = "https://git.afpy.org/fcode/delarte.git"
dev = [ dev = [
"black", "black",
"pydocstyle", "pydocstyle",
"toml"
] ]
[project.scripts] [project.scripts]

View File

@ -9,7 +9,7 @@ from .error import *
from .model import * from .model import *
def fetch_program_sources(url, http_session): def fetch_program_sources(url, http):
"""Fetch program sources listed on given ArteTV page.""" """Fetch program sources listed on given ArteTV page."""
from .www import iter_programs from .www import iter_programs
@ -18,11 +18,11 @@ def fetch_program_sources(url, http_session):
program, program,
player_config_url, player_config_url,
) )
for program, player_config_url in iter_programs(url, http_session) for program, player_config_url in iter_programs(url, http)
] ]
def fetch_rendition_sources(program_sources, http_session): def fetch_rendition_sources(program_sources, http):
"""Fetch renditions for given programs.""" """Fetch renditions for given programs."""
from itertools import groupby from itertools import groupby
@ -39,7 +39,7 @@ def fetch_rendition_sources(program_sources, http_session):
for rendition, protocol, program_index_url in iter_renditions( for rendition, protocol, program_index_url in iter_renditions(
program.id, program.id,
player_config_url, player_config_url,
http_session, http,
) )
] ]
@ -54,7 +54,7 @@ def fetch_rendition_sources(program_sources, http_session):
return sources return sources
def fetch_variant_sources(renditions_sources, http_session): def fetch_variant_sources(renditions_sources, http):
"""Fetch variants for given renditions.""" """Fetch variants for given renditions."""
from itertools import groupby from itertools import groupby
@ -71,7 +71,7 @@ def fetch_variant_sources(renditions_sources, http_session):
) )
for program, rendition, protocol, program_index_url in renditions_sources for program, rendition, protocol, program_index_url in renditions_sources
for variant, video, audio, subtitles in iter_variants( for variant, video, audio, subtitles in iter_variants(
protocol, program_index_url, http_session protocol, program_index_url, http
) )
] ]
@ -90,7 +90,7 @@ def fetch_variant_sources(renditions_sources, http_session):
return sources return sources
def fetch_targets(variant_sources, http_session, **naming_options): def fetch_targets(variant_sources, http, **naming_options):
"""Compile download targets for given variants.""" """Compile download targets for given variants."""
from .hls import fetch_mp4_media, fetch_vtt_media from .hls import fetch_mp4_media, fetch_vtt_media
from .naming import file_name_builder from .naming import file_name_builder
@ -101,16 +101,16 @@ def fetch_targets(variant_sources, http_session, **naming_options):
Target( Target(
Target.VideoInput( Target.VideoInput(
video_media.track, video_media.track,
fetch_mp4_media(video_media.track_index_url, http_session), fetch_mp4_media(video_media.track_index_url, http),
), ),
Target.AudioInput( Target.AudioInput(
audio_media.track, audio_media.track,
fetch_mp4_media(audio_media.track_index_url, http_session), fetch_mp4_media(audio_media.track_index_url, http),
), ),
( (
Target.SubtitlesInput( Target.SubtitlesInput(
subtitles_media.track, subtitles_media.track,
fetch_vtt_media(subtitles_media.track_index_url, http_session), fetch_vtt_media(subtitles_media.track_index_url, http),
) )
if subtitles_media if subtitles_media
else None else None
@ -124,7 +124,7 @@ def fetch_targets(variant_sources, http_session, **naming_options):
return targets return targets
def download_targets(targets, http_session, on_progress): def download_targets(targets, http, on_progress):
"""Download given target.""" """Download given target."""
import os import os
@ -142,17 +142,13 @@ def download_targets(targets, http_session, on_progress):
audio_path = target.output + ".audio.mp4" audio_path = target.output + ".audio.mp4"
subtitles_path = target.output + ".srt" subtitles_path = target.output + ".srt"
download_mp4_media( download_mp4_media(target.video_input.url, video_path, http, on_progress)
target.video_input.url, video_path, http_session, on_progress
)
download_mp4_media( download_mp4_media(target.audio_input.url, audio_path, http, on_progress)
target.audio_input.url, audio_path, http_session, on_progress
)
if target.subtitles_input: if target.subtitles_input:
download_vtt_media( download_vtt_media(
target.subtitles_input.url, subtitles_path, http_session, on_progress target.subtitles_input.url, subtitles_path, http, on_progress
) )
mux_target( mux_target(

View File

@ -35,11 +35,12 @@ import sys
import time import time
import docopt import docopt
import requests import urllib3
from . import ( from . import (
ModuleError, ModuleError,
UnexpectedError, UnexpectedError,
HTTPError,
__version__, __version__,
download_targets, download_targets,
fetch_program_sources, fetch_program_sources,
@ -136,24 +137,24 @@ def main():
"""CLI command.""" """CLI command."""
args = docopt.docopt(__doc__, sys.argv[1:], version=__version__) args = docopt.docopt(__doc__, sys.argv[1:], version=__version__)
http_session = requests.sessions.Session() http = urllib3.PoolManager(timeout=5)
try: try:
program_sources = fetch_program_sources(args["URL"], http_session) program_sources = fetch_program_sources(args["URL"], http)
rendition_sources = _select_rendition_sources( rendition_sources = _select_rendition_sources(
args["RENDITION"], args["RENDITION"],
fetch_rendition_sources(program_sources, http_session), fetch_rendition_sources(program_sources, http),
) )
variant_sources = _select_variant_sources( variant_sources = _select_variant_sources(
args["VARIANT"], args["VARIANT"],
fetch_variant_sources(rendition_sources, http_session), fetch_variant_sources(rendition_sources, http),
) )
targets = fetch_targets( targets = fetch_targets(
variant_sources, variant_sources,
http_session, http,
**{ **{
k[7:].replace("-", "_"): v k[7:].replace("-", "_"): v
for k, v in args.items() for k, v in args.items()
@ -161,7 +162,7 @@ def main():
}, },
) )
download_targets(targets, http_session, _create_progress()) download_targets(targets, http, _create_progress())
except UnexpectedError as e: except UnexpectedError as e:
if args["--debug"]: if args["--debug"]:
@ -188,7 +189,7 @@ def main():
print(str(e)) print(str(e))
return 1 return 1
except requests.HTTPError as e: except HTTPError as e:
if args["--debug"]: if args["--debug"]:
raise e raise e
print("Network error.") print("Network error.")

View File

@ -3,23 +3,26 @@
"""Provide ArteTV JSON API utilities.""" """Provide ArteTV JSON API utilities."""
from .error import UnexpectedAPIResponse import json
from .error import UnexpectedAPIResponse, HTTPError
from .model import Rendition from .model import Rendition
MIME_TYPE = "application/vnd.api+json; charset=utf-8" MIME_TYPE = "application/vnd.api+json; charset=utf-8"
def _fetch_api_object(http_session, url, object_type): def _fetch_api_object(http, url, object_type):
# Fetch an API object. # Fetch an API object.
r = http_session.get(url) r = http.request("GET", url)
r.raise_for_status() if not 200 <= r.status < 300:
raise HTTPError(r)
mime_type = r.headers["content-type"] mime_type = r.getheader("content-type")
if mime_type != MIME_TYPE: if mime_type != MIME_TYPE:
raise UnexpectedAPIResponse("MIME_TYPE", url, MIME_TYPE, mime_type) raise UnexpectedAPIResponse("MIME_TYPE", url, MIME_TYPE, mime_type)
obj = r.json() obj = json.loads(r.data.decode("utf-8"))
try: try:
data_type = obj["data"]["type"] data_type = obj["data"]["type"]
@ -32,9 +35,9 @@ def _fetch_api_object(http_session, url, object_type):
raise UnexpectedAPIResponse("SCHEMA", url) from e raise UnexpectedAPIResponse("SCHEMA", url) from e
def iter_renditions(program_id, player_config_url, http_session): def iter_renditions(program_id, player_config_url, http):
"""Iterate over renditions for the given program.""" """Iterate over renditions for the given program."""
obj = _fetch_api_object(http_session, player_config_url, "ConfigPlayer") obj = _fetch_api_object(http, player_config_url, "ConfigPlayer")
codes = set() codes = set()
try: try:

View File

@ -5,11 +5,12 @@
import os import os
from . import subtitles from . import subtitles
from .error import HTTPError
_CHUNK = 64 * 1024 _CHUNK = 64 * 1024
def download_mp4_media(url, file_name, http_session, on_progress): def download_mp4_media(url, file_name, http, on_progress):
"""Download a MP4 (video or audio) to given file.""" """Download a MP4 (video or audio) to given file."""
on_progress(file_name, 0, 0) on_progress(file_name, 0, 0)
@ -20,22 +21,28 @@ def download_mp4_media(url, file_name, http_session, on_progress):
temp_file = f"{file_name}.tmp" temp_file = f"{file_name}.tmp"
with open(temp_file, "ab") as f: with open(temp_file, "ab") as f:
r = http_session.get( r = http.request(
url, timeout=5, stream=True, headers={"Range": f"bytes={f.tell()}-"} "GET",
url,
headers={"Range": f"bytes={f.tell()}-"},
preload_content=False,
) )
r.raise_for_status() if not 200 <= r.status < 300:
raise HTTPError(r)
_, total = r.headers["content-range"].split("/") _, total = r.getheader("content-range").split("/")
total = int(total) total = int(total)
for content in r.iter_content(_CHUNK): for content in r.stream(_CHUNK, True):
f.write(content) f.write(content)
on_progress(file_name, f.tell(), total) on_progress(file_name, f.tell(), total)
r.release_conn()
os.rename(temp_file, file_name) os.rename(temp_file, file_name)
def download_vtt_media(url, file_name, http_session, on_progress): def download_vtt_media(url, file_name, http, on_progress):
"""Download a VTT and SRT-convert it to to given file.""" """Download a VTT and SRT-convert it to to given file."""
on_progress(file_name, 0, 0) on_progress(file_name, 0, 0)
@ -46,11 +53,11 @@ def download_vtt_media(url, file_name, http_session, on_progress):
temp_file = f"{file_name}.tmp" temp_file = f"{file_name}.tmp"
with open(temp_file, "w", encoding="utf-8") as f: with open(temp_file, "w", encoding="utf-8") as f:
r = http_session.get(url, timeout=5) r = http.request("GET", url)
r.raise_for_status() if not 200 <= r.status < 300:
r.encoding = "utf-8" raise HTTPError(r)
subtitles.convert(r.text, f) subtitles.convert(r.data.decode("utf-8"), f)
on_progress(file_name, f.tell(), f.tell()) on_progress(file_name, f.tell(), f.tell())
os.rename(temp_file, file_name) os.rename(temp_file, file_name)

View File

@ -24,6 +24,10 @@ class UnexpectedError(ModuleError):
"""An error to report to developers.""" """An error to report to developers."""
class HTTPError(Exception):
"""A wrapper around a filed HTTP response."""
# #
# www # www
# #

View File

@ -6,7 +6,7 @@
import m3u8 import m3u8
from .error import UnexpectedHLSResponse, UnsupportedHLSProtocol from .error import UnexpectedHLSResponse, UnsupportedHLSProtocol, HTTPError
from .model import AudioTrack, SubtitlesTrack, Variant, VideoTrack from .model import AudioTrack, SubtitlesTrack, Variant, VideoTrack
# #
@ -27,25 +27,24 @@ from .model import AudioTrack, SubtitlesTrack, Variant, VideoTrack
MIME_TYPE = "application/x-mpegURL" MIME_TYPE = "application/x-mpegURL"
def _fetch_index(url, http_session): def _fetch_index(http, url):
# Fetch a M3U8 playlist # Fetch a M3U8 playlist
r = http_session.get(url) r = http.request("GET", url)
r.raise_for_status() if not 200 <= r.status < 300:
raise HTTPError(r)
if (_ := r.headers["content-type"]) != MIME_TYPE: if (_ := r.getheader("content-type")) != MIME_TYPE:
raise UnexpectedHLSResponse("MIME_TYPE", url, MIME_TYPE, _) raise UnexpectedHLSResponse("MIME_TYPE", url, MIME_TYPE, _)
r.encoding = "utf-8" return m3u8.loads(r.data.decode("utf-8"), url)
return m3u8.loads(r.text, url)
def iter_variants(protocol, program_index_url, http_session): def iter_variants(protocol, program_index_url, http):
"""Iterate over variants for the given rendition.""" """Iterate over variants for the given rendition."""
if protocol != "HLS_NG": if protocol != "HLS_NG":
raise UnsupportedHLSProtocol(protocol, program_index_url) raise UnsupportedHLSProtocol(protocol, program_index_url)
program_index = _fetch_index(program_index_url, http_session) program_index = _fetch_index(http, program_index_url)
audio_media = None audio_media = None
subtitles_media = None subtitles_media = None
@ -152,9 +151,9 @@ def _convert_byterange(obj):
return offset, offset + count - 1 return offset, offset + count - 1
def fetch_mp4_media(track_index_url, http_session): def fetch_mp4_media(track_index_url, http):
"""Fetch an audio or video media.""" """Fetch an audio or video media."""
track_index = _fetch_index(track_index_url, http_session) track_index = _fetch_index(http, track_index_url)
file_name = track_index.segment_map[0].uri file_name = track_index.segment_map[0].uri
start, end = _convert_byterange(track_index.segment_map[0]) start, end = _convert_byterange(track_index.segment_map[0])
@ -180,9 +179,9 @@ def fetch_mp4_media(track_index_url, http_session):
return track_index.segment_map[0].absolute_uri return track_index.segment_map[0].absolute_uri
def fetch_vtt_media(track_index_url, http_session): def fetch_vtt_media(track_index_url, http):
"""Fetch an audio or video media.""" """Fetch an audio or video media."""
track_index = _fetch_index(track_index_url, http_session) track_index = _fetch_index(http, track_index_url)
urls = [s.absolute_uri for s in track_index.segments] urls = [s.absolute_uri for s in track_index.segments]
if not urls: if not urls:

View File

@ -5,7 +5,7 @@
import json import json
from .error import InvalidPage, PageNotFound, PageNotSupported from .error import InvalidPage, PageNotFound, PageNotSupported, HTTPError
from .model import Program from .model import Program
_DATA_MARK = '<script id="__NEXT_DATA__" type="application/json">' _DATA_MARK = '<script id="__NEXT_DATA__" type="application/json">'
@ -94,17 +94,18 @@ def _process_collections_page(page_value):
raise InvalidPage("COLLECTIONS_PROGRAMS_COUNT") raise InvalidPage("COLLECTIONS_PROGRAMS_COUNT")
def iter_programs(page_url, http_session): def iter_programs(page_url, http):
"""Iterate over programs listed on given ArteTV page.""" """Iterate over programs listed on given ArteTV page."""
r = http_session.get(page_url) r = http.request("GET", page_url)
# special handling of 404 # special handling of 404
if r.status_code == 404: if r.status == 404:
raise PageNotFound(page_url) raise PageNotFound(page_url)
r.raise_for_status() if not 200 <= r.status < 300:
raise HTTPError(r)
# no HTML parsing required, whe just find the mark # no HTML parsing required, whe just find the mark
html = r.text html = r.data.decode("utf-8")
start = html.find(_DATA_MARK) start = html.find(_DATA_MARK)
if start < 0: if start < 0:
raise InvalidPage("DATA_MARK_NOT_FOUND", page_url) raise InvalidPage("DATA_MARK_NOT_FOUND", page_url)