From 305d8ab679419c75548237e356901a4561cb23f2 Mon Sep 17 00:00:00 2001 From: Barbagus Date: Tue, 27 Dec 2022 07:52:35 +0100 Subject: [PATCH] Refactor website URL parsing Lighter implementation and using `target_id` instead of `program_id`, preparing for #7 --- src/delarte/www.py | 31 +++++++++++++------------------ 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/src/delarte/www.py b/src/delarte/www.py index 67a3273..53d3ace 100644 --- a/src/delarte/www.py +++ b/src/delarte/www.py @@ -3,9 +3,9 @@ """Provide ArteTV website utilities.""" -from urllib.parse import urlparse from . import common +BASE = "https://www.arte.tv/" LANGUAGES = ["fr", "de", "en", "es", "pl", "it"] @@ -13,26 +13,21 @@ class InvalidUrl(common.Error): """Invalid ArteTV URL.""" -def parse_url(program_page_url): - """Parse ArteTV web URL into UI language and program ID.""" - try: - url = urlparse(program_page_url) - except ValueError: - raise InvalidUrl("URL_PARSE", program_page_url, url.hostname) +def parse_url(url): + """Parse ArteTV web URL into target ID and web UI language.""" + if not url.startswith(BASE): + raise InvalidUrl("BASE", url) - if url.hostname != "www.arte.tv": - raise InvalidUrl("HOST_NAME", program_page_url, url.hostname) + path = url[len(BASE) :].split("/") - program_page_path = url.path.split("/")[1:] + www_lang = path.pop(0) - lang = program_page_path.pop(0) + if www_lang not in LANGUAGES: + raise InvalidUrl("WWW_LANG", url, www_lang) - if lang not in LANGUAGES: - raise InvalidUrl("WWW_LANGUAGE", program_page_url, lang) + if (_ := path.pop(0)) != "videos": + raise InvalidUrl("PATH", url, _) - if program_page_path.pop(0) != "videos": - raise InvalidUrl("PATH", program_page_url, program_page_path) + target_id = path.pop(0) - program_id = program_page_path.pop(0) - - return lang, program_id + return www_lang, target_id