Refactor website URL parsing
Lighter implementation and using `target_id` instead of `program_id`, preparing for #7
This commit is contained in:
parent
4c518993ef
commit
305d8ab679
|
@ -3,9 +3,9 @@
|
|||
|
||||
"""Provide ArteTV website utilities."""
|
||||
|
||||
from urllib.parse import urlparse
|
||||
from . import common
|
||||
|
||||
BASE = "https://www.arte.tv/"
|
||||
LANGUAGES = ["fr", "de", "en", "es", "pl", "it"]
|
||||
|
||||
|
||||
|
@ -13,26 +13,21 @@ class InvalidUrl(common.Error):
|
|||
"""Invalid ArteTV URL."""
|
||||
|
||||
|
||||
def parse_url(program_page_url):
|
||||
"""Parse ArteTV web URL into UI language and program ID."""
|
||||
try:
|
||||
url = urlparse(program_page_url)
|
||||
except ValueError:
|
||||
raise InvalidUrl("URL_PARSE", program_page_url, url.hostname)
|
||||
def parse_url(url):
|
||||
"""Parse ArteTV web URL into target ID and web UI language."""
|
||||
if not url.startswith(BASE):
|
||||
raise InvalidUrl("BASE", url)
|
||||
|
||||
if url.hostname != "www.arte.tv":
|
||||
raise InvalidUrl("HOST_NAME", program_page_url, url.hostname)
|
||||
path = url[len(BASE) :].split("/")
|
||||
|
||||
program_page_path = url.path.split("/")[1:]
|
||||
www_lang = path.pop(0)
|
||||
|
||||
lang = program_page_path.pop(0)
|
||||
if www_lang not in LANGUAGES:
|
||||
raise InvalidUrl("WWW_LANG", url, www_lang)
|
||||
|
||||
if lang not in LANGUAGES:
|
||||
raise InvalidUrl("WWW_LANGUAGE", program_page_url, lang)
|
||||
if (_ := path.pop(0)) != "videos":
|
||||
raise InvalidUrl("PATH", url, _)
|
||||
|
||||
if program_page_path.pop(0) != "videos":
|
||||
raise InvalidUrl("PATH", program_page_url, program_page_path)
|
||||
target_id = path.pop(0)
|
||||
|
||||
program_id = program_page_path.pop(0)
|
||||
|
||||
return lang, program_id
|
||||
return www_lang, target_id
|
||||
|
|
Loading…
Reference in New Issue