88 lines
2.5 KiB
Python
88 lines
2.5 KiB
Python
# License: GNU AGPL v3: http://www.gnu.org/licenses/
|
|
# This file is part of `delarte` (https://git.afpy.org/fcode/delarte.git)
|
|
|
|
"""Provide ArteTV website utilities."""
|
|
|
|
import json
|
|
from contextlib import contextmanager
|
|
|
|
from .error import InvalidPage, PageNotFound, PageNotSupported
|
|
from .model import ProgramMeta
|
|
|
|
_DATA_MARK = '<script id="__NEXT_DATA__" type="application/json">'
|
|
|
|
|
|
@contextmanager
|
|
def _schema_guard(*context):
|
|
try:
|
|
yield
|
|
except (KeyError, IndexError, ValueError) as e:
|
|
raise InvalidPage("SCHEMA", *context, e)
|
|
|
|
|
|
def _process_programs_page(page_value):
|
|
|
|
with _schema_guard():
|
|
site = page_value["language"]
|
|
|
|
content_zones = [
|
|
zone
|
|
for zone in page_value["zones"]
|
|
if zone["code"].startswith("program_content_")
|
|
]
|
|
|
|
programs = [
|
|
ProgramMeta(
|
|
site, data_item["programId"], data_item["title"], data_item["subtitle"]
|
|
)
|
|
for zone in content_zones
|
|
for data_item in zone["content"]["data"]
|
|
if data_item["type"] == "program"
|
|
]
|
|
|
|
if len(content_zones) != 1:
|
|
raise InvalidPage("PROGRAMS_CONTENT_ZONES_COUNT")
|
|
|
|
if len(programs) != 1:
|
|
raise InvalidPage("PROGRAMS_PROGRAMS_COUNT")
|
|
|
|
return programs[0]
|
|
|
|
|
|
def fetch_program(http_session, url):
|
|
"""Load the ArteTV page at given URL and return list of programs on it."""
|
|
r = http_session.get(url)
|
|
|
|
# special handling of 404
|
|
if r.status_code == 404:
|
|
raise PageNotFound(url)
|
|
|
|
# other network errors
|
|
r.raise_for_status()
|
|
|
|
# no HTML parsing required, whe just find the mark
|
|
html = r.text
|
|
start = html.find(_DATA_MARK)
|
|
if start < 0:
|
|
raise InvalidPage("DATA_MARK_NOT_FOUND", url)
|
|
start += len(_DATA_MARK)
|
|
end = html.index("</script>", start)
|
|
|
|
try:
|
|
next_js_data = json.loads(html[start:end].strip())
|
|
except json.JSONDecodeError:
|
|
raise InvalidPage("INVALID_JSON_DATA", url)
|
|
|
|
with _schema_guard(url):
|
|
initial_page_value = next_js_data["props"]["pageProps"]["initialPage"]["value"]
|
|
initial_type = next_js_data["props"]["pageProps"]["initialType"]
|
|
|
|
try:
|
|
match initial_type:
|
|
case "programs":
|
|
return _process_programs_page(initial_page_value)
|
|
case _:
|
|
raise PageNotSupported("TYPE_NOT_SUPPORTED", url, initial_type)
|
|
except InvalidPage as e:
|
|
raise InvalidPage(e.args[0], url, *e.args[1:])
|