135 lines
4.1 KiB
Python
135 lines
4.1 KiB
Python
# License: GNU AGPL v3: http://www.gnu.org/licenses/
|
|
# This file is part of `delarte` (https://git.afpy.org/fcode/delarte.git)
|
|
|
|
"""Provide ArteTV website utilities."""
|
|
|
|
import json
|
|
|
|
from .error import InvalidPage, PageNotFound, PageNotSupported
|
|
from .model import Program
|
|
|
|
_DATA_MARK = '<script id="__NEXT_DATA__" type="application/json">'
|
|
|
|
|
|
def _process_programs_page(page_value):
|
|
language = page_value["language"]
|
|
|
|
zone_found = False
|
|
program_found = False
|
|
|
|
for zone in page_value["zones"]:
|
|
if zone["code"].startswith("program_content_"):
|
|
if zone_found:
|
|
raise InvalidPage("PROGRAMS_CONTENT_ZONES_COUNT")
|
|
zone_found = True
|
|
else:
|
|
continue
|
|
|
|
for data_item in zone["content"]["data"]:
|
|
if data_item["type"] == "program":
|
|
if program_found:
|
|
raise InvalidPage("PROGRAMS_CONTENT_PROGRAM_COUNT")
|
|
program_found = True
|
|
else:
|
|
raise InvalidPage("PROGRAMS_CONTENT_PROGRAM_TYPE")
|
|
|
|
yield (
|
|
Program(
|
|
data_item["programId"],
|
|
language,
|
|
data_item["title"],
|
|
data_item["subtitle"],
|
|
),
|
|
data_item["player"]["config"],
|
|
)
|
|
|
|
if not zone_found:
|
|
raise InvalidPage("PROGRAMS_CONTENT_ZONES_COUNT")
|
|
|
|
if not program_found:
|
|
raise InvalidPage("PROGRAMS_CONTENT_PROGRAM_COUNT")
|
|
|
|
|
|
def _process_collections_page(page_value):
|
|
language = page_value["language"]
|
|
|
|
main_zone_found = False
|
|
sub_zone_found = False
|
|
program_found = False
|
|
|
|
for zone in page_value["zones"]:
|
|
if zone["code"].startswith("collection_videos_"):
|
|
if main_zone_found:
|
|
raise InvalidPage("COLLECTIONS_MAIN_ZONE_COUNT")
|
|
if program_found:
|
|
raise InvalidPage("COLLECTIONS_MIXED_ZONES")
|
|
main_zone_found = True
|
|
elif zone["code"].startswith("collection_subcollection_"):
|
|
if program_found and not sub_zone_found:
|
|
raise InvalidPage("COLLECTIONS_MIXED_ZONES")
|
|
sub_zone_found = True
|
|
else:
|
|
continue
|
|
|
|
for data_item in zone["content"]["data"]:
|
|
if (_ := data_item["type"]) == "teaser":
|
|
program_found = True
|
|
else:
|
|
raise InvalidPage("COLLECTIONS_INVALID_CONTENT_DATA_ITEM", _)
|
|
|
|
yield (
|
|
Program(
|
|
data_item["programId"],
|
|
language,
|
|
data_item["title"],
|
|
data_item["subtitle"],
|
|
),
|
|
f"https://api.arte.tv/api/player/v2/config/{language}/{data_item['programId']}",
|
|
)
|
|
|
|
if not main_zone_found:
|
|
raise InvalidPage("COLLECTIONS_MAIN_ZONE_COUNT")
|
|
|
|
if not program_found:
|
|
raise InvalidPage("COLLECTIONS_PROGRAMS_COUNT")
|
|
|
|
|
|
def iter_programs(page_url, http_session):
|
|
"""Iterate over programs listed on given ArteTV page."""
|
|
r = http_session.get(page_url)
|
|
|
|
# special handling of 404
|
|
if r.status_code == 404:
|
|
raise PageNotFound(page_url)
|
|
r.raise_for_status()
|
|
|
|
# no HTML parsing required, whe just find the mark
|
|
html = r.text
|
|
start = html.find(_DATA_MARK)
|
|
if start < 0:
|
|
raise InvalidPage("DATA_MARK_NOT_FOUND", page_url)
|
|
start += len(_DATA_MARK)
|
|
end = html.index("</script>", start)
|
|
|
|
try:
|
|
next_js_data = json.loads(html[start:end].strip())
|
|
except json.JSONDecodeError:
|
|
raise InvalidPage("INVALID_JSON_DATA", page_url)
|
|
|
|
try:
|
|
page_value = next_js_data["props"]["pageProps"]["props"]["page"]["value"]
|
|
|
|
match page_value["type"]:
|
|
case "program":
|
|
yield from _process_programs_page(page_value)
|
|
case "collection":
|
|
yield from _process_collections_page(page_value)
|
|
case _:
|
|
raise PageNotSupported(page_url, page_value)
|
|
|
|
except (KeyError, IndexError, ValueError) as e:
|
|
raise InvalidPage("SCHEMA", page_url) from e
|
|
|
|
except InvalidPage as e:
|
|
raise InvalidPage(e.args[0], page_url) from e
|