delarte/src/delarte/subtitles.py

54 lines
1.6 KiB
Python

# License: GNU AGPL v3: http://www.gnu.org/licenses/
# This file is part of `delarte` (https://git.afpy.org/fcode/delarte.git)
"""Provide WebVTT to SRT subtitles conversion."""
import re
from .error import WebVTTError
RE_CUE_START = r"^((?:\d\d:)\d\d:\d\d)\.(\d\d\d) --> ((?:\d\d:)\d\d:\d\d)\.(\d\d\d)"
RE_STYLED_CUE = r"^<c\.(\w+)\.bg_(?:\w+)>(.*)</c>$"
def convert(input, output):
"""Convert input ArteTV's WebVTT string data and write it on output file."""
# This is a very (very) simple implementation based on what has actually
# been seen on ArteTV and is not at all a generic WebVTT solution.
blocks = []
block = []
for line in input.splitlines():
if not line and block:
blocks.append(block)
block = []
else:
block.append(line)
if block:
blocks.append(block)
block = []
if not blocks:
raise WebVTTError("INVALID_DATA")
header = blocks.pop(0)
if not (len(header) == 1 and header[0].startswith("WEBVTT")):
raise WebVTTError("INVALID_HEADER")
counter = 1
for block in blocks:
if m := re.match(RE_CUE_START, block.pop(0)):
print(f"{counter}", file=output)
print(f"{m[1]},{m[2]} --> {m[3]},{m[4]}", file=output)
for line in block:
if m := re.match(RE_STYLED_CUE, line):
print(f'<font color="{m[1]}">{m[2]}</font>', file=output)
else:
print(line, file=output)
print("", file=output)
counter += 1
if counter == 1:
raise WebVTTError("EMPTY_DATA")