Remove dependency to `webvtt-py` which was both too much and not enough for our use case. Implement a basic WebVTT to SRT converter according to ArteTV's usage of WebVTT features.
This commit is contained in:
parent
8d216215dd
commit
96f411cca0
|
@ -143,7 +143,7 @@ So we can be more granular about _renditions_ and _variants_ that we want.
|
|||
|
||||
### Why not use `VTT` subtitles directly ?
|
||||
|
||||
Because it fails 😒.
|
||||
Because FFMPEG do not support styles in WebVTT 😒.
|
||||
|
||||
### Why not use FFMPEG directly with the _media playlist_ URLs and let it do the download ?
|
||||
|
||||
|
@ -153,7 +153,6 @@ Because some programs would randomly fail 😒. Probably due to invalid _segment
|
|||
## 📌 Dependencies
|
||||
|
||||
- [m3u8](https://pypi.org/project/m3u8/) to parse playlists.
|
||||
- [webvtt-py](https://pypi.org/project/webvtt-py/) to load `vtt` subtitles files.
|
||||
- [requests](https://pypi.org/project/requests/) to handle HTTP traffic.
|
||||
|
||||
## 🤝 Help
|
||||
|
|
|
@ -11,7 +11,6 @@ classifiers = ["License :: OSI Approved :: GNU Affero General Public License v3"
|
|||
dynamic = ["version", "description"]
|
||||
dependencies = [
|
||||
"m3u8",
|
||||
"webvtt-py",
|
||||
"requests",
|
||||
"docopt-ng"
|
||||
]
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -58,15 +58,12 @@
|
|||
# preferences.
|
||||
|
||||
import contextlib
|
||||
import io
|
||||
import os
|
||||
import re
|
||||
from tempfile import NamedTemporaryFile
|
||||
|
||||
import m3u8
|
||||
import webvtt
|
||||
|
||||
from . import error, model
|
||||
from . import error, model, subtitles
|
||||
|
||||
#
|
||||
# WARNING !
|
||||
|
@ -279,24 +276,13 @@ def _download_subtitles_media(http_session, media_playlist_url, progress):
|
|||
progress(0, 2)
|
||||
r = http_session.get(url)
|
||||
r.raise_for_status()
|
||||
|
||||
buffer = io.StringIO(r.text)
|
||||
r.encoding = "utf-8"
|
||||
progress(1, 2)
|
||||
|
||||
with NamedTemporaryFile(
|
||||
"w", delete=False, prefix="delarte.", suffix=".srt", encoding="utf8"
|
||||
) as f:
|
||||
i = 1
|
||||
for caption in webvtt.read_buffer(buffer):
|
||||
print(i, file=f)
|
||||
print(
|
||||
re.sub(r"\.", ",", caption.start)
|
||||
+ " --> "
|
||||
+ re.sub(r"\.", ",", caption.end),
|
||||
file=f,
|
||||
)
|
||||
print(caption.text + "\n", file=f)
|
||||
i += 1
|
||||
subtitles.convert(r.text, f)
|
||||
progress(2, 2)
|
||||
return f.name
|
||||
|
||||
|
@ -313,14 +299,6 @@ def download_source(http_session, source, progress):
|
|||
subtitles_filename = None
|
||||
|
||||
try:
|
||||
video_filename = _download_av_media(
|
||||
http_session, source.video, lambda i, n: progress("video", i, n)
|
||||
)
|
||||
|
||||
audio_filename = _download_av_media(
|
||||
http_session, source.audio, lambda i, n: progress("audio", i, n)
|
||||
)
|
||||
|
||||
subtitles_filename = (
|
||||
_download_subtitles_media(
|
||||
http_session,
|
||||
|
@ -331,6 +309,14 @@ def download_source(http_session, source, progress):
|
|||
else None
|
||||
)
|
||||
|
||||
video_filename = _download_av_media(
|
||||
http_session, source.video, lambda i, n: progress("video", i, n)
|
||||
)
|
||||
|
||||
audio_filename = _download_av_media(
|
||||
http_session, source.audio, lambda i, n: progress("audio", i, n)
|
||||
)
|
||||
|
||||
yield model.Source(
|
||||
source.metadata,
|
||||
source.rendition,
|
||||
|
|
|
@ -0,0 +1,58 @@
|
|||
# License: GNU AGPL v3: http://www.gnu.org/licenses/
|
||||
# This file is part of `delarte` (https://git.afpy.org/fcode/delarte.git)
|
||||
|
||||
"""Provide WebVTT to SRT subtitles conversion."""
|
||||
|
||||
import re
|
||||
|
||||
from . import error
|
||||
|
||||
|
||||
class Error(error.UnexpectedError):
|
||||
"""Unexpected WebVTT data."""
|
||||
|
||||
|
||||
RE_CUE_START = r"^((?:\d\d:)\d\d:\d\d)\.(\d\d\d) --> ((?:\d\d:)\d\d:\d\d)\.(\d\d\d)"
|
||||
RE_STYLED_CUE = r"^<c\.(\w+)\.bg_(?:\w+)>(.*)</c>$"
|
||||
|
||||
|
||||
def convert(input, output):
|
||||
"""Convert input ArteTV's WebVTT string data and write it on output file."""
|
||||
# This is a very (very) simple implementation based on what has actually
|
||||
# been seen on ArteTV and is not at all a generic WebVTT solution.
|
||||
|
||||
blocks = []
|
||||
block = []
|
||||
|
||||
for line in input.splitlines():
|
||||
if not line and block:
|
||||
blocks.append(block)
|
||||
block = []
|
||||
else:
|
||||
block.append(line)
|
||||
if block:
|
||||
blocks.append(block)
|
||||
block = []
|
||||
|
||||
if not blocks:
|
||||
raise Error("INVALID_DATA")
|
||||
|
||||
header = blocks.pop(0)
|
||||
if not (len(header) == 1 and header[0].startswith("WEBVTT")):
|
||||
raise Error("INVALID_HEADER")
|
||||
|
||||
counter = 1
|
||||
for block in blocks:
|
||||
if m := re.match(RE_CUE_START, block.pop(0)):
|
||||
print(f"{counter}", file=output)
|
||||
print(f"{m[1]},{m[2]} --> {m[3]},{m[4]}", file=output)
|
||||
for line in block:
|
||||
if m := re.match(RE_STYLED_CUE, line):
|
||||
print(f'<font color="{m[1]}">{m[2]}</font>', file=output)
|
||||
else:
|
||||
print(line, file=output)
|
||||
print("", file=output)
|
||||
counter += 1
|
||||
|
||||
if counter == 1:
|
||||
raise Error("EMPTY_DATA")
|
Loading…
Reference in New Issue