pospell/pospell.py

125 lines
3.7 KiB
Python
Raw Normal View History

2018-07-27 13:49:58 +00:00
"""pospell is a spellcheckers for po files containing reStructuedText.
"""
2018-07-23 15:37:50 +00:00
2018-07-27 09:38:17 +00:00
import re
2018-07-23 15:37:50 +00:00
import subprocess
2018-07-27 09:38:17 +00:00
import tempfile
from itertools import chain
2018-07-23 15:37:50 +00:00
from pathlib import Path
2018-07-27 09:38:17 +00:00
2018-07-23 15:37:50 +00:00
import polib
def strip_rst(line):
"""Strip out reStructuredText and Sphinx-doc tags from a line.
"""
2018-07-23 15:37:50 +00:00
return re.sub(
r"""(C-)?:[^:]*?:`[^`]*?` |
2018-07-27 08:02:43 +00:00
``.*?`` |
\b[A-Z][a-zA-Z-]{2,}[a-zA-Z.-]*\b | # Strip capitalized words and accronyms
{[a-z]*?} | # reStructuredText tag
\|[a-z]+?\| | # reStructuredText substitution
%\([a-z_]+?\)s | # Sphinx variable
2018-07-27 08:02:43 +00:00
-[A-Za-z]\b |
`[^`]*?`_ |
2018-07-23 15:37:50 +00:00
\*[^*]*?\*
""",
"",
line,
flags=re.VERBOSE,
)
2018-07-23 15:37:50 +00:00
def clear(line):
"""Clear various other syntaxes we may encounter in a line.
"""
return re.sub(r"""<a href="[^"]*?">(.*)</a>""", r"\1", line)
def po_to_text(po_path):
"""Converts a po file to a text file, by stripping the msgids and all
po syntax, but by keeping the kept lines at their same position /
line number.
"""
2018-07-23 15:37:50 +00:00
buffer = []
2018-07-23 17:24:10 +00:00
lines = 0
entries = polib.pofile(po_path)
2018-07-23 15:37:50 +00:00
for entry in entries:
if entry.msgid == entry.msgstr:
continue
2018-07-23 17:24:10 +00:00
while lines < entry.linenum:
buffer.append("")
2018-07-23 17:24:10 +00:00
lines += 1
buffer.append(clear(strip_rst(entry.msgstr)))
2018-07-23 17:24:10 +00:00
lines += 1
return "\n".join(buffer)
2018-07-23 15:37:50 +00:00
def main():
"""Module entry point.
"""
2018-07-23 15:37:50 +00:00
import argparse
2018-07-23 15:37:50 +00:00
parser = argparse.ArgumentParser(
description="Check spelling in po files containing restructuredText."
)
parser.add_argument(
"-l",
"--language",
type=str,
default="fr",
help="Language to check, you'll have to install the corresponding "
"hunspell dictionary, on Debian see apt list 'hunspell-*'.",
)
parser.add_argument(
"--glob",
type=str,
help="Provide a glob pattern, to be interpreted by pospell, to find po files, "
"like --glob '**/*.po'.",
)
parser.add_argument(
"po_file",
nargs="*",
type=Path,
help="Files to check, can optionally be mixed with --glob, or not, "
"use the one that fit your needs.",
)
parser.add_argument("--debug", action="store_true")
parser.add_argument("-p", "--personal-dict", type=str)
2018-07-23 15:37:50 +00:00
args = parser.parse_args()
personal_dict = ["-p", args.personal_dict] if args.personal_dict else []
2018-07-23 22:28:13 +00:00
errors = 0
2018-07-23 15:37:50 +00:00
with tempfile.TemporaryDirectory() as tmpdirname:
tmpdir = Path(tmpdirname)
for po_file in chain(
Path(".").glob(args.glob) if args.glob else [], args.po_file
):
if args.debug:
print(po_to_text(str(po_file)))
continue
2018-07-23 15:37:50 +00:00
(tmpdir / po_file.name).write_text(po_to_text(str(po_file)))
output = subprocess.check_output(
["hunspell", "-d", args.language]
+ personal_dict
+ ["-u3", str(tmpdir / po_file.name)],
universal_newlines=True,
)
for line in output.split("\n"):
match = re.match(
r"(?P<path>.*):(?P<line>[0-9]+): Locate: (?P<error>.*) \| Try: .*$",
line,
)
2018-07-23 17:24:10 +00:00
if match:
2018-07-23 22:28:13 +00:00
errors += 1
print(
match.group("path").replace(str(tmpdir), "").lstrip("/"),
match.group("line"),
match.group("error"),
sep=":",
)
2018-07-23 22:28:13 +00:00
exit(0 if errors == 0 else -1)
2018-07-23 15:37:50 +00:00
if __name__ == "__main__":
2018-07-23 15:37:50 +00:00
main()