2018-07-27 13:49:58 +00:00
|
|
|
"""pospell is a spellcheckers for po files containing reStructuedText.
|
|
|
|
"""
|
2018-07-23 15:37:50 +00:00
|
|
|
|
2018-07-27 09:38:17 +00:00
|
|
|
import re
|
2018-07-23 15:37:50 +00:00
|
|
|
import subprocess
|
2018-07-27 09:38:17 +00:00
|
|
|
import tempfile
|
2018-07-27 13:54:10 +00:00
|
|
|
from itertools import chain
|
2018-07-23 15:37:50 +00:00
|
|
|
from pathlib import Path
|
2018-07-27 09:38:17 +00:00
|
|
|
|
2018-07-23 15:37:50 +00:00
|
|
|
import polib
|
|
|
|
|
|
|
|
|
|
|
|
def strip_rst(line):
|
2018-07-27 09:01:09 +00:00
|
|
|
"""Strip out reStructuredText and Sphinx-doc tags from a line.
|
|
|
|
"""
|
2018-07-23 15:37:50 +00:00
|
|
|
return re.sub(
|
2018-07-23 22:00:52 +00:00
|
|
|
r"""(C-)?:[^:]*?:`[^`]*?` |
|
2018-07-27 08:02:43 +00:00
|
|
|
``.*?`` |
|
|
|
|
\b[A-Z][a-zA-Z-]{2,}[a-zA-Z.-]*\b | # Strip capitalized words and accronyms
|
|
|
|
{[a-z]*?} | # reStructuredText tag
|
|
|
|
\|[a-z]+?\| | # reStructuredText substitution
|
2018-07-27 09:01:09 +00:00
|
|
|
%\([a-z_]+?\)s | # Sphinx variable
|
2018-07-27 08:02:43 +00:00
|
|
|
-[A-Za-z]\b |
|
|
|
|
`[^`]*?`_ |
|
2018-07-23 15:37:50 +00:00
|
|
|
\*[^*]*?\*
|
2018-07-27 13:54:10 +00:00
|
|
|
""",
|
|
|
|
"",
|
|
|
|
line,
|
|
|
|
flags=re.VERBOSE,
|
|
|
|
)
|
2018-07-23 15:37:50 +00:00
|
|
|
|
|
|
|
|
2018-07-27 09:01:09 +00:00
|
|
|
def clear(line):
|
|
|
|
"""Clear various other syntaxes we may encounter in a line.
|
|
|
|
"""
|
|
|
|
return re.sub(r"""<a href="[^"]*?">(.*)</a>""", r"\1", line)
|
|
|
|
|
|
|
|
|
2018-07-27 13:54:10 +00:00
|
|
|
def po_to_text(po_path):
|
|
|
|
"""Converts a po file to a text file, by stripping the msgids and all
|
|
|
|
po syntax, but by keeping the kept lines at their same position /
|
|
|
|
line number.
|
|
|
|
"""
|
2018-07-23 15:37:50 +00:00
|
|
|
buffer = []
|
2018-07-23 17:24:10 +00:00
|
|
|
lines = 0
|
2018-07-27 13:54:10 +00:00
|
|
|
entries = polib.pofile(po_path)
|
2018-07-23 15:37:50 +00:00
|
|
|
for entry in entries:
|
2018-07-27 08:03:21 +00:00
|
|
|
if entry.msgid == entry.msgstr:
|
|
|
|
continue
|
2018-07-23 17:24:10 +00:00
|
|
|
while lines < entry.linenum:
|
2018-07-27 13:54:10 +00:00
|
|
|
buffer.append("")
|
2018-07-23 17:24:10 +00:00
|
|
|
lines += 1
|
2018-07-27 09:01:09 +00:00
|
|
|
buffer.append(clear(strip_rst(entry.msgstr)))
|
2018-07-23 17:24:10 +00:00
|
|
|
lines += 1
|
2018-07-27 13:54:10 +00:00
|
|
|
return "\n".join(buffer)
|
2018-07-23 15:37:50 +00:00
|
|
|
|
|
|
|
|
|
|
|
def main():
|
2018-07-27 13:54:10 +00:00
|
|
|
"""Module entry point.
|
|
|
|
"""
|
2018-07-23 15:37:50 +00:00
|
|
|
import argparse
|
2018-07-27 13:54:10 +00:00
|
|
|
|
2018-07-23 15:37:50 +00:00
|
|
|
parser = argparse.ArgumentParser(
|
2018-07-27 13:54:10 +00:00
|
|
|
description="Check spelling in po files containing restructuredText."
|
|
|
|
)
|
|
|
|
parser.add_argument(
|
|
|
|
"-l",
|
|
|
|
"--language",
|
|
|
|
type=str,
|
|
|
|
default="fr",
|
|
|
|
help="Language to check, you'll have to install the corresponding "
|
|
|
|
"hunspell dictionary, on Debian see apt list 'hunspell-*'.",
|
|
|
|
)
|
|
|
|
parser.add_argument(
|
|
|
|
"--glob",
|
|
|
|
type=str,
|
|
|
|
help="Provide a glob pattern, to be interpreted by pospell, to find po files, "
|
|
|
|
"like --glob '**/*.po'.",
|
|
|
|
)
|
|
|
|
parser.add_argument(
|
|
|
|
"po_file",
|
|
|
|
nargs="*",
|
2018-07-27 14:21:28 +00:00
|
|
|
type=Path,
|
2018-07-27 13:54:10 +00:00
|
|
|
help="Files to check, can optionally be mixed with --glob, or not, "
|
|
|
|
"use the one that fit your needs.",
|
|
|
|
)
|
|
|
|
parser.add_argument("--debug", action="store_true")
|
|
|
|
parser.add_argument("-p", "--personal-dict", type=str)
|
2018-07-23 15:37:50 +00:00
|
|
|
args = parser.parse_args()
|
2018-07-27 13:54:10 +00:00
|
|
|
personal_dict = ["-p", args.personal_dict] if args.personal_dict else []
|
2018-07-23 22:28:13 +00:00
|
|
|
errors = 0
|
2018-07-23 15:37:50 +00:00
|
|
|
with tempfile.TemporaryDirectory() as tmpdirname:
|
|
|
|
tmpdir = Path(tmpdirname)
|
2018-07-27 14:21:28 +00:00
|
|
|
for po_file in chain(
|
|
|
|
Path(".").glob(args.glob) if args.glob else [], args.po_file
|
|
|
|
):
|
2018-07-23 22:00:52 +00:00
|
|
|
if args.debug:
|
|
|
|
print(po_to_text(str(po_file)))
|
|
|
|
continue
|
2018-07-23 15:37:50 +00:00
|
|
|
(tmpdir / po_file.name).write_text(po_to_text(str(po_file)))
|
|
|
|
output = subprocess.check_output(
|
2018-07-27 13:54:10 +00:00
|
|
|
["hunspell", "-d", args.language]
|
|
|
|
+ personal_dict
|
|
|
|
+ ["-u3", str(tmpdir / po_file.name)],
|
|
|
|
universal_newlines=True,
|
|
|
|
)
|
|
|
|
for line in output.split("\n"):
|
|
|
|
match = re.match(
|
|
|
|
r"(?P<path>.*):(?P<line>[0-9]+): Locate: (?P<error>.*) \| Try: .*$",
|
|
|
|
line,
|
|
|
|
)
|
2018-07-23 17:24:10 +00:00
|
|
|
if match:
|
2018-07-23 22:28:13 +00:00
|
|
|
errors += 1
|
2018-07-27 13:54:10 +00:00
|
|
|
print(
|
|
|
|
match.group("path").replace(str(tmpdir), "").lstrip("/"),
|
|
|
|
match.group("line"),
|
|
|
|
match.group("error"),
|
|
|
|
sep=":",
|
|
|
|
)
|
2018-07-23 22:28:13 +00:00
|
|
|
exit(0 if errors == 0 else -1)
|
2018-07-23 15:37:50 +00:00
|
|
|
|
|
|
|
|
2018-07-27 13:54:10 +00:00
|
|
|
if __name__ == "__main__":
|
2018-07-23 15:37:50 +00:00
|
|
|
main()
|