2020-11-23 13:26:34 +00:00
|
|
|
|
"""pospell is a spellcheckers for po files containing reStructuedText."""
|
2018-07-27 19:57:44 +00:00
|
|
|
|
import io
|
2020-10-11 21:00:30 +00:00
|
|
|
|
from string import digits
|
|
|
|
|
from unicodedata import category
|
2018-07-31 22:20:03 +00:00
|
|
|
|
import logging
|
2018-07-23 15:37:50 +00:00
|
|
|
|
import subprocess
|
2018-07-28 22:58:20 +00:00
|
|
|
|
import sys
|
2020-11-23 13:26:34 +00:00
|
|
|
|
from typing import Dict
|
2019-08-20 14:38:03 +00:00
|
|
|
|
from contextlib import redirect_stderr
|
2018-07-27 13:54:10 +00:00
|
|
|
|
from itertools import chain
|
2018-07-23 15:37:50 +00:00
|
|
|
|
from pathlib import Path
|
2019-12-10 14:10:17 +00:00
|
|
|
|
from shutil import which
|
2018-07-27 09:38:17 +00:00
|
|
|
|
|
2018-07-27 19:57:44 +00:00
|
|
|
|
import docutils.frontend
|
|
|
|
|
import docutils.nodes
|
|
|
|
|
import docutils.parsers.rst
|
2018-07-28 22:58:20 +00:00
|
|
|
|
import polib
|
2018-07-27 19:57:44 +00:00
|
|
|
|
from docutils.parsers.rst import roles
|
|
|
|
|
from docutils.utils import new_document
|
|
|
|
|
|
2018-07-31 22:20:03 +00:00
|
|
|
|
import regex
|
|
|
|
|
|
2020-10-13 22:44:09 +00:00
|
|
|
|
__version__ = "1.0.11"
|
2019-10-16 14:55:46 +00:00
|
|
|
|
|
|
|
|
|
DEFAULT_DROP_CAPITALIZED = {"fr": True, "fr_FR": True}
|
|
|
|
|
|
2020-07-01 15:35:13 +00:00
|
|
|
|
|
2020-10-13 22:44:05 +00:00
|
|
|
|
class POSpellException(Exception):
|
2020-11-23 13:26:34 +00:00
|
|
|
|
"""All exceptions from this module inherit from this one."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class Unreachable(POSpellException):
|
|
|
|
|
"""The code encontered a state that should be unreachable."""
|
2020-10-13 22:44:05 +00:00
|
|
|
|
|
|
|
|
|
|
2018-07-28 22:58:20 +00:00
|
|
|
|
try:
|
|
|
|
|
HUNSPELL_VERSION = subprocess.check_output(
|
|
|
|
|
["hunspell", "--version"], universal_newlines=True
|
|
|
|
|
).split("\n")[0]
|
|
|
|
|
except FileNotFoundError:
|
|
|
|
|
print("hunspell not found, please install hunspell.", file=sys.stderr)
|
2020-10-12 16:09:26 +00:00
|
|
|
|
sys.exit(1)
|
2018-07-28 22:58:20 +00:00
|
|
|
|
|
2018-07-27 19:57:44 +00:00
|
|
|
|
|
|
|
|
|
class DummyNodeClass(docutils.nodes.Inline, docutils.nodes.TextElement):
|
2020-11-23 13:26:34 +00:00
|
|
|
|
"""Used to represent any unknown roles, so we can parse any rst blindly."""
|
2018-07-27 19:57:44 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def monkey_patch_role(role):
|
2020-11-23 13:26:34 +00:00
|
|
|
|
"""Patch docutils.parsers.rst.roles.role so it always match.
|
|
|
|
|
|
|
|
|
|
Giving a DummyNodeClass for unknown roles.
|
|
|
|
|
"""
|
|
|
|
|
|
2018-07-27 19:57:44 +00:00
|
|
|
|
def role_or_generic(role_name, language_module, lineno, reporter):
|
|
|
|
|
base_role, message = role(role_name, language_module, lineno, reporter)
|
|
|
|
|
if base_role is None:
|
|
|
|
|
roles.register_generic_role(role_name, DummyNodeClass)
|
|
|
|
|
base_role, message = role(role_name, language_module, lineno, reporter)
|
|
|
|
|
return base_role, message
|
|
|
|
|
|
|
|
|
|
return role_or_generic
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
roles.role = monkey_patch_role(roles.role)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class NodeToTextVisitor(docutils.nodes.NodeVisitor):
|
2020-11-23 13:26:34 +00:00
|
|
|
|
"""Recursively convert a docutils node to a Python string.
|
2019-07-26 15:40:48 +00:00
|
|
|
|
|
2020-11-23 13:26:34 +00:00
|
|
|
|
Usage:
|
2019-07-26 15:40:48 +00:00
|
|
|
|
|
2020-11-23 13:26:34 +00:00
|
|
|
|
>>> visitor = NodeToTextVisitor(document)
|
|
|
|
|
>>> document.walk(visitor)
|
|
|
|
|
>>> print(str(visitor))
|
2019-07-26 15:40:48 +00:00
|
|
|
|
|
2020-11-23 13:26:34 +00:00
|
|
|
|
It ignores (see IGNORE_LIST) some nodes, which we don't want in
|
|
|
|
|
hunspell (enphasis typically contain proper names that are unknown
|
|
|
|
|
to dictionaires).
|
|
|
|
|
"""
|
2019-07-26 15:40:48 +00:00
|
|
|
|
|
2020-11-23 13:26:34 +00:00
|
|
|
|
IGNORE_LIST = (
|
|
|
|
|
"emphasis",
|
|
|
|
|
"superscript",
|
|
|
|
|
"title_reference",
|
|
|
|
|
"strong",
|
|
|
|
|
"DummyNodeClass",
|
|
|
|
|
"reference",
|
|
|
|
|
"literal",
|
|
|
|
|
"Text",
|
|
|
|
|
)
|
2019-07-26 15:40:48 +00:00
|
|
|
|
|
2020-11-23 13:26:34 +00:00
|
|
|
|
def __init__(self, document):
|
|
|
|
|
"""Initialize visitor for the given node/document."""
|
|
|
|
|
self.output = []
|
|
|
|
|
super().__init__(document)
|
2019-07-26 15:40:48 +00:00
|
|
|
|
|
2020-11-23 13:26:34 +00:00
|
|
|
|
def unknown_visit(self, node):
|
|
|
|
|
"""Mandatory implementation to visit unknwon nodes."""
|
2019-07-26 15:40:48 +00:00
|
|
|
|
|
2020-11-23 13:26:34 +00:00
|
|
|
|
@staticmethod
|
|
|
|
|
def ignore(node):
|
|
|
|
|
"""Just raise SkipChildren.
|
2019-07-26 15:40:48 +00:00
|
|
|
|
|
2020-11-23 13:26:34 +00:00
|
|
|
|
Used for all visit_* in the IGNORE_LIST.
|
2019-07-26 15:40:48 +00:00
|
|
|
|
|
2020-11-23 13:26:34 +00:00
|
|
|
|
See __getattr__.
|
|
|
|
|
"""
|
2019-07-26 15:40:48 +00:00
|
|
|
|
raise docutils.nodes.SkipChildren
|
|
|
|
|
|
2020-11-23 13:26:34 +00:00
|
|
|
|
def __getattr__(self, name):
|
|
|
|
|
"""Skip childrens from the IGNORE_LIST."""
|
|
|
|
|
if name.startswith("visit_") and name[6:] in self.IGNORE_LIST:
|
|
|
|
|
return self.ignore
|
|
|
|
|
raise AttributeError(name)
|
2018-07-27 19:57:44 +00:00
|
|
|
|
|
|
|
|
|
def visit_Text(self, node):
|
2020-11-23 13:26:34 +00:00
|
|
|
|
"""Keep this node text, this is typically what we want to spell check."""
|
2018-07-27 19:57:44 +00:00
|
|
|
|
self.output.append(node.rawsource)
|
|
|
|
|
|
|
|
|
|
def __str__(self):
|
2020-11-23 13:26:34 +00:00
|
|
|
|
"""Give the accumulated strings."""
|
2018-07-27 19:57:44 +00:00
|
|
|
|
return " ".join(self.output)
|
|
|
|
|
|
2018-07-23 15:37:50 +00:00
|
|
|
|
|
|
|
|
|
def strip_rst(line):
|
2020-11-23 13:26:34 +00:00
|
|
|
|
"""Transform reStructuredText to plain text."""
|
2018-07-27 19:57:44 +00:00
|
|
|
|
if line.endswith("::"):
|
|
|
|
|
# Drop :: at the end, it would cause Literal block expected
|
|
|
|
|
line = line[:-2]
|
|
|
|
|
parser = docutils.parsers.rst.Parser()
|
2020-10-11 21:00:30 +00:00
|
|
|
|
settings = docutils.frontend.Values(
|
|
|
|
|
{
|
|
|
|
|
"report_level": 2,
|
|
|
|
|
"halt_level": 4,
|
|
|
|
|
"exit_status_level": 5,
|
|
|
|
|
"debug": None,
|
|
|
|
|
"warning_stream": None,
|
|
|
|
|
"error_encoding": "utf-8",
|
|
|
|
|
"error_encoding_error_handler": "backslashreplace",
|
|
|
|
|
"language_code": "en",
|
|
|
|
|
"id_prefix": "",
|
|
|
|
|
"auto_id_prefix": "id",
|
|
|
|
|
"pep_references": None,
|
|
|
|
|
"pep_base_url": "http://www.python.org/dev/peps/",
|
|
|
|
|
"pep_file_url_template": "pep-%04d",
|
|
|
|
|
"rfc_references": None,
|
|
|
|
|
"rfc_base_url": "http://tools.ietf.org/html/",
|
|
|
|
|
"tab_width": 8,
|
|
|
|
|
"trim_footnote_reference_space": None,
|
2020-10-12 12:42:33 +00:00
|
|
|
|
"syntax_highlight": "long",
|
2020-10-11 21:00:30 +00:00
|
|
|
|
}
|
|
|
|
|
)
|
2018-07-27 19:57:44 +00:00
|
|
|
|
stderr_stringio = io.StringIO()
|
|
|
|
|
with redirect_stderr(stderr_stringio):
|
2019-08-20 14:38:03 +00:00
|
|
|
|
document = new_document("<rst-doc>", settings=settings)
|
2018-07-27 19:57:44 +00:00
|
|
|
|
parser.parse(line, document)
|
|
|
|
|
stderr = stderr_stringio.getvalue()
|
|
|
|
|
if stderr:
|
|
|
|
|
print(stderr.strip(), "while parsing:", line)
|
|
|
|
|
visitor = NodeToTextVisitor(document)
|
|
|
|
|
document.walk(visitor)
|
|
|
|
|
return str(visitor)
|
2018-07-23 15:37:50 +00:00
|
|
|
|
|
|
|
|
|
|
2020-07-01 15:35:13 +00:00
|
|
|
|
def clear(line, drop_capitalized=False, po_path=""):
|
2020-10-11 13:33:09 +00:00
|
|
|
|
"""Clear various other syntaxes we may encounter in a line."""
|
2019-09-16 08:44:18 +00:00
|
|
|
|
# Normalize spaces
|
2020-07-01 15:35:13 +00:00
|
|
|
|
line = regex.sub(r"\s+", " ", line).replace("\xad", "")
|
|
|
|
|
|
2018-07-31 22:20:03 +00:00
|
|
|
|
to_drop = {
|
|
|
|
|
r'<a href="[^"]*?">',
|
2019-11-16 13:47:22 +00:00
|
|
|
|
r"{[a-z_]*?}", # Sphinx variable
|
2019-11-18 08:52:00 +00:00
|
|
|
|
r"%\([a-z_]+?\)[diouxXeEfFgGcrsa%]", # Sphinx variable
|
2019-09-16 08:44:18 +00:00
|
|
|
|
r"« . »", # Single letter examples (typically in Unicode documentation)
|
2018-07-31 22:20:03 +00:00
|
|
|
|
}
|
2019-10-09 11:06:45 +00:00
|
|
|
|
if drop_capitalized:
|
2019-10-16 14:55:46 +00:00
|
|
|
|
to_drop.add(
|
2019-10-09 11:06:45 +00:00
|
|
|
|
# Strip capitalized words in sentences
|
2019-10-16 14:55:46 +00:00
|
|
|
|
r"(?<!\. |^|-)\b(\p{Letter}['’])?\b\p{Uppercase}\p{Letter}[\w.-]*\b"
|
|
|
|
|
)
|
2018-07-31 22:20:03 +00:00
|
|
|
|
if logging.getLogger().isEnabledFor(logging.DEBUG):
|
|
|
|
|
for pattern in to_drop:
|
|
|
|
|
for dropped in regex.findall(pattern, line):
|
2020-06-28 09:13:45 +00:00
|
|
|
|
logging.debug(
|
|
|
|
|
"%s: dropping %r via %r due to from %r",
|
|
|
|
|
po_path,
|
|
|
|
|
dropped,
|
|
|
|
|
pattern,
|
|
|
|
|
line,
|
|
|
|
|
)
|
|
|
|
|
return regex.sub("|".join(to_drop), r" ", line)
|
2018-07-27 09:01:09 +00:00
|
|
|
|
|
|
|
|
|
|
2020-10-13 22:22:26 +00:00
|
|
|
|
def quote_for_hunspell(text):
|
2020-11-23 13:26:34 +00:00
|
|
|
|
"""Quote a paragraph so hunspell don't misinterpret it.
|
|
|
|
|
|
2020-10-13 22:22:26 +00:00
|
|
|
|
Quoting the manpage:
|
|
|
|
|
It is recommended that programmatic interfaces prefix
|
|
|
|
|
every data line with an uparrow to protect themselves
|
2020-11-23 13:26:34 +00:00
|
|
|
|
against future changes in hunspell.
|
|
|
|
|
"""
|
2020-10-13 22:22:26 +00:00
|
|
|
|
out = []
|
|
|
|
|
for line in text.split("\n"):
|
|
|
|
|
out.append("^" + line if line else "")
|
|
|
|
|
return "\n".join(out)
|
|
|
|
|
|
|
|
|
|
|
2019-10-16 14:55:46 +00:00
|
|
|
|
def po_to_text(po_path, drop_capitalized=False):
|
2020-11-23 13:26:34 +00:00
|
|
|
|
"""Convert a po file to a text file.
|
|
|
|
|
|
|
|
|
|
This strips the msgids and all po syntax while keeping lines at
|
|
|
|
|
their same position / line number.
|
2018-07-27 13:54:10 +00:00
|
|
|
|
"""
|
2018-07-23 15:37:50 +00:00
|
|
|
|
buffer = []
|
2018-07-23 17:24:10 +00:00
|
|
|
|
lines = 0
|
2020-10-13 22:44:05 +00:00
|
|
|
|
try:
|
|
|
|
|
entries = polib.pofile(Path(po_path).read_text())
|
|
|
|
|
except Exception as err:
|
|
|
|
|
raise POSpellException(str(err)) from err
|
2018-07-23 15:37:50 +00:00
|
|
|
|
for entry in entries:
|
2018-07-27 08:03:21 +00:00
|
|
|
|
if entry.msgid == entry.msgstr:
|
|
|
|
|
continue
|
2018-07-23 17:24:10 +00:00
|
|
|
|
while lines < entry.linenum:
|
2018-07-27 13:54:10 +00:00
|
|
|
|
buffer.append("")
|
2018-07-23 17:24:10 +00:00
|
|
|
|
lines += 1
|
2020-07-01 15:35:13 +00:00
|
|
|
|
buffer.append(clear(strip_rst(entry.msgstr), drop_capitalized, po_path=po_path))
|
2018-07-23 17:24:10 +00:00
|
|
|
|
lines += 1
|
2018-07-27 13:54:10 +00:00
|
|
|
|
return "\n".join(buffer)
|
2018-07-23 15:37:50 +00:00
|
|
|
|
|
|
|
|
|
|
2018-07-28 22:58:20 +00:00
|
|
|
|
def parse_args():
|
2020-10-11 13:33:09 +00:00
|
|
|
|
"""Parse command line arguments."""
|
2018-07-23 15:37:50 +00:00
|
|
|
|
import argparse
|
2018-07-27 13:54:10 +00:00
|
|
|
|
|
2018-07-23 15:37:50 +00:00
|
|
|
|
parser = argparse.ArgumentParser(
|
2018-07-27 13:54:10 +00:00
|
|
|
|
description="Check spelling in po files containing restructuredText."
|
|
|
|
|
)
|
|
|
|
|
parser.add_argument(
|
|
|
|
|
"-l",
|
|
|
|
|
"--language",
|
|
|
|
|
type=str,
|
|
|
|
|
default="fr",
|
|
|
|
|
help="Language to check, you'll have to install the corresponding "
|
|
|
|
|
"hunspell dictionary, on Debian see apt list 'hunspell-*'.",
|
|
|
|
|
)
|
|
|
|
|
parser.add_argument(
|
|
|
|
|
"--glob",
|
|
|
|
|
type=str,
|
|
|
|
|
help="Provide a glob pattern, to be interpreted by pospell, to find po files, "
|
|
|
|
|
"like --glob '**/*.po'.",
|
|
|
|
|
)
|
2019-10-09 11:07:09 +00:00
|
|
|
|
parser.add_argument(
|
2019-10-16 14:55:46 +00:00
|
|
|
|
"--drop-capitalized",
|
2019-10-09 11:07:09 +00:00
|
|
|
|
action="store_true",
|
2020-11-23 13:26:34 +00:00
|
|
|
|
help="Always drop capitalized words in sentences"
|
|
|
|
|
" (defaults according to the language).",
|
2019-10-16 14:55:46 +00:00
|
|
|
|
)
|
|
|
|
|
parser.add_argument(
|
|
|
|
|
"--no-drop-capitalized",
|
|
|
|
|
action="store_true",
|
2020-11-23 13:26:34 +00:00
|
|
|
|
help="Never drop capitalized words in sentences"
|
|
|
|
|
" (defaults according to the language).",
|
2019-10-09 11:07:09 +00:00
|
|
|
|
)
|
2018-07-27 13:54:10 +00:00
|
|
|
|
parser.add_argument(
|
|
|
|
|
"po_file",
|
|
|
|
|
nargs="*",
|
2018-07-27 14:21:28 +00:00
|
|
|
|
type=Path,
|
2018-07-27 13:54:10 +00:00
|
|
|
|
help="Files to check, can optionally be mixed with --glob, or not, "
|
|
|
|
|
"use the one that fit your needs.",
|
|
|
|
|
)
|
2018-07-31 22:20:03 +00:00
|
|
|
|
parser.add_argument(
|
|
|
|
|
"-v",
|
|
|
|
|
"--verbose",
|
|
|
|
|
action="count",
|
|
|
|
|
default=0,
|
|
|
|
|
help="More output, use -vv, -vvv, and so on.",
|
|
|
|
|
)
|
2018-07-28 22:58:20 +00:00
|
|
|
|
parser.add_argument(
|
|
|
|
|
"--version",
|
|
|
|
|
action="version",
|
|
|
|
|
version="%(prog)s " + __version__ + " using hunspell: " + HUNSPELL_VERSION,
|
|
|
|
|
)
|
2018-07-27 13:54:10 +00:00
|
|
|
|
parser.add_argument("--debug", action="store_true")
|
|
|
|
|
parser.add_argument("-p", "--personal-dict", type=str)
|
2018-12-24 14:49:54 +00:00
|
|
|
|
parser.add_argument(
|
|
|
|
|
"--modified", "-m", action="store_true", help="Use git to find modified files."
|
|
|
|
|
)
|
|
|
|
|
args = parser.parse_args()
|
2019-10-16 14:55:46 +00:00
|
|
|
|
if args.drop_capitalized and args.no_drop_capitalized:
|
|
|
|
|
print("Error: don't provide both --drop-capitalized AND --no-drop-capitalized.")
|
|
|
|
|
parser.print_help()
|
2020-10-12 16:09:26 +00:00
|
|
|
|
sys.exit(1)
|
2018-12-24 14:49:54 +00:00
|
|
|
|
if not args.po_file and not args.modified:
|
|
|
|
|
parser.print_help()
|
2020-10-12 16:09:26 +00:00
|
|
|
|
sys.exit(1)
|
2018-12-24 14:49:54 +00:00
|
|
|
|
return args
|
2018-07-28 22:58:20 +00:00
|
|
|
|
|
|
|
|
|
|
2020-10-11 21:00:30 +00:00
|
|
|
|
def look_like_a_word(word):
|
2020-11-23 13:26:34 +00:00
|
|
|
|
"""Return True if the given str looks like a word.
|
|
|
|
|
|
|
|
|
|
Used to filter out non-words like `---` or `-0700` so they don't
|
2020-10-11 21:00:30 +00:00
|
|
|
|
get reported. They typically are not errors.
|
|
|
|
|
"""
|
|
|
|
|
if not word:
|
|
|
|
|
return False
|
|
|
|
|
if any(digit in word for digit in digits):
|
|
|
|
|
return False
|
|
|
|
|
if len([c for c in word if category(c) == "Lu"]) > 1:
|
|
|
|
|
return False # Probably an accronym, or a name like CPython, macOS, SQLite, ...
|
|
|
|
|
if "-" in word:
|
|
|
|
|
return False
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
2019-10-16 14:55:46 +00:00
|
|
|
|
def spell_check(
|
2020-10-11 14:04:26 +00:00
|
|
|
|
po_files,
|
|
|
|
|
personal_dict=None,
|
2020-10-11 21:00:30 +00:00
|
|
|
|
language="en_US",
|
2020-10-11 14:04:26 +00:00
|
|
|
|
drop_capitalized=False,
|
|
|
|
|
debug_only=False,
|
2019-10-16 14:55:46 +00:00
|
|
|
|
):
|
2020-11-23 13:26:34 +00:00
|
|
|
|
"""Check for spelling mistakes in the given po_files.
|
|
|
|
|
|
|
|
|
|
(po format, containing restructuredtext), for the given language.
|
2018-12-24 14:49:54 +00:00
|
|
|
|
personal_dict allow to pass a personal dict (-p) option, to hunspell.
|
|
|
|
|
|
|
|
|
|
Debug only will show what's passed to Hunspell instead of passing it.
|
2018-07-28 22:58:20 +00:00
|
|
|
|
"""
|
2018-12-24 14:49:54 +00:00
|
|
|
|
personal_dict_arg = ["-p", personal_dict] if personal_dict else []
|
2020-10-11 21:00:30 +00:00
|
|
|
|
texts_for_hunspell = {}
|
2020-07-01 15:35:13 +00:00
|
|
|
|
for po_file in po_files:
|
|
|
|
|
if debug_only:
|
|
|
|
|
print(po_to_text(str(po_file), drop_capitalized))
|
|
|
|
|
continue
|
2020-10-11 21:00:30 +00:00
|
|
|
|
texts_for_hunspell[po_file] = po_to_text(str(po_file), drop_capitalized)
|
2020-11-23 13:26:34 +00:00
|
|
|
|
if debug_only:
|
|
|
|
|
return 0
|
2020-10-11 21:00:30 +00:00
|
|
|
|
try:
|
|
|
|
|
output = subprocess.run(
|
2020-10-12 16:09:26 +00:00
|
|
|
|
["hunspell", "-d", language, "-a"] + personal_dict_arg,
|
2020-10-11 21:00:30 +00:00
|
|
|
|
universal_newlines=True,
|
2020-10-13 22:22:26 +00:00
|
|
|
|
input=quote_for_hunspell("\n".join(texts_for_hunspell.values())),
|
2020-10-11 21:00:30 +00:00
|
|
|
|
stdout=subprocess.PIPE,
|
2020-11-23 13:26:34 +00:00
|
|
|
|
check=True,
|
2020-10-11 21:00:30 +00:00
|
|
|
|
)
|
|
|
|
|
except subprocess.CalledProcessError:
|
|
|
|
|
return -1
|
2020-11-23 13:26:34 +00:00
|
|
|
|
return parse_hunspell_output(texts_for_hunspell, output)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def parse_hunspell_output(hunspell_input: Dict[str, str], hunspell_output) -> int:
|
|
|
|
|
"""Parse `hunspell -a` output.
|
2020-10-12 16:09:26 +00:00
|
|
|
|
|
2020-11-23 13:26:34 +00:00
|
|
|
|
Print one line per error on stderr, of the following format:
|
|
|
|
|
|
|
|
|
|
FILE:LINE:ERROR
|
|
|
|
|
|
|
|
|
|
Returns the number of errors.
|
|
|
|
|
|
|
|
|
|
hunspell_input contains a dict of files: all_lines_for_this_file.
|
|
|
|
|
"""
|
2020-10-12 16:09:26 +00:00
|
|
|
|
errors = 0
|
2020-11-23 13:26:34 +00:00
|
|
|
|
checked_files = iter(hunspell_input.items())
|
2020-10-12 16:09:26 +00:00
|
|
|
|
checked_file_name, checked_text = next(checked_files)
|
|
|
|
|
checked_lines = iter(checked_text.split("\n"))
|
2020-11-23 13:26:34 +00:00
|
|
|
|
next(checked_lines)
|
2020-10-12 16:09:26 +00:00
|
|
|
|
current_line_number = 1
|
2020-11-23 13:26:34 +00:00
|
|
|
|
for line in hunspell_output.stdout.split("\n")[1:]:
|
2020-10-12 16:09:26 +00:00
|
|
|
|
if not line:
|
|
|
|
|
try:
|
2020-11-23 13:26:34 +00:00
|
|
|
|
next(checked_lines)
|
2020-10-12 16:09:26 +00:00
|
|
|
|
current_line_number += 1
|
|
|
|
|
except StopIteration:
|
|
|
|
|
try:
|
|
|
|
|
checked_file_name, checked_text = next(checked_files)
|
|
|
|
|
checked_lines = iter(checked_text.split("\n"))
|
2020-11-23 13:26:34 +00:00
|
|
|
|
next(checked_lines)
|
2020-10-12 16:09:26 +00:00
|
|
|
|
current_line_number = 1
|
|
|
|
|
except StopIteration:
|
|
|
|
|
return errors
|
|
|
|
|
continue
|
|
|
|
|
if line == "*": # OK
|
|
|
|
|
continue
|
|
|
|
|
if line[0] == "&":
|
2020-11-23 13:26:34 +00:00
|
|
|
|
_, original, *_ = line.split()
|
2020-10-12 16:09:26 +00:00
|
|
|
|
if look_like_a_word(original):
|
|
|
|
|
print(checked_file_name, current_line_number, original, sep=":")
|
|
|
|
|
errors += 1
|
2020-11-23 13:26:34 +00:00
|
|
|
|
raise Unreachable("Got this one? I'm sorry, read XKCD 2200, then open an issue.")
|
2018-12-24 14:49:54 +00:00
|
|
|
|
|
|
|
|
|
|
2019-12-10 14:10:17 +00:00
|
|
|
|
def gracefull_handling_of_missing_dicts(language):
|
2020-10-11 13:33:09 +00:00
|
|
|
|
"""Check if hunspell dictionary for given language is installed."""
|
2019-12-10 14:10:17 +00:00
|
|
|
|
hunspell_dash_d = subprocess.check_output(
|
|
|
|
|
["hunspell", "-D"], universal_newlines=True, stderr=subprocess.STDOUT
|
|
|
|
|
)
|
|
|
|
|
languages = {Path(line).name for line in hunspell_dash_d}
|
|
|
|
|
|
|
|
|
|
def error(*args, file=sys.stderr, **kwargs):
|
|
|
|
|
print(*args, file=file, **kwargs)
|
|
|
|
|
|
|
|
|
|
if language in languages:
|
|
|
|
|
return
|
|
|
|
|
error(
|
|
|
|
|
"The hunspell dictionary for your language is missing, please install it.",
|
|
|
|
|
end="\n\n",
|
|
|
|
|
)
|
|
|
|
|
if which("apt"):
|
|
|
|
|
error("Maybe try something like:")
|
|
|
|
|
error(" sudo apt install hunspell-{}".format(language))
|
|
|
|
|
else:
|
|
|
|
|
error(
|
|
|
|
|
"""I don't know your environment, but I bet the package name looks like:
|
|
|
|
|
|
|
|
|
|
hunspell-{language}
|
|
|
|
|
|
|
|
|
|
If you find it, please tell me (by opening an issue or a PR on
|
|
|
|
|
https://github.com/JulienPalard/pospell/) so I can enhance this error message.
|
|
|
|
|
""".format(
|
|
|
|
|
language=language
|
|
|
|
|
)
|
|
|
|
|
)
|
2020-10-12 16:09:26 +00:00
|
|
|
|
sys.exit(1)
|
2019-12-10 14:10:17 +00:00
|
|
|
|
|
|
|
|
|
|
2018-12-24 14:49:54 +00:00
|
|
|
|
def main():
|
2020-11-23 13:26:34 +00:00
|
|
|
|
"""Entry point (for command-line)."""
|
2018-12-24 14:49:54 +00:00
|
|
|
|
args = parse_args()
|
|
|
|
|
logging.basicConfig(level=50 - 10 * args.verbose)
|
2019-10-16 14:55:46 +00:00
|
|
|
|
default_drop_capitalized = DEFAULT_DROP_CAPITALIZED.get(args.language, False)
|
|
|
|
|
if args.drop_capitalized:
|
|
|
|
|
drop_capitalized = True
|
|
|
|
|
elif args.no_drop_capitalized:
|
|
|
|
|
drop_capitalized = False
|
|
|
|
|
else:
|
|
|
|
|
drop_capitalized = default_drop_capitalized
|
2018-12-24 14:49:54 +00:00
|
|
|
|
args.po_file = list(
|
|
|
|
|
chain(Path(".").glob(args.glob) if args.glob else [], args.po_file)
|
|
|
|
|
)
|
|
|
|
|
if args.modified:
|
|
|
|
|
git_status = subprocess.check_output(
|
|
|
|
|
["git", "status", "--porcelain"], encoding="utf-8"
|
|
|
|
|
)
|
|
|
|
|
git_status_lines = [
|
|
|
|
|
line.split(maxsplit=2) for line in git_status.split("\n") if line
|
|
|
|
|
]
|
|
|
|
|
args.po_file.extend(
|
|
|
|
|
Path(filename)
|
|
|
|
|
for status, filename in git_status_lines
|
|
|
|
|
if filename.endswith(".po")
|
|
|
|
|
)
|
2020-10-13 22:44:05 +00:00
|
|
|
|
try:
|
|
|
|
|
errors = spell_check(
|
|
|
|
|
args.po_file,
|
|
|
|
|
args.personal_dict,
|
|
|
|
|
args.language,
|
|
|
|
|
drop_capitalized,
|
|
|
|
|
args.debug,
|
|
|
|
|
)
|
|
|
|
|
except POSpellException as err:
|
|
|
|
|
print(err, file=sys.stderr)
|
|
|
|
|
sys.exit(-1)
|
2019-12-10 14:10:17 +00:00
|
|
|
|
if errors == -1:
|
|
|
|
|
gracefull_handling_of_missing_dicts(args.language)
|
2020-10-12 16:09:26 +00:00
|
|
|
|
sys.exit(0 if errors == 0 else -1)
|
2018-07-23 15:37:50 +00:00
|
|
|
|
|
|
|
|
|
|
2018-07-27 13:54:10 +00:00
|
|
|
|
if __name__ == "__main__":
|
2018-07-23 15:37:50 +00:00
|
|
|
|
main()
|