pospell/pospell.py

"""pospell is a spellcheckers for po files containing reStructuedText.
"""
import io
import logging
import subprocess
import sys
import tempfile
from contextlib import redirect_stderr
from itertools import chain
from pathlib import Path
from shutil import which

import docutils.frontend
import docutils.nodes
import docutils.parsers.rst
import polib
from docutils.parsers.rst import roles
from docutils.utils import new_document

import regex

__version__ = "1.0.3"

DEFAULT_DROP_CAPITALIZED = {"fr": True, "fr_FR": True}

try:
    HUNSPELL_VERSION = subprocess.check_output(
        ["hunspell", "--version"], universal_newlines=True
    ).split("\n")[0]
except FileNotFoundError:
    print("hunspell not found, please install hunspell.", file=sys.stderr)
    exit(1)


class DummyNodeClass(docutils.nodes.Inline, docutils.nodes.TextElement):
    pass


def monkey_patch_role(role):
    def role_or_generic(role_name, language_module, lineno, reporter):
        base_role, message = role(role_name, language_module, lineno, reporter)
        if base_role is None:
            roles.register_generic_role(role_name, DummyNodeClass)
            base_role, message = role(role_name, language_module, lineno, reporter)
        return base_role, message

    return role_or_generic


roles.role = monkey_patch_role(roles.role)


class NodeToTextVisitor(docutils.nodes.NodeVisitor):
    def __init__(self, document):
        self.output = []
        self.depth = 0
        super().__init__(document)

    def dispatch_visit(self, node):
        self.depth += 1
        super().dispatch_visit(node)

    def dispatch_departure(self, node):
        self.depth -= 1
        super().dispatch_departure(node)

    def unknown_visit(self, node):
        """Mandatory implementation to visit unknwon nodes.
        """
        # print(" " * self.depth * 4, node.__class__.__name__, ":", node)

    def unknown_departure(self, node):
        """To help debugging tree.
        """
        # print(node, repr(node), node.__class__.__name__)

    def visit_emphasis(self, node):
        raise docutils.nodes.SkipChildren

    def visit_superscript(self, node):
        raise docutils.nodes.SkipChildren

    def visit_title_reference(self, node):
        raise docutils.nodes.SkipChildren

    def visit_strong(self, node):
        raise docutils.nodes.SkipChildren

    def visit_DummyNodeClass(self, node):
        raise docutils.nodes.SkipChildren

    def visit_reference(self, node):
        raise docutils.nodes.SkipChildren

    def visit_literal(self, node):
        raise docutils.nodes.SkipChildren

    def visit_Text(self, node):
        self.output.append(node.rawsource)

    def __str__(self):
        return " ".join(self.output)


def strip_rst(line):
    if line.endswith("::"):
        # Drop :: at the end, it would cause Literal block expected
        line = line[:-2]
    parser = docutils.parsers.rst.Parser()
    components = (docutils.parsers.rst.Parser,)
    settings = docutils.frontend.OptionParser(
        components=components
    ).get_default_values()
    stderr_stringio = io.StringIO()
    with redirect_stderr(stderr_stringio):
        document = new_document("<rst-doc>", settings=settings)
        parser.parse(line, document)
    stderr = stderr_stringio.getvalue()
    if stderr:
        print(stderr.strip(), "while parsing:", line)
    visitor = NodeToTextVisitor(document)
    document.walk(visitor)
    return str(visitor)


def clear(po_path, line, drop_capitalized=False):
    """Clear various other syntaxes we may encounter in a line.
    """
    # Normalize spaces
    line = regex.sub(r"\s+", " ", line)
    to_drop = {
        r'<a href="[^"]*?">',
        # Strip accronyms
        r"\b[\w-]*\p{Uppercase}{2,}[0-9.\w-]*\b",
        r"---?",  # -- and --- separators to be ignored
        r" - ",  # Drop lone dashes (sometimes used in place of -- or ---)
        r"-\\ ",  # Ignore "MINUS BACKSLASH SPACE" typically used in
        # formulas, like '-\ *π*' but *π* gets removed too
        r"{[a-z_]*?}",  # Sphinx variable
        r"'?-?\b([0-9]+\.)*[0-9]+\.[0-9abcrx]+\b'?",  # Versions
        r"[0-9]+h",  # Hours
        r"%\([a-z_]+?\)[diouxXeEfFgGcrsa%]",  # Sphinx variable
        r"« . »",  # Single letter examples (typically in Unicode documentation)
        "\xad",  # soft hyphen
    }
    if drop_capitalized:
        to_drop.add(
            # Strip capitalized words in sentences
            r"(?<!\. |^|-)\b(\p{Letter}['’])?\b\p{Uppercase}\p{Letter}[\w.-]*\b"
        )
    if logging.getLogger().isEnabledFor(logging.DEBUG):
        for pattern in to_drop:
            for dropped in regex.findall(pattern, line):
                logging.debug("%s: dropping %r due to from %r", po_path, dropped, line)
    return regex.sub("|".join(to_drop), r"", line)


def po_to_text(po_path, drop_capitalized=False):
    """Converts a po file to a text file, by stripping the msgids and all
    po syntax, but by keeping the kept lines at their same position /
    line number.
    """
    buffer = []
    lines = 0
    entries = polib.pofile(po_path)
    for entry in entries:
        if entry.msgid == entry.msgstr:
            continue
        while lines < entry.linenum:
            buffer.append("")
            lines += 1
        buffer.append(clear(po_path, strip_rst(entry.msgstr), drop_capitalized))
        lines += 1
    return "\n".join(buffer)


def parse_args():
    """Parse command line arguments.
    """
    import argparse

    parser = argparse.ArgumentParser(
        description="Check spelling in po files containing restructuredText."
    )
    parser.add_argument(
        "-l",
        "--language",
        type=str,
        default="fr",
        help="Language to check, you'll have to install the corresponding "
        "hunspell dictionary, on Debian see apt list 'hunspell-*'.",
    )
    parser.add_argument(
        "--glob",
        type=str,
        help="Provide a glob pattern, to be interpreted by pospell, to find po files, "
        "like --glob '**/*.po'.",
    )
    parser.add_argument(
        "--drop-capitalized",
        action="store_true",
        help="Always drop capitalized words in sentences (defaults according to the language).",
    )
    parser.add_argument(
        "--no-drop-capitalized",
        action="store_true",
        help="Never drop capitalized words in sentences (defaults according to the language).",
    )
    parser.add_argument(
        "po_file",
        nargs="*",
        type=Path,
        help="Files to check, can optionally be mixed with --glob, or not, "
        "use the one that fit your needs.",
    )
    parser.add_argument(
        "-v",
        "--verbose",
        action="count",
        default=0,
        help="More output, use -vv, -vvv, and so on.",
    )
    parser.add_argument(
        "--version",
        action="version",
        version="%(prog)s " + __version__ + " using hunspell: " + HUNSPELL_VERSION,
    )
    parser.add_argument("--debug", action="store_true")
    parser.add_argument("-p", "--personal-dict", type=str)
    parser.add_argument(
        "--modified", "-m", action="store_true", help="Use git to find modified files."
    )
    args = parser.parse_args()
    if args.drop_capitalized and args.no_drop_capitalized:
        print("Error: don't provide both --drop-capitalized AND --no-drop-capitalized.")
        parser.print_help()
        exit(1)
    if not args.po_file and not args.modified:
        parser.print_help()
        exit(1)
    return args


def spell_check(
    po_files, personal_dict, language, drop_capitalized=False, debug_only=False
):
    """Check for spelling mistakes in the files po_files (po format,
    containing restructuredtext), for the given language.
    personal_dict allow to pass a personal dict (-p) option, to hunspell.

    Debug only will show what's passed to Hunspell instead of passing it.
    """
    errors = 0
    personal_dict_arg = ["-p", personal_dict] if personal_dict else []
    with tempfile.TemporaryDirectory() as tmpdirname:
        tmpdir = Path(tmpdirname)
        for po_file in po_files:
            if debug_only:
                print(po_to_text(str(po_file), drop_capitalized))
                continue
            (tmpdir / po_file.name).write_text(
                po_to_text(str(po_file), drop_capitalized)
            )
            try:
                output = subprocess.check_output(
                    ["hunspell", "-d", language]
                    + personal_dict_arg
                    + ["-u3", str(tmpdir / po_file.name)],
                    universal_newlines=True,
                )
            except subprocess.CalledProcessError:
                return -1
            for line in output.split("\n"):
                match = regex.match(
                    r"(?P<path>.*):(?P<line>[0-9]+): Locate: (?P<error>.*) \| Try: .*$",
                    line,
                )
                if match:
                    errors += 1
                    print(po_file, match.group("line"), match.group("error"), sep=":")
    return errors


def gracefull_handling_of_missing_dicts(language):
    """Check if hunspell dictionary for given language is installed.
    """
    hunspell_dash_d = subprocess.check_output(
        ["hunspell", "-D"], universal_newlines=True, stderr=subprocess.STDOUT
    )
    languages = {Path(line).name for line in hunspell_dash_d}

    def error(*args, file=sys.stderr, **kwargs):
        print(*args, file=file, **kwargs)

    if language in languages:
        return
    error(
        "The hunspell dictionary for your language is missing, please install it.",
        end="\n\n",
    )
    if which("apt"):
        error("Maybe try something like:")
        error("  sudo apt install hunspell-{}".format(language))
    else:
        error(
            """I don't know your environment, but I bet the package name looks like:

    hunspell-{language}

If you find it, please tell me (by opening an issue or a PR on
https://github.com/JulienPalard/pospell/) so I can enhance this error message.
""".format(
                language=language
            )
        )
    exit(1)


def main():
    """Module entry point.
    """
    args = parse_args()
    logging.basicConfig(level=50 - 10 * args.verbose)
    default_drop_capitalized = DEFAULT_DROP_CAPITALIZED.get(args.language, False)
    if args.drop_capitalized:
        drop_capitalized = True
    elif args.no_drop_capitalized:
        drop_capitalized = False
    else:
        drop_capitalized = default_drop_capitalized
    args.po_file = list(
        chain(Path(".").glob(args.glob) if args.glob else [], args.po_file)
    )
    if args.modified:
        git_status = subprocess.check_output(
            ["git", "status", "--porcelain"], encoding="utf-8"
        )
        git_status_lines = [
            line.split(maxsplit=2) for line in git_status.split("\n") if line
        ]
        args.po_file.extend(
            Path(filename)
            for status, filename in git_status_lines
            if filename.endswith(".po")
        )
    errors = spell_check(
        args.po_file, args.personal_dict, args.language, drop_capitalized, args.debug
    )
    if errors == -1:
        gracefull_handling_of_missing_dicts(args.language)
    exit(0 if errors == 0 else -1)


if __name__ == "__main__":
    main()
-												Module docstring.

											
										
										
											2018-07-27 13:49:58 +00:00
+								"""pospell is a spellcheckers for po files containing reStructuedText.
 								"""
-												Use docutils to parse rst.

											
										
										
											2018-07-27 19:57:44 +00:00
+								import io
-												Better handling of capitalized words..

											
										
										
											2018-07-31 22:20:03 +00:00
+								import logging
-												Initial commit

											
										
										
											2018-07-23 15:37:50 +00:00
+								import subprocess
-												Adding --version.

											
										
										
											2018-07-28 22:58:20 +00:00
+								import sys
-												isort

											
										
										
											2018-07-27 09:38:17 +00:00
+								import tempfile
-												Ignore versions like 1.6a1.

											
										
										
											2019-08-20 14:38:03 +00:00
+								from contextlib import redirect_stderr
-												Allow for multiple files to be passed positionally.

											
										
										
											2018-07-27 13:54:10 +00:00
+								from itertools import chain
-												Initial commit

											
										
										
											2018-07-23 15:37:50 +00:00
+								from pathlib import Path
-												Gracefull handling of missing dicts. (#11)


											
										
										
											2019-12-10 14:10:17 +00:00
+								from shutil import which
-												isort

											
										
										
											2018-07-27 09:38:17 +00:00
-												Use docutils to parse rst.

											
										
										
											2018-07-27 19:57:44 +00:00
+								import docutils.frontend
 								import docutils.nodes
 								import docutils.parsers.rst
-												Adding --version.

											
										
										
											2018-07-28 22:58:20 +00:00
+								import polib
-												Use docutils to parse rst.

											
										
										
											2018-07-27 19:57:44 +00:00
+								from docutils.parsers.rst import roles
 								from docutils.utils import new_document
-												Better handling of capitalized words..

											
										
										
											2018-07-31 22:20:03 +00:00
+								import regex
-												Bump version: 1.0.2 → 1.0.3

											
										
										
											2019-10-17 07:46:30 +00:00
+								__version__ = "1.0.3"
-												Add default values for capitalized words droppings, and add --no-drop-capitalized.

											
										
										
											2019-10-16 14:55:46 +00:00
 								DEFAULT_DROP_CAPITALIZED = {"fr": True, "fr_FR": True}
-												Adding --version.

											
										
										
											2018-07-28 22:58:20 +00:00
+								try:
 								    HUNSPELL_VERSION = subprocess.check_output(
 								        ["hunspell", "--version"], universal_newlines=True
 								    ).split("\n")[0]
 								except FileNotFoundError:
 								    print("hunspell not found, please install hunspell.", file=sys.stderr)
 								    exit(1)
-												Use docutils to parse rst.

											
										
										
											2018-07-27 19:57:44 +00:00
 								class DummyNodeClass(docutils.nodes.Inline, docutils.nodes.TextElement):
 								    pass
 								def monkey_patch_role(role):
 								    def role_or_generic(role_name, language_module, lineno, reporter):
 								        base_role, message = role(role_name, language_module, lineno, reporter)
 								        if base_role is None:
 								            roles.register_generic_role(role_name, DummyNodeClass)
 								            base_role, message = role(role_name, language_module, lineno, reporter)
 								        return base_role, message
 								    return role_or_generic
 								roles.role = monkey_patch_role(roles.role)
 								class NodeToTextVisitor(docutils.nodes.NodeVisitor):
 								    def __init__(self, document):
 								        self.output = []
-												Compatibility with docutils 0.15

											
										
										
											2019-07-26 15:40:48 +00:00
+								        self.depth = 0
-												Use docutils to parse rst.

											
										
										
											2018-07-27 19:57:44 +00:00
+								        super().__init__(document)
-												Compatibility with docutils 0.15

											
										
										
											2019-07-26 15:40:48 +00:00
+								    def dispatch_visit(self, node):
 								        self.depth += 1
 								        super().dispatch_visit(node)
 								    def dispatch_departure(self, node):
 								        self.depth -= 1
 								        super().dispatch_departure(node)
-												Use docutils to parse rst.

											
										
										
											2018-07-27 19:57:44 +00:00
+								    def unknown_visit(self, node):
-												Compatibility with docutils 0.15

											
										
										
											2019-07-26 15:40:48 +00:00
+								        """Mandatory implementation to visit unknwon nodes.
 								        """
 								        # print(" " * self.depth * 4, node.__class__.__name__, ":", node)
 								    def unknown_departure(self, node):
 								        """To help debugging tree.
 								        """
 								        # print(node, repr(node), node.__class__.__name__)
 								    def visit_emphasis(self, node):
 								        raise docutils.nodes.SkipChildren
 								    def visit_superscript(self, node):
 								        raise docutils.nodes.SkipChildren
 								    def visit_title_reference(self, node):
 								        raise docutils.nodes.SkipChildren
 								    def visit_strong(self, node):
 								        raise docutils.nodes.SkipChildren
 								    def visit_DummyNodeClass(self, node):
 								        raise docutils.nodes.SkipChildren
 								    def visit_reference(self, node):
 								        raise docutils.nodes.SkipChildren
 								    def visit_literal(self, node):
 								        raise docutils.nodes.SkipChildren
-												Use docutils to parse rst.

											
										
										
											2018-07-27 19:57:44 +00:00
 								    def visit_Text(self, node):
 								        self.output.append(node.rawsource)
 								    def __str__(self):
 								        return " ".join(self.output)
-												Initial commit

											
										
										
											2018-07-23 15:37:50 +00:00
 								def strip_rst(line):
-												Use docutils to parse rst.

											
										
										
											2018-07-27 19:57:44 +00:00
+								    if line.endswith("::"):
 								        # Drop :: at the end, it would cause Literal block expected
 								        line = line[:-2]
 								    parser = docutils.parsers.rst.Parser()
 								    components = (docutils.parsers.rst.Parser,)
 								    settings = docutils.frontend.OptionParser(
 								        components=components
 								    ).get_default_values()
 								    stderr_stringio = io.StringIO()
 								    with redirect_stderr(stderr_stringio):
-												Ignore versions like 1.6a1.

											
										
										
											2019-08-20 14:38:03 +00:00
+								        document = new_document("<rst-doc>", settings=settings)
-												Use docutils to parse rst.

											
										
										
											2018-07-27 19:57:44 +00:00
+								        parser.parse(line, document)
 								    stderr = stderr_stringio.getvalue()
 								    if stderr:
 								        print(stderr.strip(), "while parsing:", line)
 								    visitor = NodeToTextVisitor(document)
 								    document.walk(visitor)
 								    return str(visitor)
-												Initial commit

											
										
										
											2018-07-23 15:37:50 +00:00
-												Add default values for capitalized words droppings, and add --no-drop-capitalized.

											
										
										
											2019-10-16 14:55:46 +00:00
+								def clear(po_path, line, drop_capitalized=False):
-												Strip sphinx variables and a href links.

											
										
										
											2018-07-27 09:01:09 +00:00
+								    """Clear various other syntaxes we may encounter in a line.
 								    """
-												FIX: Double spaces were breaking start-of-sentence detection.

											
										
										
											2019-09-16 08:44:18 +00:00
+								    # Normalize spaces
 								    line = regex.sub(r"\s+", " ", line)
-												Better handling of capitalized words..

											
										
										
											2018-07-31 22:20:03 +00:00
+								    to_drop = {
 								        r'<a href="[^"]*?">',
-												make dropping capitalized words optional

											
										
										
											2019-10-09 11:06:45 +00:00
+								        # Strip accronyms
-												Also drop .1 in POSIX.1.

											
										
										
											2019-10-16 15:51:50 +00:00
+								        r"\b[\w-]*\p{Uppercase}{2,}[0-9.\w-]*\b",
-												Better handling of capitalized words..

											
										
										
											2018-07-31 22:20:03 +00:00
+								        r"---?",  # -- and --- separators to be ignored
-												FIX: ignore lone dashes.

											
										
										
											2019-10-16 15:03:15 +00:00
+								        r" - ",  # Drop lone dashes (sometimes used in place of -- or ---)
-												Better handling of capitalized words..

											
										
										
											2018-07-31 22:20:03 +00:00
+								        r"-\\ ",  # Ignore "MINUS BACKSLASH SPACE" typically used in
 								        # formulas, like '-\ *π*' but *π* gets removed too
-												allow underscors in variables

											
										
										
											2019-11-16 13:47:22 +00:00
+								        r"{[a-z_]*?}",  # Sphinx variable
-												Allow versions like 1.6a1 (found on docs.python.org).

											
										
										
											2019-08-20 15:34:30 +00:00
+								        r"'?-?\b([0-9]+\.)*[0-9]+\.[0-9abcrx]+\b'?",  # Versions
-												Dropping hours.

											
										
										
											2019-05-23 18:45:04 +00:00
+								        r"[0-9]+h",  # Hours
-												allow full list of conversion types in printf-style variables

See https://docs.python.org/3/library/stdtypes.html#old-string-formatting

											
										
										
											2019-11-18 08:52:00 +00:00
+								        r"%\([a-z_]+?\)[diouxXeEfFgGcrsa%]",  # Sphinx variable
-												FIX: Double spaces were breaking start-of-sentence detection.

											
										
										
											2019-09-16 08:44:18 +00:00
+								        r"« . »",  # Single letter examples (typically in Unicode documentation)
-												drop soft hyphens fixes #4

											
										
										
											2019-09-30 11:37:48 +00:00
+								        "\xad",  # soft hyphen
-												Better handling of capitalized words..

											
										
										
											2018-07-31 22:20:03 +00:00
+								    }
-												make dropping capitalized words optional

											
										
										
											2019-10-09 11:06:45 +00:00
+								    if drop_capitalized:
-												Add default values for capitalized words droppings, and add --no-drop-capitalized.

											
										
										
											2019-10-16 14:55:46 +00:00
+								        to_drop.add(
-												make dropping capitalized words optional

											
										
										
											2019-10-09 11:06:45 +00:00
+								            # Strip capitalized words in sentences
-												Add default values for capitalized words droppings, and add --no-drop-capitalized.

											
										
										
											2019-10-16 14:55:46 +00:00
+								            r"(?<!\. |^|-)\b(\p{Letter}['’])?\b\p{Uppercase}\p{Letter}[\w.-]*\b"
 								        )
-												Better handling of capitalized words..

											
										
										
											2018-07-31 22:20:03 +00:00
+								    if logging.getLogger().isEnabledFor(logging.DEBUG):
 								        for pattern in to_drop:
 								            for dropped in regex.findall(pattern, line):
 								                logging.debug("%s: dropping %r due to from %r", po_path, dropped, line)
 								    return regex.sub("|".join(to_drop), r"", line)
-												Strip sphinx variables and a href links.

											
										
										
											2018-07-27 09:01:09 +00:00
-												Add default values for capitalized words droppings, and add --no-drop-capitalized.

											
										
										
											2019-10-16 14:55:46 +00:00
+								def po_to_text(po_path, drop_capitalized=False):
-												Allow for multiple files to be passed positionally.

											
										
										
											2018-07-27 13:54:10 +00:00
+								    """Converts a po file to a text file, by stripping the msgids and all
 								    po syntax, but by keeping the kept lines at their same position /
 								    line number.
 								    """
-												Initial commit

											
										
										
											2018-07-23 15:37:50 +00:00
+								    buffer = []
-												Report line numbers.

											
										
										
											2018-07-23 17:24:10 +00:00
+								    lines = 0
-												Allow for multiple files to be passed positionally.

											
										
										
											2018-07-27 13:54:10 +00:00
+								    entries = polib.pofile(po_path)
-												Initial commit

											
										
										
											2018-07-23 15:37:50 +00:00
+								    for entry in entries:
-												Don't spellcheck untranslated messages, they're typically in english (licence text, project names, functions prototypes, ...)

											
										
										
											2018-07-27 08:03:21 +00:00
+								        if entry.msgid == entry.msgstr:
 								            continue
-												Report line numbers.

											
										
										
											2018-07-23 17:24:10 +00:00
+								        while lines < entry.linenum:
-												Allow for multiple files to be passed positionally.

											
										
										
											2018-07-27 13:54:10 +00:00
+								            buffer.append("")
-												Report line numbers.

											
										
										
											2018-07-23 17:24:10 +00:00
+								            lines += 1
-												add --keep-capitalized argument

											
										
										
											2019-10-09 11:07:09 +00:00
+								        buffer.append(clear(po_path, strip_rst(entry.msgstr), drop_capitalized))
-												Report line numbers.

											
										
										
											2018-07-23 17:24:10 +00:00
+								        lines += 1
-												Allow for multiple files to be passed positionally.

											
										
										
											2018-07-27 13:54:10 +00:00
+								    return "\n".join(buffer)
-												Initial commit

											
										
										
											2018-07-23 15:37:50 +00:00
-												Adding --version.

											
										
										
											2018-07-28 22:58:20 +00:00
+								def parse_args():
 								    """Parse command line arguments.
-												Allow for multiple files to be passed positionally.

											
										
										
											2018-07-27 13:54:10 +00:00
+								    """
-												Initial commit

											
										
										
											2018-07-23 15:37:50 +00:00
+								    import argparse
-												Allow for multiple files to be passed positionally.

											
										
										
											2018-07-27 13:54:10 +00:00
-												Initial commit

											
										
										
											2018-07-23 15:37:50 +00:00
+								    parser = argparse.ArgumentParser(
-												Allow for multiple files to be passed positionally.

											
										
										
											2018-07-27 13:54:10 +00:00
+								        description="Check spelling in po files containing restructuredText."
 								    )
 								    parser.add_argument(
 								        "-l",
 								        "--language",
 								        type=str,
 								        default="fr",
 								        help="Language to check, you'll have to install the corresponding "
 								        "hunspell dictionary, on Debian see apt list 'hunspell-*'.",
 								    )
 								    parser.add_argument(
 								        "--glob",
 								        type=str,
 								        help="Provide a glob pattern, to be interpreted by pospell, to find po files, "
 								        "like --glob '**/*.po'.",
 								    )
-												add --keep-capitalized argument

											
										
										
											2019-10-09 11:07:09 +00:00
+								    parser.add_argument(
-												Add default values for capitalized words droppings, and add --no-drop-capitalized.

											
										
										
											2019-10-16 14:55:46 +00:00
+								        "--drop-capitalized",
-												add --keep-capitalized argument

											
										
										
											2019-10-09 11:07:09 +00:00
+								        action="store_true",
-												Add default values for capitalized words droppings, and add --no-drop-capitalized.

											
										
										
											2019-10-16 14:55:46 +00:00
+								        help="Always drop capitalized words in sentences (defaults according to the language).",
 								    )
 								    parser.add_argument(
 								        "--no-drop-capitalized",
 								        action="store_true",
 								        help="Never drop capitalized words in sentences (defaults according to the language).",
-												add --keep-capitalized argument

											
										
										
											2019-10-09 11:07:09 +00:00
+								    )
-												Allow for multiple files to be passed positionally.

											
										
										
											2018-07-27 13:54:10 +00:00
+								    parser.add_argument(
 								        "po_file",
 								        nargs="*",
-												FIX: Don't scan recursively by default, and use Path instead of strings.

											
										
										
											2018-07-27 14:21:28 +00:00
+								        type=Path,
-												Allow for multiple files to be passed positionally.

											
										
										
											2018-07-27 13:54:10 +00:00
+								        help="Files to check, can optionally be mixed with --glob, or not, "
 								        "use the one that fit your needs.",
 								    )
-												Better handling of capitalized words..

											
										
										
											2018-07-31 22:20:03 +00:00
+								    parser.add_argument(
 								        "-v",
 								        "--verbose",
 								        action="count",
 								        default=0,
 								        help="More output, use -vv, -vvv, and so on.",
 								    )
-												Adding --version.

											
										
										
											2018-07-28 22:58:20 +00:00
+								    parser.add_argument(
 								        "--version",
 								        action="version",
 								        version="%(prog)s " + __version__ + " using hunspell: " + HUNSPELL_VERSION,
 								    )
-												Allow for multiple files to be passed positionally.

											
										
										
											2018-07-27 13:54:10 +00:00
+								    parser.add_argument("--debug", action="store_true")
 								    parser.add_argument("-p", "--personal-dict", type=str)
-												Implement '--modified' option, to check spell only on modified files according to git.

											
										
										
											2018-12-24 14:49:54 +00:00
+								    parser.add_argument(
 								        "--modified", "-m", action="store_true", help="Use git to find modified files."
 								    )
 								    args = parser.parse_args()
-												Add default values for capitalized words droppings, and add --no-drop-capitalized.

											
										
										
											2019-10-16 14:55:46 +00:00
+								    if args.drop_capitalized and args.no_drop_capitalized:
 								        print("Error: don't provide both --drop-capitalized AND --no-drop-capitalized.")
 								        parser.print_help()
 								        exit(1)
-												Implement '--modified' option, to check spell only on modified files according to git.

											
										
										
											2018-12-24 14:49:54 +00:00
+								    if not args.po_file and not args.modified:
 								        parser.print_help()
 								        exit(1)
 								    return args
-												Adding --version.

											
										
										
											2018-07-28 22:58:20 +00:00
-												Add default values for capitalized words droppings, and add --no-drop-capitalized.

											
										
										
											2019-10-16 14:55:46 +00:00
+								def spell_check(
 								    po_files, personal_dict, language, drop_capitalized=False, debug_only=False
 								):
-												Implement '--modified' option, to check spell only on modified files according to git.

											
										
										
											2018-12-24 14:49:54 +00:00
+								    """Check for spelling mistakes in the files po_files (po format,
 								    containing restructuredtext), for the given language.
 								    personal_dict allow to pass a personal dict (-p) option, to hunspell.
 								    Debug only will show what's passed to Hunspell instead of passing it.
-												Adding --version.

											
										
										
											2018-07-28 22:58:20 +00:00
+								    """
-												Exit -1 on error.

											
										
										
											2018-07-23 22:28:13 +00:00
+								    errors = 0
-												Implement '--modified' option, to check spell only on modified files according to git.

											
										
										
											2018-12-24 14:49:54 +00:00
+								    personal_dict_arg = ["-p", personal_dict] if personal_dict else []
-												Initial commit

											
										
										
											2018-07-23 15:37:50 +00:00
+								    with tempfile.TemporaryDirectory() as tmpdirname:
 								        tmpdir = Path(tmpdirname)
-												Implement '--modified' option, to check spell only on modified files according to git.

											
										
										
											2018-12-24 14:49:54 +00:00
+								        for po_file in po_files:
 								            if debug_only:
-												add --keep-capitalized argument

											
										
										
											2019-10-09 11:07:09 +00:00
+								                print(po_to_text(str(po_file), drop_capitalized))
-												Enhance exclusions thanks to a new debug mode.

											
										
										
											2018-07-23 22:00:52 +00:00
+								                continue
-												Add default values for capitalized words droppings, and add --no-drop-capitalized.

											
										
										
											2019-10-16 14:55:46 +00:00
+								            (tmpdir / po_file.name).write_text(
 								                po_to_text(str(po_file), drop_capitalized)
 								            )
-												Trust hunspell to print missing dictionaries on stderr.

											
										
										
											2019-11-14 22:30:52 +00:00
+								            try:
 								                output = subprocess.check_output(
 								                    ["hunspell", "-d", language]
 								                    + personal_dict_arg
 								                    + ["-u3", str(tmpdir / po_file.name)],
 								                    universal_newlines=True,
 								                )
 								            except subprocess.CalledProcessError:
 								                return -1
-												Allow for multiple files to be passed positionally.

											
										
										
											2018-07-27 13:54:10 +00:00
+								            for line in output.split("\n"):
-												Better handling of capitalized words..

											
										
										
											2018-07-31 22:20:03 +00:00
+								                match = regex.match(
-												Allow for multiple files to be passed positionally.

											
										
										
											2018-07-27 13:54:10 +00:00
+								                    r"(?P<path>.*):(?P<line>[0-9]+): Locate: (?P<error>.*) \| Try: .*$",
 								                    line,
 								                )
-												Report line numbers.

											
										
										
											2018-07-23 17:24:10 +00:00
+								                if match:
-												Exit -1 on error.

											
										
										
											2018-07-23 22:28:13 +00:00
+								                    errors += 1
-												Show full path instead of only file name.

											
										
										
											2018-07-31 16:55:58 +00:00
+								                    print(po_file, match.group("line"), match.group("error"), sep=":")
-												Implement '--modified' option, to check spell only on modified files according to git.

											
										
										
											2018-12-24 14:49:54 +00:00
+								    return errors
-												Gracefull handling of missing dicts. (#11)


											
										
										
											2019-12-10 14:10:17 +00:00
+								def gracefull_handling_of_missing_dicts(language):
 								    """Check if hunspell dictionary for given language is installed.
 								    """
 								    hunspell_dash_d = subprocess.check_output(
 								        ["hunspell", "-D"], universal_newlines=True, stderr=subprocess.STDOUT
 								    )
 								    languages = {Path(line).name for line in hunspell_dash_d}
 								    def error(*args, file=sys.stderr, **kwargs):
 								        print(*args, file=file, **kwargs)
 								    if language in languages:
 								        return
 								    error(
 								        "The hunspell dictionary for your language is missing, please install it.",
 								        end="\n\n",
 								    )
 								    if which("apt"):
 								        error("Maybe try something like:")
 								        error("  sudo apt install hunspell-{}".format(language))
 								    else:
 								        error(
 								            """I don't know your environment, but I bet the package name looks like:
 								    hunspell-{language}
 								If you find it, please tell me (by opening an issue or a PR on
 								https://github.com/JulienPalard/pospell/) so I can enhance this error message.
 								""".format(
 								                language=language
 								            )
 								        )
 								    exit(1)
-												Implement '--modified' option, to check spell only on modified files according to git.

											
										
										
											2018-12-24 14:49:54 +00:00
+								def main():
 								    """Module entry point.
 								    """
 								    args = parse_args()
 								    logging.basicConfig(level=50 - 10 * args.verbose)
-												Add default values for capitalized words droppings, and add --no-drop-capitalized.

											
										
										
											2019-10-16 14:55:46 +00:00
+								    default_drop_capitalized = DEFAULT_DROP_CAPITALIZED.get(args.language, False)
 								    if args.drop_capitalized:
 								        drop_capitalized = True
 								    elif args.no_drop_capitalized:
 								        drop_capitalized = False
 								    else:
 								        drop_capitalized = default_drop_capitalized
-												Implement '--modified' option, to check spell only on modified files according to git.

											
										
										
											2018-12-24 14:49:54 +00:00
+								    args.po_file = list(
 								        chain(Path(".").glob(args.glob) if args.glob else [], args.po_file)
 								    )
 								    if args.modified:
 								        git_status = subprocess.check_output(
 								            ["git", "status", "--porcelain"], encoding="utf-8"
 								        )
 								        git_status_lines = [
 								            line.split(maxsplit=2) for line in git_status.split("\n") if line
 								        ]
 								        args.po_file.extend(
 								            Path(filename)
 								            for status, filename in git_status_lines
 								            if filename.endswith(".po")
 								        )
-												add --keep-capitalized argument

											
										
										
											2019-10-09 11:07:09 +00:00
+								    errors = spell_check(
-												Add default values for capitalized words droppings, and add --no-drop-capitalized.

											
										
										
											2019-10-16 14:55:46 +00:00
+								        args.po_file, args.personal_dict, args.language, drop_capitalized, args.debug
-												add --keep-capitalized argument

											
										
										
											2019-10-09 11:07:09 +00:00
+								    )
-												Gracefull handling of missing dicts. (#11)


											
										
										
											2019-12-10 14:10:17 +00:00
+								    if errors == -1:
 								        gracefull_handling_of_missing_dicts(args.language)
-												Exit -1 on error.

											
										
										
											2018-07-23 22:28:13 +00:00
+								    exit(0 if errors == 0 else -1)
-												Initial commit

											
										
										
											2018-07-23 15:37:50 +00:00
-												Allow for multiple files to be passed positionally.

											
										
										
											2018-07-27 13:54:10 +00:00
+								if __name__ == "__main__":
-												Initial commit

											
										
										
											2018-07-23 15:37:50 +00:00
+								    main()