pospell/pospell.py

"""pospell is a spellcheckers for po files containing reStructuedText.
"""

import re
import subprocess
import tempfile
from itertools import chain
from pathlib import Path

import polib


def strip_rst(line):
    """Strip out reStructuredText and Sphinx-doc tags from a line.
    """
    return re.sub(
        r"""(C-)?:[^:]*?:`[^`]*?` |
            ``.*?``               |
            \b[A-Z][a-zA-Z-]{2,}[a-zA-Z.-]*\b |  # Strip capitalized words and accronyms
            {[a-z]*?}             | # reStructuredText tag
            \|[a-z]+?\|           | # reStructuredText substitution
            %\([a-z_]+?\)s        | # Sphinx variable
            -[A-Za-z]\b           |
            `[^`]*?`_             |
            \*[^*]*?\*
        """,
        "",
        line,
        flags=re.VERBOSE,
    )


def clear(line):
    """Clear various other syntaxes we may encounter in a line.
    """
    return re.sub(r"""<a href="[^"]*?">(.*)</a>""", r"\1", line)


def po_to_text(po_path):
    """Converts a po file to a text file, by stripping the msgids and all
    po syntax, but by keeping the kept lines at their same position /
    line number.
    """
    buffer = []
    lines = 0
    entries = polib.pofile(po_path)
    for entry in entries:
        if entry.msgid == entry.msgstr:
            continue
        while lines < entry.linenum:
            buffer.append("")
            lines += 1
        buffer.append(clear(strip_rst(entry.msgstr)))
        lines += 1
    return "\n".join(buffer)


def main():
    """Module entry point.
    """
    import argparse

    parser = argparse.ArgumentParser(
        description="Check spelling in po files containing restructuredText."
    )
    parser.add_argument(
        "-l",
        "--language",
        type=str,
        default="fr",
        help="Language to check, you'll have to install the corresponding "
        "hunspell dictionary, on Debian see apt list 'hunspell-*'.",
    )
    parser.add_argument(
        "--glob",
        type=str,
        help="Provide a glob pattern, to be interpreted by pospell, to find po files, "
        "like --glob '**/*.po'.",
    )
    parser.add_argument(
        "po_file",
        nargs="*",
        type=Path,
        help="Files to check, can optionally be mixed with --glob, or not, "
        "use the one that fit your needs.",
    )
    parser.add_argument("--debug", action="store_true")
    parser.add_argument("-p", "--personal-dict", type=str)
    args = parser.parse_args()
    personal_dict = ["-p", args.personal_dict] if args.personal_dict else []
    errors = 0
    with tempfile.TemporaryDirectory() as tmpdirname:
        tmpdir = Path(tmpdirname)
        for po_file in chain(
            Path(".").glob(args.glob) if args.glob else [], args.po_file
        ):
            if args.debug:
                print(po_to_text(str(po_file)))
                continue
            (tmpdir / po_file.name).write_text(po_to_text(str(po_file)))
            output = subprocess.check_output(
                ["hunspell", "-d", args.language]
                + personal_dict
                + ["-u3", str(tmpdir / po_file.name)],
                universal_newlines=True,
            )
            for line in output.split("\n"):
                match = re.match(
                    r"(?P<path>.*):(?P<line>[0-9]+): Locate: (?P<error>.*) \| Try: .*$",
                    line,
                )
                if match:
                    errors += 1
                    print(
                        match.group("path").replace(str(tmpdir), "").lstrip("/"),
                        match.group("line"),
                        match.group("error"),
                        sep=":",
                    )
    exit(0 if errors == 0 else -1)


if __name__ == "__main__":
    main()
Module docstring. 2018-07-27 13:49:58 +00:00			`"""pospell is a spellcheckers for po files containing reStructuedText.`
			`"""`
Initial commit 2018-07-23 15:37:50 +00:00
isort 2018-07-27 09:38:17 +00:00			`import re`
Initial commit 2018-07-23 15:37:50 +00:00			`import subprocess`
isort 2018-07-27 09:38:17 +00:00			`import tempfile`
Allow for multiple files to be passed positionally. 2018-07-27 13:54:10 +00:00			`from itertools import chain`
Initial commit 2018-07-23 15:37:50 +00:00			`from pathlib import Path`
isort 2018-07-27 09:38:17 +00:00
Initial commit 2018-07-23 15:37:50 +00:00			`import polib`


			`def strip_rst(line):`
Strip sphinx variables and a href links. 2018-07-27 09:01:09 +00:00			`"""Strip out reStructuredText and Sphinx-doc tags from a line.`
			`"""`
Initial commit 2018-07-23 15:37:50 +00:00			`return re.sub(`
Enhance exclusions thanks to a new debug mode. 2018-07-23 22:00:52 +00:00			r"""(C-)?:[^:]?:`[^`]?` \|
Adding new reStructuredText markers. 2018-07-27 08:02:43 +00:00			``.*?`` \|
			`\b[A-Z][a-zA-Z-]{2,}[a-zA-Z.-]*\b \| # Strip capitalized words and accronyms`
			`{[a-z]*?} \| # reStructuredText tag`
			`\\|[a-z]+?\\| \| # reStructuredText substitution`
Strip sphinx variables and a href links. 2018-07-27 09:01:09 +00:00			`%\([a-z_]+?\)s \| # Sphinx variable`
Adding new reStructuredText markers. 2018-07-27 08:02:43 +00:00			`-[A-Za-z]\b \|`
			`[^`]*?`_ \|
Initial commit 2018-07-23 15:37:50 +00:00			`\[^]?\`
Allow for multiple files to be passed positionally. 2018-07-27 13:54:10 +00:00			`""",`
			`"",`
			`line,`
			`flags=re.VERBOSE,`
			`)`
Initial commit 2018-07-23 15:37:50 +00:00

Strip sphinx variables and a href links. 2018-07-27 09:01:09 +00:00			`def clear(line):`
			`"""Clear various other syntaxes we may encounter in a line.`
			`"""`
			`return re.sub(r"""<a href="[^"]?">(.)</a>""", r"\1", line)`


Allow for multiple files to be passed positionally. 2018-07-27 13:54:10 +00:00			`def po_to_text(po_path):`
			`"""Converts a po file to a text file, by stripping the msgids and all`
			`po syntax, but by keeping the kept lines at their same position /`
			`line number.`
			`"""`
Initial commit 2018-07-23 15:37:50 +00:00			`buffer = []`
Report line numbers. 2018-07-23 17:24:10 +00:00			`lines = 0`
Allow for multiple files to be passed positionally. 2018-07-27 13:54:10 +00:00			`entries = polib.pofile(po_path)`
Initial commit 2018-07-23 15:37:50 +00:00			`for entry in entries:`
Don't spellcheck untranslated messages, they're typically in english (licence text, project names, functions prototypes, ...) 2018-07-27 08:03:21 +00:00			`if entry.msgid == entry.msgstr:`
			`continue`
Report line numbers. 2018-07-23 17:24:10 +00:00			`while lines < entry.linenum:`
Allow for multiple files to be passed positionally. 2018-07-27 13:54:10 +00:00			`buffer.append("")`
Report line numbers. 2018-07-23 17:24:10 +00:00			`lines += 1`
Strip sphinx variables and a href links. 2018-07-27 09:01:09 +00:00			`buffer.append(clear(strip_rst(entry.msgstr)))`
Report line numbers. 2018-07-23 17:24:10 +00:00			`lines += 1`
Allow for multiple files to be passed positionally. 2018-07-27 13:54:10 +00:00			`return "\n".join(buffer)`
Initial commit 2018-07-23 15:37:50 +00:00

			`def main():`
Allow for multiple files to be passed positionally. 2018-07-27 13:54:10 +00:00			`"""Module entry point.`
			`"""`
Initial commit 2018-07-23 15:37:50 +00:00			`import argparse`
Allow for multiple files to be passed positionally. 2018-07-27 13:54:10 +00:00
Initial commit 2018-07-23 15:37:50 +00:00			`parser = argparse.ArgumentParser(`
Allow for multiple files to be passed positionally. 2018-07-27 13:54:10 +00:00			`description="Check spelling in po files containing restructuredText."`
			`)`
			`parser.add_argument(`
			`"-l",`
			`"--language",`
			`type=str,`
			`default="fr",`
			`help="Language to check, you'll have to install the corresponding "`
			`"hunspell dictionary, on Debian see apt list 'hunspell-*'.",`
			`)`
			`parser.add_argument(`
			`"--glob",`
			`type=str,`
			`help="Provide a glob pattern, to be interpreted by pospell, to find po files, "`
			`"like --glob '*/.po'.",`
			`)`
			`parser.add_argument(`
			`"po_file",`
			`nargs="*",`
FIX: Don't scan recursively by default, and use Path instead of strings. 2018-07-27 14:21:28 +00:00			`type=Path,`
Allow for multiple files to be passed positionally. 2018-07-27 13:54:10 +00:00			`help="Files to check, can optionally be mixed with --glob, or not, "`
			`"use the one that fit your needs.",`
			`)`
			`parser.add_argument("--debug", action="store_true")`
			`parser.add_argument("-p", "--personal-dict", type=str)`
Initial commit 2018-07-23 15:37:50 +00:00			`args = parser.parse_args()`
Allow for multiple files to be passed positionally. 2018-07-27 13:54:10 +00:00			`personal_dict = ["-p", args.personal_dict] if args.personal_dict else []`
Exit -1 on error. 2018-07-23 22:28:13 +00:00			`errors = 0`
Initial commit 2018-07-23 15:37:50 +00:00			`with tempfile.TemporaryDirectory() as tmpdirname:`
			`tmpdir = Path(tmpdirname)`
FIX: Don't scan recursively by default, and use Path instead of strings. 2018-07-27 14:21:28 +00:00			`for po_file in chain(`
			`Path(".").glob(args.glob) if args.glob else [], args.po_file`
			`):`
Enhance exclusions thanks to a new debug mode. 2018-07-23 22:00:52 +00:00			`if args.debug:`
			`print(po_to_text(str(po_file)))`
			`continue`
Initial commit 2018-07-23 15:37:50 +00:00			`(tmpdir / po_file.name).write_text(po_to_text(str(po_file)))`
			`output = subprocess.check_output(`
Allow for multiple files to be passed positionally. 2018-07-27 13:54:10 +00:00			`["hunspell", "-d", args.language]`
			`+ personal_dict`
			`+ ["-u3", str(tmpdir / po_file.name)],`
			`universal_newlines=True,`
			`)`
			`for line in output.split("\n"):`
			`match = re.match(`
			`r"(?P<path>.):(?P<line>[0-9]+): Locate: (?P<error>.) \\| Try: .*$",`
			`line,`
			`)`
Report line numbers. 2018-07-23 17:24:10 +00:00			`if match:`
Exit -1 on error. 2018-07-23 22:28:13 +00:00			`errors += 1`
Allow for multiple files to be passed positionally. 2018-07-27 13:54:10 +00:00			`print(`
			`match.group("path").replace(str(tmpdir), "").lstrip("/"),`
			`match.group("line"),`
			`match.group("error"),`
			`sep=":",`
			`)`
Exit -1 on error. 2018-07-23 22:28:13 +00:00			`exit(0 if errors == 0 else -1)`
Initial commit 2018-07-23 15:37:50 +00:00

Allow for multiple files to be passed positionally. 2018-07-27 13:54:10 +00:00			`if __name__ == "__main__":`
Initial commit 2018-07-23 15:37:50 +00:00			`main()`