128 lines
4.5 KiB
Python
128 lines
4.5 KiB
Python
"""Checker for grammar errors."""
|
||
|
||
import re
|
||
from pathlib import Path
|
||
from typing import Set, Optional
|
||
|
||
import requests
|
||
import simplelogging
|
||
from pygrammalecte import (
|
||
GrammalecteGrammarMessage,
|
||
GrammalecteMessage,
|
||
GrammalecteSpellingMessage,
|
||
grammalecte_text,
|
||
)
|
||
|
||
from padpo.checkers.baseclass import Checker, replace_quotes
|
||
from padpo.checkers.glossary import glossary
|
||
from padpo.pofile import PoFile, PoItem
|
||
|
||
log = simplelogging.get_logger()
|
||
|
||
|
||
class GrammalecteChecker(Checker):
|
||
"""Checker for grammar errors."""
|
||
|
||
name = "Grammalecte"
|
||
|
||
def __init__(self):
|
||
"""Initialiser."""
|
||
super().__init__()
|
||
self.personal_dict: Set[str] = set()
|
||
|
||
def check_file(self, pofile: PoFile):
|
||
"""Check a `*.po` file."""
|
||
if not isinstance(pofile, PoFile):
|
||
log.error("%s is not an instance of PoFile", str(pofile))
|
||
text = pofile.rst2txt()
|
||
text = re.sub(r"«\s(.*?)\s»", replace_quotes, text)
|
||
warnings = grammalecte_text(text)
|
||
self.manage_warnings(warnings, pofile)
|
||
|
||
def check_item(self, item: PoItem):
|
||
"""Check an item in a `*.po` file (does nothing)."""
|
||
pass
|
||
|
||
def manage_warnings(self, warnings: GrammalecteMessage, pofile: PoFile) -> None:
|
||
"""Manage warnings returned by grammalecte."""
|
||
for warning in warnings:
|
||
if self.filter_out_grammar_error(warning) or self.filter_out_spelling_error(
|
||
warning
|
||
):
|
||
continue
|
||
item_index = warning.line // 2
|
||
item = pofile.content[item_index]
|
||
start = max(0, warning.start - 40)
|
||
end = warning.end + 10
|
||
item.add_warning(
|
||
self.name,
|
||
f"{warning.message} => " f"###{item.msgstr_rst2txt[start:end]}###",
|
||
)
|
||
|
||
def filter_out_grammar_error(self, warning: GrammalecteMessage) -> bool:
|
||
"""Return True when grammalecte error should be ignored."""
|
||
if not isinstance(warning, GrammalecteGrammarMessage):
|
||
return False
|
||
if warning.rule in (
|
||
"esp_milieu_ligne", # double space
|
||
"nbsp_avant_deux_points", # NBSP
|
||
"nbsp_avant_double_ponctuation", # NBSP
|
||
):
|
||
return True
|
||
if "typo_guillemets_typographiques_simples" in warning.rule:
|
||
return True # ignore ' quotes
|
||
if warning.message in (
|
||
"Accord de genre erroné : « ABC » est masculin.",
|
||
"Accord de genre erroné : « PEP » est masculin.",
|
||
"Accord de nombre erroné : « PEP » devrait être au pluriel.",
|
||
"Accord de genre erroné : « une entrée » est féminin, « utilisateur » est masculin.",
|
||
):
|
||
return True
|
||
if "S’il s’agit d’un impératif" in warning.message:
|
||
if warning.start == 0:
|
||
# ignore imperative conjugation at begining of 1st sentence
|
||
return True
|
||
return False
|
||
|
||
def filter_out_spelling_error(self, warning: GrammalecteMessage) -> bool:
|
||
"""Return True when grammalecte error should be ignored."""
|
||
if not isinstance(warning, GrammalecteSpellingMessage):
|
||
return False
|
||
if set(warning.word) == {"x"}:
|
||
return True # word is xxxxx or xxxxxxxx…
|
||
if warning.word.strip() in self.personal_dict:
|
||
return True # white list
|
||
if warning.word.endswith("_"):
|
||
return True
|
||
if warning.word.lower() in glossary:
|
||
return True
|
||
if warning.word.lower() == "uplet": # partially italic word in glossary
|
||
return True
|
||
return False
|
||
|
||
def _get_personal_dict(self, dict_path: str) -> None:
|
||
if "://" in dict_path:
|
||
download_request = requests.get(dict_path)
|
||
download_request.raise_for_status()
|
||
lines = download_request.text
|
||
else:
|
||
lines = Path(dict_path).read_text(encoding="UTF-8")
|
||
for line in lines.splitlines():
|
||
word = line.strip()
|
||
self.personal_dict.add(word)
|
||
self.personal_dict.add(word.title())
|
||
|
||
def add_arguments(self, parser):
|
||
parser.add_argument(
|
||
"--dict",
|
||
nargs="*",
|
||
dest="dicts",
|
||
help="Personal dict files or URLs. Should contain onw word per line.",
|
||
)
|
||
|
||
def configure(self, args):
|
||
"""Store the result of parse_args, to get back arguments from self.add_arguments."""
|
||
if args.dicts:
|
||
for dict_path in args.dicts:
|
||
self._get_personal_dict(dict_path)
|