padpo/padpo/checkers/grammalecte.py

128 lines
4.5 KiB
Python
Raw Normal View History

2019-11-22 11:50:24 +00:00
"""Checker for grammar errors."""
import re
from pathlib import Path
from typing import Set, Optional
2019-11-22 11:50:24 +00:00
import requests
import simplelogging
2020-08-02 19:32:56 +00:00
from pygrammalecte import (
GrammalecteGrammarMessage,
GrammalecteMessage,
GrammalecteSpellingMessage,
grammalecte_text,
2020-08-02 19:32:56 +00:00
)
2019-11-22 11:50:24 +00:00
from padpo.checkers.baseclass import Checker, replace_quotes
from padpo.checkers.glossary import glossary
from padpo.pofile import PoFile, PoItem
2019-11-22 11:50:24 +00:00
log = simplelogging.get_logger()
class GrammalecteChecker(Checker):
"""Checker for grammar errors."""
2019-11-22 12:26:31 +00:00
name = "Grammalecte"
2019-11-22 11:50:24 +00:00
def __init__(self):
"""Initialiser."""
2019-11-22 12:26:31 +00:00
super().__init__()
self.personal_dict: Set[str] = set()
2019-11-22 11:50:24 +00:00
def check_file(self, pofile: PoFile):
"""Check a `*.po` file."""
if not isinstance(pofile, PoFile):
log.error("%s is not an instance of PoFile", str(pofile))
2020-08-02 19:32:56 +00:00
text = pofile.rst2txt()
text = re.sub(r"«\s(.*?)\", replace_quotes, text)
warnings = grammalecte_text(text)
self.manage_warnings(warnings, pofile)
2019-11-22 11:50:24 +00:00
def check_item(self, item: PoItem):
"""Check an item in a `*.po` file (does nothing)."""
pass
2020-08-02 19:32:56 +00:00
def manage_warnings(self, warnings: GrammalecteMessage, pofile: PoFile) -> None:
"""Manage warnings returned by grammalecte."""
for warning in warnings:
if self.filter_out_grammar_error(warning) or self.filter_out_spelling_error(
warning
):
continue
item_index = warning.line // 2
item = pofile.content[item_index]
start = max(0, warning.start - 40)
end = warning.end + 10
item.add_warning(
self.name,
f"{warning.message} => " f"###{item.msgstr_rst2txt[start:end]}###",
)
2019-12-03 08:14:56 +00:00
2020-08-02 19:32:56 +00:00
def filter_out_grammar_error(self, warning: GrammalecteMessage) -> bool:
2019-11-22 11:50:24 +00:00
"""Return True when grammalecte error should be ignored."""
2020-08-02 19:32:56 +00:00
if not isinstance(warning, GrammalecteGrammarMessage):
return False
if warning.rule in (
2019-12-02 18:19:01 +00:00
"esp_milieu_ligne", # double space
"nbsp_avant_deux_points", # NBSP
"nbsp_avant_double_ponctuation", # NBSP
):
2019-11-22 11:50:24 +00:00
return True
2020-08-02 19:32:56 +00:00
if "typo_guillemets_typographiques_simples" in warning.rule:
2019-12-02 18:19:01 +00:00
return True # ignore ' quotes
2020-08-02 19:32:56 +00:00
if warning.message in (
2019-12-02 18:19:01 +00:00
"Accord de genre erroné : « ABC » est masculin.",
"Accord de genre erroné : « PEP » est masculin.",
"Accord de nombre erroné : « PEP » devrait être au pluriel.",
2019-12-02 18:23:02 +00:00
"Accord de genre erroné : « une entrée » est féminin, « utilisateur » est masculin.",
2019-12-02 18:19:01 +00:00
):
2019-11-22 11:50:24 +00:00
return True
2020-08-02 19:32:56 +00:00
if "Sil sagit dun impératif" in warning.message:
if warning.start == 0:
2019-12-02 18:30:54 +00:00
# ignore imperative conjugation at begining of 1st sentence
return True
2019-11-22 11:50:24 +00:00
return False
2020-08-02 19:32:56 +00:00
def filter_out_spelling_error(self, warning: GrammalecteMessage) -> bool:
2019-12-03 08:27:05 +00:00
"""Return True when grammalecte error should be ignored."""
2020-08-02 19:32:56 +00:00
if not isinstance(warning, GrammalecteSpellingMessage):
return False
if set(warning.word) == {"x"}:
2019-12-03 08:27:05 +00:00
return True # word is xxxxx or xxxxxxxx…
2020-08-02 19:32:56 +00:00
if warning.word.strip() in self.personal_dict:
return True # white list
2020-08-25 15:33:35 +00:00
if warning.word.endswith("_"):
return True
if warning.word.lower() in glossary:
return True
2021-02-02 13:42:36 +00:00
if warning.word.lower() == "uplet": # partially italic word in glossary
return True
2019-12-03 08:27:05 +00:00
return False
def _get_personal_dict(self, dict_path: str) -> None:
if "://" in dict_path:
download_request = requests.get(dict_path)
download_request.raise_for_status()
lines = download_request.text
else:
lines = Path(dict_path).read_text(encoding="UTF-8")
for line in lines.splitlines():
word = line.strip()
self.personal_dict.add(word)
self.personal_dict.add(word.title())
def add_arguments(self, parser):
parser.add_argument(
"--dict",
nargs="*",
dest="dicts",
help="Personal dict files or URLs. Should contain onw word per line.",
)
def configure(self, args):
"""Store the result of parse_args, to get back arguments from self.add_arguments."""
if args.dicts:
for dict_path in args.dicts:
self._get_personal_dict(dict_path)