2019-11-22 11:50:24 +00:00
|
|
|
|
"""Checker for grammar errors."""
|
|
|
|
|
|
|
|
|
|
import re
|
2019-12-03 08:48:32 +00:00
|
|
|
|
from typing import Set
|
2019-11-22 11:50:24 +00:00
|
|
|
|
|
2020-09-07 06:45:50 +00:00
|
|
|
|
import requests
|
|
|
|
|
import simplelogging
|
2020-08-02 19:32:56 +00:00
|
|
|
|
from pygrammalecte import (
|
|
|
|
|
GrammalecteGrammarMessage,
|
|
|
|
|
GrammalecteMessage,
|
|
|
|
|
GrammalecteSpellingMessage,
|
2020-09-07 06:45:50 +00:00
|
|
|
|
grammalecte_text,
|
2020-08-02 19:32:56 +00:00
|
|
|
|
)
|
2019-11-22 11:50:24 +00:00
|
|
|
|
|
|
|
|
|
from padpo.checkers.baseclass import Checker, replace_quotes
|
2021-02-02 13:30:21 +00:00
|
|
|
|
from padpo.checkers.glossary import glossary
|
2020-09-07 06:45:50 +00:00
|
|
|
|
from padpo.pofile import PoFile, PoItem
|
2019-11-22 11:50:24 +00:00
|
|
|
|
|
|
|
|
|
log = simplelogging.get_logger()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class GrammalecteChecker(Checker):
|
|
|
|
|
"""Checker for grammar errors."""
|
|
|
|
|
|
2019-11-22 12:26:31 +00:00
|
|
|
|
name = "Grammalecte"
|
|
|
|
|
|
2019-11-22 11:50:24 +00:00
|
|
|
|
def __init__(self):
|
|
|
|
|
"""Initialiser."""
|
2019-11-22 12:26:31 +00:00
|
|
|
|
super().__init__()
|
2019-12-03 08:48:32 +00:00
|
|
|
|
self.personal_dict: Set[str] = set()
|
|
|
|
|
self.get_personal_dict()
|
2019-11-22 11:50:24 +00:00
|
|
|
|
|
|
|
|
|
def check_file(self, pofile: PoFile):
|
|
|
|
|
"""Check a `*.po` file."""
|
|
|
|
|
if not isinstance(pofile, PoFile):
|
|
|
|
|
log.error("%s is not an instance of PoFile", str(pofile))
|
2020-08-02 19:32:56 +00:00
|
|
|
|
text = pofile.rst2txt()
|
|
|
|
|
text = re.sub(r"«\s(.*?)\s»", replace_quotes, text)
|
|
|
|
|
warnings = grammalecte_text(text)
|
|
|
|
|
self.manage_warnings(warnings, pofile)
|
2019-11-22 11:50:24 +00:00
|
|
|
|
|
|
|
|
|
def check_item(self, item: PoItem):
|
|
|
|
|
"""Check an item in a `*.po` file (does nothing)."""
|
|
|
|
|
pass
|
|
|
|
|
|
2020-08-02 19:32:56 +00:00
|
|
|
|
def manage_warnings(self, warnings: GrammalecteMessage, pofile: PoFile) -> None:
|
|
|
|
|
"""Manage warnings returned by grammalecte."""
|
|
|
|
|
for warning in warnings:
|
|
|
|
|
if self.filter_out_grammar_error(warning) or self.filter_out_spelling_error(
|
|
|
|
|
warning
|
|
|
|
|
):
|
|
|
|
|
continue
|
|
|
|
|
item_index = warning.line // 2
|
|
|
|
|
item = pofile.content[item_index]
|
|
|
|
|
start = max(0, warning.start - 40)
|
|
|
|
|
end = warning.end + 10
|
2020-09-07 06:45:50 +00:00
|
|
|
|
item.add_warning(
|
|
|
|
|
self.name,
|
|
|
|
|
f"{warning.message} => " f"###{item.msgstr_rst2txt[start:end]}###",
|
|
|
|
|
)
|
2019-12-03 08:14:56 +00:00
|
|
|
|
|
2020-08-02 19:32:56 +00:00
|
|
|
|
def filter_out_grammar_error(self, warning: GrammalecteMessage) -> bool:
|
2019-11-22 11:50:24 +00:00
|
|
|
|
"""Return True when grammalecte error should be ignored."""
|
2020-08-02 19:32:56 +00:00
|
|
|
|
if not isinstance(warning, GrammalecteGrammarMessage):
|
|
|
|
|
return False
|
|
|
|
|
if warning.rule in (
|
2019-12-02 18:19:01 +00:00
|
|
|
|
"esp_milieu_ligne", # double space
|
|
|
|
|
"nbsp_avant_deux_points", # NBSP
|
|
|
|
|
"nbsp_avant_double_ponctuation", # NBSP
|
|
|
|
|
):
|
2019-11-22 11:50:24 +00:00
|
|
|
|
return True
|
2020-08-02 19:32:56 +00:00
|
|
|
|
if "typo_guillemets_typographiques_simples" in warning.rule:
|
2019-12-02 18:19:01 +00:00
|
|
|
|
return True # ignore ' quotes
|
2020-08-02 19:32:56 +00:00
|
|
|
|
if warning.message in (
|
2019-12-02 18:19:01 +00:00
|
|
|
|
"Accord de genre erroné : « ABC » est masculin.",
|
|
|
|
|
"Accord de genre erroné : « PEP » est masculin.",
|
|
|
|
|
"Accord de nombre erroné : « PEP » devrait être au pluriel.",
|
2019-12-02 18:23:02 +00:00
|
|
|
|
"Accord de genre erroné : « une entrée » est féminin, « utilisateur » est masculin.",
|
2019-12-02 18:19:01 +00:00
|
|
|
|
):
|
2019-11-22 11:50:24 +00:00
|
|
|
|
return True
|
2020-08-02 19:32:56 +00:00
|
|
|
|
if "S’il s’agit d’un impératif" in warning.message:
|
|
|
|
|
if warning.start == 0:
|
2019-12-02 18:30:54 +00:00
|
|
|
|
# ignore imperative conjugation at begining of 1st sentence
|
|
|
|
|
return True
|
2019-11-22 11:50:24 +00:00
|
|
|
|
return False
|
|
|
|
|
|
2020-08-02 19:32:56 +00:00
|
|
|
|
def filter_out_spelling_error(self, warning: GrammalecteMessage) -> bool:
|
2019-12-03 08:27:05 +00:00
|
|
|
|
"""Return True when grammalecte error should be ignored."""
|
2020-08-02 19:32:56 +00:00
|
|
|
|
if not isinstance(warning, GrammalecteSpellingMessage):
|
|
|
|
|
return False
|
|
|
|
|
if set(warning.word) == {"x"}:
|
2019-12-03 08:27:05 +00:00
|
|
|
|
return True # word is xxxxx or xxxxxxxx…
|
2020-08-02 19:32:56 +00:00
|
|
|
|
if warning.word.strip() in self.personal_dict:
|
2019-12-03 08:48:32 +00:00
|
|
|
|
return True # white list
|
2020-08-25 15:33:35 +00:00
|
|
|
|
if warning.word.endswith("_"):
|
|
|
|
|
return True
|
2021-02-02 13:30:21 +00:00
|
|
|
|
if warning.word.lower() in glossary:
|
|
|
|
|
return True
|
2021-02-02 13:42:36 +00:00
|
|
|
|
if warning.word.lower() == "uplet": # partially italic word in glossary
|
|
|
|
|
return True
|
2019-12-03 08:27:05 +00:00
|
|
|
|
return False
|
|
|
|
|
|
2019-12-03 08:48:32 +00:00
|
|
|
|
def get_personal_dict(self):
|
|
|
|
|
"""
|
|
|
|
|
Add spelling white list.
|
|
|
|
|
|
|
|
|
|
Based on
|
2020-08-02 19:32:56 +00:00
|
|
|
|
https://raw.githubusercontent.com/python/python-docs-fr/3.9/dict
|
2019-12-03 08:48:32 +00:00
|
|
|
|
"""
|
|
|
|
|
download_request = requests.get(
|
2020-08-02 19:32:56 +00:00
|
|
|
|
"https://raw.githubusercontent.com/python/python-docs-fr/3.9/dict"
|
2019-12-03 08:48:32 +00:00
|
|
|
|
)
|
|
|
|
|
download_request.raise_for_status()
|
|
|
|
|
for line in download_request.text.splitlines():
|
|
|
|
|
word = line.strip()
|
|
|
|
|
self.personal_dict.add(word)
|
|
|
|
|
self.personal_dict.add(word.title())
|