"""Checker for grammar errors.""" import re from pathlib import Path from typing import Set, Optional import requests import simplelogging from pygrammalecte import ( GrammalecteGrammarMessage, GrammalecteMessage, GrammalecteSpellingMessage, grammalecte_text, ) from padpo.checkers.baseclass import Checker, replace_quotes from padpo.checkers.glossary import glossary from padpo.pofile import PoFile, PoItem log = simplelogging.get_logger() class GrammalecteChecker(Checker): """Checker for grammar errors.""" name = "Grammalecte" def __init__(self): """Initialiser.""" super().__init__() self.personal_dict: Set[str] = set() def check_file(self, pofile: PoFile): """Check a `*.po` file.""" if not isinstance(pofile, PoFile): log.error("%s is not an instance of PoFile", str(pofile)) text = pofile.rst2txt() text = re.sub(r"«\s(.*?)\s»", replace_quotes, text) warnings = grammalecte_text(text) self.manage_warnings(warnings, pofile) def check_item(self, item: PoItem): """Check an item in a `*.po` file (does nothing).""" pass def manage_warnings(self, warnings: GrammalecteMessage, pofile: PoFile) -> None: """Manage warnings returned by grammalecte.""" for warning in warnings: if self.filter_out_grammar_error(warning) or self.filter_out_spelling_error( warning ): continue item_index = warning.line // 2 item = pofile.content[item_index] start = max(0, warning.start - 40) end = warning.end + 10 item.add_warning( self.name, f"{warning.message} => " f"###{item.msgstr_rst2txt[start:end]}###", ) def filter_out_grammar_error(self, warning: GrammalecteMessage) -> bool: """Return True when grammalecte error should be ignored.""" if not isinstance(warning, GrammalecteGrammarMessage): return False if warning.rule in ( "esp_milieu_ligne", # double space "nbsp_avant_deux_points", # NBSP "nbsp_avant_double_ponctuation", # NBSP ): return True if "typo_guillemets_typographiques_simples" in warning.rule: return True # ignore ' quotes if warning.message in ( "Accord de genre erroné : « ABC » est masculin.", "Accord de genre erroné : « PEP » est masculin.", "Accord de nombre erroné : « PEP » devrait être au pluriel.", "Accord de genre erroné : « une entrée » est féminin, « utilisateur » est masculin.", ): return True if "S’il s’agit d’un impératif" in warning.message: if warning.start == 0: # ignore imperative conjugation at begining of 1st sentence return True return False def filter_out_spelling_error(self, warning: GrammalecteMessage) -> bool: """Return True when grammalecte error should be ignored.""" if not isinstance(warning, GrammalecteSpellingMessage): return False if set(warning.word) == {"x"}: return True # word is xxxxx or xxxxxxxx… if warning.word.strip() in self.personal_dict: return True # white list if warning.word.endswith("_"): return True if warning.word.lower() in glossary: return True if warning.word.lower() == "uplet": # partially italic word in glossary return True return False def _get_personal_dict(self, dict_path: str) -> None: if "://" in dict_path: download_request = requests.get(dict_path) download_request.raise_for_status() lines = download_request.text else: lines = Path(dict_path).read_text(encoding="UTF-8") for line in lines.splitlines(): word = line.strip() self.personal_dict.add(word) self.personal_dict.add(word.title()) def add_arguments(self, parser): parser.add_argument( "--dict", nargs="*", dest="dicts", help="Personal dict files or URLs. Should contain onw word per line.", ) def configure(self, args): """Store the result of parse_args, to get back arguments from self.add_arguments.""" if args.dicts: for dict_path in args.dicts: self._get_personal_dict(dict_path)