130 lines
4.4 KiB
Python
130 lines
4.4 KiB
Python
"""Checker for grammar errors."""
|
||
|
||
import json
|
||
import re
|
||
import subprocess
|
||
import tempfile
|
||
from pathlib import Path
|
||
from zipfile import ZipFile
|
||
|
||
import requests
|
||
import simplelogging
|
||
|
||
from padpo.checkers.baseclass import Checker, replace_quotes
|
||
from padpo.pofile import PoItem, PoFile
|
||
|
||
log = simplelogging.get_logger()
|
||
|
||
|
||
class GrammalecteChecker(Checker):
|
||
"""Checker for grammar errors."""
|
||
|
||
name = "Grammalecte"
|
||
|
||
def __init__(self):
|
||
"""Initialiser."""
|
||
super().__init__()
|
||
self.dir = None
|
||
|
||
@staticmethod
|
||
def run_grammalecte(filename: str) -> subprocess.CompletedProcess:
|
||
return subprocess.run(
|
||
[
|
||
"grammalecte-cli.py",
|
||
"-f",
|
||
filename,
|
||
"-off",
|
||
"apos",
|
||
"--json",
|
||
"--only_when_errors",
|
||
],
|
||
capture_output=True,
|
||
text=True,
|
||
)
|
||
|
||
def check_file(self, pofile: PoFile):
|
||
"""Check a `*.po` file."""
|
||
if not isinstance(pofile, PoFile):
|
||
log.error("%s is not an instance of PoFile", str(pofile))
|
||
_, filename = tempfile.mkstemp(
|
||
suffix=".txt", prefix="padpo_", text=True
|
||
)
|
||
with open(filename, "w", encoding="utf8") as f:
|
||
text = pofile.rst2txt()
|
||
text = re.sub(r"«\s(.*?)\s»", replace_quotes, text)
|
||
f.write(text)
|
||
try:
|
||
result = self.run_grammalecte(filename)
|
||
except FileNotFoundError as e:
|
||
if e.filename == "grammalecte-cli.py":
|
||
install_grammalecte()
|
||
result = self.run_grammalecte(filename)
|
||
if result.stdout:
|
||
warnings = json.loads(result.stdout)
|
||
for warning in warnings["data"]:
|
||
for error in warning["lGrammarErrors"]:
|
||
if self.filter_out(error):
|
||
continue
|
||
item_index = int(warning["iParagraph"]) // 2
|
||
item = pofile.content[item_index]
|
||
start = max(0, int(error["nStart"]) - 40)
|
||
end = max(0, int(error["nEnd"]) + 10)
|
||
item.add_warning(
|
||
self.name,
|
||
# self.name + " " + error["sRuleId"], # TODO
|
||
error["sMessage"]
|
||
+ " => ###"
|
||
+ item.msgstr_rst2txt[start:end]
|
||
+ "###",
|
||
)
|
||
Path(filename).unlink()
|
||
|
||
def check_item(self, item: PoItem):
|
||
"""Check an item in a `*.po` file (does nothing)."""
|
||
pass
|
||
|
||
def filter_out(self, error):
|
||
"""Return True when grammalecte error should be ignored."""
|
||
msg = error["sRuleId"]
|
||
if msg in (
|
||
"esp_milieu_ligne", # double space
|
||
"nbsp_avant_deux_points", # NBSP
|
||
"nbsp_avant_double_ponctuation", # NBSP
|
||
):
|
||
return True
|
||
if "typo_guillemets_typographiques_simples" in msg:
|
||
return True # ignore ' quotes
|
||
msg_text = error["sMessage"]
|
||
if msg_text in (
|
||
"Accord de genre erroné : « ABC » est masculin.",
|
||
"Accord de genre erroné : « PEP » est masculin.",
|
||
"Accord de nombre erroné : « PEP » devrait être au pluriel.",
|
||
"Accord de genre erroné : « une entrée » est féminin, « utilisateur » est masculin.",
|
||
):
|
||
return True
|
||
if "S’il s’agit d’un impératif" in msg_text:
|
||
if error["nStart"] == 0:
|
||
# ignore imperative conjugation at begining of 1st sentence
|
||
return True
|
||
return False
|
||
|
||
|
||
def install_grammalecte():
|
||
"""Install grammalecte CLI."""
|
||
log.warning("Missing grammalecte, trying to install it")
|
||
# with tempfile.TemporaryDirectory(prefix="padpo_") as tmpdirname:
|
||
tmpdirname = "/tmp/_padpo_gramma"
|
||
tmpdirname = Path(tmpdirname)
|
||
tmpdirname.mkdir(exist_ok=True)
|
||
download_request = requests.get(
|
||
"https://grammalecte.net/grammalecte/zip/Grammalecte-fr-v1.5.0.zip"
|
||
)
|
||
download_request.raise_for_status()
|
||
zip_file = tmpdirname / "Grammalecte-fr-v1.5.0.zip"
|
||
zip_file.write_bytes(download_request.content)
|
||
with ZipFile(zip_file, "r") as zip_obj:
|
||
zip_obj.extractall(tmpdirname / "Grammalecte-fr-v1.5.0")
|
||
subprocess.run(
|
||
["pip", "install", str(tmpdirname / "Grammalecte-fr-v1.5.0")]
|
||
)
|