2019-11-22 11:50:24 +00:00
|
|
|
|
"""Checker for grammar errors."""
|
|
|
|
|
|
|
|
|
|
import json
|
|
|
|
|
import re
|
|
|
|
|
import subprocess
|
|
|
|
|
import tempfile
|
|
|
|
|
from pathlib import Path
|
|
|
|
|
from zipfile import ZipFile
|
|
|
|
|
|
|
|
|
|
import requests
|
|
|
|
|
import simplelogging
|
|
|
|
|
|
|
|
|
|
from padpo.checkers.baseclass import Checker, replace_quotes
|
|
|
|
|
from padpo.pofile import PoItem, PoFile
|
|
|
|
|
|
|
|
|
|
log = simplelogging.get_logger()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class GrammalecteChecker(Checker):
|
|
|
|
|
"""Checker for grammar errors."""
|
|
|
|
|
|
2019-11-22 12:26:31 +00:00
|
|
|
|
name = "Grammalecte"
|
|
|
|
|
|
2019-11-22 11:50:24 +00:00
|
|
|
|
def __init__(self):
|
|
|
|
|
"""Initialiser."""
|
2019-11-22 12:26:31 +00:00
|
|
|
|
super().__init__()
|
2019-11-22 11:50:24 +00:00
|
|
|
|
self.dir = None
|
|
|
|
|
|
2019-11-22 15:28:57 +00:00
|
|
|
|
@staticmethod
|
|
|
|
|
def run_grammalecte(filename: str) -> subprocess.CompletedProcess:
|
|
|
|
|
return subprocess.run(
|
|
|
|
|
[
|
|
|
|
|
"grammalecte-cli.py",
|
|
|
|
|
"-f",
|
|
|
|
|
filename,
|
|
|
|
|
"-off",
|
|
|
|
|
"apos",
|
|
|
|
|
"--json",
|
|
|
|
|
"--only_when_errors",
|
|
|
|
|
],
|
|
|
|
|
capture_output=True,
|
|
|
|
|
text=True,
|
|
|
|
|
)
|
|
|
|
|
|
2019-11-22 11:50:24 +00:00
|
|
|
|
def check_file(self, pofile: PoFile):
|
|
|
|
|
"""Check a `*.po` file."""
|
|
|
|
|
if not isinstance(pofile, PoFile):
|
|
|
|
|
log.error("%s is not an instance of PoFile", str(pofile))
|
2019-11-22 15:28:57 +00:00
|
|
|
|
_, filename = tempfile.mkstemp(
|
|
|
|
|
suffix=".txt", prefix="padpo_", text=True
|
|
|
|
|
)
|
|
|
|
|
with open(filename, "w", encoding="utf8") as f:
|
2019-11-22 11:50:24 +00:00
|
|
|
|
text = pofile.rst2txt()
|
|
|
|
|
text = re.sub(r"«\s(.*?)\s»", replace_quotes, text)
|
|
|
|
|
f.write(text)
|
|
|
|
|
try:
|
2019-11-22 15:28:57 +00:00
|
|
|
|
result = self.run_grammalecte(filename)
|
2019-11-22 11:50:24 +00:00
|
|
|
|
except FileNotFoundError as e:
|
|
|
|
|
if e.filename == "grammalecte-cli.py":
|
|
|
|
|
install_grammalecte()
|
2019-11-22 15:28:57 +00:00
|
|
|
|
result = self.run_grammalecte(filename)
|
2019-11-22 11:50:24 +00:00
|
|
|
|
if result.stdout:
|
|
|
|
|
warnings = json.loads(result.stdout)
|
2019-12-03 08:14:56 +00:00
|
|
|
|
self.manage_grammar_errors(warnings, pofile)
|
2019-11-22 15:28:57 +00:00
|
|
|
|
Path(filename).unlink()
|
2019-11-22 11:50:24 +00:00
|
|
|
|
|
|
|
|
|
def check_item(self, item: PoItem):
|
|
|
|
|
"""Check an item in a `*.po` file (does nothing)."""
|
|
|
|
|
pass
|
|
|
|
|
|
2019-12-03 08:14:56 +00:00
|
|
|
|
def manage_grammar_errors(self, warnings, pofile: PoFile):
|
|
|
|
|
for warning in warnings["data"]:
|
|
|
|
|
for error in warning["lGrammarErrors"]:
|
|
|
|
|
if self.filter_out_grammar_error(error):
|
|
|
|
|
continue
|
|
|
|
|
item_index = int(warning["iParagraph"]) // 2
|
|
|
|
|
item = pofile.content[item_index]
|
|
|
|
|
start = max(0, int(error["nStart"]) - 40)
|
|
|
|
|
end = max(0, int(error["nEnd"]) + 10)
|
|
|
|
|
item.add_warning(
|
|
|
|
|
self.name,
|
|
|
|
|
error["sMessage"]
|
|
|
|
|
+ " => ###"
|
|
|
|
|
+ item.msgstr_rst2txt[start:end]
|
|
|
|
|
+ "###",
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
def filter_out_grammar_error(self, error):
|
2019-11-22 11:50:24 +00:00
|
|
|
|
"""Return True when grammalecte error should be ignored."""
|
|
|
|
|
msg = error["sRuleId"]
|
2019-12-02 18:19:01 +00:00
|
|
|
|
if msg in (
|
|
|
|
|
"esp_milieu_ligne", # double space
|
|
|
|
|
"nbsp_avant_deux_points", # NBSP
|
|
|
|
|
"nbsp_avant_double_ponctuation", # NBSP
|
|
|
|
|
):
|
2019-11-22 11:50:24 +00:00
|
|
|
|
return True
|
2019-12-02 18:19:01 +00:00
|
|
|
|
if "typo_guillemets_typographiques_simples" in msg:
|
|
|
|
|
return True # ignore ' quotes
|
|
|
|
|
msg_text = error["sMessage"]
|
|
|
|
|
if msg_text in (
|
|
|
|
|
"Accord de genre erroné : « ABC » est masculin.",
|
|
|
|
|
"Accord de genre erroné : « PEP » est masculin.",
|
|
|
|
|
"Accord de nombre erroné : « PEP » devrait être au pluriel.",
|
2019-12-02 18:23:02 +00:00
|
|
|
|
"Accord de genre erroné : « une entrée » est féminin, « utilisateur » est masculin.",
|
2019-12-02 18:19:01 +00:00
|
|
|
|
):
|
2019-11-22 11:50:24 +00:00
|
|
|
|
return True
|
2019-12-02 18:30:54 +00:00
|
|
|
|
if "S’il s’agit d’un impératif" in msg_text:
|
|
|
|
|
if error["nStart"] == 0:
|
|
|
|
|
# ignore imperative conjugation at begining of 1st sentence
|
|
|
|
|
return True
|
2019-11-22 11:50:24 +00:00
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def install_grammalecte():
|
|
|
|
|
"""Install grammalecte CLI."""
|
|
|
|
|
log.warning("Missing grammalecte, trying to install it")
|
|
|
|
|
# with tempfile.TemporaryDirectory(prefix="padpo_") as tmpdirname:
|
|
|
|
|
tmpdirname = "/tmp/_padpo_gramma"
|
|
|
|
|
tmpdirname = Path(tmpdirname)
|
|
|
|
|
tmpdirname.mkdir(exist_ok=True)
|
|
|
|
|
download_request = requests.get(
|
|
|
|
|
"https://grammalecte.net/grammalecte/zip/Grammalecte-fr-v1.5.0.zip"
|
|
|
|
|
)
|
|
|
|
|
download_request.raise_for_status()
|
|
|
|
|
zip_file = tmpdirname / "Grammalecte-fr-v1.5.0.zip"
|
|
|
|
|
zip_file.write_bytes(download_request.content)
|
|
|
|
|
with ZipFile(zip_file, "r") as zip_obj:
|
|
|
|
|
zip_obj.extractall(tmpdirname / "Grammalecte-fr-v1.5.0")
|
|
|
|
|
subprocess.run(
|
|
|
|
|
["pip", "install", str(tmpdirname / "Grammalecte-fr-v1.5.0")]
|
|
|
|
|
)
|