padpo/padpo/checkers/grammalecte.py
2019-11-22 13:26:31 +01:00

122 lines
3.9 KiB
Python
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""Checker for grammar errors."""
import json
import re
import subprocess
import tempfile
from pathlib import Path
from zipfile import ZipFile
import requests
import simplelogging
from padpo.checkers.baseclass import Checker, replace_quotes
from padpo.pofile import PoItem, PoFile
log = simplelogging.get_logger()
class GrammalecteChecker(Checker):
"""Checker for grammar errors."""
name = "Grammalecte"
def __init__(self):
"""Initialiser."""
super().__init__()
self.dir = None
def check_file(self, pofile: PoFile):
"""Check a `*.po` file."""
if not isinstance(pofile, PoFile):
log.error("%s is not an instance of PoFile", str(pofile))
fd, name = tempfile.mkstemp(suffix=".txt", prefix="padpo_", text=True)
with open(name, "w", encoding="utf8") as f:
text = pofile.rst2txt()
text = re.sub(r"«\s(.*?)\", replace_quotes, text)
f.write(text)
try:
result = subprocess.run(
[
"grammalecte-cli.py",
"-f",
name,
"-off",
"apos",
"--json",
"--only_when_errors",
],
capture_output=True,
text=True,
)
except FileNotFoundError as e:
if e.filename == "grammalecte-cli.py":
install_grammalecte()
result = subprocess.run(
[
"grammalecte-cli.py",
"-f",
name,
"-off",
"apos",
"--json",
"--only_when_errors",
],
capture_output=True,
text=True,
)
if result.stdout:
warnings = json.loads(result.stdout)
for warning in warnings["data"]:
for error in warning["lGrammarErrors"]:
if self.filter_out(error):
continue
item_index = int(warning["iParagraph"]) // 2
item = pofile.content[item_index]
start = max(0, int(error["nStart"]) - 40)
end = max(0, int(error["nEnd"]) + 10)
item.add_warning(
self.name,
# self.name + " " + error["sRuleId"], # TODO
error["sMessage"]
+ " => ###"
+ item.msgstr_rst2txt[start:end]
+ "###",
)
Path(name).unlink()
def check_item(self, item: PoItem):
"""Check an item in a `*.po` file (does nothing)."""
pass
def filter_out(self, error):
"""Return True when grammalecte error should be ignored."""
msg = error["sRuleId"]
if msg == "esp_milieu_ligne":
return True # double space
if msg == "nbsp_avant_deux_points":
return True
if msg == "nbsp_avant_double_ponctuation":
return True
return False
def install_grammalecte():
"""Install grammalecte CLI."""
log.warning("Missing grammalecte, trying to install it")
# with tempfile.TemporaryDirectory(prefix="padpo_") as tmpdirname:
tmpdirname = "/tmp/_padpo_gramma"
tmpdirname = Path(tmpdirname)
tmpdirname.mkdir(exist_ok=True)
download_request = requests.get(
"https://grammalecte.net/grammalecte/zip/Grammalecte-fr-v1.5.0.zip"
)
download_request.raise_for_status()
zip_file = tmpdirname / "Grammalecte-fr-v1.5.0.zip"
zip_file.write_bytes(download_request.content)
with ZipFile(zip_file, "r") as zip_obj:
zip_obj.extractall(tmpdirname / "Grammalecte-fr-v1.5.0")
subprocess.run(
["pip", "install", str(tmpdirname / "Grammalecte-fr-v1.5.0")]
)