FIX: Double spaces were breaking start-of-sentence detection.

This commit is contained in:
Julien Palard 2019-09-16 10:44:18 +02:00
parent b66ad5d2b9
commit 10bfe72a5e
2 changed files with 8 additions and 1 deletions

View File

@ -122,6 +122,8 @@ def strip_rst(line):
def clear(po_path, line):
"""Clear various other syntaxes we may encounter in a line.
"""
# Normalize spaces
line = regex.sub(r"\s+", " ", line)
to_drop = {
r'<a href="[^"]*?">',
# Strip capitalized words and accronyms in sentences
@ -135,7 +137,7 @@ def clear(po_path, line):
r"'?-?\b([0-9]+\.)*[0-9]+\.[0-9abcrx]+\b'?", # Versions
r"[0-9]+h", # Hours
r"%\([a-z_]+?\)s", # Sphinx variable
r"« . »", # Single letter examples (typically in Unicode documentation)
r"« . »", # Single letter examples (typically in Unicode documentation)
}
if logging.getLogger().isEnabledFor(logging.DEBUG):
for pattern in to_drop:

View File

@ -38,3 +38,8 @@ def test_clear():
clear("test", "under python 1.6a1, 1.5.2, and earlier.")
== "under python , , and earlier."
)
# Double space should change nothing
assert clear("test", "Test. Aujourd'hui, j'ai faim.") == clear(
"test", "Test. Aujourd'hui, j'ai faim."
)