FIX: Double spaces were breaking start-of-sentence detection.
This commit is contained in:
parent
b66ad5d2b9
commit
10bfe72a5e
|
@ -122,6 +122,8 @@ def strip_rst(line):
|
|||
def clear(po_path, line):
|
||||
"""Clear various other syntaxes we may encounter in a line.
|
||||
"""
|
||||
# Normalize spaces
|
||||
line = regex.sub(r"\s+", " ", line)
|
||||
to_drop = {
|
||||
r'<a href="[^"]*?">',
|
||||
# Strip capitalized words and accronyms in sentences
|
||||
|
@ -135,7 +137,7 @@ def clear(po_path, line):
|
|||
r"'?-?\b([0-9]+\.)*[0-9]+\.[0-9abcrx]+\b'?", # Versions
|
||||
r"[0-9]+h", # Hours
|
||||
r"%\([a-z_]+?\)s", # Sphinx variable
|
||||
r"« . »", # Single letter examples (typically in Unicode documentation)
|
||||
r"« . »", # Single letter examples (typically in Unicode documentation)
|
||||
}
|
||||
if logging.getLogger().isEnabledFor(logging.DEBUG):
|
||||
for pattern in to_drop:
|
||||
|
|
|
@ -38,3 +38,8 @@ def test_clear():
|
|||
clear("test", "under python 1.6a1, 1.5.2, and earlier.")
|
||||
== "under python , , and earlier."
|
||||
)
|
||||
|
||||
# Double space should change nothing
|
||||
assert clear("test", "Test. Aujourd'hui, j'ai faim.") == clear(
|
||||
"test", "Test. Aujourd'hui, j'ai faim."
|
||||
)
|
||||
|
|
Loading…
Reference in New Issue
Block a user