Remove french dict from slug generation: it looks biaised against minorities :(

This commit is contained in:
Julien Palard 2020-09-15 14:31:48 +02:00
parent 3ef7baccac
commit 05f763c3a5
4 changed files with 14 additions and 4183 deletions

View File

@ -51,10 +51,3 @@ If you're in production collect static files:
Run it:
./manage.py runserver
## Words generation
To generate the french dict I used
$ unmunch <(grep po:adj /usr/share/hunspell/fr_FR.dic) /usr/share/hunspell/fr_FR.aff | LC_ALL=C grep '^[a-z]*$' | grep '^[a-z]\{2,7\}$' | uniq > dict/french

File diff suppressed because it is too large Load Diff

View File

@ -1,46 +1,22 @@
import string
import random
from string import digits, ascii_uppercase
from random import choice, choices
import shortuuid
import os
import re
from webtools import settings
from functools import lru_cache
from .models import Paste
@lru_cache()
def find_words():
if not settings.DICT:
return None
short_words = []
try:
with open(settings.DICT) as dictionary:
for line in dictionary:
line = line.strip()
if re.match("[a-z]{2,5}$", line):
short_words.append(line)
return short_words
except FileNotFoundError:
return None
def random_id(model):
"""Returns a short uuid for the slug of the given model.
If a DICT is given in the settings, try to use it to generate nicer URLS like:
If a DICT is given in the settings, try to use it to generate
nicer URLS like:
"""
short_words = find_words()
if short_words:
slug = (
random.choice(string.digits)
+ random.choice(string.ascii_uppercase)
+ "-"
+ random.choice(short_words)
)
if not model.objects.filter(slug=slug):
return slug
# else, fallback to the shortuuid strategy:
uuid = random.choice("0123456789") + shortuuid.uuid()
pool = digits + ascii_uppercase
slug = choice(digits) + choice(ascii_uppercase) + "-" + "".join(choices(pool, k=2))
if not model.objects.filter(slug=slug):
return slug
# fallback to the shortuuid strategy:
uuid = choice("0123456789") + shortuuid.uuid()
for i in range(3, len(uuid)):
potential_uuid = uuid[:i]
if not model.objects.filter(slug=potential_uuid):

View File

@ -14,7 +14,6 @@ SECRET_KEY = "change_me"
ALLOWED_HOSTS = ["localhost", "127.0.0.1"]
TIME_ZONE = "Europe/Brussels"
LANGUAGE_CODE = "fr-FR"
DICT = os.path.join(SITE_ROOT, "dict", "french")
DEBUG = True
TEMPLATE_DEBUG = DEBUG
ADMINS = (("user", "user@hostname.domain"),)
@ -116,7 +115,10 @@ LOGGING = {
"filters": ["require_debug_false"],
"class": "django.utils.log.AdminEmailHandler",
},
"console": {"level": "DEBUG", "class": "logging.StreamHandler",},
"console": {
"level": "DEBUG",
"class": "logging.StreamHandler",
},
"logfile": {
"level": "DEBUG",
"class": "logging.handlers.RotatingFileHandler",