Remove french dict from slug generation: it looks biaised against minorities :(
This commit is contained in:
parent
3ef7baccac
commit
05f763c3a5
|
@ -51,10 +51,3 @@ If you're in production collect static files:
|
|||
Run it:
|
||||
|
||||
./manage.py runserver
|
||||
|
||||
|
||||
## Words generation
|
||||
|
||||
To generate the french dict I used
|
||||
|
||||
$ unmunch <(grep po:adj /usr/share/hunspell/fr_FR.dic) /usr/share/hunspell/fr_FR.aff | LC_ALL=C grep '^[a-z]*$' | grep '^[a-z]\{2,7\}$' | uniq > dict/french
|
||||
|
|
4140
dict/french
4140
dict/french
File diff suppressed because it is too large
Load Diff
|
@ -1,46 +1,22 @@
|
|||
import string
|
||||
import random
|
||||
from string import digits, ascii_uppercase
|
||||
from random import choice, choices
|
||||
import shortuuid
|
||||
import os
|
||||
import re
|
||||
from webtools import settings
|
||||
from functools import lru_cache
|
||||
from .models import Paste
|
||||
|
||||
|
||||
@lru_cache()
|
||||
def find_words():
|
||||
if not settings.DICT:
|
||||
return None
|
||||
short_words = []
|
||||
try:
|
||||
with open(settings.DICT) as dictionary:
|
||||
for line in dictionary:
|
||||
line = line.strip()
|
||||
if re.match("[a-z]{2,5}$", line):
|
||||
short_words.append(line)
|
||||
return short_words
|
||||
except FileNotFoundError:
|
||||
return None
|
||||
|
||||
|
||||
def random_id(model):
|
||||
"""Returns a short uuid for the slug of the given model.
|
||||
|
||||
If a DICT is given in the settings, try to use it to generate nicer URLS like:
|
||||
If a DICT is given in the settings, try to use it to generate
|
||||
nicer URLS like:
|
||||
"""
|
||||
short_words = find_words()
|
||||
if short_words:
|
||||
slug = (
|
||||
random.choice(string.digits)
|
||||
+ random.choice(string.ascii_uppercase)
|
||||
+ "-"
|
||||
+ random.choice(short_words)
|
||||
)
|
||||
if not model.objects.filter(slug=slug):
|
||||
return slug
|
||||
# else, fallback to the shortuuid strategy:
|
||||
uuid = random.choice("0123456789") + shortuuid.uuid()
|
||||
pool = digits + ascii_uppercase
|
||||
slug = choice(digits) + choice(ascii_uppercase) + "-" + "".join(choices(pool, k=2))
|
||||
if not model.objects.filter(slug=slug):
|
||||
return slug
|
||||
# fallback to the shortuuid strategy:
|
||||
uuid = choice("0123456789") + shortuuid.uuid()
|
||||
for i in range(3, len(uuid)):
|
||||
potential_uuid = uuid[:i]
|
||||
if not model.objects.filter(slug=potential_uuid):
|
||||
|
|
|
@ -14,7 +14,6 @@ SECRET_KEY = "change_me"
|
|||
ALLOWED_HOSTS = ["localhost", "127.0.0.1"]
|
||||
TIME_ZONE = "Europe/Brussels"
|
||||
LANGUAGE_CODE = "fr-FR"
|
||||
DICT = os.path.join(SITE_ROOT, "dict", "french")
|
||||
DEBUG = True
|
||||
TEMPLATE_DEBUG = DEBUG
|
||||
ADMINS = (("user", "user@hostname.domain"),)
|
||||
|
@ -116,7 +115,10 @@ LOGGING = {
|
|||
"filters": ["require_debug_false"],
|
||||
"class": "django.utils.log.AdminEmailHandler",
|
||||
},
|
||||
"console": {"level": "DEBUG", "class": "logging.StreamHandler",},
|
||||
"console": {
|
||||
"level": "DEBUG",
|
||||
"class": "logging.StreamHandler",
|
||||
},
|
||||
"logfile": {
|
||||
"level": "DEBUG",
|
||||
"class": "logging.handlers.RotatingFileHandler",
|
||||
|
|
Loading…
Reference in New Issue