Remove french dict from slug generation: it looks biaised against minorities :(
This commit is contained in:
parent
3ef7baccac
commit
05f763c3a5
|
@ -51,10 +51,3 @@ If you're in production collect static files:
|
||||||
Run it:
|
Run it:
|
||||||
|
|
||||||
./manage.py runserver
|
./manage.py runserver
|
||||||
|
|
||||||
|
|
||||||
## Words generation
|
|
||||||
|
|
||||||
To generate the french dict I used
|
|
||||||
|
|
||||||
$ unmunch <(grep po:adj /usr/share/hunspell/fr_FR.dic) /usr/share/hunspell/fr_FR.aff | LC_ALL=C grep '^[a-z]*$' | grep '^[a-z]\{2,7\}$' | uniq > dict/french
|
|
||||||
|
|
4140
dict/french
4140
dict/french
File diff suppressed because it is too large
Load Diff
|
@ -1,46 +1,22 @@
|
||||||
import string
|
from string import digits, ascii_uppercase
|
||||||
import random
|
from random import choice, choices
|
||||||
import shortuuid
|
import shortuuid
|
||||||
import os
|
import os
|
||||||
import re
|
|
||||||
from webtools import settings
|
from webtools import settings
|
||||||
from functools import lru_cache
|
|
||||||
from .models import Paste
|
|
||||||
|
|
||||||
|
|
||||||
@lru_cache()
|
|
||||||
def find_words():
|
|
||||||
if not settings.DICT:
|
|
||||||
return None
|
|
||||||
short_words = []
|
|
||||||
try:
|
|
||||||
with open(settings.DICT) as dictionary:
|
|
||||||
for line in dictionary:
|
|
||||||
line = line.strip()
|
|
||||||
if re.match("[a-z]{2,5}$", line):
|
|
||||||
short_words.append(line)
|
|
||||||
return short_words
|
|
||||||
except FileNotFoundError:
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def random_id(model):
|
def random_id(model):
|
||||||
"""Returns a short uuid for the slug of the given model.
|
"""Returns a short uuid for the slug of the given model.
|
||||||
|
|
||||||
If a DICT is given in the settings, try to use it to generate nicer URLS like:
|
If a DICT is given in the settings, try to use it to generate
|
||||||
|
nicer URLS like:
|
||||||
"""
|
"""
|
||||||
short_words = find_words()
|
pool = digits + ascii_uppercase
|
||||||
if short_words:
|
slug = choice(digits) + choice(ascii_uppercase) + "-" + "".join(choices(pool, k=2))
|
||||||
slug = (
|
if not model.objects.filter(slug=slug):
|
||||||
random.choice(string.digits)
|
return slug
|
||||||
+ random.choice(string.ascii_uppercase)
|
# fallback to the shortuuid strategy:
|
||||||
+ "-"
|
uuid = choice("0123456789") + shortuuid.uuid()
|
||||||
+ random.choice(short_words)
|
|
||||||
)
|
|
||||||
if not model.objects.filter(slug=slug):
|
|
||||||
return slug
|
|
||||||
# else, fallback to the shortuuid strategy:
|
|
||||||
uuid = random.choice("0123456789") + shortuuid.uuid()
|
|
||||||
for i in range(3, len(uuid)):
|
for i in range(3, len(uuid)):
|
||||||
potential_uuid = uuid[:i]
|
potential_uuid = uuid[:i]
|
||||||
if not model.objects.filter(slug=potential_uuid):
|
if not model.objects.filter(slug=potential_uuid):
|
||||||
|
|
|
@ -14,7 +14,6 @@ SECRET_KEY = "change_me"
|
||||||
ALLOWED_HOSTS = ["localhost", "127.0.0.1"]
|
ALLOWED_HOSTS = ["localhost", "127.0.0.1"]
|
||||||
TIME_ZONE = "Europe/Brussels"
|
TIME_ZONE = "Europe/Brussels"
|
||||||
LANGUAGE_CODE = "fr-FR"
|
LANGUAGE_CODE = "fr-FR"
|
||||||
DICT = os.path.join(SITE_ROOT, "dict", "french")
|
|
||||||
DEBUG = True
|
DEBUG = True
|
||||||
TEMPLATE_DEBUG = DEBUG
|
TEMPLATE_DEBUG = DEBUG
|
||||||
ADMINS = (("user", "user@hostname.domain"),)
|
ADMINS = (("user", "user@hostname.domain"),)
|
||||||
|
@ -116,7 +115,10 @@ LOGGING = {
|
||||||
"filters": ["require_debug_false"],
|
"filters": ["require_debug_false"],
|
||||||
"class": "django.utils.log.AdminEmailHandler",
|
"class": "django.utils.log.AdminEmailHandler",
|
||||||
},
|
},
|
||||||
"console": {"level": "DEBUG", "class": "logging.StreamHandler",},
|
"console": {
|
||||||
|
"level": "DEBUG",
|
||||||
|
"class": "logging.StreamHandler",
|
||||||
|
},
|
||||||
"logfile": {
|
"logfile": {
|
||||||
"level": "DEBUG",
|
"level": "DEBUG",
|
||||||
"class": "logging.handlers.RotatingFileHandler",
|
"class": "logging.handlers.RotatingFileHandler",
|
||||||
|
|
Loading…
Reference in New Issue