132 lines
2.8 KiB
Python
132 lines
2.8 KiB
Python
|
from functools import lru_cache, partial
|
||
|
from typing import List
|
||
|
from urllib.parse import urlparse
|
||
|
|
||
|
import bleach
|
||
|
import markdown
|
||
|
import pygments
|
||
|
from django.conf import settings
|
||
|
from markdown.extensions.codehilite import CodeHiliteExtension
|
||
|
from pygments.formatters import HtmlFormatter
|
||
|
from pygments.lexers import get_lexer_by_name, get_lexer_for_filename
|
||
|
|
||
|
ALLOWED_TAGS = [
|
||
|
# Bleach Defaults
|
||
|
"a",
|
||
|
"abbr",
|
||
|
"acronym",
|
||
|
"b",
|
||
|
"blockquote",
|
||
|
"code",
|
||
|
"em",
|
||
|
"i",
|
||
|
"li",
|
||
|
"ol",
|
||
|
"strong",
|
||
|
"ul",
|
||
|
# Custom Additions
|
||
|
"br",
|
||
|
"caption",
|
||
|
"cite",
|
||
|
"col",
|
||
|
"colgroup",
|
||
|
"dd",
|
||
|
"del",
|
||
|
"details",
|
||
|
"div",
|
||
|
"dl",
|
||
|
"dt",
|
||
|
"h1",
|
||
|
"h2",
|
||
|
"h3",
|
||
|
"h4",
|
||
|
"h5",
|
||
|
"h6",
|
||
|
"hr",
|
||
|
"img",
|
||
|
"p",
|
||
|
"pre",
|
||
|
"span",
|
||
|
"sub",
|
||
|
"summary",
|
||
|
"sup",
|
||
|
"table",
|
||
|
"tbody",
|
||
|
"td",
|
||
|
"th",
|
||
|
"thead",
|
||
|
"tr",
|
||
|
"tt",
|
||
|
"kbd",
|
||
|
"var",
|
||
|
]
|
||
|
|
||
|
ALLOWED_ATTRIBUTES = {
|
||
|
# Bleach Defaults
|
||
|
"a": ["href", "title"],
|
||
|
"abbr": ["title"],
|
||
|
"acronym": ["title"],
|
||
|
# Custom Additions
|
||
|
"*": ["id"],
|
||
|
"hr": ["class"],
|
||
|
"img": ["src", "width", "height", "alt", "align", "class"],
|
||
|
"span": ["class"],
|
||
|
"div": ["class"],
|
||
|
"th": ["align"],
|
||
|
"td": ["align"],
|
||
|
"code": ["class"],
|
||
|
"p": ["align", "class"],
|
||
|
}
|
||
|
|
||
|
|
||
|
def _set_target(attrs, new=False):
|
||
|
link_text = attrs["_text"]
|
||
|
if new and not link_text.startswith(("http:", "https:")):
|
||
|
return None
|
||
|
try:
|
||
|
url = urlparse(attrs[(None, "href")])
|
||
|
except KeyError:
|
||
|
return attrs
|
||
|
if url.netloc not in settings.ALLOWED_HOSTS:
|
||
|
attrs[(None, "target")] = "_blank"
|
||
|
else:
|
||
|
attrs.pop((None, "target"), None)
|
||
|
return attrs
|
||
|
|
||
|
|
||
|
def markdown_to_html(text):
|
||
|
"""This convert markdown text to html, with two things:
|
||
|
- Uses bleach.clean to remove unsafe things.
|
||
|
"""
|
||
|
return bleach.sanitizer.Cleaner(
|
||
|
tags=getattr(settings, "ALLOWED_TAGS", ALLOWED_TAGS),
|
||
|
attributes=getattr(settings, "ALLOWED_ATTRIBUTES", ALLOWED_ATTRIBUTES),
|
||
|
filters=[
|
||
|
partial(
|
||
|
bleach.linkifier.LinkifyFilter,
|
||
|
callbacks=[_set_target],
|
||
|
skip_tags=["pre"],
|
||
|
parse_email=False,
|
||
|
),
|
||
|
],
|
||
|
).clean(
|
||
|
markdown.markdown(
|
||
|
text,
|
||
|
extensions=[
|
||
|
"fenced_code",
|
||
|
CodeHiliteExtension(guess_lang=False, css_class="highlight"),
|
||
|
"tables",
|
||
|
"admonition",
|
||
|
],
|
||
|
),
|
||
|
)
|
||
|
|
||
|
|
||
|
def pygmentize(filename, filecontents):
|
||
|
try:
|
||
|
lexer = get_lexer_for_filename(filename)
|
||
|
except pygments.util.ClassNotFound:
|
||
|
lexer = get_lexer_by_name(settings.PASTE["default_language"])
|
||
|
formatter = HtmlFormatter(style="emacs")
|
||
|
return pygments.highlight(filecontents, lexer, formatter)
|