pasteque/paste/utils.py

132 lines
2.8 KiB
Python

from functools import lru_cache, partial
from typing import List
from urllib.parse import urlparse
import bleach
import markdown
import pygments
from django.conf import settings
from markdown.extensions.codehilite import CodeHiliteExtension
from pygments.formatters import HtmlFormatter
from pygments.lexers import get_lexer_by_name, get_lexer_for_filename
ALLOWED_TAGS = [
# Bleach Defaults
"a",
"abbr",
"acronym",
"b",
"blockquote",
"code",
"em",
"i",
"li",
"ol",
"strong",
"ul",
# Custom Additions
"br",
"caption",
"cite",
"col",
"colgroup",
"dd",
"del",
"details",
"div",
"dl",
"dt",
"h1",
"h2",
"h3",
"h4",
"h5",
"h6",
"hr",
"img",
"p",
"pre",
"span",
"sub",
"summary",
"sup",
"table",
"tbody",
"td",
"th",
"thead",
"tr",
"tt",
"kbd",
"var",
]
ALLOWED_ATTRIBUTES = {
# Bleach Defaults
"a": ["href", "title"],
"abbr": ["title"],
"acronym": ["title"],
# Custom Additions
"*": ["id"],
"hr": ["class"],
"img": ["src", "width", "height", "alt", "align", "class"],
"span": ["class"],
"div": ["class"],
"th": ["align"],
"td": ["align"],
"code": ["class"],
"p": ["align", "class"],
}
def _set_target(attrs, new=False):
link_text = attrs["_text"]
if new and not link_text.startswith(("http:", "https:")):
return None
try:
url = urlparse(attrs[(None, "href")])
except KeyError:
return attrs
if url.netloc not in settings.ALLOWED_HOSTS:
attrs[(None, "target")] = "_blank"
else:
attrs.pop((None, "target"), None)
return attrs
def markdown_to_html(text):
"""This convert markdown text to html, with two things:
- Uses bleach.clean to remove unsafe things.
"""
return bleach.sanitizer.Cleaner(
tags=getattr(settings, "ALLOWED_TAGS", ALLOWED_TAGS),
attributes=getattr(settings, "ALLOWED_ATTRIBUTES", ALLOWED_ATTRIBUTES),
filters=[
partial(
bleach.linkifier.LinkifyFilter,
callbacks=[_set_target],
skip_tags=["pre"],
parse_email=False,
),
],
).clean(
markdown.markdown(
text,
extensions=[
"fenced_code",
CodeHiliteExtension(guess_lang=False, css_class="highlight"),
"tables",
"admonition",
],
),
)
def pygmentize(filename, filecontents):
try:
lexer = get_lexer_for_filename(filename)
except pygments.util.ClassNotFound:
lexer = get_lexer_by_name(settings.PASTE["default_language"])
formatter = HtmlFormatter(style="emacs")
return pygments.highlight(filecontents, lexer, formatter)