from functools import lru_cache, partial from typing import List from urllib.parse import urlparse import bleach import markdown import pygments from django.conf import settings from markdown.extensions.codehilite import CodeHiliteExtension from pygments.formatters import HtmlFormatter from pygments.lexers import get_lexer_by_name, get_lexer_for_filename ALLOWED_TAGS = [ # Bleach Defaults "a", "abbr", "acronym", "b", "blockquote", "code", "em", "i", "li", "ol", "strong", "ul", # Custom Additions "br", "caption", "cite", "col", "colgroup", "dd", "del", "details", "div", "dl", "dt", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "img", "p", "pre", "span", "sub", "summary", "sup", "table", "tbody", "td", "th", "thead", "tr", "tt", "kbd", "var", ] ALLOWED_ATTRIBUTES = { # Bleach Defaults "a": ["href", "title"], "abbr": ["title"], "acronym": ["title"], # Custom Additions "*": ["id"], "hr": ["class"], "img": ["src", "width", "height", "alt", "align", "class"], "span": ["class"], "div": ["class"], "th": ["align"], "td": ["align"], "code": ["class"], "p": ["align", "class"], } def _set_target(attrs, new=False): link_text = attrs["_text"] if new and not link_text.startswith(("http:", "https:")): return None try: url = urlparse(attrs[(None, "href")]) except KeyError: return attrs if url.netloc not in settings.ALLOWED_HOSTS: attrs[(None, "target")] = "_blank" else: attrs.pop((None, "target"), None) return attrs def markdown_to_html(text): """This convert markdown text to html, with two things: - Uses bleach.clean to remove unsafe things. """ return bleach.sanitizer.Cleaner( tags=getattr(settings, "ALLOWED_TAGS", ALLOWED_TAGS), attributes=getattr(settings, "ALLOWED_ATTRIBUTES", ALLOWED_ATTRIBUTES), filters=[ partial( bleach.linkifier.LinkifyFilter, callbacks=[_set_target], skip_tags=["pre"], parse_email=False, ), ], ).clean( markdown.markdown( text, extensions=[ "fenced_code", CodeHiliteExtension(guess_lang=False, css_class="highlight"), "tables", "admonition", ], ), ) def pygmentize(filename, filecontents): try: lexer = get_lexer_for_filename(filename) except pygments.util.ClassNotFound: lexer = get_lexer_by_name(settings.PASTE["default_language"]) formatter = HtmlFormatter(style="emacs") return pygments.highlight(filecontents, lexer, formatter)