Initial commit

2024-08-27 20:33:44 +02:00
commit 1f1832267d
14794 changed files with 1599592 additions and 0 deletions
--- a/env/lib/python3.10/site-packages/wagtail/admin/rich_text/init.py
+++ b/env/lib/python3.10/site-packages/wagtail/admin/rich_text/init.py
@@ -0,0 +1,32 @@
+from django.conf import settings
+from django.utils.module_loading import import_string
+
+from wagtail.admin.rich_text.editors.draftail import DraftailRichTextArea  # NOQA: F401
+
+DEFAULT_RICH_TEXT_EDITORS = {
+    "default": {"WIDGET": "wagtail.admin.rich_text.DraftailRichTextArea"}
+}
+
+
+def get_rich_text_editor_widget(name="default", features=None):
+    editor_settings = DEFAULT_RICH_TEXT_EDITORS.copy()
+    editor_settings.update(getattr(settings, "WAGTAILADMIN_RICH_TEXT_EDITORS", {}))
+
+    editor = editor_settings[name]
+    options = editor.get("OPTIONS", None)
+
+    if features is None and options is not None:
+        # fall back on 'features' list within OPTIONS, if any
+        features = options.get("features", None)
+
+    cls = import_string(editor["WIDGET"])
+
+    kwargs = {}
+
+    if options is not None:
+        kwargs["options"] = options
+
+    if getattr(cls, "accepts_features", False):
+        kwargs["features"] = features
+
+    return cls(**kwargs)
--- a/env/lib/python3.10/site-packages/wagtail/admin/rich_text/pycache/init.cpython-310.pyc
+++ b/env/lib/python3.10/site-packages/wagtail/admin/rich_text/pycache/init.cpython-310.pyc
--- a/env/lib/python3.10/site-packages/wagtail/admin/rich_text/converters/init.py
+++ b/env/lib/python3.10/site-packages/wagtail/admin/rich_text/converters/init.py
--- a/env/lib/python3.10/site-packages/wagtail/admin/rich_text/converters/pycache/init.cpython-310.pyc
+++ b/env/lib/python3.10/site-packages/wagtail/admin/rich_text/converters/pycache/init.cpython-310.pyc
--- a/env/lib/python3.10/site-packages/wagtail/admin/rich_text/converters/pycache/contentstate.cpython-310.pyc
+++ b/env/lib/python3.10/site-packages/wagtail/admin/rich_text/converters/pycache/contentstate.cpython-310.pyc
--- a/env/lib/python3.10/site-packages/wagtail/admin/rich_text/converters/pycache/contentstate_models.cpython-310.pyc
+++ b/env/lib/python3.10/site-packages/wagtail/admin/rich_text/converters/pycache/contentstate_models.cpython-310.pyc
--- a/env/lib/python3.10/site-packages/wagtail/admin/rich_text/converters/pycache/editor_html.cpython-310.pyc
+++ b/env/lib/python3.10/site-packages/wagtail/admin/rich_text/converters/pycache/editor_html.cpython-310.pyc
--- a/env/lib/python3.10/site-packages/wagtail/admin/rich_text/converters/pycache/html_ruleset.cpython-310.pyc
+++ b/env/lib/python3.10/site-packages/wagtail/admin/rich_text/converters/pycache/html_ruleset.cpython-310.pyc
--- a/env/lib/python3.10/site-packages/wagtail/admin/rich_text/converters/pycache/html_to_contentstate.cpython-310.pyc
+++ b/env/lib/python3.10/site-packages/wagtail/admin/rich_text/converters/pycache/html_to_contentstate.cpython-310.pyc
--- a/env/lib/python3.10/site-packages/wagtail/admin/rich_text/converters/contentstate.py
+++ b/env/lib/python3.10/site-packages/wagtail/admin/rich_text/converters/contentstate.py
@@ -0,0 +1,149 @@
+import json
+import logging
+import re
+
+from draftjs_exporter.defaults import render_children
+from draftjs_exporter.dom import DOM
+from draftjs_exporter.html import HTML as HTMLExporter
+
+from wagtail.admin.rich_text.converters.html_to_contentstate import (
+    BLOCK_KEY_NAME,
+    HtmlToContentStateHandler,
+)
+from wagtail.rich_text import features as feature_registry
+from wagtail.whitelist import check_url
+
+
+def link_entity(props):
+    """
+    <a linktype="page" id="1">internal page link</a>
+    """
+    id_ = props.get("id")
+    link_props = {}
+
+    if id_ is not None:
+        link_props["linktype"] = "page"
+        link_props["id"] = id_
+    else:
+        link_props["href"] = check_url(props.get("url"))
+
+    return DOM.create_element("a", link_props, props["children"])
+
+
+def br(props):
+    if props["block"]["type"] == "code-block":
+        return props["children"]
+
+    return DOM.create_element("br")
+
+
+def block_fallback(props):
+    type_ = props["block"]["type"]
+    logging.error('Missing config for "%s". Deleting block.' % type_)
+    return None
+
+
+def entity_fallback(props):
+    type_ = props["entity"]["type"]
+    logging.warning('Missing config for "%s". Deleting entity' % type_)
+    return None
+
+
+def style_fallback(props):
+    type_ = props["inline_style_range"]["style"]
+    logging.warning('Missing config for "%s". Deleting style.' % type_)
+    return props["children"]
+
+
+def persist_key_for_block(config):
+    # For any block level element config for draft js exporter, return a config that retains the
+    # block key in a data attribute
+    if isinstance(config, dict):
+        # Wrapper elements don't retain a key - we can keep them in the config as-is
+        new_config = {
+            key: value
+            for key, value in config.items()
+            if key in {"wrapper", "wrapper_props"}
+        }
+        element = config.get("element")
+        element_props = config.get("props", {})
+    else:
+        # The config is either a simple string element name, or a function
+        new_config = {}
+        element_props = {}
+        element = config
+
+    def element_with_uuid(props):
+        added_props = {BLOCK_KEY_NAME: props["block"].get("key")}
+        try:
+            # See if the element is a function - if so, we can only run it and modify its return value to include the data attribute
+            elt = element(props)
+            if elt is not None:
+                elt.attr.update(added_props)
+            return elt
+        except TypeError:
+            # Otherwise we can do the normal process of creating a DOM element with the right element type
+            # and simply adding the data attribute to its props
+            added_props.update(element_props)
+            return DOM.create_element(element, added_props, props["children"])
+
+    new_config["element"] = element_with_uuid
+    return new_config
+
+
+class ContentstateConverter:
+    def __init__(self, features=None):
+        self.features = features
+        self.html_to_contentstate_handler = HtmlToContentStateHandler(features)
+
+        exporter_config = {
+            "block_map": {
+                "unstyled": persist_key_for_block("p"),
+                "atomic": render_children,
+                "fallback": block_fallback,
+            },
+            "style_map": {
+                "FALLBACK": style_fallback,
+            },
+            "entity_decorators": {
+                "FALLBACK": entity_fallback,
+            },
+            "composite_decorators": [
+                {
+                    "strategy": re.compile(r"\n"),
+                    "component": br,
+                },
+            ],
+            "engine": DOM.STRING,
+        }
+
+        for feature in self.features:
+            rule = feature_registry.get_converter_rule("contentstate", feature)
+            if rule is not None:
+                feature_config = rule["to_database_format"]
+                exporter_config["block_map"].update(
+                    {
+                        block_type: persist_key_for_block(config)
+                        for block_type, config in feature_config.get(
+                            "block_map", {}
+                        ).items()
+                    }
+                )
+                exporter_config["style_map"].update(feature_config.get("style_map", {}))
+                exporter_config["entity_decorators"].update(
+                    feature_config.get("entity_decorators", {})
+                )
+
+        self.exporter = HTMLExporter(exporter_config)
+
+    def from_database_format(self, html):
+        self.html_to_contentstate_handler.reset()
+        self.html_to_contentstate_handler.feed(html)
+        self.html_to_contentstate_handler.close()
+
+        return self.html_to_contentstate_handler.contentstate.as_json(
+            indent=4, separators=(",", ": ")
+        )
+
+    def to_database_format(self, contentstate_json):
+        return self.exporter.render(json.loads(contentstate_json))
--- a/env/lib/python3.10/site-packages/wagtail/admin/rich_text/converters/contentstate_models.py
+++ b/env/lib/python3.10/site-packages/wagtail/admin/rich_text/converters/contentstate_models.py
@@ -0,0 +1,93 @@
+import json
+import random
+import string
+
+ALPHANUM = string.ascii_lowercase + string.digits
+
+
+class Block:
+    def __init__(self, typ, depth=0, key=None):
+        self.type = typ
+        self.depth = depth
+        self.text = ""
+        self.key = key if key else "".join(random.choice(ALPHANUM) for _ in range(5))
+        self.inline_style_ranges = []
+        self.entity_ranges = []
+
+    def as_dict(self):
+        return {
+            "key": self.key,
+            "type": self.type,
+            "depth": self.depth,
+            "text": self.text,
+            "inlineStyleRanges": [isr.as_dict() for isr in self.inline_style_ranges],
+            "entityRanges": [er.as_dict() for er in self.entity_ranges],
+        }
+
+
+class InlineStyleRange:
+    def __init__(self, style):
+        self.style = style
+        self.offset = None
+        self.length = None
+
+    def as_dict(self):
+        return {
+            "offset": self.offset,
+            "length": self.length,
+            "style": self.style,
+        }
+
+
+class Entity:
+    def __init__(self, entity_type, mutability, data):
+        self.entity_type = entity_type
+        self.mutability = mutability
+        self.data = data
+
+    def as_dict(self):
+        return {
+            "mutability": self.mutability,
+            "type": self.entity_type,
+            "data": self.data,
+        }
+
+
+class EntityRange:
+    def __init__(self, key):
+        self.key = key
+        self.offset = None
+        self.length = None
+
+    def as_dict(self):
+        return {
+            "key": self.key,
+            "offset": self.offset,
+            "length": self.length,
+        }
+
+
+class ContentState:
+    """Pythonic representation of a Draftail contentState structure"""
+
+    def __init__(self):
+        self.blocks = []
+        self.entity_count = 0
+        self.entity_map = {}
+
+    def add_entity(self, entity):
+        key = self.entity_count
+        self.entity_map[key] = entity
+        self.entity_count += 1
+        return key
+
+    def as_dict(self):
+        return {
+            "blocks": [block.as_dict() for block in self.blocks],
+            "entityMap": {
+                key: entity.as_dict() for (key, entity) in self.entity_map.items()
+            },
+        }
+
+    def as_json(self, **kwargs):
+        return json.dumps(self.as_dict(), **kwargs)
--- a/env/lib/python3.10/site-packages/wagtail/admin/rich_text/converters/editor_html.py
+++ b/env/lib/python3.10/site-packages/wagtail/admin/rich_text/converters/editor_html.py
@@ -0,0 +1,183 @@
+from django.utils.functional import cached_property
+from django.utils.html import escape
+
+from wagtail.models import Page
+from wagtail.rich_text import features as feature_registry
+from wagtail.rich_text.rewriters import EmbedRewriter, LinkRewriter, MultiRuleRewriter
+from wagtail.whitelist import Whitelister, allow_without_attributes
+
+
+class WhitelistRule:
+    def __init__(self, element, handler):
+        self.element = element
+        self.handler = handler
+
+
+class EmbedTypeRule:
+    def __init__(self, embed_type, handler):
+        self.embed_type = embed_type
+        self.handler = handler
+
+
+class LinkTypeRule:
+    def __init__(self, link_type, handler):
+        self.link_type = link_type
+        self.handler = handler
+
+
+# Whitelist rules which are always active regardless of the rich text features that are enabled
+
+BASE_WHITELIST_RULES = {
+    "[document]": allow_without_attributes,
+    "p": allow_without_attributes,
+    "div": allow_without_attributes,
+    "br": allow_without_attributes,
+}
+
+
+class DbWhitelister(Whitelister):
+    """
+    A custom whitelisting engine to convert the HTML as returned by the rich text editor
+    into the pseudo-HTML format stored in the database (in which images, documents and other
+    linked objects are identified by ID rather than URL):
+
+    * accepts a list of WhitelistRules to extend the initial set in BASE_WHITELIST_RULES;
+    * replaces any element with a 'data-embedtype' attribute with an <embed> element, with
+      attributes supplied by the handler for that type as defined in embed_handlers;
+    * rewrites the attributes of any <a> element with a 'data-linktype' attribute, as
+      determined by the handler for that type defined in link_handlers, while keeping the
+      element content intact.
+    """
+
+    def __init__(self, converter_rules):
+        self.converter_rules = converter_rules
+        self.element_rules = BASE_WHITELIST_RULES.copy()
+        for rule in self.converter_rules:
+            if isinstance(rule, WhitelistRule):
+                self.element_rules[rule.element] = rule.handler
+
+    @cached_property
+    def embed_handlers(self):
+        return {
+            rule.embed_type: rule.handler
+            for rule in self.converter_rules
+            if isinstance(rule, EmbedTypeRule)
+        }
+
+    @cached_property
+    def link_handlers(self):
+        return {
+            rule.link_type: rule.handler
+            for rule in self.converter_rules
+            if isinstance(rule, LinkTypeRule)
+        }
+
+    def clean_tag_node(self, doc, tag):
+        if "data-embedtype" in tag.attrs:
+            embed_type = tag["data-embedtype"]
+            # fetch the appropriate embed handler for this embedtype
+            try:
+                embed_handler = self.embed_handlers[embed_type]
+            except KeyError:
+                # discard embeds with unrecognised embedtypes
+                tag.decompose()
+                return
+
+            embed_attrs = embed_handler.get_db_attributes(tag)
+            embed_attrs["embedtype"] = embed_type
+
+            embed_tag = doc.new_tag("embed", **embed_attrs)
+            embed_tag.can_be_empty_element = True
+            tag.replace_with(embed_tag)
+        elif tag.name == "a" and "data-linktype" in tag.attrs:
+            # first, whitelist the contents of this tag
+            for child in tag.contents:
+                self.clean_node(doc, child)
+
+            link_type = tag["data-linktype"]
+            try:
+                link_handler = self.link_handlers[link_type]
+            except KeyError:
+                # discard links with unrecognised linktypes
+                tag.unwrap()
+                return
+
+            link_attrs = link_handler.get_db_attributes(tag)
+            link_attrs["linktype"] = link_type
+            tag.attrs.clear()
+            tag.attrs.update(**link_attrs)
+        else:
+            if tag.name == "div":
+                tag.name = "p"
+
+            super().clean_tag_node(doc, tag)
+
+
+class EditorHTMLConverter:
+    def __init__(self, features=None):
+        if features is None:
+            features = feature_registry.get_default_features()
+
+        self.converter_rules = []
+        for feature in features:
+            rule = feature_registry.get_converter_rule("editorhtml", feature)
+            if rule is not None:
+                # rule should be a list of WhitelistRule() instances - append this to
+                # the main converter_rules list
+                self.converter_rules.extend(rule)
+
+    @cached_property
+    def whitelister(self):
+        return DbWhitelister(self.converter_rules)
+
+    def to_database_format(self, html):
+        return self.whitelister.clean(html)
+
+    @cached_property
+    def html_rewriter(self):
+        embed_rules = {}
+        link_rules = {}
+        for rule in self.converter_rules:
+            if isinstance(rule, EmbedTypeRule):
+                embed_rules[rule.embed_type] = rule.handler.expand_db_attributes
+            elif isinstance(rule, LinkTypeRule):
+                link_rules[rule.link_type] = rule.handler.expand_db_attributes
+
+        return MultiRuleRewriter(
+            [LinkRewriter(rules=link_rules), EmbedRewriter(rules=embed_rules)]
+        )
+
+    def from_database_format(self, html):
+        return self.html_rewriter(html)
+
+
+class PageLinkHandler:
+    """
+    PageLinkHandler will be invoked whenever we encounter an <a> element in HTML content
+    with an attribute of data-linktype="page". The resulting element in the database
+    representation will be:
+    <a linktype="page" id="42">hello world</a>
+    """
+
+    @staticmethod
+    def get_db_attributes(tag):
+        """
+        Given an <a> tag that we've identified as a page link embed (because it has a
+        data-linktype="page" attribute), return a dict of the attributes we should
+        have on the resulting <a linktype="page"> element.
+        """
+        return {"id": tag["data-id"]}
+
+    @staticmethod
+    def expand_db_attributes(attrs):
+        try:
+            page = Page.objects.get(id=attrs["id"])
+
+            attrs = 'data-linktype="page" data-id="%d" ' % page.id
+            parent_page = page.get_parent()
+            if parent_page:
+                attrs += 'data-parent-id="%d" ' % parent_page.id
+
+            return f'<a {attrs}href="{escape(page.localized.specific.url)}">'
+        except Page.DoesNotExist:
+            return "<a>"
--- a/env/lib/python3.10/site-packages/wagtail/admin/rich_text/converters/html_ruleset.py
+++ b/env/lib/python3.10/site-packages/wagtail/admin/rich_text/converters/html_ruleset.py
@@ -0,0 +1,108 @@
+import re
+from collections.abc import Mapping
+
+ELEMENT_SELECTOR = re.compile(r"^([\w-]+)$")
+ELEMENT_WITH_ATTR_SELECTOR = re.compile(r"^([\w-]+)\[([\w-]+)\]$")
+ELEMENT_WITH_ATTR_EXACT_SINGLE_QUOTE_SELECTOR = re.compile(
+    r"^([\w-]+)\[([\w-]+)='(.*)'\]$"
+)
+ELEMENT_WITH_ATTR_EXACT_DOUBLE_QUOTE_SELECTOR = re.compile(
+    r'^([\w-]+)\[([\w-]+)="(.*)"\]$'
+)
+ELEMENT_WITH_ATTR_EXACT_UNQUOTED_SELECTOR = re.compile(
+    r"^([\w-]+)\[([\w-]+)=([\w-]+)\]$"
+)
+
+
+class HTMLRuleset:
+    """
+    Maintains a set of rules for matching HTML elements.
+    Each rule defines a mapping from a CSS-like selector to an arbitrary result object.
+
+    The following forms of rule are currently supported:
+    'a' = matches any <a> element
+    'a[href]' = matches any <a> element with an 'href' attribute
+    'a[linktype="page"]' = matches any <a> element with a 'linktype' attribute equal to 'page'
+    """
+
+    def __init__(self, rules=None):
+        # mapping of element name to a sorted list of (precedence, attr_check, result) tuples
+        # where attr_check is a callable that takes an attr dict and returns True if they match
+        self.element_rules = {}
+
+        if rules:
+            self.add_rules(rules)
+
+    def add_rules(self, rules):
+        # accepts either a dict of {selector: result}, or a list of (selector, result) tuples
+        if isinstance(rules, Mapping):
+            rules = rules.items()
+
+        for selector, result in rules:
+            self.add_rule(selector, result)
+
+    def _add_element_rule(self, name, result):
+        # add a rule that matches on any element with name `name`
+        rules = self.element_rules.setdefault(name, [])
+        # element-only rules have priority 2 (lower)
+        rules.append((2, (lambda attrs: True), result))
+        # sort list on priority
+        rules.sort(key=lambda t: t[0])
+
+    def _add_element_with_attr_rule(self, name, attr, result):
+        # add a rule that matches any element with name `name` which has the attribute `attr`
+        rules = self.element_rules.setdefault(name, [])
+        # element-and-attr rules have priority 1 (higher)
+        rules.append((1, (lambda attrs: attr in attrs), result))
+        # sort list on priority
+        rules.sort(key=lambda t: t[0])
+
+    def _add_element_with_attr_exact_rule(self, name, attr, value, result):
+        # add a rule that matches any element with name `name` which has an
+        # attribute `attr` equal to `value`
+        rules = self.element_rules.setdefault(name, [])
+        # element-and-attr rules have priority 1 (higher)
+        rules.append(
+            (1, (lambda attrs: attr in attrs and attrs[attr] == value), result)
+        )
+        # sort list on priority
+        rules.sort(key=lambda t: t[0])
+
+    def add_rule(self, selector, result):
+        match = ELEMENT_SELECTOR.match(selector)
+        if match:
+            name = match.group(1)
+            self._add_element_rule(name, result)
+            return
+
+        match = ELEMENT_WITH_ATTR_SELECTOR.match(selector)
+        if match:
+            name, attr = match.groups()
+            self._add_element_with_attr_rule(name, attr, result)
+            return
+
+        for regex in (
+            ELEMENT_WITH_ATTR_EXACT_SINGLE_QUOTE_SELECTOR,
+            ELEMENT_WITH_ATTR_EXACT_DOUBLE_QUOTE_SELECTOR,
+            ELEMENT_WITH_ATTR_EXACT_UNQUOTED_SELECTOR,
+        ):
+            match = regex.match(selector)
+            if match:
+                name, attr, value = match.groups()
+                self._add_element_with_attr_exact_rule(name, attr, value, result)
+                return
+
+    def match(self, name, attrs):
+        """
+        Look for a rule matching an HTML element with the given name and attribute dict,
+        and return the corresponding result object. If no rule matches, return None.
+        If multiple rules match, the one chosen is undetermined.
+        """
+        try:
+            rules_to_test = self.element_rules[name]
+        except KeyError:
+            return None
+
+        for precedence, attr_check, result in rules_to_test:
+            if attr_check(attrs):
+                return result
--- a/env/lib/python3.10/site-packages/wagtail/admin/rich_text/converters/html_to_contentstate.py
+++ b/env/lib/python3.10/site-packages/wagtail/admin/rich_text/converters/html_to_contentstate.py
@@ -0,0 +1,446 @@
+import re
+from html.parser import HTMLParser
+
+from wagtail.admin.rich_text.converters.contentstate_models import (
+    Block,
+    ContentState,
+    Entity,
+    EntityRange,
+    InlineStyleRange,
+)
+from wagtail.admin.rich_text.converters.html_ruleset import HTMLRuleset
+from wagtail.models import Page
+from wagtail.rich_text import features as feature_registry
+
+# constants to keep track of what to do with leading whitespace on the next text node we encounter
+STRIP_WHITESPACE = 0
+KEEP_WHITESPACE = 1
+FORCE_WHITESPACE = 2
+
+# match one or more consecutive normal spaces, new-lines, tabs and form-feeds
+WHITESPACE_RE = re.compile(r"[ \t\n\f\r]+")
+
+# the attribute name to persist the Draftail block key between FE and db
+BLOCK_KEY_NAME = "data-block-key"
+
+
+class HandlerState:
+    def __init__(self):
+        self.current_block = None
+        self.current_inline_styles = []
+        self.current_entity_ranges = []
+
+        # what to do with leading whitespace on the next text node we encounter: strip, keep or force
+        self.leading_whitespace = STRIP_WHITESPACE
+        self.list_depth = 0
+        self.list_item_type = None
+
+        # an atomic block which is NOT preceded by a non-atomic block must have a spacer
+        # paragraph inserted before it
+        # NB This is not included in pushed/popped state, because after a pop() this
+        # should still indicate the status of the most recent block, not the one preceding
+        # the corresponding push()
+        self.has_preceding_nonatomic_block = False
+
+        self.pushed_states = []
+
+    def push(self):
+        self.pushed_states.append(
+            {
+                "current_block": self.current_block,
+                "current_inline_styles": self.current_inline_styles,
+                "current_entity_ranges": self.current_entity_ranges,
+                "leading_whitespace": self.leading_whitespace,
+                "list_depth": self.list_depth,
+                "list_item_type": self.list_item_type,
+            }
+        )
+
+    def pop(self):
+        last_state = self.pushed_states.pop()
+        self.current_block = last_state["current_block"]
+        self.current_inline_styles = last_state["current_inline_styles"]
+        self.current_entity_ranges = last_state["current_entity_ranges"]
+        self.leading_whitespace = last_state["leading_whitespace"]
+        self.list_depth = last_state["list_depth"]
+        self.list_item_type = last_state["list_item_type"]
+
+
+def add_paragraph_block(state, contentstate):
+    """
+    Utility function for adding an unstyled (paragraph) block to contentstate;
+    useful for element handlers that aren't paragraph elements themselves, but need
+    to insert paragraphs to ensure correctness
+    """
+    block = Block("unstyled", depth=state.list_depth)
+    contentstate.blocks.append(block)
+    state.current_block = block
+    state.leading_whitespace = STRIP_WHITESPACE
+    state.has_preceding_nonatomic_block = True
+
+
+class ListElementHandler:
+    """Handler for <ul> / <ol> tags"""
+
+    def __init__(self, list_item_type):
+        self.list_item_type = list_item_type
+
+    def handle_starttag(self, name, attrs, state, contentstate):
+        state.push()
+
+        if state.list_item_type is None:
+            # this is not nested in another list => depth remains unchanged
+            pass
+        else:
+            # start the next nesting level
+            state.list_depth += 1
+
+        state.list_item_type = self.list_item_type
+
+    def handle_endtag(self, name, state, contentstate):
+        state.pop()
+
+
+class BlockElementHandler:
+    def __init__(self, block_type):
+        self.block_type = block_type
+
+    def create_block(self, name, attrs, state, contentstate):
+        return Block(
+            self.block_type, depth=state.list_depth, key=attrs.get(BLOCK_KEY_NAME)
+        )
+
+    def handle_starttag(self, name, attrs, state, contentstate):
+        attr_dict = dict(
+            attrs
+        )  # convert attrs from list of (name, value) tuples to a dict
+        block = self.create_block(name, attr_dict, state, contentstate)
+        contentstate.blocks.append(block)
+        state.current_block = block
+        state.leading_whitespace = STRIP_WHITESPACE
+        state.has_preceding_nonatomic_block = True
+
+    def handle_endtag(self, name, state, contentState):
+        assert (
+            not state.current_inline_styles
+        ), "End of block reached without closing inline style elements"
+        assert (
+            not state.current_entity_ranges
+        ), "End of block reached without closing entity elements"
+        state.current_block = None
+
+
+class ListItemElementHandler(BlockElementHandler):
+    """Handler for <li> tag"""
+
+    def __init__(self):
+        pass  # skip setting self.block_type
+
+    def create_block(self, name, attrs, state, contentstate):
+        assert state.list_item_type is not None, (
+            "%s element found outside of an enclosing list element" % name
+        )
+        return Block(
+            state.list_item_type, depth=state.list_depth, key=attrs.get(BLOCK_KEY_NAME)
+        )
+
+
+class InlineStyleElementHandler:
+    def __init__(self, style):
+        self.style = style
+
+    def handle_starttag(self, name, attrs, state, contentstate):
+        if state.current_block is None:
+            # Inline style element encountered at the top level -
+            # start a new paragraph block to contain it
+            add_paragraph_block(state, contentstate)
+
+        if state.leading_whitespace == FORCE_WHITESPACE:
+            # any pending whitespace should be output before handling this tag,
+            # and subsequent whitespace should be collapsed into it (= stripped)
+            state.current_block.text += " "
+            state.leading_whitespace = STRIP_WHITESPACE
+
+        inline_style_range = InlineStyleRange(self.style)
+        inline_style_range.offset = len(state.current_block.text)
+        state.current_block.inline_style_ranges.append(inline_style_range)
+        state.current_inline_styles.append(inline_style_range)
+
+    def handle_endtag(self, name, state, contentstate):
+        inline_style_range = state.current_inline_styles.pop()
+        assert inline_style_range.style == self.style
+        inline_style_range.length = (
+            len(state.current_block.text) - inline_style_range.offset
+        )
+
+
+class InlineEntityElementHandler:
+    """
+    Abstract superclass for elements that will be represented as inline entities.
+    Subclasses should define a `mutability` property
+    """
+
+    def __init__(self, entity_type):
+        self.entity_type = entity_type
+
+    def handle_starttag(self, name, attrs, state, contentstate):
+        if state.current_block is None:
+            # Inline entity element encountered at the top level -
+            # start a new paragraph block to contain it
+            add_paragraph_block(state, contentstate)
+
+        if state.leading_whitespace == FORCE_WHITESPACE:
+            # any pending whitespace should be output before handling this tag,
+            # and subsequent whitespace should be collapsed into it (= stripped)
+            state.current_block.text += " "
+            state.leading_whitespace = STRIP_WHITESPACE
+
+        # convert attrs from a list of (name, value) tuples to a dict
+        # for get_attribute_data to work with
+        attrs = dict(attrs)
+
+        entity = Entity(
+            self.entity_type, self.mutability, self.get_attribute_data(attrs)
+        )
+        key = contentstate.add_entity(entity)
+
+        entity_range = EntityRange(key)
+        entity_range.offset = len(state.current_block.text)
+        state.current_block.entity_ranges.append(entity_range)
+        state.current_entity_ranges.append(entity_range)
+
+    def get_attribute_data(self, attrs):
+        """
+        Given a dict of attributes found on the source element, return the data dict
+        to be associated with the resulting entity
+        """
+        return {}
+
+    def handle_endtag(self, name, state, contentstate):
+        entity_range = state.current_entity_ranges.pop()
+        entity_range.length = len(state.current_block.text) - entity_range.offset
+
+
+class LinkElementHandler(InlineEntityElementHandler):
+    mutability = "MUTABLE"
+
+
+class ExternalLinkElementHandler(LinkElementHandler):
+    def get_attribute_data(self, attrs):
+        return {"url": attrs["href"]}
+
+
+class PageLinkElementHandler(LinkElementHandler):
+    def get_attribute_data(self, attrs):
+        try:
+            page = Page.objects.get(id=attrs["id"]).specific
+        except Page.DoesNotExist:
+            # retain ID so that it's still identified as a page link (albeit a broken one)
+            return {"id": int(attrs["id"]), "url": None, "parentId": None}
+
+        parent_page = page.get_parent()
+
+        return {
+            "id": page.id,
+            "url": page.url,
+            "parentId": parent_page.id if parent_page else None,
+        }
+
+
+class AtomicBlockEntityElementHandler:
+    """
+    Handler for elements like <img> that exist as a single immutable item at the block level
+    """
+
+    def handle_starttag(self, name, attrs, state, contentstate):
+        if state.current_block:
+            # Placing an atomic block inside another block (e.g. a paragraph) is invalid in
+            # contentstate; we will recover from this by forcibly closing the block along with all
+            # of its inline styles / entities, and opening a new identical one afterwards.
+
+            # Construct a new block of the same type and depth as the currently open one; this will
+            # become the new 'current block' after we've added the atomic block.
+            next_block = Block(
+                state.current_block.type, depth=state.current_block.depth
+            )
+
+            for inline_style_range in state.current_inline_styles:
+                # set this inline style to end at the current text position
+                inline_style_range.length = (
+                    len(state.current_block.text) - inline_style_range.offset
+                )
+                # start a new one of the same type, which will begin at the next block
+                new_inline_style = InlineStyleRange(inline_style_range.style)
+                new_inline_style.offset = 0
+                next_block.inline_style_ranges.append(new_inline_style)
+
+            for entity_range in state.current_entity_ranges:
+                # set this inline entity to end at the current text position
+                entity_range.length = (
+                    len(state.current_block.text) - entity_range.offset
+                )
+                # start a new entity range, pointing to the same entity, to begin at the next block
+                new_entity_range = EntityRange(entity_range.key)
+                new_entity_range.offset = 0
+                next_block.entity_ranges.append(new_entity_range)
+
+            state.current_block = None
+        else:
+            next_block = None
+
+        if not state.has_preceding_nonatomic_block:
+            # if this block is NOT preceded by a non-atomic block,
+            # need to insert a spacer paragraph
+            add_paragraph_block(state, contentstate)
+            # immediately set this as not the current block, so that any subsequent invocations
+            # of this handler don't think we're inside it
+            state.current_block = None
+
+        attr_dict = dict(
+            attrs
+        )  # convert attrs from list of (name, value) tuples to a dict
+        entity = self.create_entity(name, attr_dict, state, contentstate)
+        key = contentstate.add_entity(entity)
+
+        block = Block("atomic", depth=state.list_depth)
+        contentstate.blocks.append(block)
+        block.text = " "
+        entity_range = EntityRange(key)
+        entity_range.offset = 0
+        entity_range.length = 1
+        block.entity_ranges.append(entity_range)
+        state.has_preceding_nonatomic_block = False
+
+        if next_block:
+            # take the replica that we made of the previous block and its inline styles / entities,
+            # and make that the new current block. Now, when we encounter the closing tags for
+            # those styles/entities further on in the document, they will close the range that
+            # began here.
+            contentstate.blocks.append(next_block)
+            state.current_block = next_block
+            state.current_inline_styles = next_block.inline_style_ranges.copy()
+            state.current_entity_ranges = next_block.entity_ranges.copy()
+            state.has_preceding_nonatomic_block = True
+            state.leading_whitespace = STRIP_WHITESPACE
+
+    def handle_endtag(self, name, state, contentstate):
+        pass
+
+
+class HorizontalRuleHandler(AtomicBlockEntityElementHandler):
+    def create_entity(self, name, attrs, state, contentstate):
+        return Entity("HORIZONTAL_RULE", "IMMUTABLE", {})
+
+
+class LineBreakHandler:
+    def handle_starttag(self, name, attrs, state, contentstate):
+        if state.current_block is None:
+            # ignore line breaks that exist at the top level
+            return
+
+        state.current_block.text += "\n"
+
+    def handle_endtag(self, name, state, contentstate):
+        pass
+
+
+class HtmlToContentStateHandler(HTMLParser):
+    def __init__(self, features=()):
+        self.paragraph_handler = BlockElementHandler("unstyled")
+        self.element_handlers = HTMLRuleset(
+            {
+                "p": self.paragraph_handler,
+                "br": LineBreakHandler(),
+            }
+        )
+        for feature in features:
+            rule = feature_registry.get_converter_rule("contentstate", feature)
+            if rule is not None:
+                self.element_handlers.add_rules(rule["from_database_format"])
+
+        super().__init__(convert_charrefs=True)
+
+    def reset(self):
+        self.state = HandlerState()
+        self.contentstate = ContentState()
+
+        # stack of (name, handler) tuples for the elements we're currently inside
+        self.open_elements = []
+
+        super().reset()
+
+    def handle_starttag(self, name, attrs):
+        attr_dict = dict(
+            attrs
+        )  # convert attrs from list of (name, value) tuples to a dict
+        element_handler = self.element_handlers.match(name, attr_dict)
+
+        if element_handler is None and not self.open_elements:
+            # treat unrecognised top-level elements as paragraphs
+            element_handler = self.paragraph_handler
+
+        self.open_elements.append((name, element_handler))
+
+        if element_handler:
+            element_handler.handle_starttag(name, attrs, self.state, self.contentstate)
+
+    def handle_endtag(self, name):
+        if not self.open_elements:
+            return  # avoid a pop from an empty list if we have an extra end tag
+        expected_name, element_handler = self.open_elements.pop()
+        assert name == expected_name, "Unmatched tags: expected {}, got {}".format(
+            expected_name,
+            name,
+        )
+        if element_handler:
+            element_handler.handle_endtag(name, self.state, self.contentstate)
+
+    def handle_data(self, content):
+        # normalise whitespace sequences to a single space unless whitespace is contained in <pre> tag,
+        # in which case, leave it alone
+        # This is in line with https://www.w3.org/TR/html4/struct/text.html#h-9.1
+        content = re.sub(WHITESPACE_RE, " ", content)
+
+        if self.state.current_block is None:
+            if content == " ":
+                # ignore top-level whitespace
+                return
+            else:
+                # create a new paragraph block for this content
+                add_paragraph_block(self.state, self.contentstate)
+
+        if content == " ":
+            # if leading_whitespace = strip, this whitespace node is not significant
+            #   and should be skipped.
+            # For other cases, _don't_ output the whitespace yet, but set leading_whitespace = force
+            # so that a space is forced before the next text node or inline element. If no such node
+            # appears (= we reach the end of the block), the whitespace can rightfully be dropped.
+            if self.state.leading_whitespace != STRIP_WHITESPACE:
+                self.state.leading_whitespace = FORCE_WHITESPACE
+        else:
+            # strip or add leading whitespace according to the leading_whitespace flag
+            if self.state.leading_whitespace == STRIP_WHITESPACE:
+                content = content.lstrip()
+            elif (
+                self.state.leading_whitespace == FORCE_WHITESPACE
+                and not content.startswith(" ")
+            ):
+                content = " " + content
+            if content.endswith(" "):
+                # don't output trailing whitespace yet, because we want to discard it if the end
+                # of the block follows. Instead, we'll set leading_whitespace = force so that
+                # any following text or inline element will be prefixed by a space
+                content = content.rstrip()
+                self.state.leading_whitespace = FORCE_WHITESPACE
+            else:
+                # no trailing whitespace here - any leading whitespace at the start of the
+                # next text node should be respected
+                self.state.leading_whitespace = KEEP_WHITESPACE
+
+            self.state.current_block.text += content
+
+    def close(self):
+        # if content ends in an atomic block (or is empty), need to append a spacer paragraph
+        if not self.state.has_preceding_nonatomic_block:
+            add_paragraph_block(self.state, self.contentstate)
+        super().close()
--- a/env/lib/python3.10/site-packages/wagtail/admin/rich_text/editors/init.py
+++ b/env/lib/python3.10/site-packages/wagtail/admin/rich_text/editors/init.py
--- a/env/lib/python3.10/site-packages/wagtail/admin/rich_text/editors/pycache/init.cpython-310.pyc
+++ b/env/lib/python3.10/site-packages/wagtail/admin/rich_text/editors/pycache/init.cpython-310.pyc
--- a/env/lib/python3.10/site-packages/wagtail/admin/rich_text/editors/draftail/init.py
+++ b/env/lib/python3.10/site-packages/wagtail/admin/rich_text/editors/draftail/init.py
@@ -0,0 +1,109 @@
+import json
+import warnings
+
+from django.core.serializers.json import DjangoJSONEncoder
+from django.forms import Media, widgets
+from django.utils.functional import cached_property
+
+from wagtail.admin.rich_text.converters.contentstate import ContentstateConverter
+from wagtail.admin.staticfiles import versioned_static
+from wagtail.rich_text import features as feature_registry
+from wagtail.telepath import register
+from wagtail.widget_adapters import WidgetAdapter
+
+
+class DraftailRichTextArea(widgets.HiddenInput):
+    template_name = "wagtailadmin/widgets/draftail_rich_text_area.html"
+    is_hidden = False
+
+    # this class's constructor accepts a 'features' kwarg
+    accepts_features = True
+
+    # Draftail has its own commenting
+    show_add_comment_button = False
+
+    def __init__(self, *args, **kwargs):
+        # note: this constructor will receive an 'options' kwarg taken from the WAGTAILADMIN_RICH_TEXT_EDITORS setting,
+        # but we don't currently recognise any options from there (other than 'features', which is passed here as a separate kwarg)
+        kwargs.pop("options", None)
+        self.options = {}
+        self.plugins = []
+
+        self.features = kwargs.pop("features", None)
+        if self.features is None:
+            self.features = feature_registry.get_default_features()
+
+        for feature in self.features:
+            plugin = feature_registry.get_editor_plugin("draftail", feature)
+            if plugin is None:
+                warnings.warn(
+                    f"Draftail received an unknown feature '{feature}'.",
+                    category=RuntimeWarning,
+                )
+            else:
+                plugin.construct_options(self.options)
+                self.plugins.append(plugin)
+
+        self.converter = ContentstateConverter(self.features)
+
+        default_attrs = {
+            "data-draftail-input": True,
+            "data-controller": "w-init",
+            "data-w-init-event-value": "w-draftail:init",
+        }
+        attrs = kwargs.get("attrs")
+        if attrs:
+            default_attrs.update(attrs)
+        kwargs["attrs"] = default_attrs
+
+        super().__init__(*args, **kwargs)
+
+    def format_value(self, value):
+        # Convert database rich text representation to the format required by
+        # the input field
+        value = super().format_value(value)
+
+        if value is None:
+            value = ""
+
+        return self.converter.from_database_format(value)
+
+    def get_context(self, name, value, attrs):
+        context = super().get_context(name, value, attrs)
+        context["widget"]["attrs"]["data-w-init-detail-value"] = json.dumps(
+            self.options,
+            cls=DjangoJSONEncoder,
+        )
+        return context
+
+    def value_from_datadict(self, data, files, name):
+        original_value = super().value_from_datadict(data, files, name)
+        if original_value is None:
+            return None
+        return self.converter.to_database_format(original_value)
+
+    @cached_property
+    def media(self):
+        media = Media(
+            js=[
+                versioned_static("wagtailadmin/js/draftail.js"),
+            ],
+            css={"all": [versioned_static("wagtailadmin/css/panels/draftail.css")]},
+        )
+
+        for plugin in self.plugins:
+            media += plugin.media
+
+        return media
+
+
+class DraftailRichTextAreaAdapter(WidgetAdapter):
+    js_constructor = "wagtail.widgets.DraftailRichTextArea"
+
+    def js_args(self, widget):
+        return [
+            widget.options,
+        ]
+
+
+register(DraftailRichTextAreaAdapter(), DraftailRichTextArea)
--- a/env/lib/python3.10/site-packages/wagtail/admin/rich_text/editors/draftail/pycache/init.cpython-310.pyc
+++ b/env/lib/python3.10/site-packages/wagtail/admin/rich_text/editors/draftail/pycache/init.cpython-310.pyc
--- a/env/lib/python3.10/site-packages/wagtail/admin/rich_text/editors/draftail/pycache/features.cpython-310.pyc
+++ b/env/lib/python3.10/site-packages/wagtail/admin/rich_text/editors/draftail/pycache/features.cpython-310.pyc
--- a/env/lib/python3.10/site-packages/wagtail/admin/rich_text/editors/draftail/features.py
+++ b/env/lib/python3.10/site-packages/wagtail/admin/rich_text/editors/draftail/features.py
@@ -0,0 +1,94 @@
+from django.forms import Media
+
+from wagtail.admin.staticfiles import versioned_static
+
+# Feature objects: these are mapped to feature identifiers within the rich text
+# feature registry (wagtail.rich_text.features). Each one implements
+# a `construct_options` method which modifies an options dict as appropriate to
+# enable that feature.
+
+# Additionally, a Feature object defines a media property
+# (https://docs.djangoproject.com/en/stable/topics/forms/media/) to specify css/js
+# files to import when the feature is active.
+
+
+class Feature:
+    def __init__(self, js=None, css=None):
+        self.js = js or []
+        self.css = css or {}
+
+    @property
+    def media(self):
+        js = [versioned_static(js_file) for js_file in self.js]
+        css = {}
+        for media_type, css_files in self.css.items():
+            css[media_type] = [versioned_static(css_file) for css_file in css_files]
+
+        return Media(js=js, css=css)
+
+
+class BooleanFeature(Feature):
+    """
+    A feature which is enabled by a boolean flag at the top level of
+    the options dict
+    """
+
+    def __init__(self, option_name, **kwargs):
+        super().__init__(**kwargs)
+        self.option_name = option_name
+
+    def construct_options(self, options):
+        options[self.option_name] = True
+
+
+class ListFeature(Feature):
+    """
+    Abstract class for features that are defined in a list within the options dict.
+    Subclasses must define option_name
+    """
+
+    def __init__(self, data, **kwargs):
+        super().__init__(**kwargs)
+        self.data = data
+
+    def construct_options(self, options):
+        if self.option_name not in options:
+            options[self.option_name] = []
+
+        options[self.option_name].append(self.data)
+
+
+class EntityFeature(ListFeature):
+    """A feature which is listed in the entityTypes list of the options"""
+
+    option_name = "entityTypes"
+
+
+class BlockFeature(ListFeature):
+    """A feature which is listed in the blockTypes list of the options"""
+
+    option_name = "blockTypes"
+
+
+class InlineStyleFeature(ListFeature):
+    """A feature which is listed in the inlineStyles list of the options"""
+
+    option_name = "inlineStyles"
+
+
+class DecoratorFeature(ListFeature):
+    """A feature which is listed in the decorators list of the options"""
+
+    option_name = "decorators"
+
+
+class ControlFeature(ListFeature):
+    """A feature which is listed in the controls list of the options"""
+
+    option_name = "controls"
+
+
+class PluginFeature(ListFeature):
+    """A feature which is listed in the plugins list of the options"""
+
+    option_name = "plugins"