Initial commit

This commit is contained in:
2024-08-27 20:33:44 +02:00
commit 1f1832267d
14794 changed files with 1599592 additions and 0 deletions

View File

@@ -0,0 +1,32 @@
from django.conf import settings
from django.utils.module_loading import import_string
from wagtail.admin.rich_text.editors.draftail import DraftailRichTextArea # NOQA: F401
DEFAULT_RICH_TEXT_EDITORS = {
"default": {"WIDGET": "wagtail.admin.rich_text.DraftailRichTextArea"}
}
def get_rich_text_editor_widget(name="default", features=None):
editor_settings = DEFAULT_RICH_TEXT_EDITORS.copy()
editor_settings.update(getattr(settings, "WAGTAILADMIN_RICH_TEXT_EDITORS", {}))
editor = editor_settings[name]
options = editor.get("OPTIONS", None)
if features is None and options is not None:
# fall back on 'features' list within OPTIONS, if any
features = options.get("features", None)
cls = import_string(editor["WIDGET"])
kwargs = {}
if options is not None:
kwargs["options"] = options
if getattr(cls, "accepts_features", False):
kwargs["features"] = features
return cls(**kwargs)

View File

@@ -0,0 +1,149 @@
import json
import logging
import re
from draftjs_exporter.defaults import render_children
from draftjs_exporter.dom import DOM
from draftjs_exporter.html import HTML as HTMLExporter
from wagtail.admin.rich_text.converters.html_to_contentstate import (
BLOCK_KEY_NAME,
HtmlToContentStateHandler,
)
from wagtail.rich_text import features as feature_registry
from wagtail.whitelist import check_url
def link_entity(props):
"""
<a linktype="page" id="1">internal page link</a>
"""
id_ = props.get("id")
link_props = {}
if id_ is not None:
link_props["linktype"] = "page"
link_props["id"] = id_
else:
link_props["href"] = check_url(props.get("url"))
return DOM.create_element("a", link_props, props["children"])
def br(props):
if props["block"]["type"] == "code-block":
return props["children"]
return DOM.create_element("br")
def block_fallback(props):
type_ = props["block"]["type"]
logging.error('Missing config for "%s". Deleting block.' % type_)
return None
def entity_fallback(props):
type_ = props["entity"]["type"]
logging.warning('Missing config for "%s". Deleting entity' % type_)
return None
def style_fallback(props):
type_ = props["inline_style_range"]["style"]
logging.warning('Missing config for "%s". Deleting style.' % type_)
return props["children"]
def persist_key_for_block(config):
# For any block level element config for draft js exporter, return a config that retains the
# block key in a data attribute
if isinstance(config, dict):
# Wrapper elements don't retain a key - we can keep them in the config as-is
new_config = {
key: value
for key, value in config.items()
if key in {"wrapper", "wrapper_props"}
}
element = config.get("element")
element_props = config.get("props", {})
else:
# The config is either a simple string element name, or a function
new_config = {}
element_props = {}
element = config
def element_with_uuid(props):
added_props = {BLOCK_KEY_NAME: props["block"].get("key")}
try:
# See if the element is a function - if so, we can only run it and modify its return value to include the data attribute
elt = element(props)
if elt is not None:
elt.attr.update(added_props)
return elt
except TypeError:
# Otherwise we can do the normal process of creating a DOM element with the right element type
# and simply adding the data attribute to its props
added_props.update(element_props)
return DOM.create_element(element, added_props, props["children"])
new_config["element"] = element_with_uuid
return new_config
class ContentstateConverter:
def __init__(self, features=None):
self.features = features
self.html_to_contentstate_handler = HtmlToContentStateHandler(features)
exporter_config = {
"block_map": {
"unstyled": persist_key_for_block("p"),
"atomic": render_children,
"fallback": block_fallback,
},
"style_map": {
"FALLBACK": style_fallback,
},
"entity_decorators": {
"FALLBACK": entity_fallback,
},
"composite_decorators": [
{
"strategy": re.compile(r"\n"),
"component": br,
},
],
"engine": DOM.STRING,
}
for feature in self.features:
rule = feature_registry.get_converter_rule("contentstate", feature)
if rule is not None:
feature_config = rule["to_database_format"]
exporter_config["block_map"].update(
{
block_type: persist_key_for_block(config)
for block_type, config in feature_config.get(
"block_map", {}
).items()
}
)
exporter_config["style_map"].update(feature_config.get("style_map", {}))
exporter_config["entity_decorators"].update(
feature_config.get("entity_decorators", {})
)
self.exporter = HTMLExporter(exporter_config)
def from_database_format(self, html):
self.html_to_contentstate_handler.reset()
self.html_to_contentstate_handler.feed(html)
self.html_to_contentstate_handler.close()
return self.html_to_contentstate_handler.contentstate.as_json(
indent=4, separators=(",", ": ")
)
def to_database_format(self, contentstate_json):
return self.exporter.render(json.loads(contentstate_json))

View File

@@ -0,0 +1,93 @@
import json
import random
import string
ALPHANUM = string.ascii_lowercase + string.digits
class Block:
def __init__(self, typ, depth=0, key=None):
self.type = typ
self.depth = depth
self.text = ""
self.key = key if key else "".join(random.choice(ALPHANUM) for _ in range(5))
self.inline_style_ranges = []
self.entity_ranges = []
def as_dict(self):
return {
"key": self.key,
"type": self.type,
"depth": self.depth,
"text": self.text,
"inlineStyleRanges": [isr.as_dict() for isr in self.inline_style_ranges],
"entityRanges": [er.as_dict() for er in self.entity_ranges],
}
class InlineStyleRange:
def __init__(self, style):
self.style = style
self.offset = None
self.length = None
def as_dict(self):
return {
"offset": self.offset,
"length": self.length,
"style": self.style,
}
class Entity:
def __init__(self, entity_type, mutability, data):
self.entity_type = entity_type
self.mutability = mutability
self.data = data
def as_dict(self):
return {
"mutability": self.mutability,
"type": self.entity_type,
"data": self.data,
}
class EntityRange:
def __init__(self, key):
self.key = key
self.offset = None
self.length = None
def as_dict(self):
return {
"key": self.key,
"offset": self.offset,
"length": self.length,
}
class ContentState:
"""Pythonic representation of a Draftail contentState structure"""
def __init__(self):
self.blocks = []
self.entity_count = 0
self.entity_map = {}
def add_entity(self, entity):
key = self.entity_count
self.entity_map[key] = entity
self.entity_count += 1
return key
def as_dict(self):
return {
"blocks": [block.as_dict() for block in self.blocks],
"entityMap": {
key: entity.as_dict() for (key, entity) in self.entity_map.items()
},
}
def as_json(self, **kwargs):
return json.dumps(self.as_dict(), **kwargs)

View File

@@ -0,0 +1,183 @@
from django.utils.functional import cached_property
from django.utils.html import escape
from wagtail.models import Page
from wagtail.rich_text import features as feature_registry
from wagtail.rich_text.rewriters import EmbedRewriter, LinkRewriter, MultiRuleRewriter
from wagtail.whitelist import Whitelister, allow_without_attributes
class WhitelistRule:
def __init__(self, element, handler):
self.element = element
self.handler = handler
class EmbedTypeRule:
def __init__(self, embed_type, handler):
self.embed_type = embed_type
self.handler = handler
class LinkTypeRule:
def __init__(self, link_type, handler):
self.link_type = link_type
self.handler = handler
# Whitelist rules which are always active regardless of the rich text features that are enabled
BASE_WHITELIST_RULES = {
"[document]": allow_without_attributes,
"p": allow_without_attributes,
"div": allow_without_attributes,
"br": allow_without_attributes,
}
class DbWhitelister(Whitelister):
"""
A custom whitelisting engine to convert the HTML as returned by the rich text editor
into the pseudo-HTML format stored in the database (in which images, documents and other
linked objects are identified by ID rather than URL):
* accepts a list of WhitelistRules to extend the initial set in BASE_WHITELIST_RULES;
* replaces any element with a 'data-embedtype' attribute with an <embed> element, with
attributes supplied by the handler for that type as defined in embed_handlers;
* rewrites the attributes of any <a> element with a 'data-linktype' attribute, as
determined by the handler for that type defined in link_handlers, while keeping the
element content intact.
"""
def __init__(self, converter_rules):
self.converter_rules = converter_rules
self.element_rules = BASE_WHITELIST_RULES.copy()
for rule in self.converter_rules:
if isinstance(rule, WhitelistRule):
self.element_rules[rule.element] = rule.handler
@cached_property
def embed_handlers(self):
return {
rule.embed_type: rule.handler
for rule in self.converter_rules
if isinstance(rule, EmbedTypeRule)
}
@cached_property
def link_handlers(self):
return {
rule.link_type: rule.handler
for rule in self.converter_rules
if isinstance(rule, LinkTypeRule)
}
def clean_tag_node(self, doc, tag):
if "data-embedtype" in tag.attrs:
embed_type = tag["data-embedtype"]
# fetch the appropriate embed handler for this embedtype
try:
embed_handler = self.embed_handlers[embed_type]
except KeyError:
# discard embeds with unrecognised embedtypes
tag.decompose()
return
embed_attrs = embed_handler.get_db_attributes(tag)
embed_attrs["embedtype"] = embed_type
embed_tag = doc.new_tag("embed", **embed_attrs)
embed_tag.can_be_empty_element = True
tag.replace_with(embed_tag)
elif tag.name == "a" and "data-linktype" in tag.attrs:
# first, whitelist the contents of this tag
for child in tag.contents:
self.clean_node(doc, child)
link_type = tag["data-linktype"]
try:
link_handler = self.link_handlers[link_type]
except KeyError:
# discard links with unrecognised linktypes
tag.unwrap()
return
link_attrs = link_handler.get_db_attributes(tag)
link_attrs["linktype"] = link_type
tag.attrs.clear()
tag.attrs.update(**link_attrs)
else:
if tag.name == "div":
tag.name = "p"
super().clean_tag_node(doc, tag)
class EditorHTMLConverter:
def __init__(self, features=None):
if features is None:
features = feature_registry.get_default_features()
self.converter_rules = []
for feature in features:
rule = feature_registry.get_converter_rule("editorhtml", feature)
if rule is not None:
# rule should be a list of WhitelistRule() instances - append this to
# the main converter_rules list
self.converter_rules.extend(rule)
@cached_property
def whitelister(self):
return DbWhitelister(self.converter_rules)
def to_database_format(self, html):
return self.whitelister.clean(html)
@cached_property
def html_rewriter(self):
embed_rules = {}
link_rules = {}
for rule in self.converter_rules:
if isinstance(rule, EmbedTypeRule):
embed_rules[rule.embed_type] = rule.handler.expand_db_attributes
elif isinstance(rule, LinkTypeRule):
link_rules[rule.link_type] = rule.handler.expand_db_attributes
return MultiRuleRewriter(
[LinkRewriter(rules=link_rules), EmbedRewriter(rules=embed_rules)]
)
def from_database_format(self, html):
return self.html_rewriter(html)
class PageLinkHandler:
"""
PageLinkHandler will be invoked whenever we encounter an <a> element in HTML content
with an attribute of data-linktype="page". The resulting element in the database
representation will be:
<a linktype="page" id="42">hello world</a>
"""
@staticmethod
def get_db_attributes(tag):
"""
Given an <a> tag that we've identified as a page link embed (because it has a
data-linktype="page" attribute), return a dict of the attributes we should
have on the resulting <a linktype="page"> element.
"""
return {"id": tag["data-id"]}
@staticmethod
def expand_db_attributes(attrs):
try:
page = Page.objects.get(id=attrs["id"])
attrs = 'data-linktype="page" data-id="%d" ' % page.id
parent_page = page.get_parent()
if parent_page:
attrs += 'data-parent-id="%d" ' % parent_page.id
return f'<a {attrs}href="{escape(page.localized.specific.url)}">'
except Page.DoesNotExist:
return "<a>"

View File

@@ -0,0 +1,108 @@
import re
from collections.abc import Mapping
ELEMENT_SELECTOR = re.compile(r"^([\w-]+)$")
ELEMENT_WITH_ATTR_SELECTOR = re.compile(r"^([\w-]+)\[([\w-]+)\]$")
ELEMENT_WITH_ATTR_EXACT_SINGLE_QUOTE_SELECTOR = re.compile(
r"^([\w-]+)\[([\w-]+)='(.*)'\]$"
)
ELEMENT_WITH_ATTR_EXACT_DOUBLE_QUOTE_SELECTOR = re.compile(
r'^([\w-]+)\[([\w-]+)="(.*)"\]$'
)
ELEMENT_WITH_ATTR_EXACT_UNQUOTED_SELECTOR = re.compile(
r"^([\w-]+)\[([\w-]+)=([\w-]+)\]$"
)
class HTMLRuleset:
"""
Maintains a set of rules for matching HTML elements.
Each rule defines a mapping from a CSS-like selector to an arbitrary result object.
The following forms of rule are currently supported:
'a' = matches any <a> element
'a[href]' = matches any <a> element with an 'href' attribute
'a[linktype="page"]' = matches any <a> element with a 'linktype' attribute equal to 'page'
"""
def __init__(self, rules=None):
# mapping of element name to a sorted list of (precedence, attr_check, result) tuples
# where attr_check is a callable that takes an attr dict and returns True if they match
self.element_rules = {}
if rules:
self.add_rules(rules)
def add_rules(self, rules):
# accepts either a dict of {selector: result}, or a list of (selector, result) tuples
if isinstance(rules, Mapping):
rules = rules.items()
for selector, result in rules:
self.add_rule(selector, result)
def _add_element_rule(self, name, result):
# add a rule that matches on any element with name `name`
rules = self.element_rules.setdefault(name, [])
# element-only rules have priority 2 (lower)
rules.append((2, (lambda attrs: True), result))
# sort list on priority
rules.sort(key=lambda t: t[0])
def _add_element_with_attr_rule(self, name, attr, result):
# add a rule that matches any element with name `name` which has the attribute `attr`
rules = self.element_rules.setdefault(name, [])
# element-and-attr rules have priority 1 (higher)
rules.append((1, (lambda attrs: attr in attrs), result))
# sort list on priority
rules.sort(key=lambda t: t[0])
def _add_element_with_attr_exact_rule(self, name, attr, value, result):
# add a rule that matches any element with name `name` which has an
# attribute `attr` equal to `value`
rules = self.element_rules.setdefault(name, [])
# element-and-attr rules have priority 1 (higher)
rules.append(
(1, (lambda attrs: attr in attrs and attrs[attr] == value), result)
)
# sort list on priority
rules.sort(key=lambda t: t[0])
def add_rule(self, selector, result):
match = ELEMENT_SELECTOR.match(selector)
if match:
name = match.group(1)
self._add_element_rule(name, result)
return
match = ELEMENT_WITH_ATTR_SELECTOR.match(selector)
if match:
name, attr = match.groups()
self._add_element_with_attr_rule(name, attr, result)
return
for regex in (
ELEMENT_WITH_ATTR_EXACT_SINGLE_QUOTE_SELECTOR,
ELEMENT_WITH_ATTR_EXACT_DOUBLE_QUOTE_SELECTOR,
ELEMENT_WITH_ATTR_EXACT_UNQUOTED_SELECTOR,
):
match = regex.match(selector)
if match:
name, attr, value = match.groups()
self._add_element_with_attr_exact_rule(name, attr, value, result)
return
def match(self, name, attrs):
"""
Look for a rule matching an HTML element with the given name and attribute dict,
and return the corresponding result object. If no rule matches, return None.
If multiple rules match, the one chosen is undetermined.
"""
try:
rules_to_test = self.element_rules[name]
except KeyError:
return None
for precedence, attr_check, result in rules_to_test:
if attr_check(attrs):
return result

View File

@@ -0,0 +1,446 @@
import re
from html.parser import HTMLParser
from wagtail.admin.rich_text.converters.contentstate_models import (
Block,
ContentState,
Entity,
EntityRange,
InlineStyleRange,
)
from wagtail.admin.rich_text.converters.html_ruleset import HTMLRuleset
from wagtail.models import Page
from wagtail.rich_text import features as feature_registry
# constants to keep track of what to do with leading whitespace on the next text node we encounter
STRIP_WHITESPACE = 0
KEEP_WHITESPACE = 1
FORCE_WHITESPACE = 2
# match one or more consecutive normal spaces, new-lines, tabs and form-feeds
WHITESPACE_RE = re.compile(r"[ \t\n\f\r]+")
# the attribute name to persist the Draftail block key between FE and db
BLOCK_KEY_NAME = "data-block-key"
class HandlerState:
def __init__(self):
self.current_block = None
self.current_inline_styles = []
self.current_entity_ranges = []
# what to do with leading whitespace on the next text node we encounter: strip, keep or force
self.leading_whitespace = STRIP_WHITESPACE
self.list_depth = 0
self.list_item_type = None
# an atomic block which is NOT preceded by a non-atomic block must have a spacer
# paragraph inserted before it
# NB This is not included in pushed/popped state, because after a pop() this
# should still indicate the status of the most recent block, not the one preceding
# the corresponding push()
self.has_preceding_nonatomic_block = False
self.pushed_states = []
def push(self):
self.pushed_states.append(
{
"current_block": self.current_block,
"current_inline_styles": self.current_inline_styles,
"current_entity_ranges": self.current_entity_ranges,
"leading_whitespace": self.leading_whitespace,
"list_depth": self.list_depth,
"list_item_type": self.list_item_type,
}
)
def pop(self):
last_state = self.pushed_states.pop()
self.current_block = last_state["current_block"]
self.current_inline_styles = last_state["current_inline_styles"]
self.current_entity_ranges = last_state["current_entity_ranges"]
self.leading_whitespace = last_state["leading_whitespace"]
self.list_depth = last_state["list_depth"]
self.list_item_type = last_state["list_item_type"]
def add_paragraph_block(state, contentstate):
"""
Utility function for adding an unstyled (paragraph) block to contentstate;
useful for element handlers that aren't paragraph elements themselves, but need
to insert paragraphs to ensure correctness
"""
block = Block("unstyled", depth=state.list_depth)
contentstate.blocks.append(block)
state.current_block = block
state.leading_whitespace = STRIP_WHITESPACE
state.has_preceding_nonatomic_block = True
class ListElementHandler:
"""Handler for <ul> / <ol> tags"""
def __init__(self, list_item_type):
self.list_item_type = list_item_type
def handle_starttag(self, name, attrs, state, contentstate):
state.push()
if state.list_item_type is None:
# this is not nested in another list => depth remains unchanged
pass
else:
# start the next nesting level
state.list_depth += 1
state.list_item_type = self.list_item_type
def handle_endtag(self, name, state, contentstate):
state.pop()
class BlockElementHandler:
def __init__(self, block_type):
self.block_type = block_type
def create_block(self, name, attrs, state, contentstate):
return Block(
self.block_type, depth=state.list_depth, key=attrs.get(BLOCK_KEY_NAME)
)
def handle_starttag(self, name, attrs, state, contentstate):
attr_dict = dict(
attrs
) # convert attrs from list of (name, value) tuples to a dict
block = self.create_block(name, attr_dict, state, contentstate)
contentstate.blocks.append(block)
state.current_block = block
state.leading_whitespace = STRIP_WHITESPACE
state.has_preceding_nonatomic_block = True
def handle_endtag(self, name, state, contentState):
assert (
not state.current_inline_styles
), "End of block reached without closing inline style elements"
assert (
not state.current_entity_ranges
), "End of block reached without closing entity elements"
state.current_block = None
class ListItemElementHandler(BlockElementHandler):
"""Handler for <li> tag"""
def __init__(self):
pass # skip setting self.block_type
def create_block(self, name, attrs, state, contentstate):
assert state.list_item_type is not None, (
"%s element found outside of an enclosing list element" % name
)
return Block(
state.list_item_type, depth=state.list_depth, key=attrs.get(BLOCK_KEY_NAME)
)
class InlineStyleElementHandler:
def __init__(self, style):
self.style = style
def handle_starttag(self, name, attrs, state, contentstate):
if state.current_block is None:
# Inline style element encountered at the top level -
# start a new paragraph block to contain it
add_paragraph_block(state, contentstate)
if state.leading_whitespace == FORCE_WHITESPACE:
# any pending whitespace should be output before handling this tag,
# and subsequent whitespace should be collapsed into it (= stripped)
state.current_block.text += " "
state.leading_whitespace = STRIP_WHITESPACE
inline_style_range = InlineStyleRange(self.style)
inline_style_range.offset = len(state.current_block.text)
state.current_block.inline_style_ranges.append(inline_style_range)
state.current_inline_styles.append(inline_style_range)
def handle_endtag(self, name, state, contentstate):
inline_style_range = state.current_inline_styles.pop()
assert inline_style_range.style == self.style
inline_style_range.length = (
len(state.current_block.text) - inline_style_range.offset
)
class InlineEntityElementHandler:
"""
Abstract superclass for elements that will be represented as inline entities.
Subclasses should define a `mutability` property
"""
def __init__(self, entity_type):
self.entity_type = entity_type
def handle_starttag(self, name, attrs, state, contentstate):
if state.current_block is None:
# Inline entity element encountered at the top level -
# start a new paragraph block to contain it
add_paragraph_block(state, contentstate)
if state.leading_whitespace == FORCE_WHITESPACE:
# any pending whitespace should be output before handling this tag,
# and subsequent whitespace should be collapsed into it (= stripped)
state.current_block.text += " "
state.leading_whitespace = STRIP_WHITESPACE
# convert attrs from a list of (name, value) tuples to a dict
# for get_attribute_data to work with
attrs = dict(attrs)
entity = Entity(
self.entity_type, self.mutability, self.get_attribute_data(attrs)
)
key = contentstate.add_entity(entity)
entity_range = EntityRange(key)
entity_range.offset = len(state.current_block.text)
state.current_block.entity_ranges.append(entity_range)
state.current_entity_ranges.append(entity_range)
def get_attribute_data(self, attrs):
"""
Given a dict of attributes found on the source element, return the data dict
to be associated with the resulting entity
"""
return {}
def handle_endtag(self, name, state, contentstate):
entity_range = state.current_entity_ranges.pop()
entity_range.length = len(state.current_block.text) - entity_range.offset
class LinkElementHandler(InlineEntityElementHandler):
mutability = "MUTABLE"
class ExternalLinkElementHandler(LinkElementHandler):
def get_attribute_data(self, attrs):
return {"url": attrs["href"]}
class PageLinkElementHandler(LinkElementHandler):
def get_attribute_data(self, attrs):
try:
page = Page.objects.get(id=attrs["id"]).specific
except Page.DoesNotExist:
# retain ID so that it's still identified as a page link (albeit a broken one)
return {"id": int(attrs["id"]), "url": None, "parentId": None}
parent_page = page.get_parent()
return {
"id": page.id,
"url": page.url,
"parentId": parent_page.id if parent_page else None,
}
class AtomicBlockEntityElementHandler:
"""
Handler for elements like <img> that exist as a single immutable item at the block level
"""
def handle_starttag(self, name, attrs, state, contentstate):
if state.current_block:
# Placing an atomic block inside another block (e.g. a paragraph) is invalid in
# contentstate; we will recover from this by forcibly closing the block along with all
# of its inline styles / entities, and opening a new identical one afterwards.
# Construct a new block of the same type and depth as the currently open one; this will
# become the new 'current block' after we've added the atomic block.
next_block = Block(
state.current_block.type, depth=state.current_block.depth
)
for inline_style_range in state.current_inline_styles:
# set this inline style to end at the current text position
inline_style_range.length = (
len(state.current_block.text) - inline_style_range.offset
)
# start a new one of the same type, which will begin at the next block
new_inline_style = InlineStyleRange(inline_style_range.style)
new_inline_style.offset = 0
next_block.inline_style_ranges.append(new_inline_style)
for entity_range in state.current_entity_ranges:
# set this inline entity to end at the current text position
entity_range.length = (
len(state.current_block.text) - entity_range.offset
)
# start a new entity range, pointing to the same entity, to begin at the next block
new_entity_range = EntityRange(entity_range.key)
new_entity_range.offset = 0
next_block.entity_ranges.append(new_entity_range)
state.current_block = None
else:
next_block = None
if not state.has_preceding_nonatomic_block:
# if this block is NOT preceded by a non-atomic block,
# need to insert a spacer paragraph
add_paragraph_block(state, contentstate)
# immediately set this as not the current block, so that any subsequent invocations
# of this handler don't think we're inside it
state.current_block = None
attr_dict = dict(
attrs
) # convert attrs from list of (name, value) tuples to a dict
entity = self.create_entity(name, attr_dict, state, contentstate)
key = contentstate.add_entity(entity)
block = Block("atomic", depth=state.list_depth)
contentstate.blocks.append(block)
block.text = " "
entity_range = EntityRange(key)
entity_range.offset = 0
entity_range.length = 1
block.entity_ranges.append(entity_range)
state.has_preceding_nonatomic_block = False
if next_block:
# take the replica that we made of the previous block and its inline styles / entities,
# and make that the new current block. Now, when we encounter the closing tags for
# those styles/entities further on in the document, they will close the range that
# began here.
contentstate.blocks.append(next_block)
state.current_block = next_block
state.current_inline_styles = next_block.inline_style_ranges.copy()
state.current_entity_ranges = next_block.entity_ranges.copy()
state.has_preceding_nonatomic_block = True
state.leading_whitespace = STRIP_WHITESPACE
def handle_endtag(self, name, state, contentstate):
pass
class HorizontalRuleHandler(AtomicBlockEntityElementHandler):
def create_entity(self, name, attrs, state, contentstate):
return Entity("HORIZONTAL_RULE", "IMMUTABLE", {})
class LineBreakHandler:
def handle_starttag(self, name, attrs, state, contentstate):
if state.current_block is None:
# ignore line breaks that exist at the top level
return
state.current_block.text += "\n"
def handle_endtag(self, name, state, contentstate):
pass
class HtmlToContentStateHandler(HTMLParser):
def __init__(self, features=()):
self.paragraph_handler = BlockElementHandler("unstyled")
self.element_handlers = HTMLRuleset(
{
"p": self.paragraph_handler,
"br": LineBreakHandler(),
}
)
for feature in features:
rule = feature_registry.get_converter_rule("contentstate", feature)
if rule is not None:
self.element_handlers.add_rules(rule["from_database_format"])
super().__init__(convert_charrefs=True)
def reset(self):
self.state = HandlerState()
self.contentstate = ContentState()
# stack of (name, handler) tuples for the elements we're currently inside
self.open_elements = []
super().reset()
def handle_starttag(self, name, attrs):
attr_dict = dict(
attrs
) # convert attrs from list of (name, value) tuples to a dict
element_handler = self.element_handlers.match(name, attr_dict)
if element_handler is None and not self.open_elements:
# treat unrecognised top-level elements as paragraphs
element_handler = self.paragraph_handler
self.open_elements.append((name, element_handler))
if element_handler:
element_handler.handle_starttag(name, attrs, self.state, self.contentstate)
def handle_endtag(self, name):
if not self.open_elements:
return # avoid a pop from an empty list if we have an extra end tag
expected_name, element_handler = self.open_elements.pop()
assert name == expected_name, "Unmatched tags: expected {}, got {}".format(
expected_name,
name,
)
if element_handler:
element_handler.handle_endtag(name, self.state, self.contentstate)
def handle_data(self, content):
# normalise whitespace sequences to a single space unless whitespace is contained in <pre> tag,
# in which case, leave it alone
# This is in line with https://www.w3.org/TR/html4/struct/text.html#h-9.1
content = re.sub(WHITESPACE_RE, " ", content)
if self.state.current_block is None:
if content == " ":
# ignore top-level whitespace
return
else:
# create a new paragraph block for this content
add_paragraph_block(self.state, self.contentstate)
if content == " ":
# if leading_whitespace = strip, this whitespace node is not significant
# and should be skipped.
# For other cases, _don't_ output the whitespace yet, but set leading_whitespace = force
# so that a space is forced before the next text node or inline element. If no such node
# appears (= we reach the end of the block), the whitespace can rightfully be dropped.
if self.state.leading_whitespace != STRIP_WHITESPACE:
self.state.leading_whitespace = FORCE_WHITESPACE
else:
# strip or add leading whitespace according to the leading_whitespace flag
if self.state.leading_whitespace == STRIP_WHITESPACE:
content = content.lstrip()
elif (
self.state.leading_whitespace == FORCE_WHITESPACE
and not content.startswith(" ")
):
content = " " + content
if content.endswith(" "):
# don't output trailing whitespace yet, because we want to discard it if the end
# of the block follows. Instead, we'll set leading_whitespace = force so that
# any following text or inline element will be prefixed by a space
content = content.rstrip()
self.state.leading_whitespace = FORCE_WHITESPACE
else:
# no trailing whitespace here - any leading whitespace at the start of the
# next text node should be respected
self.state.leading_whitespace = KEEP_WHITESPACE
self.state.current_block.text += content
def close(self):
# if content ends in an atomic block (or is empty), need to append a spacer paragraph
if not self.state.has_preceding_nonatomic_block:
add_paragraph_block(self.state, self.contentstate)
super().close()

View File

@@ -0,0 +1,109 @@
import json
import warnings
from django.core.serializers.json import DjangoJSONEncoder
from django.forms import Media, widgets
from django.utils.functional import cached_property
from wagtail.admin.rich_text.converters.contentstate import ContentstateConverter
from wagtail.admin.staticfiles import versioned_static
from wagtail.rich_text import features as feature_registry
from wagtail.telepath import register
from wagtail.widget_adapters import WidgetAdapter
class DraftailRichTextArea(widgets.HiddenInput):
template_name = "wagtailadmin/widgets/draftail_rich_text_area.html"
is_hidden = False
# this class's constructor accepts a 'features' kwarg
accepts_features = True
# Draftail has its own commenting
show_add_comment_button = False
def __init__(self, *args, **kwargs):
# note: this constructor will receive an 'options' kwarg taken from the WAGTAILADMIN_RICH_TEXT_EDITORS setting,
# but we don't currently recognise any options from there (other than 'features', which is passed here as a separate kwarg)
kwargs.pop("options", None)
self.options = {}
self.plugins = []
self.features = kwargs.pop("features", None)
if self.features is None:
self.features = feature_registry.get_default_features()
for feature in self.features:
plugin = feature_registry.get_editor_plugin("draftail", feature)
if plugin is None:
warnings.warn(
f"Draftail received an unknown feature '{feature}'.",
category=RuntimeWarning,
)
else:
plugin.construct_options(self.options)
self.plugins.append(plugin)
self.converter = ContentstateConverter(self.features)
default_attrs = {
"data-draftail-input": True,
"data-controller": "w-init",
"data-w-init-event-value": "w-draftail:init",
}
attrs = kwargs.get("attrs")
if attrs:
default_attrs.update(attrs)
kwargs["attrs"] = default_attrs
super().__init__(*args, **kwargs)
def format_value(self, value):
# Convert database rich text representation to the format required by
# the input field
value = super().format_value(value)
if value is None:
value = ""
return self.converter.from_database_format(value)
def get_context(self, name, value, attrs):
context = super().get_context(name, value, attrs)
context["widget"]["attrs"]["data-w-init-detail-value"] = json.dumps(
self.options,
cls=DjangoJSONEncoder,
)
return context
def value_from_datadict(self, data, files, name):
original_value = super().value_from_datadict(data, files, name)
if original_value is None:
return None
return self.converter.to_database_format(original_value)
@cached_property
def media(self):
media = Media(
js=[
versioned_static("wagtailadmin/js/draftail.js"),
],
css={"all": [versioned_static("wagtailadmin/css/panels/draftail.css")]},
)
for plugin in self.plugins:
media += plugin.media
return media
class DraftailRichTextAreaAdapter(WidgetAdapter):
js_constructor = "wagtail.widgets.DraftailRichTextArea"
def js_args(self, widget):
return [
widget.options,
]
register(DraftailRichTextAreaAdapter(), DraftailRichTextArea)

View File

@@ -0,0 +1,94 @@
from django.forms import Media
from wagtail.admin.staticfiles import versioned_static
# Feature objects: these are mapped to feature identifiers within the rich text
# feature registry (wagtail.rich_text.features). Each one implements
# a `construct_options` method which modifies an options dict as appropriate to
# enable that feature.
# Additionally, a Feature object defines a media property
# (https://docs.djangoproject.com/en/stable/topics/forms/media/) to specify css/js
# files to import when the feature is active.
class Feature:
def __init__(self, js=None, css=None):
self.js = js or []
self.css = css or {}
@property
def media(self):
js = [versioned_static(js_file) for js_file in self.js]
css = {}
for media_type, css_files in self.css.items():
css[media_type] = [versioned_static(css_file) for css_file in css_files]
return Media(js=js, css=css)
class BooleanFeature(Feature):
"""
A feature which is enabled by a boolean flag at the top level of
the options dict
"""
def __init__(self, option_name, **kwargs):
super().__init__(**kwargs)
self.option_name = option_name
def construct_options(self, options):
options[self.option_name] = True
class ListFeature(Feature):
"""
Abstract class for features that are defined in a list within the options dict.
Subclasses must define option_name
"""
def __init__(self, data, **kwargs):
super().__init__(**kwargs)
self.data = data
def construct_options(self, options):
if self.option_name not in options:
options[self.option_name] = []
options[self.option_name].append(self.data)
class EntityFeature(ListFeature):
"""A feature which is listed in the entityTypes list of the options"""
option_name = "entityTypes"
class BlockFeature(ListFeature):
"""A feature which is listed in the blockTypes list of the options"""
option_name = "blockTypes"
class InlineStyleFeature(ListFeature):
"""A feature which is listed in the inlineStyles list of the options"""
option_name = "inlineStyles"
class DecoratorFeature(ListFeature):
"""A feature which is listed in the decorators list of the options"""
option_name = "decorators"
class ControlFeature(ListFeature):
"""A feature which is listed in the controls list of the options"""
option_name = "controls"
class PluginFeature(ListFeature):
"""A feature which is listed in the plugins list of the options"""
option_name = "plugins"