Initial commit

This commit is contained in:
2024-08-27 20:33:44 +02:00
commit 1f1832267d
14794 changed files with 1599592 additions and 0 deletions

View File

@@ -0,0 +1,50 @@
from typing import Any, Dict, Optional
from draftjs_exporter.types import HTML, Element, Tag
Attr = Dict[str, str]
class DOMEngine:
"""
Parent class of all DOM implementations.
"""
@staticmethod
def create_tag(type_: Tag, attr: Optional[Attr] = None) -> Any:
"""
Creates and returns a tree node of the given type and attributes.
"""
raise NotImplementedError
@staticmethod
def parse_html(markup: HTML) -> Element:
"""
Creates nodes based on the input html.
Note: this method is used in component implementations only, and
is not required for the exporter to operate.
"""
raise NotImplementedError
@staticmethod
def append_child(elt: Element, child: Element) -> Any:
"""
Appends the given child node in the children of elt.
"""
raise NotImplementedError
@staticmethod
def render(elt: Element) -> HTML:
"""
Renders a given element to HTML.
"""
raise NotImplementedError
@staticmethod
def render_debug(elt: Element) -> HTML:
"""
Renders a given element to HTML.
Note: this method is only used for draftjs_exporter's tests, and
is not required for the exporter to operate.
"""
raise NotImplementedError

View File

@@ -0,0 +1,45 @@
import re
from typing import Optional
from draftjs_exporter.engines.base import Attr, DOMEngine
from draftjs_exporter.types import HTML, Element, Tag
try:
from bs4 import BeautifulSoup
# Cache empty soup so we can create tags in isolation without the performance overhead.
soup = BeautifulSoup("", "html5lib")
except ImportError:
pass
RENDER_RE = re.compile(r"</?(fragment|body|html|head)>")
RENDER_DEBUG_RE = re.compile(r"</?(body|html|head)>")
class DOM_HTML5LIB(DOMEngine):
"""
html5lib implementation of the DOM API.
"""
@staticmethod
def create_tag(type_: Tag, attr: Optional[Attr] = None) -> Element:
if not attr:
attr = {}
return soup.new_tag(type_, **attr)
@staticmethod
def parse_html(markup: HTML) -> Element:
return BeautifulSoup(markup, "html5lib")
@staticmethod
def append_child(elt: Element, child: Element) -> None:
elt.append(child)
@staticmethod
def render(elt: Element) -> HTML:
return RENDER_RE.sub("", str(elt))
@staticmethod
def render_debug(elt: Element) -> HTML:
return RENDER_DEBUG_RE.sub("", str(elt))

View File

@@ -0,0 +1,54 @@
import re
from typing import Optional
from draftjs_exporter.engines.base import Attr, DOMEngine
from draftjs_exporter.types import HTML, Tag
try:
from lxml import etree, html
except ImportError:
pass
NSMAP = {"xlink": "http://www.w3.org/1999/xlink"}
RENDER_RE = re.compile(r"</?fragment>")
class DOM_LXML(DOMEngine):
"""
lxml implementation of the DOM API.
"""
@staticmethod
def create_tag(type_: Tag, attr: Optional[Attr] = None) -> etree.Element:
nsmap = None
if attr:
if "xlink:href" in attr:
attr[f"{{{NSMAP['xlink']}}}href"] = attr.pop("xlink:href")
nsmap = NSMAP
return etree.Element(type_, attrib=attr, nsmap=nsmap)
@staticmethod
def parse_html(markup: HTML) -> etree.Element:
return html.fromstring(markup)
@staticmethod
def append_child(elt: etree.Element, child: etree.Element) -> None:
if hasattr(child, "tag"):
elt.append(child)
else:
c = etree.Element("fragment")
c.text = child
elt.append(c)
@staticmethod
def render(elt: etree.Element) -> HTML:
return RENDER_RE.sub(
"", etree.tostring(elt, method="html", encoding="unicode")
)
@staticmethod
def render_debug(elt: etree.Element) -> HTML:
return etree.tostring(elt, method="html", encoding="unicode")

View File

@@ -0,0 +1,122 @@
from html import escape
from typing import List, Optional, Sequence, Union
from draftjs_exporter.engines.base import Attr, DOMEngine
from draftjs_exporter.types import HTML, Tag
# http://w3c.github.io/html/single-page.html#void-elements
# https://github.com/html5lib/html5lib-python/blob/0cae52b2073e3f2220db93a7650901f2200f2a13/html5lib/constants.py#L560
VOID_ELEMENTS = (
"area",
"base",
"br",
"col",
"embed",
"hr",
"img",
"input",
"link",
"meta",
"param",
"source",
"track",
"wbr",
)
class Elt:
"""
A DOM element that the string engine manipulates.
This class doesn't do much, but the exporter relies on
comparing elements by reference so it's useful nonetheless.
"""
__slots__ = ("type", "attr", "children", "markup")
def __init__(self, type_: Tag, attr: Optional[Attr], markup: HTML = None):
self.type = type_
self.attr = attr
self.children: List["Elt"] = []
self.markup = markup
@staticmethod
def from_html(markup: HTML) -> "Elt":
return Elt("escaped_html", None, markup)
class DOMString(DOMEngine):
"""
String concatenation implementation of the DOM API.
"""
@staticmethod
def create_tag(type_: Tag, attr: Optional[Attr] = None) -> Elt:
return Elt(type_, attr)
@staticmethod
def parse_html(markup: HTML) -> Elt:
"""
Allows inserting arbitrary HTML into the exporter output.
Treats the HTML as if it had been escaped and was safe already.
"""
return Elt.from_html(markup)
@staticmethod
def append_child(elt: Elt, child: Elt) -> None:
# This check is necessary because the current wrapper_state implementation
# has an issue where it inserts elements multiple times.
# This must be skipped for text, which can be duplicated.
is_existing_ref = child in elt.children and isinstance(child, Elt)
if not is_existing_ref:
elt.children.append(child)
@staticmethod
def render_attrs(attr: Attr) -> str:
attrs = [f' {k}="{escape(v)}"' for k, v in attr.items()]
return "".join(attrs)
@staticmethod
def render_children(children: Sequence[Union[HTML, Elt]]) -> HTML:
return "".join(
[
DOMString.render(c)
if isinstance(c, Elt)
else escape(c, quote=False)
for c in children
]
)
@staticmethod
def render(elt: Elt) -> HTML:
type_ = elt.type
attr = DOMString.render_attrs(elt.attr) if elt.attr else ""
children = (
DOMString.render_children(elt.children) if elt.children else ""
)
if type_ == "fragment":
return children
if type_ in VOID_ELEMENTS:
return f"<{type_}{attr}/>"
if type_ == "escaped_html":
return elt.markup # type: ignore
return f"<{type_}{attr}>{children}</{type_}>"
@staticmethod
def render_debug(elt: Elt) -> HTML:
type_ = elt.type
attr = DOMString.render_attrs(elt.attr) if elt.attr else ""
children = (
DOMString.render_children(elt.children) if elt.children else ""
)
if type_ in VOID_ELEMENTS:
return f"<{type_}{attr}/>"
if type_ == "escaped_html":
return elt.markup # type: ignore
return f"<{type_}{attr}>{children}</{type_}>"

View File

@@ -0,0 +1,69 @@
from html import escape
from typing import Sequence, Union
from draftjs_exporter.engines.base import Attr
from draftjs_exporter.engines.string import DOMString, Elt, VOID_ELEMENTS
from draftjs_exporter.types import HTML
class DOMStringCompat(DOMString):
"""
The same as DOMString, but with as much backwards-compatibility as possible.
"""
@staticmethod
def render_attrs(attr: Attr) -> str:
attrs = [f' {k}="{escape(v)}"' for k, v in attr.items()]
# Compat: reverts "Remove HTML attributes alphabetical sorting of default string engine ([#129](https://github.com/springload/draftjs_exporter/pull/129))"
attrs.sort()
return "".join(attrs)
@staticmethod
def render_children(children: Sequence[Union[HTML, Elt]]) -> HTML:
return "".join(
[
DOMStringCompat.render(c) if isinstance(c, Elt)
# Compat: reverts "Disable single and double quotes escaping outside of attributes for string engine ([#129](https://github.com/springload/draftjs_exporter/pull/129))"
else escape(c, quote=True)
for c in children
]
)
@staticmethod
def render(elt: Elt) -> HTML:
type_ = elt.type
attr = DOMStringCompat.render_attrs(elt.attr) if elt.attr else ""
children = (
DOMStringCompat.render_children(elt.children)
if elt.children
else ""
)
if type_ == "fragment":
return children
if type_ in VOID_ELEMENTS:
return f"<{type_}{attr}/>"
if type_ == "escaped_html":
return elt.markup # type: ignore
return f"<{type_}{attr}>{children}</{type_}>"
@staticmethod
def render_debug(elt: Elt) -> HTML:
type_ = elt.type
attr = DOMStringCompat.render_attrs(elt.attr) if elt.attr else ""
children = (
DOMStringCompat.render_children(elt.children)
if elt.children
else ""
)
if type_ in VOID_ELEMENTS:
return f"<{type_}{attr}/>"
if type_ == "escaped_html":
return elt.markup # type: ignore
return f"<{type_}{attr}>{children}</{type_}>"