import json from unittest.mock import patch from django.test import TestCase from draftjs_exporter.dom import DOM from draftjs_exporter.html import HTML as HTMLExporter from wagtail.admin.rich_text.converters.contentstate import ( ContentstateConverter, persist_key_for_block, ) from wagtail.embeds.models import Embed def content_state_equal(v1, v2, match_keys=False): "Test whether two contentState structures are equal, ignoring 'key' properties if match_keys=False" if type(v1) != type(v2): return False if isinstance(v1, dict): if set(v1.keys()) != set(v2.keys()): return False return all( (k == "key" and not match_keys) or content_state_equal(v, v2[k], match_keys=match_keys) for k, v in v1.items() ) elif isinstance(v1, list): if len(v1) != len(v2): return False return all( content_state_equal(a, b, match_keys=match_keys) for a, b in zip(v1, v2) ) else: return v1 == v2 class TestHtmlToContentState(TestCase): fixtures = ["test.json"] def assertContentStateEqual(self, v1, v2, match_keys=False): "Assert that two contentState structures are equal, ignoring 'key' properties if match_keys is False" self.assertTrue( content_state_equal(v1, v2, match_keys=match_keys), "%s does not match %s" % (json.dumps(v1, indent=4), json.dumps(v2, indent=4)), ) def test_paragraphs(self): converter = ContentstateConverter(features=[]) result = json.loads( converter.from_database_format( """
Hello world!
Goodbye world!
""" ) ) self.assertContentStateEqual( result, { "entityMap": {}, "blocks": [ { "inlineStyleRanges": [], "text": "Hello world!", "depth": 0, "type": "unstyled", "key": "00000", "entityRanges": [], }, { "inlineStyleRanges": [], "text": "Goodbye world!", "depth": 0, "type": "unstyled", "key": "00001", "entityRanges": [], }, ], }, match_keys=True, ) def test_unknown_block_becomes_paragraph(self): converter = ContentstateConverter(features=[]) result = json.loads( converter.from_database_format( """Goodbye world!
""" ) ) self.assertContentStateEqual( result, { "entityMap": {}, "blocks": [ { "inlineStyleRanges": [], "text": "Hello world!", "depth": 0, "type": "unstyled", "key": "00000", "entityRanges": [], }, { "inlineStyleRanges": [], "text": "I said hello world!", "depth": 0, "type": "unstyled", "key": "00000", "entityRanges": [], }, { "inlineStyleRanges": [], "text": "Goodbye world!", "depth": 0, "type": "unstyled", "key": "00000", "entityRanges": [], }, ], }, ) def test_bare_text_becomes_paragraph(self): converter = ContentstateConverter(features=[]) result = json.loads( converter.from_database_format( """ beforeparagraph
betweenparagraph
after """ ) ) self.assertContentStateEqual( result, { "entityMap": {}, "blocks": [ { "inlineStyleRanges": [], "text": "before", "depth": 0, "type": "unstyled", "key": "00000", "entityRanges": [], }, { "inlineStyleRanges": [], "text": "paragraph", "depth": 0, "type": "unstyled", "key": "00000", "entityRanges": [], }, { "inlineStyleRanges": [], "text": "between", "depth": 0, "type": "unstyled", "key": "00000", "entityRanges": [], }, { "inlineStyleRanges": [], "text": "paragraph", "depth": 0, "type": "unstyled", "key": "00000", "entityRanges": [], }, { "inlineStyleRanges": [], "text": "after", "depth": 0, "type": "unstyled", "key": "00000", "entityRanges": [], }, ], }, ) def test_ignore_unrecognised_tags_in_blocks(self): converter = ContentstateConverter(features=[]) result = json.loads( converter.from_database_format( """Hello
You do not talk about Fight Club.
""" ) ) self.assertContentStateEqual( result, { "entityMap": {}, "blocks": [ { "inlineStyleRanges": [ {"offset": 4, "length": 11, "style": "BOLD"}, {"offset": 7, "length": 3, "style": "ITALIC"}, ], "text": "You do not talk about Fight Club.", "depth": 0, "type": "unstyled", "key": "00000", "entityRanges": [], }, ], }, ) def test_inline_styles_at_top_level(self): converter = ContentstateConverter(features=["bold", "italic"]) result = json.loads( converter.from_database_format( """ You do not talk about Fight Club. """ ) ) self.assertContentStateEqual( result, { "entityMap": {}, "blocks": [ { "inlineStyleRanges": [ {"offset": 4, "length": 11, "style": "BOLD"}, {"offset": 7, "length": 3, "style": "ITALIC"}, ], "text": "You do not talk about Fight Club.", "depth": 0, "type": "unstyled", "key": "00000", "entityRanges": [], }, ], }, ) def test_inline_styles_at_start_of_bare_block(self): converter = ContentstateConverter(features=["bold", "italic"]) result = json.loads( converter.from_database_format( """Seriously, stop talking about Fight Club already.""" ) ) self.assertContentStateEqual( result, { "entityMap": {}, "blocks": [ { "inlineStyleRanges": [ {"offset": 0, "length": 9, "style": "BOLD"}, {"offset": 30, "length": 10, "style": "ITALIC"}, ], "text": "Seriously, stop talking about Fight Club already.", "depth": 0, "type": "unstyled", "key": "00000", "entityRanges": [], }, ], }, ) def test_inline_styles_depend_on_features(self): converter = ContentstateConverter(features=["italic", "just-made-it-up"]) result = json.loads( converter.from_database_format( """You do not talk about Fight Club.
""" ) ) self.assertContentStateEqual( result, { "entityMap": {}, "blocks": [ { "inlineStyleRanges": [ {"offset": 7, "length": 3, "style": "ITALIC"} ], "text": "You do not talk about Fight Club.", "depth": 0, "type": "unstyled", "key": "00000", "entityRanges": [], }, ], }, ) def test_ordered_list(self): converter = ContentstateConverter(features=["h1", "ol", "bold", "italic"]) result = json.loads( converter.from_database_format( """an external link
""" ) ) self.assertContentStateEqual( result, { "entityMap": { "0": { "mutability": "MUTABLE", "type": "LINK", "data": {"url": "http://wagtail.org"}, } }, "blocks": [ { "inlineStyleRanges": [], "text": "an external link", "depth": 0, "type": "unstyled", "key": "00000", "entityRanges": [{"offset": 3, "length": 8, "key": 0}], }, ], }, ) def test_link_in_bare_text(self): converter = ContentstateConverter(features=["link"]) result = json.loads( converter.from_database_format( """an external link""" ) ) self.assertContentStateEqual( result, { "entityMap": { "0": { "mutability": "MUTABLE", "type": "LINK", "data": {"url": "http://wagtail.org"}, } }, "blocks": [ { "inlineStyleRanges": [], "text": "an external link", "depth": 0, "type": "unstyled", "key": "00000", "entityRanges": [{"offset": 3, "length": 8, "key": 0}], }, ], }, ) def test_link_at_start_of_bare_text(self): converter = ContentstateConverter(features=["link"]) result = json.loads( converter.from_database_format( """an external link and another""" ) ) self.assertContentStateEqual( result, { "entityMap": { "0": { "mutability": "MUTABLE", "type": "LINK", "data": {"url": "http://wagtail.org"}, }, "1": { "mutability": "MUTABLE", "type": "LINK", "data": {"url": "http://torchbox.com"}, }, }, "blocks": [ { "inlineStyleRanges": [], "text": "an external link and another", "depth": 0, "type": "unstyled", "key": "00000", "entityRanges": [ {"offset": 0, "length": 16, "key": 0}, {"offset": 21, "length": 7, "key": 1}, ], }, ], }, ) def test_page_link(self): converter = ContentstateConverter(features=["link"]) result = json.loads( converter.from_database_format( """an internal link
""" ) ) self.assertContentStateEqual( result, { "entityMap": { "0": { "mutability": "MUTABLE", "type": "LINK", "data": {"id": 3, "url": "/events/", "parentId": 2}, } }, "blocks": [ { "inlineStyleRanges": [], "text": "an internal link", "depth": 0, "type": "unstyled", "key": "00000", "entityRanges": [{"offset": 3, "length": 8, "key": 0}], }, ], }, ) def test_broken_page_link(self): converter = ContentstateConverter(features=["link"]) result = json.loads( converter.from_database_format( """an internal link
""" ) ) self.assertContentStateEqual( result, { "entityMap": { "0": { "mutability": "MUTABLE", "type": "LINK", "data": { "id": 9999, "url": None, "parentId": None, }, } }, "blocks": [ { "inlineStyleRanges": [], "text": "an internal link", "depth": 0, "type": "unstyled", "key": "00000", "entityRanges": [{"offset": 3, "length": 8, "key": 0}], }, ], }, ) def test_link_to_root_page(self): converter = ContentstateConverter(features=["link"]) result = json.loads( converter.from_database_format( """an internal link
""" ) ) self.assertContentStateEqual( result, { "entityMap": { "0": { "mutability": "MUTABLE", "type": "LINK", "data": {"id": 1, "url": None, "parentId": None}, } }, "blocks": [ { "inlineStyleRanges": [], "text": "an internal link", "depth": 0, "type": "unstyled", "key": "00000", "entityRanges": [{"offset": 3, "length": 8, "key": 0}], }, ], }, ) def test_document_link(self): converter = ContentstateConverter(features=["document-link"]) result = json.loads( converter.from_database_format( """a document link
""" ) ) self.assertContentStateEqual( result, { "entityMap": { "0": { "mutability": "MUTABLE", "type": "DOCUMENT", "data": { "id": 1, "url": "/documents/1/test.pdf", "filename": "test.pdf", }, } }, "blocks": [ { "inlineStyleRanges": [], "text": "a document link", "depth": 0, "type": "unstyled", "key": "00000", "entityRanges": [{"offset": 2, "length": 8, "key": 0}], }, ], }, ) def test_broken_document_link(self): converter = ContentstateConverter(features=["document-link"]) result = json.loads( converter.from_database_format( """a document link
""" ) ) self.assertContentStateEqual( result, { "entityMap": { "0": { "mutability": "MUTABLE", "type": "DOCUMENT", "data": {"id": 9999}, } }, "blocks": [ { "inlineStyleRanges": [], "text": "a document link", "depth": 0, "type": "unstyled", "key": "00000", "entityRanges": [{"offset": 2, "length": 8, "key": 0}], }, ], }, ) def test_document_link_with_missing_id(self): converter = ContentstateConverter(features=["document-link"]) result = json.loads( converter.from_database_format( """a document link
""" ) ) self.assertContentStateEqual( result, { "entityMap": { "0": {"mutability": "MUTABLE", "type": "DOCUMENT", "data": {}} }, "blocks": [ { "inlineStyleRanges": [], "text": "a document link", "depth": 0, "type": "unstyled", "key": "00000", "entityRanges": [{"offset": 2, "length": 8, "key": 0}], }, ], }, ) def test_image_embed(self): converter = ContentstateConverter(features=["image"]) result = json.loads( converter.from_database_format( """before
after
""" ) ) self.assertContentStateEqual( result, { "blocks": [ { "key": "00000", "inlineStyleRanges": [], "entityRanges": [], "depth": 0, "text": "before", "type": "unstyled", }, { "key": "00000", "inlineStyleRanges": [], "entityRanges": [{"key": 0, "offset": 0, "length": 1}], "depth": 0, "text": " ", "type": "atomic", }, { "key": "00000", "inlineStyleRanges": [], "entityRanges": [], "depth": 0, "text": "after", "type": "unstyled", }, ], "entityMap": { "0": { "data": { "format": "left", "alt": "an image", "id": "1", "src": "/media/not-found", }, "mutability": "IMMUTABLE", "type": "IMAGE", } }, }, ) def test_add_spacer_paragraph_between_image_embeds(self): converter = ContentstateConverter(features=["image"]) result = json.loads( converter.from_database_format( """ """ ) ) self.assertContentStateEqual( result, { "blocks": [ { "key": "00000", "inlineStyleRanges": [], "entityRanges": [], "depth": 0, "text": "", "type": "unstyled", }, { "key": "00000", "inlineStyleRanges": [], "entityRanges": [{"key": 0, "offset": 0, "length": 1}], "depth": 0, "text": " ", "type": "atomic", }, { "key": "00000", "inlineStyleRanges": [], "entityRanges": [], "depth": 0, "text": "", "type": "unstyled", }, { "key": "00000", "inlineStyleRanges": [], "entityRanges": [{"key": 1, "offset": 0, "length": 1}], "depth": 0, "text": " ", "type": "atomic", }, { "key": "00000", "inlineStyleRanges": [], "entityRanges": [], "depth": 0, "text": "", "type": "unstyled", }, ], "entityMap": { "0": { "data": { "format": "left", "alt": "an image", "id": "1", "src": "/media/not-found", }, "mutability": "IMMUTABLE", "type": "IMAGE", }, "1": { "data": { "format": "left", "alt": "an image", "id": "1", "src": "/media/not-found", }, "mutability": "IMMUTABLE", "type": "IMAGE", }, }, }, ) def test_image_after_list(self): """ There should be no spacer paragraph inserted between a list and an image """ converter = ContentstateConverter(features=["ul", "image"]) result = json.loads( converter.from_database_format( """before
after
""" ) ) self.assertContentStateEqual( result, { "blocks": [ { "key": "00000", "inlineStyleRanges": [], "entityRanges": [], "depth": 0, "text": "before", "type": "unstyled", }, { "key": "00000", "inlineStyleRanges": [], "entityRanges": [{"key": 0, "offset": 0, "length": 1}], "depth": 0, "text": " ", "type": "atomic", }, { "key": "00000", "inlineStyleRanges": [], "entityRanges": [], "depth": 0, "text": "after", "type": "unstyled", }, ], "entityMap": { "0": { "data": { "thumbnail": "http://test/thumbnail.url", "embedType": "video", "providerName": "YouTube", "title": "what are birds", "authorName": "look around you", "url": "https://www.youtube.com/watch?v=Kh0Y2hVe_bw", }, "mutability": "IMMUTABLE", "type": "EMBED", } }, }, ) @patch("wagtail.embeds.embeds.get_embed") def test_add_spacer_paras_between_media_embeds(self, get_embed): get_embed.return_value = Embed( url="https://www.youtube.com/watch?v=Kh0Y2hVe_bw", max_width=None, type="video", html="test html", title="what are birds", author_name="look around you", provider_name="YouTube", thumbnail_url="http://test/thumbnail.url", width=1000, height=1000, ) converter = ContentstateConverter(features=["embed"]) result = json.loads( converter.from_database_format( """ """ ) ) self.assertContentStateEqual( result, { "blocks": [ { "key": "00000", "inlineStyleRanges": [], "entityRanges": [], "depth": 0, "text": "", "type": "unstyled", }, { "key": "00000", "inlineStyleRanges": [], "entityRanges": [{"key": 0, "offset": 0, "length": 1}], "depth": 0, "text": " ", "type": "atomic", }, { "key": "00000", "inlineStyleRanges": [], "entityRanges": [], "depth": 0, "text": "", "type": "unstyled", }, { "key": "00000", "inlineStyleRanges": [], "entityRanges": [{"key": 1, "offset": 0, "length": 1}], "depth": 0, "text": " ", "type": "atomic", }, { "key": "00000", "inlineStyleRanges": [], "entityRanges": [], "depth": 0, "text": "", "type": "unstyled", }, ], "entityMap": { "0": { "data": { "thumbnail": "http://test/thumbnail.url", "embedType": "video", "providerName": "YouTube", "title": "what are birds", "authorName": "look around you", "url": "https://www.youtube.com/watch?v=Kh0Y2hVe_bw", }, "mutability": "IMMUTABLE", "type": "EMBED", }, "1": { "data": { "thumbnail": "http://test/thumbnail.url", "embedType": "video", "providerName": "YouTube", "title": "what are birds", "authorName": "look around you", "url": "https://www.youtube.com/watch?v=Kh0Y2hVe_bw", }, "mutability": "IMMUTABLE", "type": "EMBED", }, }, }, ) def test_hr(self): converter = ContentstateConverter(features=["hr"]) result = json.loads( converter.from_database_format( """before
after
""" ) ) self.assertContentStateEqual( result, { "blocks": [ { "key": "00000", "inlineStyleRanges": [], "entityRanges": [], "depth": 0, "text": "before", "type": "unstyled", }, { "key": "00000", "inlineStyleRanges": [], "entityRanges": [{"key": 0, "offset": 0, "length": 1}], "depth": 0, "text": " ", "type": "atomic", }, { "key": "00000", "inlineStyleRanges": [], "entityRanges": [], "depth": 0, "text": "after", "type": "unstyled", }, ], "entityMap": { "0": { "data": {}, "mutability": "IMMUTABLE", "type": "HORIZONTAL_RULE", } }, }, ) def test_add_spacer_paragraph_between_hrs(self): converter = ContentstateConverter(features=["hr"]) result = json.loads( converter.from_database_format( """before
before
after
before
after
""" ) ) self.assertContentStateEqual( result, { "entityMap": {}, "blocks": [ { "key": "00000", "inlineStyleRanges": [], "entityRanges": [], "depth": 0, "text": "before", "type": "unstyled", }, { "key": "00000", "inlineStyleRanges": [], "entityRanges": [], "depth": 0, "text": "after", "type": "unstyled", }, ], }, ) def test_block_element_in_empty_paragraph(self): converter = ContentstateConverter(features=["hr"]) result = json.loads( converter.from_database_format( """Arthur "two sheds" Jackson <the third> & his wife
""" ) ) self.assertContentStateEqual( result, { "entityMap": {}, "blocks": [ { "inlineStyleRanges": [], "text": 'Arthur "two sheds" JacksonMultiple whitespaces: should be reduced
Multiple non-breaking whitespace characters: \xa0\xa0\xa0 should be preserved
""" ) ) self.assertContentStateEqual( result, { "entityMap": {}, "blocks": [ { "inlineStyleRanges": [], "text": "Multiple whitespaces: should be reduced", "depth": 0, "type": "unstyled", "key": "00000", "entityRanges": [], }, { "inlineStyleRanges": [], "text": "Multiple non-breaking whitespace characters: \xa0\xa0\xa0 should be preserved", "depth": 0, "type": "unstyled", "key": "00000", "entityRanges": [], }, ], }, ) def test_extra_end_tag_before(self): converter = ContentstateConverter(features=[]) result = json.loads( converter.from_database_format( """Before
""" ) ) # The leading tag should be ignored instead of blowing up with a # pop from empty list error self.assertContentStateEqual( result, { "entityMap": {}, "blocks": [ { "inlineStyleRanges": [], "text": "Before", "depth": 0, "type": "unstyled", "key": "00000", "entityRanges": [], }, ], }, ) def test_extra_end_tag_after(self): converter = ContentstateConverter(features=[]) result = json.loads( converter.from_database_format( """After
""" ) ) # The tailing tag should be ignored instead of blowing up with a # pop from empty list error self.assertContentStateEqual( result, { "entityMap": {}, "blocks": [ { "inlineStyleRanges": [], "text": "After", "depth": 0, "type": "unstyled", "key": "00000", "entityRanges": [], }, ], }, ) def test_p_with_class(self): # Test support for custom conversion rules which require correct treatment of # CSS precedence in HTMLRuleset. Here,should match the # 'p[class="intro"]' rule rather than 'p' and thus become an 'intro-paragraph' block converter = ContentstateConverter(features=["intro"]) result = json.loads( converter.from_database_format( """
before
after
""" ) ) self.assertContentStateEqual( result, { "blocks": [ { "key": "00000", "inlineStyleRanges": [], "entityRanges": [], "depth": 0, "text": "before", "type": "intro-paragraph", }, { "key": "00000", "inlineStyleRanges": [], "entityRanges": [], "depth": 0, "text": "after", "type": "unstyled", }, ], "entityMap": {}, }, ) def test_image_inside_paragraph(self): # In Draftail's data model, images are block-level elements and therefore # split up preceding / following text into their own paragraphs converter = ContentstateConverter(features=["image"]) result = json.loads( converter.from_database_format( """before after
""" ) ) self.assertContentStateEqual( result, { "blocks": [ { "key": "00000", "inlineStyleRanges": [], "entityRanges": [], "depth": 0, "text": "before", "type": "unstyled", }, { "key": "00000", "inlineStyleRanges": [], "entityRanges": [{"key": 0, "offset": 0, "length": 1}], "depth": 0, "text": " ", "type": "atomic", }, { "key": "00000", "inlineStyleRanges": [], "entityRanges": [], "depth": 0, "text": "after", "type": "unstyled", }, ], "entityMap": { "0": { "data": { "format": "left", "alt": "an image", "id": "1", "src": "/media/not-found", }, "mutability": "IMMUTABLE", "type": "IMAGE", } }, }, ) def test_image_inside_style(self): # https://github.com/wagtail/wagtail/issues/4602 - ensure that an