From 7441aa34e4929e0093a26bbda1184f216ae8fbb9 Mon Sep 17 00:00:00 2001 From: Shannon Booth Date: Sun, 9 Feb 2025 12:47:50 +1300 Subject: [PATCH] LibWeb/HTML: Bail from HTML parsing when EOF hit on document.close This fixes a crash in the included test that regressed in 0adf261, and is hit by the following HTML: ```html ``` I am not 100% sure this is fully the correct fix and there are other cases which would not work properly. But it's definitely an improvement to make the confuisingly named 'insert_an_eof' function of the tokenizer actually do something. --- Libraries/LibWeb/HTML/Parser/HTMLParser.cpp | 3 + .../quirks.window.txt | 10 +++ .../quirks.window.html | 8 ++ .../opening-the-input-stream/quirks.window.js | 74 +++++++++++++++++++ 4 files changed, 95 insertions(+) create mode 100644 Tests/LibWeb/Text/expected/wpt-import/html/webappapis/dynamic-markup-insertion/opening-the-input-stream/quirks.window.txt create mode 100644 Tests/LibWeb/Text/input/wpt-import/html/webappapis/dynamic-markup-insertion/opening-the-input-stream/quirks.window.html create mode 100644 Tests/LibWeb/Text/input/wpt-import/html/webappapis/dynamic-markup-insertion/opening-the-input-stream/quirks.window.js diff --git a/Libraries/LibWeb/HTML/Parser/HTMLParser.cpp b/Libraries/LibWeb/HTML/Parser/HTMLParser.cpp index 039baef404..e4e8305507 100644 --- a/Libraries/LibWeb/HTML/Parser/HTMLParser.cpp +++ b/Libraries/LibWeb/HTML/Parser/HTMLParser.cpp @@ -199,6 +199,9 @@ void HTMLParser::run(HTMLTokenizer::StopAtInsertionPoint stop_at_insertion_point dbgln_if(HTML_PARSER_DEBUG, "[{}] {}", insertion_mode_name(), token.to_string()); + if (token.is_end_of_file() && m_tokenizer.is_eof_inserted()) + break; + // https://html.spec.whatwg.org/multipage/parsing.html#tree-construction-dispatcher // As each token is emitted from the tokenizer, the user agent must follow the appropriate steps from the following list, known as the tree construction dispatcher: if (m_stack_of_open_elements.is_empty() diff --git a/Tests/LibWeb/Text/expected/wpt-import/html/webappapis/dynamic-markup-insertion/opening-the-input-stream/quirks.window.txt b/Tests/LibWeb/Text/expected/wpt-import/html/webappapis/dynamic-markup-insertion/opening-the-input-stream/quirks.window.txt new file mode 100644 index 0000000000..4cc431a0fa --- /dev/null +++ b/Tests/LibWeb/Text/expected/wpt-import/html/webappapis/dynamic-markup-insertion/opening-the-input-stream/quirks.window.txt @@ -0,0 +1,10 @@ +Harness status: OK + +Found 4 tests + +3 Pass +1 Fail +Fail document.open() sets document to no-quirks mode (write no doctype) +Pass document.open() sets document to no-quirks mode (write old doctype) +Pass document.open() sets document to no-quirks mode (write new doctype) +Pass document.open() sets document to no-quirks mode, not limited-quirks mode \ No newline at end of file diff --git a/Tests/LibWeb/Text/input/wpt-import/html/webappapis/dynamic-markup-insertion/opening-the-input-stream/quirks.window.html b/Tests/LibWeb/Text/input/wpt-import/html/webappapis/dynamic-markup-insertion/opening-the-input-stream/quirks.window.html new file mode 100644 index 0000000000..d97a48718a --- /dev/null +++ b/Tests/LibWeb/Text/input/wpt-import/html/webappapis/dynamic-markup-insertion/opening-the-input-stream/quirks.window.html @@ -0,0 +1,8 @@ + + + + + + +
+ diff --git a/Tests/LibWeb/Text/input/wpt-import/html/webappapis/dynamic-markup-insertion/opening-the-input-stream/quirks.window.js b/Tests/LibWeb/Text/input/wpt-import/html/webappapis/dynamic-markup-insertion/opening-the-input-stream/quirks.window.js new file mode 100644 index 0000000000..0ff0bb9944 --- /dev/null +++ b/Tests/LibWeb/Text/input/wpt-import/html/webappapis/dynamic-markup-insertion/opening-the-input-stream/quirks.window.js @@ -0,0 +1,74 @@ +test(t => { + const frame = document.body.appendChild(document.createElement("iframe")); + t.add_cleanup(() => frame.contentDocument.close()); + assert_equals(frame.contentDocument.compatMode, "BackCompat"); + frame.contentDocument.open(); + assert_equals(frame.contentDocument.compatMode, "CSS1Compat"); + frame.contentDocument.close(); + assert_equals(frame.contentDocument.compatMode, "BackCompat"); +}, "document.open() sets document to no-quirks mode (write no doctype)"); + +test(t => { + const frame = document.body.appendChild(document.createElement("iframe")); + t.add_cleanup(() => frame.contentDocument.close()); + assert_equals(frame.contentDocument.compatMode, "BackCompat"); + frame.contentDocument.open(); + assert_equals(frame.contentDocument.compatMode, "CSS1Compat"); + frame.contentDocument.write(""); + assert_equals(frame.contentDocument.compatMode, "BackCompat"); + frame.contentDocument.close(); + assert_equals(frame.contentDocument.compatMode, "BackCompat"); +}, "document.open() sets document to no-quirks mode (write old doctype)"); + +test(t => { + const frame = document.body.appendChild(document.createElement("iframe")); + t.add_cleanup(() => frame.contentDocument.close()); + assert_equals(frame.contentDocument.compatMode, "BackCompat"); + frame.contentDocument.open(); + assert_equals(frame.contentDocument.compatMode, "CSS1Compat"); + frame.contentDocument.write(""); + assert_equals(frame.contentDocument.compatMode, "CSS1Compat"); + frame.contentDocument.close(); + assert_equals(frame.contentDocument.compatMode, "CSS1Compat"); +}, "document.open() sets document to no-quirks mode (write new doctype)"); + +// This tests the document.open() call in fact sets the document to no-quirks +// mode, not limited-quirks mode. It is derived from +// quirks/blocks-ignore-line-height.html in WPT, as there is no direct way to +// distinguish between a no-quirks document and a limited-quirks document. It +// assumes that the user agent passes the linked test, which at the time of +// writing is all major web browsers. +test(t => { + const frame = document.body.appendChild(document.createElement("iframe")); + t.add_cleanup(() => frame.contentDocument.close()); + assert_equals(frame.contentDocument.compatMode, "BackCompat"); + frame.contentDocument.open(); + assert_equals(frame.contentDocument.compatMode, "CSS1Compat"); + + // Create the DOM tree manually rather than going through document.write() to + // bypass the parser, which resets the document mode. + const html = frame.contentDocument.appendChild(frame.contentDocument.createElement("html")); + const body = html.appendChild(frame.contentDocument.createElement("body")); + assert_equals(frame.contentDocument.body, body); + body.innerHTML = ` + +
x
+ x +
x
+ `; + assert_equals(frame.contentDocument.compatMode, "CSS1Compat"); + + const idTest = frame.contentDocument.getElementById("test"); + const idRef = frame.contentDocument.getElementById("ref"); + const idSRef = frame.contentDocument.getElementById("s_ref"); + assert_equals(frame.contentWindow.getComputedStyle(idTest).height, + frame.contentWindow.getComputedStyle(idSRef).height); + assert_not_equals(frame.contentWindow.getComputedStyle(idTest).height, + frame.contentWindow.getComputedStyle(idRef).height); +}, "document.open() sets document to no-quirks mode, not limited-quirks mode");