mirror of
https://github.com/fergalmoran/ladybird.git
synced 2026-01-06 16:45:03 +00:00
LibWeb: Start implementing character token parsing
Now that we've gotten rid of the misguided character buffering in the tokenizer, it actually spits out character tokens that we have to deal with in the parser. This patch implements enough to bring us back to speed with simple.html
This commit is contained in:
@@ -24,11 +24,13 @@
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <AK/Utf32View.h>
|
||||
#include <LibWeb/DOM/Document.h>
|
||||
#include <LibWeb/DOM/DocumentType.h>
|
||||
#include <LibWeb/DOM/ElementFactory.h>
|
||||
#include <LibWeb/DOM/HTMLFormElement.h>
|
||||
#include <LibWeb/DOM/HTMLHeadElement.h>
|
||||
#include <LibWeb/DOM/Text.h>
|
||||
#include <LibWeb/Parser/HTMLDocumentParser.h>
|
||||
#include <LibWeb/Parser/HTMLToken.h>
|
||||
|
||||
@@ -54,41 +56,45 @@ void HTMLDocumentParser::run()
|
||||
auto& token = optional_token.value();
|
||||
|
||||
dbg() << "[" << insertion_mode_name() << "] " << token.to_string();
|
||||
process_using_the_rules_for(m_insertion_mode, token);
|
||||
}
|
||||
}
|
||||
|
||||
switch (m_insertion_mode) {
|
||||
case InsertionMode::Initial:
|
||||
handle_initial(token);
|
||||
break;
|
||||
case InsertionMode::BeforeHTML:
|
||||
handle_before_html(token);
|
||||
break;
|
||||
case InsertionMode::BeforeHead:
|
||||
handle_before_head(token);
|
||||
break;
|
||||
case InsertionMode::InHead:
|
||||
handle_in_head(token);
|
||||
break;
|
||||
case InsertionMode::InHeadNoscript:
|
||||
handle_in_head_noscript(token);
|
||||
break;
|
||||
case InsertionMode::AfterHead:
|
||||
handle_after_head(token);
|
||||
break;
|
||||
case InsertionMode::InBody:
|
||||
handle_in_body(token);
|
||||
break;
|
||||
case InsertionMode::AfterBody:
|
||||
handle_after_body(token);
|
||||
break;
|
||||
case InsertionMode::AfterAfterBody:
|
||||
handle_after_after_body(token);
|
||||
break;
|
||||
case InsertionMode::Text:
|
||||
handle_text(token);
|
||||
break;
|
||||
default:
|
||||
ASSERT_NOT_REACHED();
|
||||
}
|
||||
void HTMLDocumentParser::process_using_the_rules_for(InsertionMode mode, HTMLToken& token)
|
||||
{
|
||||
switch (mode) {
|
||||
case InsertionMode::Initial:
|
||||
handle_initial(token);
|
||||
break;
|
||||
case InsertionMode::BeforeHTML:
|
||||
handle_before_html(token);
|
||||
break;
|
||||
case InsertionMode::BeforeHead:
|
||||
handle_before_head(token);
|
||||
break;
|
||||
case InsertionMode::InHead:
|
||||
handle_in_head(token);
|
||||
break;
|
||||
case InsertionMode::InHeadNoscript:
|
||||
handle_in_head_noscript(token);
|
||||
break;
|
||||
case InsertionMode::AfterHead:
|
||||
handle_after_head(token);
|
||||
break;
|
||||
case InsertionMode::InBody:
|
||||
handle_in_body(token);
|
||||
break;
|
||||
case InsertionMode::AfterBody:
|
||||
handle_after_body(token);
|
||||
break;
|
||||
case InsertionMode::AfterAfterBody:
|
||||
handle_after_after_body(token);
|
||||
break;
|
||||
case InsertionMode::Text:
|
||||
handle_text(token);
|
||||
break;
|
||||
default:
|
||||
ASSERT_NOT_REACHED();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -106,6 +112,10 @@ void HTMLDocumentParser::handle_initial(HTMLToken& token)
|
||||
|
||||
void HTMLDocumentParser::handle_before_html(HTMLToken& token)
|
||||
{
|
||||
if (token.is_character() && token.is_parser_whitespace()) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (token.is_start_tag() && token.tag_name() == "html") {
|
||||
auto element = create_element_for(token);
|
||||
document().append_child(element);
|
||||
@@ -151,6 +161,10 @@ RefPtr<Element> HTMLDocumentParser::insert_html_element(HTMLToken& token)
|
||||
|
||||
void HTMLDocumentParser::handle_before_head(HTMLToken& token)
|
||||
{
|
||||
if (token.is_character() && token.is_parser_whitespace()) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (token.is_start_tag() && token.tag_name() == "head") {
|
||||
auto element = insert_html_element(token);
|
||||
m_head_element = to<HTMLHeadElement>(element);
|
||||
@@ -183,9 +197,32 @@ void HTMLDocumentParser::handle_in_head_noscript(HTMLToken&)
|
||||
ASSERT_NOT_REACHED();
|
||||
}
|
||||
|
||||
void HTMLDocumentParser::insert_character(u32 data)
|
||||
{
|
||||
auto adjusted_insertion_location = find_appropriate_place_for_inserting_node();
|
||||
if (adjusted_insertion_location->is_document())
|
||||
return;
|
||||
if (adjusted_insertion_location->last_child() && adjusted_insertion_location->last_child()->is_text()) {
|
||||
auto& existing_text_node = to<Text>(*adjusted_insertion_location->last_child());
|
||||
StringBuilder builder;
|
||||
builder.append(existing_text_node.data());
|
||||
builder.append(Utf32View { &data, 1 });
|
||||
existing_text_node.set_data(builder.to_string());
|
||||
return;
|
||||
}
|
||||
StringBuilder builder;
|
||||
builder.append(Utf32View { &data, 1 });
|
||||
adjusted_insertion_location->append_child(adopt(*new Text(document(), builder.to_string())));
|
||||
}
|
||||
|
||||
void HTMLDocumentParser::handle_after_head(HTMLToken& token)
|
||||
{
|
||||
if (token.is_character()) {
|
||||
if (token.is_parser_whitespace()) {
|
||||
insert_character(token.codepoint());
|
||||
return;
|
||||
}
|
||||
|
||||
ASSERT_NOT_REACHED();
|
||||
}
|
||||
|
||||
@@ -249,6 +286,11 @@ void HTMLDocumentParser::generate_implied_end_tags()
|
||||
|
||||
void HTMLDocumentParser::handle_after_body(HTMLToken& token)
|
||||
{
|
||||
if (token.is_character() && token.is_parser_whitespace()) {
|
||||
process_using_the_rules_for(InsertionMode::InBody, token);
|
||||
return;
|
||||
}
|
||||
|
||||
if (token.is_end_tag() && token.tag_name() == "html") {
|
||||
if (m_parsing_fragment) {
|
||||
ASSERT_NOT_REACHED();
|
||||
@@ -261,6 +303,11 @@ void HTMLDocumentParser::handle_after_body(HTMLToken& token)
|
||||
|
||||
void HTMLDocumentParser::handle_after_after_body(HTMLToken& token)
|
||||
{
|
||||
if (token.is_doctype() || token.is_parser_whitespace() || (token.is_start_tag() && token.tag_name() == "html")) {
|
||||
process_using_the_rules_for(InsertionMode::InBody, token);
|
||||
return;
|
||||
}
|
||||
|
||||
if (token.is_end_of_file()) {
|
||||
dbg() << "Stop parsing! :^)";
|
||||
return;
|
||||
@@ -268,8 +315,27 @@ void HTMLDocumentParser::handle_after_after_body(HTMLToken& token)
|
||||
ASSERT_NOT_REACHED();
|
||||
}
|
||||
|
||||
void HTMLDocumentParser::reconstruct_the_active_formatting_elements()
|
||||
{
|
||||
if (m_list_of_active_formatting_elements.is_empty())
|
||||
return;
|
||||
|
||||
ASSERT_NOT_REACHED();
|
||||
}
|
||||
|
||||
void HTMLDocumentParser::handle_in_body(HTMLToken& token)
|
||||
{
|
||||
if (token.is_character()) {
|
||||
if (token.codepoint() == 0) {
|
||||
ASSERT_NOT_REACHED();
|
||||
}
|
||||
if (token.is_parser_whitespace()) {
|
||||
reconstruct_the_active_formatting_elements();
|
||||
insert_character(token.codepoint());
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (token.is_end_tag() && token.tag_name() == "body") {
|
||||
if (!m_stack_of_open_elements.has_in_scope("body")) {
|
||||
ASSERT_NOT_REACHED();
|
||||
|
||||
Reference in New Issue
Block a user