mirror of
https://github.com/fergalmoran/ladybird.git
synced 2026-01-04 15:45:25 +00:00
LibXML: Read code points when parsing names
This commit is contained in:
@@ -41,3 +41,9 @@ TEST_CASE(predefined_character_reference)
|
|||||||
auto const& content = node.children[0]->content.get<XML::Node::Text>();
|
auto const& content = node.children[0]->content.get<XML::Node::Text>();
|
||||||
EXPECT_EQ(content.builder.string_view(), "Well hello &, <, >, ', and \"!");
|
EXPECT_EQ(content.builder.string_view(), "Well hello &, <, >, ', and \"!");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_CASE(unicode_name)
|
||||||
|
{
|
||||||
|
XML::Parser parser("<div 中文=\"\"></div>"sv);
|
||||||
|
TRY_OR_FAIL(parser.parse());
|
||||||
|
}
|
||||||
|
|||||||
@@ -545,16 +545,30 @@ ErrorOr<Name, ParseError> Parser::parse_name()
|
|||||||
auto rule = enter_rule();
|
auto rule = enter_rule();
|
||||||
|
|
||||||
// Name ::= NameStartChar (NameChar)*
|
// Name ::= NameStartChar (NameChar)*
|
||||||
auto start = TRY(expect(s_name_start_characters, "a NameStartChar"sv));
|
|
||||||
|
// FIXME: This is a hacky workaround to read code points instead of bytes.
|
||||||
|
// Replace this once we have a unicode-aware lexer.
|
||||||
|
auto start = m_lexer.tell();
|
||||||
|
StringView remaining = m_lexer.input().substring_view(start);
|
||||||
|
Utf8View view { remaining };
|
||||||
|
auto code_points = view.begin();
|
||||||
|
if (code_points.done() || !s_name_start_characters.contains(*code_points)) {
|
||||||
|
if (m_options.treat_errors_as_fatal)
|
||||||
|
return parse_error(m_lexer.current_position(), Expectation { "a NameStartChar"sv });
|
||||||
|
}
|
||||||
|
|
||||||
|
m_lexer.ignore(code_points.underlying_code_point_length_in_bytes());
|
||||||
|
++code_points;
|
||||||
|
|
||||||
auto accept = accept_rule();
|
auto accept = accept_rule();
|
||||||
|
|
||||||
auto rest = m_lexer.consume_while(s_name_characters);
|
while (!code_points.done() && s_name_characters.contains(*code_points)) {
|
||||||
StringBuilder builder;
|
m_lexer.ignore(code_points.underlying_code_point_length_in_bytes());
|
||||||
builder.append(start);
|
++code_points;
|
||||||
builder.append(rest);
|
}
|
||||||
|
|
||||||
rollback.disarm();
|
rollback.disarm();
|
||||||
return builder.to_byte_string();
|
return remaining.substring_view(0, m_lexer.tell() - start);
|
||||||
}
|
}
|
||||||
|
|
||||||
// 2.8.28. doctypedecl, https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-doctypedecl
|
// 2.8.28. doctypedecl, https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-doctypedecl
|
||||||
|
|||||||
Reference in New Issue
Block a user