mirror of
https://github.com/fergalmoran/ladybird.git
synced 2026-01-06 16:45:03 +00:00
LibRegex+Everywhere: Make LibRegex more unicode-aware
This commit makes LibRegex (mostly) capable of operating on any of the three main string views: - StringView for raw strings - Utf8View for utf-8 encoded strings - Utf32View for raw unicode strings As a result, regexps with unicode strings should be able to properly handle utf-8 and not stop in the middle of a code point. A future commit will update LibJS to use the correct type of string depending on the flags.
This commit is contained in:
committed by
Ali Mohammad Pur
parent
e5af15a6e9
commit
f364fcec5d
@@ -249,7 +249,7 @@ TEST_CASE(char_utf8)
|
||||
Regex<PosixExtended> re("😀");
|
||||
RegexResult result;
|
||||
|
||||
EXPECT_EQ((result = match("Привет, мир! 😀 γειά σου κόσμος 😀 こんにちは世界", re, PosixFlags::Global)).success, true);
|
||||
EXPECT_EQ((result = match(Utf8View { "Привет, мир! 😀 γειά σου κόσμος 😀 こんにちは世界" }, re, PosixFlags::Global)).success, true);
|
||||
EXPECT_EQ(result.count, 2u);
|
||||
}
|
||||
|
||||
@@ -312,7 +312,6 @@ TEST_CASE(match_all_character_class)
|
||||
EXPECT_EQ(result.matches.at(0).view, "W");
|
||||
EXPECT_EQ(result.matches.at(1).view, "i");
|
||||
EXPECT_EQ(result.matches.at(2).view, "n");
|
||||
EXPECT(&result.matches.at(0).view.characters_without_null_termination()[0] != &str.view().characters_without_null_termination()[1]);
|
||||
}
|
||||
|
||||
TEST_CASE(match_character_class_with_assertion)
|
||||
|
||||
Reference in New Issue
Block a user