mirror of
https://github.com/fergalmoran/ladybird.git
synced 2025-12-22 09:19:03 +00:00
AK: Replace UTF-16 validation and length computation with simdutf
This commit is contained in:
committed by
Andreas Kling
parent
a2bcb2ab8d
commit
32ffe9bbfc
@@ -1,9 +1,11 @@
|
||||
/*
|
||||
* Copyright (c) 2021-2023, Tim Flynn <trflynn89@serenityos.org>
|
||||
* Copyright (c) 2021-2024, Tim Flynn <trflynn89@serenityos.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#define AK_DONT_REPLACE_STD
|
||||
|
||||
#include <AK/CharacterTypes.h>
|
||||
#include <AK/Concepts.h>
|
||||
#include <AK/StringBuilder.h>
|
||||
@@ -12,6 +14,8 @@
|
||||
#include <AK/Utf32View.h>
|
||||
#include <AK/Utf8View.h>
|
||||
|
||||
#include <simdutf.h>
|
||||
|
||||
namespace AK {
|
||||
|
||||
static constexpr u16 high_surrogate_min = 0xd800;
|
||||
@@ -233,27 +237,27 @@ bool Utf16View::starts_with(Utf16View const& needle) const
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Utf16View::validate() const
|
||||
{
|
||||
return simdutf::validate_utf16(reinterpret_cast<char16_t const*>(m_code_units.data()), m_code_units.size());
|
||||
}
|
||||
|
||||
bool Utf16View::validate(size_t& valid_code_units) const
|
||||
{
|
||||
valid_code_units = 0;
|
||||
auto result = simdutf::validate_utf16_with_errors(reinterpret_cast<char16_t const*>(m_code_units.data()), m_code_units.size());
|
||||
valid_code_units = result.count;
|
||||
|
||||
for (auto const* ptr = begin_ptr(); ptr < end_ptr(); ++ptr) {
|
||||
if (is_high_surrogate(*ptr)) {
|
||||
if ((++ptr >= end_ptr()) || !is_low_surrogate(*ptr))
|
||||
return false;
|
||||
++valid_code_units;
|
||||
} else if (is_low_surrogate(*ptr)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
++valid_code_units;
|
||||
}
|
||||
|
||||
return true;
|
||||
return result.error == simdutf::SUCCESS;
|
||||
}
|
||||
|
||||
size_t Utf16View::calculate_length_in_code_points() const
|
||||
{
|
||||
// FIXME: simdutf's code point length method assumes valid UTF-16, whereas Utf16View uses U+FFFD as a replacement
|
||||
// for invalid code points. If we change Utf16View to only accept valid encodings as an invariant, we can
|
||||
// remove this branch.
|
||||
if (validate()) [[likely]]
|
||||
return simdutf::count_utf16(reinterpret_cast<char16_t const*>(m_code_units.data()), m_code_units.size());
|
||||
|
||||
size_t code_points = 0;
|
||||
for ([[maybe_unused]] auto code_point : *this)
|
||||
++code_points;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2021-2023, Tim Flynn <trflynn89@serenityos.org>
|
||||
* Copyright (c) 2021-2024, Tim Flynn <trflynn89@serenityos.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
@@ -113,12 +113,8 @@ public:
|
||||
|
||||
bool starts_with(Utf16View const&) const;
|
||||
|
||||
bool validate() const;
|
||||
bool validate(size_t& valid_code_units) const;
|
||||
bool validate() const
|
||||
{
|
||||
size_t valid_code_units;
|
||||
return validate(valid_code_units);
|
||||
}
|
||||
|
||||
bool equals_ignoring_case(Utf16View const&) const;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user