AK+LibUnicode: Add a method to trim non-ASCII whitespace from a String

Required by WebDriver.
This commit is contained in:
Timothy Flynn
2024-11-03 17:13:56 -05:00
committed by Tim Flynn
parent 969ee0f3e0
commit cfcb29bdfd
5 changed files with 118 additions and 2 deletions

View File

@@ -1,11 +1,12 @@
/*
* Copyright (c) 2023-2024, Tim Flynn <trflynn89@serenityos.org>
* Copyright (c) 2023-2024, Tim Flynn <trflynn89@ladybird.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/String.h>
#include <AK/StringBuilder.h>
#include <LibUnicode/CharacterTypes.h>
#include <LibUnicode/ICU.h>
#include <unicode/bytestream.h>
@@ -155,4 +156,36 @@ Optional<size_t> String::find_byte_offset_ignoring_case(StringView needle, size_
return {};
}
ErrorOr<String> String::trim_whitespace(TrimMode mode) const
{
auto code_points = this->code_points();
Optional<size_t> start;
size_t length = 0;
for (auto it = code_points.begin(); it != code_points.end(); ++it) {
if (Unicode::code_point_has_white_space_property(*it))
continue;
auto offset = code_points.byte_offset_of(it);
if (!start.has_value())
start = offset;
length = offset + it.underlying_code_point_length_in_bytes();
}
if (mode == TrimMode::Right)
start = 0;
if (mode == TrimMode::Left)
length = bytes_as_string_view().length();
if (!start.has_value() || start == length)
return String {};
if (start == 0uz && length == bytes_as_string_view().length())
return *this;
return substring_from_byte_offset_with_shared_superstring(*start, length - *start);
}
}