mirror of
https://github.com/fergalmoran/ladybird.git
synced 2026-02-13 11:16:53 +00:00
Note: We keep locale parsing and syntactic validation as-is. ECMA-402 places additional restrictions on locales above what is required by the Unicode spec. ICU doesn't provide methods that let us easily check those restrictions, whereas LibLocale does. Other browsers also implement their own validators here. This introduces a locale cache to re-use parsed locale data and various related structures (not doing so has a non-negligible performance impact on Intl tests). The existing APIs for canonicalization and display names are pretty intertwined, so they must both be adapted at once here. The results of canonicalization are slightly different on some edge cases. But the changed results are actually now aligned with Chrome and Safari.
718 lines
22 KiB
C++
718 lines
22 KiB
C++
/*
|
|
* Copyright (c) 2021-2024, Tim Flynn <trflynn89@serenityos.org>
|
|
*
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
|
*/
|
|
|
|
#define AK_DONT_REPLACE_STD
|
|
|
|
#include <AK/AllOf.h>
|
|
#include <AK/GenericLexer.h>
|
|
#include <AK/QuickSort.h>
|
|
#include <AK/ScopeGuard.h>
|
|
#include <AK/StringBuilder.h>
|
|
#include <LibLocale/DateTimeFormat.h>
|
|
#include <LibLocale/ICU.h>
|
|
#include <LibLocale/Locale.h>
|
|
#include <LibUnicode/CharacterTypes.h>
|
|
|
|
#include <unicode/localebuilder.h>
|
|
#include <unicode/locid.h>
|
|
#include <unicode/ucurr.h>
|
|
|
|
namespace Locale {
|
|
|
|
static bool is_key(StringView key)
|
|
{
|
|
// key = alphanum alpha
|
|
if (key.length() != 2)
|
|
return false;
|
|
return is_ascii_alphanumeric(key[0]) && is_ascii_alpha(key[1]);
|
|
}
|
|
|
|
static bool is_single_type(StringView type)
|
|
{
|
|
// type = alphanum{3,8} (sep alphanum{3,8})*
|
|
// Note: Consecutive types are not handled here, that is left to the caller.
|
|
if ((type.length() < 3) || (type.length() > 8))
|
|
return false;
|
|
return all_of(type, is_ascii_alphanumeric);
|
|
}
|
|
|
|
static bool is_attribute(StringView type)
|
|
{
|
|
// attribute = alphanum{3,8}
|
|
if ((type.length() < 3) || (type.length() > 8))
|
|
return false;
|
|
return all_of(type, is_ascii_alphanumeric);
|
|
}
|
|
|
|
static bool is_transformed_key(StringView key)
|
|
{
|
|
// tkey = alpha digit
|
|
if (key.length() != 2)
|
|
return false;
|
|
return is_ascii_alpha(key[0]) && is_ascii_digit(key[1]);
|
|
}
|
|
|
|
static bool is_single_transformed_value(StringView value)
|
|
{
|
|
// tvalue = (sep alphanum{3,8})+
|
|
// Note: Consecutive values are not handled here, that is left to the caller.
|
|
if ((value.length() < 3) || (value.length() > 8))
|
|
return false;
|
|
return all_of(value, is_ascii_alphanumeric);
|
|
}
|
|
|
|
static Optional<StringView> consume_next_segment(GenericLexer& lexer, bool with_separator = true)
|
|
{
|
|
constexpr auto is_separator = is_any_of("-_"sv);
|
|
|
|
if (with_separator) {
|
|
if (!lexer.next_is(is_separator))
|
|
return {};
|
|
lexer.ignore();
|
|
}
|
|
|
|
auto segment = lexer.consume_until(is_separator);
|
|
if (segment.is_empty()) {
|
|
lexer.retreat(with_separator);
|
|
return {};
|
|
}
|
|
|
|
return segment;
|
|
}
|
|
|
|
bool is_type_identifier(StringView identifier)
|
|
{
|
|
// type = alphanum{3,8} (sep alphanum{3,8})*
|
|
GenericLexer lexer { identifier };
|
|
|
|
while (true) {
|
|
auto type = consume_next_segment(lexer, lexer.tell() > 0);
|
|
if (!type.has_value())
|
|
break;
|
|
if (!is_single_type(*type))
|
|
return false;
|
|
}
|
|
|
|
return lexer.is_eof() && (lexer.tell() > 0);
|
|
}
|
|
|
|
static Optional<LanguageID> parse_unicode_language_id(GenericLexer& lexer)
|
|
{
|
|
// https://unicode.org/reports/tr35/#Unicode_language_identifier
|
|
//
|
|
// unicode_language_id = "root"
|
|
// OR
|
|
// unicode_language_id = ((unicode_language_subtag (sep unicode_script_subtag)?) | unicode_script_subtag)
|
|
// (sep unicode_region_subtag)?
|
|
// (sep unicode_variant_subtag)*
|
|
LanguageID language_id {};
|
|
|
|
if (lexer.consume_specific("root"sv)) {
|
|
language_id.is_root = true;
|
|
return language_id;
|
|
}
|
|
|
|
enum class ParseState {
|
|
ParsingLanguageOrScript,
|
|
ParsingScript,
|
|
ParsingRegion,
|
|
ParsingVariant,
|
|
Done,
|
|
};
|
|
|
|
auto state = ParseState::ParsingLanguageOrScript;
|
|
|
|
while (!lexer.is_eof() && (state != ParseState::Done)) {
|
|
auto segment = consume_next_segment(lexer, state != ParseState::ParsingLanguageOrScript);
|
|
if (!segment.has_value())
|
|
return {};
|
|
|
|
switch (state) {
|
|
case ParseState::ParsingLanguageOrScript:
|
|
if (is_unicode_language_subtag(*segment)) {
|
|
state = ParseState::ParsingScript;
|
|
language_id.language = MUST(String::from_utf8(*segment));
|
|
} else if (is_unicode_script_subtag(*segment)) {
|
|
state = ParseState::ParsingRegion;
|
|
language_id.script = MUST(String::from_utf8(*segment));
|
|
} else {
|
|
return {};
|
|
}
|
|
break;
|
|
|
|
case ParseState::ParsingScript:
|
|
if (is_unicode_script_subtag(*segment)) {
|
|
state = ParseState::ParsingRegion;
|
|
language_id.script = MUST(String::from_utf8(*segment));
|
|
break;
|
|
}
|
|
|
|
state = ParseState::ParsingRegion;
|
|
[[fallthrough]];
|
|
|
|
case ParseState::ParsingRegion:
|
|
if (is_unicode_region_subtag(*segment)) {
|
|
state = ParseState::ParsingVariant;
|
|
language_id.region = MUST(String::from_utf8(*segment));
|
|
break;
|
|
}
|
|
|
|
state = ParseState::ParsingVariant;
|
|
[[fallthrough]];
|
|
|
|
case ParseState::ParsingVariant:
|
|
if (is_unicode_variant_subtag(*segment)) {
|
|
language_id.variants.append(MUST(String::from_utf8(*segment)));
|
|
} else {
|
|
lexer.retreat(segment->length() + 1);
|
|
state = ParseState::Done;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
VERIFY_NOT_REACHED();
|
|
}
|
|
}
|
|
|
|
return language_id;
|
|
}
|
|
|
|
static Optional<LocaleExtension> parse_unicode_locale_extension(GenericLexer& lexer)
|
|
{
|
|
// https://unicode.org/reports/tr35/#unicode_locale_extensions
|
|
//
|
|
// unicode_locale_extensions = sep [uU] ((sep keyword)+ | (sep attribute)+ (sep keyword)*)
|
|
LocaleExtension locale_extension {};
|
|
|
|
enum class ParseState {
|
|
ParsingAttributeOrKeyword,
|
|
ParsingAttribute,
|
|
ParsingKeyword,
|
|
Done,
|
|
};
|
|
|
|
auto state = ParseState::ParsingAttributeOrKeyword;
|
|
|
|
while (!lexer.is_eof() && (state != ParseState::Done)) {
|
|
auto segment = consume_next_segment(lexer);
|
|
if (!segment.has_value())
|
|
return {};
|
|
|
|
if (state == ParseState::ParsingAttributeOrKeyword)
|
|
state = is_key(*segment) ? ParseState::ParsingKeyword : ParseState::ParsingAttribute;
|
|
|
|
switch (state) {
|
|
case ParseState::ParsingAttribute:
|
|
if (is_attribute(*segment)) {
|
|
locale_extension.attributes.append(MUST(String::from_utf8(*segment)));
|
|
break;
|
|
}
|
|
|
|
state = ParseState::ParsingKeyword;
|
|
[[fallthrough]];
|
|
|
|
case ParseState::ParsingKeyword: {
|
|
// keyword = key (sep type)?
|
|
Keyword keyword { .key = MUST(String::from_utf8(*segment)) };
|
|
Vector<StringView> keyword_values;
|
|
|
|
if (!is_key(*segment)) {
|
|
lexer.retreat(segment->length() + 1);
|
|
state = ParseState::Done;
|
|
break;
|
|
}
|
|
|
|
while (true) {
|
|
auto type = consume_next_segment(lexer);
|
|
|
|
if (!type.has_value() || !is_single_type(*type)) {
|
|
if (type.has_value())
|
|
lexer.retreat(type->length() + 1);
|
|
break;
|
|
}
|
|
|
|
keyword_values.append(*type);
|
|
}
|
|
|
|
StringBuilder builder;
|
|
builder.join('-', keyword_values);
|
|
keyword.value = MUST(builder.to_string());
|
|
|
|
locale_extension.keywords.append(move(keyword));
|
|
break;
|
|
}
|
|
|
|
default:
|
|
VERIFY_NOT_REACHED();
|
|
}
|
|
}
|
|
|
|
if (locale_extension.attributes.is_empty() && locale_extension.keywords.is_empty())
|
|
return {};
|
|
return locale_extension;
|
|
}
|
|
|
|
static Optional<TransformedExtension> parse_transformed_extension(GenericLexer& lexer)
|
|
{
|
|
// https://unicode.org/reports/tr35/#transformed_extensions
|
|
//
|
|
// transformed_extensions = sep [tT] ((sep tlang (sep tfield)*) | (sep tfield)+)
|
|
TransformedExtension transformed_extension {};
|
|
|
|
enum class ParseState {
|
|
ParsingLanguageOrField,
|
|
ParsingLanguage,
|
|
ParsingField,
|
|
Done,
|
|
};
|
|
|
|
auto state = ParseState::ParsingLanguageOrField;
|
|
|
|
while (!lexer.is_eof() && (state != ParseState::Done)) {
|
|
auto segment = consume_next_segment(lexer);
|
|
if (!segment.has_value())
|
|
return {};
|
|
|
|
if (state == ParseState::ParsingLanguageOrField)
|
|
state = is_unicode_language_subtag(*segment) ? ParseState::ParsingLanguage : ParseState::ParsingField;
|
|
|
|
switch (state) {
|
|
case ParseState::ParsingLanguage:
|
|
lexer.retreat(segment->length());
|
|
|
|
if (auto language_id = parse_unicode_language_id(lexer); language_id.has_value()) {
|
|
transformed_extension.language = language_id.release_value();
|
|
state = ParseState::ParsingField;
|
|
break;
|
|
}
|
|
|
|
return {};
|
|
|
|
case ParseState::ParsingField: {
|
|
// tfield = tkey tvalue;
|
|
TransformedField field { .key = MUST(String::from_utf8(*segment)) };
|
|
Vector<StringView> field_values;
|
|
|
|
if (!is_transformed_key(*segment)) {
|
|
lexer.retreat(segment->length() + 1);
|
|
state = ParseState::Done;
|
|
break;
|
|
}
|
|
|
|
while (true) {
|
|
auto value = consume_next_segment(lexer);
|
|
|
|
if (!value.has_value() || !is_single_transformed_value(*value)) {
|
|
if (value.has_value())
|
|
lexer.retreat(value->length() + 1);
|
|
break;
|
|
}
|
|
|
|
field_values.append(*value);
|
|
}
|
|
|
|
if (field_values.is_empty())
|
|
return {};
|
|
|
|
StringBuilder builder;
|
|
builder.join('-', field_values);
|
|
field.value = MUST(builder.to_string());
|
|
|
|
transformed_extension.fields.append(move(field));
|
|
break;
|
|
}
|
|
|
|
default:
|
|
VERIFY_NOT_REACHED();
|
|
}
|
|
}
|
|
|
|
if (!transformed_extension.language.has_value() && transformed_extension.fields.is_empty())
|
|
return {};
|
|
return transformed_extension;
|
|
}
|
|
|
|
static Optional<OtherExtension> parse_other_extension(char key, GenericLexer& lexer)
|
|
{
|
|
// https://unicode.org/reports/tr35/#other_extensions
|
|
//
|
|
// other_extensions = sep [alphanum-[tTuUxX]] (sep alphanum{2,8})+ ;
|
|
OtherExtension other_extension { .key = key };
|
|
Vector<StringView> other_values;
|
|
|
|
if (!is_ascii_alphanumeric(key) || (key == 'x') || (key == 'X'))
|
|
return {};
|
|
|
|
while (true) {
|
|
auto segment = consume_next_segment(lexer);
|
|
if (!segment.has_value())
|
|
break;
|
|
|
|
if ((segment->length() < 2) || (segment->length() > 8) || !all_of(*segment, is_ascii_alphanumeric)) {
|
|
lexer.retreat(segment->length() + 1);
|
|
break;
|
|
}
|
|
|
|
other_values.append(*segment);
|
|
}
|
|
|
|
if (other_values.is_empty())
|
|
return {};
|
|
|
|
StringBuilder builder;
|
|
builder.join('-', other_values);
|
|
other_extension.value = MUST(builder.to_string());
|
|
|
|
return other_extension;
|
|
}
|
|
|
|
static Optional<Extension> parse_extension(GenericLexer& lexer)
|
|
{
|
|
// https://unicode.org/reports/tr35/#extensions
|
|
//
|
|
// extensions = unicode_locale_extensions | transformed_extensions | other_extensions
|
|
size_t starting_position = lexer.tell();
|
|
|
|
if (auto header = consume_next_segment(lexer); header.has_value() && (header->length() == 1)) {
|
|
switch (char key = (*header)[0]) {
|
|
case 'u':
|
|
case 'U':
|
|
if (auto extension = parse_unicode_locale_extension(lexer); extension.has_value())
|
|
return Extension { extension.release_value() };
|
|
break;
|
|
|
|
case 't':
|
|
case 'T':
|
|
if (auto extension = parse_transformed_extension(lexer); extension.has_value())
|
|
return Extension { extension.release_value() };
|
|
break;
|
|
|
|
default:
|
|
if (auto extension = parse_other_extension(key, lexer); extension.has_value())
|
|
return Extension { extension.release_value() };
|
|
break;
|
|
}
|
|
}
|
|
|
|
lexer.retreat(lexer.tell() - starting_position);
|
|
return {};
|
|
}
|
|
|
|
static Vector<String> parse_private_use_extensions(GenericLexer& lexer)
|
|
{
|
|
// https://unicode.org/reports/tr35/#pu_extensions
|
|
//
|
|
// pu_extensions = = sep [xX] (sep alphanum{1,8})+ ;
|
|
size_t starting_position = lexer.tell();
|
|
|
|
auto header = consume_next_segment(lexer);
|
|
if (!header.has_value())
|
|
return {};
|
|
|
|
auto parse_values = [&]() {
|
|
Vector<String> extensions;
|
|
|
|
while (true) {
|
|
auto segment = consume_next_segment(lexer);
|
|
if (!segment.has_value())
|
|
break;
|
|
|
|
if ((segment->length() < 1) || (segment->length() > 8) || !all_of(*segment, is_ascii_alphanumeric)) {
|
|
lexer.retreat(segment->length() + 1);
|
|
break;
|
|
}
|
|
|
|
extensions.append(MUST(String::from_utf8(*segment)));
|
|
}
|
|
|
|
return extensions;
|
|
};
|
|
|
|
if ((header->length() == 1) && (((*header)[0] == 'x') || ((*header)[0] == 'X'))) {
|
|
if (auto extensions = parse_values(); !extensions.is_empty())
|
|
return extensions;
|
|
}
|
|
|
|
lexer.retreat(lexer.tell() - starting_position);
|
|
return {};
|
|
}
|
|
|
|
Optional<LanguageID> parse_unicode_language_id(StringView language)
|
|
{
|
|
GenericLexer lexer { language };
|
|
|
|
auto language_id = parse_unicode_language_id(lexer);
|
|
if (!lexer.is_eof())
|
|
return {};
|
|
|
|
return language_id;
|
|
}
|
|
|
|
Optional<LocaleID> parse_unicode_locale_id(StringView locale)
|
|
{
|
|
GenericLexer lexer { locale };
|
|
|
|
// https://unicode.org/reports/tr35/#Unicode_locale_identifier
|
|
//
|
|
// unicode_locale_id = unicode_language_id
|
|
// extensions*
|
|
// pu_extensions?
|
|
auto language_id = parse_unicode_language_id(lexer);
|
|
if (!language_id.has_value())
|
|
return {};
|
|
|
|
LocaleID locale_id { language_id.release_value() };
|
|
|
|
while (true) {
|
|
auto extension = parse_extension(lexer);
|
|
if (!extension.has_value())
|
|
break;
|
|
locale_id.extensions.append(extension.release_value());
|
|
}
|
|
|
|
locale_id.private_use_extensions = parse_private_use_extensions(lexer);
|
|
|
|
if (!lexer.is_eof())
|
|
return {};
|
|
|
|
return locale_id;
|
|
}
|
|
|
|
String canonicalize_unicode_locale_id(StringView locale)
|
|
{
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
|
|
auto locale_data = LocaleData::for_locale(locale);
|
|
VERIFY(locale_data.has_value());
|
|
|
|
locale_data->locale().canonicalize(status);
|
|
VERIFY(icu_success(status));
|
|
|
|
return locale_data->to_string();
|
|
}
|
|
|
|
void canonicalize_unicode_extension_values(StringView key, String& value, bool)
|
|
{
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
|
|
icu::LocaleBuilder builder;
|
|
builder.setUnicodeLocaleKeyword(icu_string_piece(key), icu_string_piece(value));
|
|
|
|
auto locale = builder.build(status);
|
|
VERIFY(icu_success(status));
|
|
|
|
locale.canonicalize(status);
|
|
VERIFY(icu_success(status));
|
|
|
|
auto result = locale.getUnicodeKeywordValue<StringBuilder>(icu_string_piece(key), status);
|
|
VERIFY(icu_success(status));
|
|
|
|
value = MUST(result.to_string());
|
|
}
|
|
|
|
StringView default_locale()
|
|
{
|
|
return "en"sv;
|
|
}
|
|
|
|
bool is_locale_available(StringView locale)
|
|
{
|
|
return locale_from_string(locale).has_value();
|
|
}
|
|
|
|
Style style_from_string(StringView style)
|
|
{
|
|
if (style == "narrow"sv)
|
|
return Style::Narrow;
|
|
if (style == "short"sv)
|
|
return Style::Short;
|
|
if (style == "long"sv)
|
|
return Style::Long;
|
|
VERIFY_NOT_REACHED();
|
|
}
|
|
|
|
StringView style_to_string(Style style)
|
|
{
|
|
switch (style) {
|
|
case Style::Narrow:
|
|
return "narrow"sv;
|
|
case Style::Short:
|
|
return "short"sv;
|
|
case Style::Long:
|
|
return "long"sv;
|
|
default:
|
|
VERIFY_NOT_REACHED();
|
|
}
|
|
}
|
|
|
|
ReadonlySpan<StringView> __attribute__((weak)) get_available_keyword_values(StringView) { return {}; }
|
|
ReadonlySpan<StringView> __attribute__((weak)) get_available_calendars() { return {}; }
|
|
ReadonlySpan<StringView> __attribute__((weak)) get_available_collation_case_orderings() { return {}; }
|
|
ReadonlySpan<StringView> __attribute__((weak)) get_available_collation_numeric_orderings() { return {}; }
|
|
ReadonlySpan<StringView> __attribute__((weak)) get_available_collation_types() { return {}; }
|
|
ReadonlySpan<StringView> __attribute__((weak)) get_available_hour_cycles() { return {}; }
|
|
ReadonlySpan<StringView> __attribute__((weak)) get_available_number_systems() { return {}; }
|
|
Optional<Locale> __attribute__((weak)) locale_from_string(StringView) { return {}; }
|
|
Optional<ListPatternType> __attribute__((weak)) list_pattern_type_from_string(StringView) { return {}; }
|
|
Optional<Key> __attribute__((weak)) key_from_string(StringView) { return {}; }
|
|
Optional<KeywordCalendar> __attribute__((weak)) keyword_ca_from_string(StringView) { return {}; }
|
|
Optional<KeywordCollation> __attribute__((weak)) keyword_co_from_string(StringView) { return {}; }
|
|
Optional<KeywordHours> __attribute__((weak)) keyword_hc_from_string(StringView) { return {}; }
|
|
Optional<KeywordColCaseFirst> __attribute__((weak)) keyword_kf_from_string(StringView) { return {}; }
|
|
Optional<KeywordColNumeric> __attribute__((weak)) keyword_kn_from_string(StringView) { return {}; }
|
|
Optional<KeywordNumbers> __attribute__((weak)) keyword_nu_from_string(StringView) { return {}; }
|
|
Vector<StringView> __attribute__((weak)) get_keywords_for_locale(StringView, StringView) { return {}; }
|
|
Optional<StringView> __attribute__((weak)) get_preferred_keyword_value_for_locale(StringView, StringView) { return {}; }
|
|
|
|
Vector<String> available_currencies()
|
|
{
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
|
|
auto* currencies = ucurr_openISOCurrencies(UCURR_ALL, &status);
|
|
ScopeGuard guard { [&]() { uenum_close(currencies); } };
|
|
|
|
if (icu_failure(status))
|
|
return {};
|
|
|
|
Vector<String> result;
|
|
|
|
while (true) {
|
|
i32 length = 0;
|
|
char const* next = uenum_next(currencies, &length, &status);
|
|
|
|
if (icu_failure(status))
|
|
return {};
|
|
if (next == nullptr)
|
|
break;
|
|
|
|
// https://unicode-org.atlassian.net/browse/ICU-21687
|
|
if (StringView currency { next, static_cast<size_t>(length) }; currency != "LSM"sv)
|
|
result.append(MUST(String::from_utf8(currency)));
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
Optional<ListPatterns> __attribute__((weak)) get_locale_list_patterns(StringView, StringView, Style) { return {}; }
|
|
Optional<CharacterOrder> __attribute__((weak)) character_order_from_string(StringView) { return {}; }
|
|
StringView __attribute__((weak)) character_order_to_string(CharacterOrder) { return {}; }
|
|
Optional<CharacterOrder> __attribute__((weak)) character_order_for_locale(StringView) { return {}; }
|
|
Optional<LanguageID> __attribute__((weak)) add_likely_subtags(LanguageID const&) { return {}; }
|
|
|
|
Optional<LanguageID> remove_likely_subtags(LanguageID const& language_id)
|
|
{
|
|
// https://www.unicode.org/reports/tr35/#Likely_Subtags
|
|
auto return_language_and_variants = [](auto language, auto variants) {
|
|
language.variants = move(variants);
|
|
return language;
|
|
};
|
|
|
|
// 1. First get max = AddLikelySubtags(inputLocale). If an error is signaled, return it.
|
|
auto maximized = add_likely_subtags(language_id);
|
|
if (!maximized.has_value())
|
|
return {};
|
|
|
|
// 2. Remove the variants from max.
|
|
auto variants = move(maximized->variants);
|
|
|
|
// 3. Get the components of the max (languagemax, scriptmax, regionmax).
|
|
auto language_max = maximized->language;
|
|
auto script_max = maximized->script;
|
|
auto region_max = maximized->region;
|
|
|
|
// 4. Then for trial in {languagemax, languagemax_regionmax, languagemax_scriptmax}:
|
|
// If AddLikelySubtags(trial) = max, then return trial + variants.
|
|
auto run_trial = [&](Optional<String> language, Optional<String> script, Optional<String> region) -> Optional<LanguageID> {
|
|
LanguageID trial { .language = move(language), .script = move(script), .region = move(region) };
|
|
|
|
if (add_likely_subtags(trial) == maximized)
|
|
return return_language_and_variants(move(trial), move(variants));
|
|
return {};
|
|
};
|
|
|
|
if (auto trial = run_trial(language_max, {}, {}); trial.has_value())
|
|
return trial;
|
|
if (auto trial = run_trial(language_max, {}, region_max); trial.has_value())
|
|
return trial;
|
|
if (auto trial = run_trial(language_max, script_max, {}); trial.has_value())
|
|
return trial;
|
|
|
|
// 5. If you do not get a match, return max + variants.
|
|
return return_language_and_variants(maximized.release_value(), move(variants));
|
|
}
|
|
|
|
String LanguageID::to_string() const
|
|
{
|
|
StringBuilder builder;
|
|
|
|
auto append_segment = [&](Optional<String> const& segment) {
|
|
if (!segment.has_value())
|
|
return;
|
|
if (!builder.is_empty())
|
|
builder.append('-');
|
|
builder.append(*segment);
|
|
};
|
|
|
|
append_segment(language);
|
|
append_segment(script);
|
|
append_segment(region);
|
|
for (auto const& variant : variants)
|
|
append_segment(variant);
|
|
|
|
return MUST(builder.to_string());
|
|
}
|
|
|
|
String LocaleID::to_string() const
|
|
{
|
|
StringBuilder builder;
|
|
|
|
auto append_segment = [&](auto const& segment) {
|
|
if (segment.is_empty())
|
|
return;
|
|
if (!builder.is_empty())
|
|
builder.append('-');
|
|
builder.append(segment);
|
|
};
|
|
|
|
append_segment(language_id.to_string());
|
|
|
|
for (auto const& extension : extensions) {
|
|
extension.visit(
|
|
[&](LocaleExtension const& ext) {
|
|
builder.append("-u"sv);
|
|
for (auto const& attribute : ext.attributes)
|
|
append_segment(attribute);
|
|
for (auto const& keyword : ext.keywords) {
|
|
append_segment(keyword.key);
|
|
append_segment(keyword.value);
|
|
}
|
|
},
|
|
[&](TransformedExtension const& ext) {
|
|
builder.append("-t"sv);
|
|
if (ext.language.has_value())
|
|
append_segment(ext.language->to_string());
|
|
for (auto const& field : ext.fields) {
|
|
append_segment(field.key);
|
|
append_segment(field.value);
|
|
}
|
|
},
|
|
[&](OtherExtension const& ext) {
|
|
builder.appendff("-{}", ext.key);
|
|
append_segment(ext.value);
|
|
});
|
|
}
|
|
|
|
if (!private_use_extensions.is_empty()) {
|
|
builder.append("-x"sv);
|
|
for (auto const& extension : private_use_extensions)
|
|
append_segment(extension);
|
|
}
|
|
|
|
return MUST(builder.to_string());
|
|
}
|
|
|
|
}
|