LibTextCodec: Add Windows-1250 text decoder

This commit is adding Windows-1250 decoding based on unicode.org
mapping table.
This commit is contained in:
Michal Grich
2024-04-20 16:29:26 +02:00
committed by Andreas Kling
parent b7bd3fd920
commit 7a6d84d036
2 changed files with 31 additions and 0 deletions

View File

@@ -21,6 +21,7 @@ UTF8Decoder s_utf8_decoder;
UTF16BEDecoder s_utf16be_decoder;
UTF16LEDecoder s_utf16le_decoder;
Latin2Decoder s_latin2_decoder;
CentralEuropeDecoder s_centraleurope_decoder;
HebrewDecoder s_hebrew_decoder;
CyrillicDecoder s_cyrillic_decoder;
Koi8RDecoder s_koi8r_decoder;
@@ -45,6 +46,8 @@ Optional<Decoder&> decoder_for(StringView a_encoding)
return s_utf16le_decoder;
if (encoding.value().equals_ignoring_ascii_case("iso-8859-2"sv))
return s_latin2_decoder;
if (encoding.value().equals_ignoring_ascii_case("windows-1250"sv))
return s_centraleurope_decoder;
if (encoding.value().equals_ignoring_ascii_case("windows-1255"sv))
return s_hebrew_decoder;
if (encoding.value().equals_ignoring_ascii_case("windows-1251"sv))
@@ -518,6 +521,29 @@ ErrorOr<void> Latin2Decoder::process(StringView input, Function<ErrorOr<void>(u3
return {};
}
ErrorOr<void> CentralEuropeDecoder::process(StringView input, Function<ErrorOr<void>(u32)> on_code_point)
{
static constexpr Array<u32, 128> translation_table = {
0x20AC, 0xFFFD, 0x201A, 0xFFFD, 0x201E, 0x2026, 0x2020, 0x2021, 0xFFFD, 0x2030, 0x0160, 0x2039, 0x015A, 0x0164, 0x017D, 0x0179,
0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0xFFFD, 0x2122, 0x0161, 0x203A, 0x015B, 0x0165, 0x017E, 0x017A,
0x00A0, 0x02C7, 0x02D8, 0x0141, 0x00A4, 0x0104, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x015E, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x017B,
0x00B0, 0x00B1, 0x02DB, 0x0142, 0x00B4, 0x00B5, 0x00B6, 0x00B7, 0x00B8, 0x0105, 0x015F, 0x00BB, 0x013D, 0x02DD, 0x013E, 0x017C,
0x0154, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0139, 0x0106, 0x00C7, 0x010C, 0x00C9, 0x0118, 0x00CB, 0x011A, 0x00CD, 0x00CE, 0x010E,
0x0110, 0x0143, 0x0147, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x00D7, 0x0158, 0x016E, 0x00DA, 0x0170, 0x00DC, 0x00DD, 0x0162, 0x00DF,
0x0155, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x013A, 0x0107, 0x00E7, 0x010D, 0x00E9, 0x0119, 0x00EB, 0x011B, 0x00ED, 0x00EE, 0x010F,
0x0111, 0x0144, 0x0148, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x00F7, 0x0159, 0x016F, 0x00FA, 0x0171, 0x00FC, 0x00FD, 0x0163, 0x02D9
};
for (unsigned char ch : input) {
if (ch < 0x80) { // Superset of ASCII
TRY(on_code_point(ch));
} else {
TRY(on_code_point(translation_table[ch - 0x80]));
}
}
return {};
}
ErrorOr<void> HebrewDecoder::process(StringView input, Function<ErrorOr<void>(u32)> on_code_point)
{
static constexpr Array<u32, 128> translation_table = {