mirror of
https://github.com/fergalmoran/ladybird.git
synced 2026-01-03 23:25:20 +00:00
LibTextCodec: Implement a Windows-1251 decoder
This encoding (a superset of ascii that adds in the cyrillic alphabet) is currently the third most used encoding on the web, and because cyrillic glyphs were added by Dmitrii Trifonov recently, we can now support it as well :^)
This commit is contained in:
committed by
Linus Groh
parent
4b0098e52f
commit
87cabda80d
@@ -51,6 +51,14 @@ HebrewDecoder& hebrew_decoder()
|
||||
return *decoder;
|
||||
}
|
||||
|
||||
CyrillicDecoder& cyrillic_decoder()
|
||||
{
|
||||
static CyrillicDecoder* decoder = nullptr;
|
||||
if (!decoder)
|
||||
decoder = new CyrillicDecoder;
|
||||
return *decoder;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Decoder* decoder_for(const String& a_encoding)
|
||||
@@ -66,6 +74,8 @@ Decoder* decoder_for(const String& a_encoding)
|
||||
return &latin2_decoder();
|
||||
if (encoding.equals_ignoring_case("windows-1255"))
|
||||
return &hebrew_decoder();
|
||||
if (encoding.equals_ignoring_case("windows-1251"))
|
||||
return &cyrillic_decoder();
|
||||
dbgln("TextCodec: No decoder implemented for encoding '{}'", a_encoding);
|
||||
return nullptr;
|
||||
}
|
||||
@@ -304,4 +314,27 @@ String HebrewDecoder::to_utf8(const StringView& input)
|
||||
return builder.to_string();
|
||||
}
|
||||
|
||||
String CyrillicDecoder::to_utf8(const StringView& input)
|
||||
{
|
||||
static constexpr Array<u32, 128> translation_table = {
|
||||
0x402, 0x403, 0x201A, 0x453, 0x201E, 0x2026, 0x2020, 0x2021, 0x20AC, 0x2030, 0x409, 0x2039, 0x40A, 0x40C, 0x40B, 0x40F,
|
||||
0x452, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0xFFFD, 0x2122, 0x459, 0x203A, 0x45A, 0x45C, 0x45B, 0x45F,
|
||||
0xA0, 0x40E, 0x45E, 0x408, 0xA4, 0x490, 0xA6, 0xA7, 0x401, 0xA9, 0x404, 0xAB, 0xAC, 0xAD, 0xAE, 0x407,
|
||||
0xB0, 0xB1, 0x406, 0x456, 0x491, 0xB5, 0xB6, 0xB7, 0x451, 0x2116, 0x454, 0xBB, 0x458, 0x405, 0x455, 0x457,
|
||||
0x410, 0x411, 0x412, 0x413, 0x414, 0x415, 0x416, 0x417, 0x418, 0x419, 0x41A, 0x41B, 0x41C, 0x41D, 0x41E, 0x41F,
|
||||
0x420, 0x421, 0x422, 0x423, 0x424, 0x425, 0x426, 0x427, 0x428, 0x429, 0x42A, 0x42B, 0x42C, 0x42D, 0x42E, 0x42F,
|
||||
0x430, 0x431, 0x432, 0x433, 0x434, 0x435, 0x436, 0x437, 0x438, 0x439, 0x43A, 0x43B, 0x43C, 0x43D, 0x43E, 0x43F,
|
||||
0x440, 0x441, 0x442, 0x443, 0x444, 0x445, 0x446, 0x447, 0x448, 0x449, 0x44A, 0x44B, 0x44C, 0x44D, 0x44E, 0x44F
|
||||
};
|
||||
StringBuilder builder(input.length());
|
||||
for (unsigned char ch : input) {
|
||||
if (ch < 0x80) { // Superset of ASCII
|
||||
builder.append(ch);
|
||||
} else {
|
||||
builder.append_code_point(translation_table[ch - 0x80]);
|
||||
}
|
||||
}
|
||||
return builder.to_string();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user