Fixed utf-32 detection from BOM

- the utf-32-le BOM was in wrong order - longer BOMs need to be checked first - added BOM for utf-8-sig
2025-12-22 17:28:58 +00:00 · 2024-03-22 07:48:38 +01:00
parent 1ffd583b00
commit 06ae9d7de9
3 changed files with 10 additions and 4 deletions
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -949,8 +949,8 @@ class DetectUnicodeEncodingTest(PicardTestCase):
        boms = {
            b'\xff\xfe': 'utf-16-le',
            b'\xfe\xff': 'utf-16-be',
-            b'\00\00\xff\xfe': 'utf-32-le',
-            b'\00\00\xfe\xff': 'utf-32-be',
+            b'\xff\xfe\x00\x00': 'utf-32-le',
+            b'\x00\x00\xfe\xff': 'utf-32-be',
            b'\xef\xbb\xbf': 'utf-8-sig',
            b'': 'utf-8',
            b'\00': 'utf-8',
@@ -970,6 +970,11 @@ class DetectUnicodeEncodingTest(PicardTestCase):
        file_path = get_test_data_path('eac-utf16le.log')
        self.assertEqual(expected_encoding, detect_file_encoding(file_path))

+    def test_detect_file_encoding_eac_utf_32_le(self):
+        expected_encoding = 'utf-32-le'
+        file_path = get_test_data_path('eac-utf32le.log')
+        self.assertEqual(expected_encoding, detect_file_encoding(file_path))
+
    def test_detect_file_encoding_eac_windows_1251(self):
        expected_encoding = 'windows-1251'
        file_path = get_test_data_path('eac-windows1251.log')