diff --git a/picard/util/__init__.py b/picard/util/__init__.py index 6af22fda2..214f8ffef 100644 --- a/picard/util/__init__.py +++ b/picard/util/__init__.py @@ -1167,10 +1167,11 @@ def strxfrm(string): ENCODING_BOMS = { + b'\xff\xfe\x00\x00': 'utf-32-le', + b'\x00\x00\xfe\xff': 'utf-32-be', + b'\xef\xbb\xbf': 'utf-8-sig', b'\xff\xfe': 'utf-16-le', b'\xfe\xff': 'utf-16-be', - b'\00\00\xff\xfe': 'utf-32-le', - b'\00\00\xfe\xff': 'utf-32-be', } diff --git a/test/data/eac-utf32le.log b/test/data/eac-utf32le.log new file mode 100644 index 000000000..12df8b62a Binary files /dev/null and b/test/data/eac-utf32le.log differ diff --git a/test/test_utils.py b/test/test_utils.py index f3c997b28..bc93ffd90 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -949,8 +949,8 @@ class DetectUnicodeEncodingTest(PicardTestCase): boms = { b'\xff\xfe': 'utf-16-le', b'\xfe\xff': 'utf-16-be', - b'\00\00\xff\xfe': 'utf-32-le', - b'\00\00\xfe\xff': 'utf-32-be', + b'\xff\xfe\x00\x00': 'utf-32-le', + b'\x00\x00\xfe\xff': 'utf-32-be', b'\xef\xbb\xbf': 'utf-8-sig', b'': 'utf-8', b'\00': 'utf-8', @@ -970,6 +970,11 @@ class DetectUnicodeEncodingTest(PicardTestCase): file_path = get_test_data_path('eac-utf16le.log') self.assertEqual(expected_encoding, detect_file_encoding(file_path)) + def test_detect_file_encoding_eac_utf_32_le(self): + expected_encoding = 'utf-32-le' + file_path = get_test_data_path('eac-utf32le.log') + self.assertEqual(expected_encoding, detect_file_encoding(file_path)) + def test_detect_file_encoding_eac_windows_1251(self): expected_encoding = 'windows-1251' file_path = get_test_data_path('eac-windows1251.log')