diff --git a/picard/coverart/image.py b/picard/coverart/image.py index 186eab392..70f737969 100644 --- a/picard/coverart/image.py +++ b/picard/coverart/image.py @@ -273,9 +273,7 @@ class CoverArtImage: metadata.add_unique("coverart_types", cover_type) filename = ScriptParser().eval(filename, metadata) if config.setting["ascii_filenames"]: - if isinstance(filename, str): - filename = unaccent(filename) - filename = replace_non_ascii(filename) + filename = replace_non_ascii(filename, pathsave=True) if not filename: filename = "cover" if not os.path.isabs(filename): diff --git a/picard/file.py b/picard/file.py index 0eec71480..50cb32e5e 100644 --- a/picard/file.py +++ b/picard/file.py @@ -314,9 +314,7 @@ class File(QtCore.QObject, Item): naming_format = naming_format.replace("\t", "").replace("\n", "") filename = ScriptParser().eval(naming_format, metadata, self) if settings["ascii_filenames"]: - if isinstance(filename, str): - filename = unaccent(filename) - filename = replace_non_ascii(filename) + filename = replace_non_ascii(filename, pathsave=True) # replace incompatible characters if settings["windows_compatibility"] or sys.platform == "win32": filename = replace_win32_incompat(filename) diff --git a/picard/util/textencoding.py b/picard/util/textencoding.py index 35b095c65..6d09ec4cf 100644 --- a/picard/util/textencoding.py +++ b/picard/util/textencoding.py @@ -65,6 +65,8 @@ from functools import partial import re import unicodedata +from picard.util import sanitize_filename + ######################### LATIN SIMPLIFICATION ########################### # The translation tables for punctuation and latin combined-characters are taken from # http://unicode.org/repos/cldr/trunk/common/transforms/Latin-ASCII.xml @@ -175,10 +177,11 @@ _simplify_punctuation = { "\u200B": "", # Zero Width Space } _re_simplify_punctuation = _re_any(_simplify_punctuation.keys()) +_pathsave_simplify_punctuation = {k: sanitize_filename(v) for k, v in _simplify_punctuation.items()} - -def unicode_simplify_punctuation(string): - return _re_simplify_punctuation.sub(lambda m: _simplify_punctuation[m.group(0)], string) +def unicode_simplify_punctuation(string, pathsave=False): + punctuation = _pathsave_simplify_punctuation if pathsave else _simplify_punctuation + return _re_simplify_punctuation.sub(lambda m: punctuation[m.group(0)], string) _simplify_combinations = { @@ -407,10 +410,12 @@ _simplify_combinations = { "\u01BE": "ts", # LATIN LETTER TS LIGATION (see http://unicode.org/notes/tn27/) } _re_simplify_combinations = _re_any(_simplify_combinations) +_pathsave_simplify_combinations = {k: sanitize_filename(v) for k, v in _simplify_combinations.items()} -def unicode_simplify_combinations(string): - return _re_simplify_combinations.sub(lambda m: _simplify_combinations[m.group(0)], string) +def unicode_simplify_combinations(string, pathsave=False): + combinations = _pathsave_simplify_combinations if pathsave else _simplify_combinations + return _re_simplify_combinations.sub(lambda m: combinations[m.group(0)], string) def unicode_simplify_accents(string): @@ -428,15 +433,15 @@ def unaccent(string): return unicode_simplify_accents(string) -def replace_non_ascii(string, repl="_"): +def replace_non_ascii(string, repl="_", pathsave=False): """Replace non-ASCII characters from ``string`` by ``repl``.""" - interim = unicode_simplify_combinations(string) + interim = unicode_simplify_combinations(string, pathsave) interim = unicode_simplify_accents(interim) - interim = unicode_simplify_punctuation(interim) + interim = unicode_simplify_punctuation(interim, pathsave) interim = unicode_simplify_compatibility(interim) def error_repl(e, repl="_"): - return(repl, e.start + 1) + return (repl, e.start + 1) codecs.register_error('repl', partial(error_repl, repl=repl)) # Decoding and encoding to allow replacements return interim.encode('ascii', 'repl').decode('ascii') diff --git a/test/test_textencoding.py b/test/test_textencoding.py index d81cc1292..7d511e9ad 100644 --- a/test/test_textencoding.py +++ b/test/test_textencoding.py @@ -128,6 +128,10 @@ class PunctuationTest(unittest.TestCase): self.assertEqual(util.textencoding.unicode_simplify_punctuation(combinations_from), combinations_from) self.assertEqual(util.textencoding.unicode_simplify_punctuation(ascii_chars), ascii_chars) + def test_pathsave(self): + self.assertEqual(util.textencoding.unicode_simplify_punctuation('\u2215', True), '_') + self.assertEqual(util.textencoding.unicode_simplify_punctuation('/\\\u2215', True), '/\\_') + def test_incorrect(self): pass @@ -141,6 +145,10 @@ class CombinationsTest(unittest.TestCase): self.assertEqual(util.textencoding.unicode_simplify_combinations(punctuation_from), punctuation_from) self.assertEqual(util.textencoding.unicode_simplify_combinations(ascii_chars), ascii_chars) + def test_pathsave(self): + self.assertEqual(util.textencoding.unicode_simplify_combinations('8½', True), '8 1_2') + self.assertEqual(util.textencoding.unicode_simplify_combinations('8/\\½', True), '8/\\ 1_2') + def test_incorrect(self): pass @@ -191,6 +199,9 @@ class ReplaceNonAsciiTest(unittest.TestCase): self.assertEqual(util.textencoding.replace_non_ascii(u"⒈ ⒉ ⒊"), u"1. 2. 3.") # Digit full stop self.assertEqual(util.textencoding.replace_non_ascii(u"123"), u"123") # Fullwidth digits + def test_pathsave(self): + self.assertEqual(util.textencoding.replace_non_ascii('\u2044/8½\\', pathsave=True), '_/8 1_2\\') + def test_incorrect(self): self.assertNotEqual(util.textencoding.replace_non_ascii(u"Lukáš"), u"Lukáš") self.assertNotEqual(util.textencoding.replace_non_ascii(u"Lukáš"), u"Luk____")