diff --git a/picard/util/__init__.py b/picard/util/__init__.py index 2c032c436..a1ed56704 100644 --- a/picard/util/__init__.py +++ b/picard/util/__init__.py @@ -163,9 +163,13 @@ def strip_non_alnum(string): # noqa: E302 return _re_non_alphanum.sub(" ", string).strip() -_re_slashes = re.compile(r'[\\/]', re.UNICODE) -def sanitize_filename(string, repl="_"): # noqa: E302 - return _re_slashes.sub(repl, string) +def sanitize_filename(string, repl="_", win_compat=False): + string = string.replace(os.sep, repl) + if os.altsep: + string = string.replace(os.altsep, repl) + if win_compat and os.altsep != '\\': + string = string.replace('\\', repl) + return string def _reverse_sortname(sortname): diff --git a/picard/util/scripttofilename.py b/picard/util/scripttofilename.py index e00c96b71..a9bde0276 100644 --- a/picard/util/scripttofilename.py +++ b/picard/util/scripttofilename.py @@ -34,15 +34,17 @@ def script_to_filename(naming_format, metadata, file=None, settings=None): if settings is None: settings = config.setting # make sure every metadata can safely be used in a path name + win_compat = IS_WIN or settings["windows_compatibility"] meta = Metadata() for name in metadata: - meta[name] = [sanitize_filename(str(v)) for v in metadata.getall(name)] + meta[name] = [sanitize_filename(str(v), win_compat=win_compat) + for v in metadata.getall(name)] naming_format = naming_format.replace("\t", "").replace("\n", "") filename = ScriptParser().eval(naming_format, meta, file) if settings["ascii_filenames"]: - filename = replace_non_ascii(filename, pathsave=True) + filename = replace_non_ascii(filename, pathsave=True, win_compat=win_compat) # replace incompatible characters - if settings["windows_compatibility"] or IS_WIN: + if win_compat: filename = replace_win32_incompat(filename) # remove null characters filename = filename.replace("\x00", "") diff --git a/picard/util/textencoding.py b/picard/util/textencoding.py index fa414de8a..94264e698 100644 --- a/picard/util/textencoding.py +++ b/picard/util/textencoding.py @@ -179,12 +179,16 @@ _simplify_punctuation = { "\u200B": "", # Zero Width Space } _re_simplify_punctuation = _re_any(_simplify_punctuation.keys()) -_pathsave_simplify_punctuation = {k: sanitize_filename(v) for k, v in _simplify_punctuation.items()} -def unicode_simplify_punctuation(string, pathsave=False): - punctuation = _pathsave_simplify_punctuation if pathsave else _simplify_punctuation - return _re_simplify_punctuation.sub(lambda m: punctuation[m.group(0)], string) +def unicode_simplify_punctuation(string, pathsave=False, win_compat=False): + def repl(m): + if pathsave: + return sanitize_filename(_simplify_punctuation[m.group(0)], win_compat=win_compat) + else: + return _simplify_punctuation[m.group(0)] + + return _re_simplify_punctuation.sub(repl, string) _simplify_combinations = { @@ -413,12 +417,16 @@ _simplify_combinations = { "\u01BE": "ts", # LATIN LETTER TS LIGATION (see http://unicode.org/notes/tn27/) } _re_simplify_combinations = _re_any(_simplify_combinations) -_pathsave_simplify_combinations = {k: sanitize_filename(v) for k, v in _simplify_combinations.items()} -def unicode_simplify_combinations(string, pathsave=False): - combinations = _pathsave_simplify_combinations if pathsave else _simplify_combinations - return _re_simplify_combinations.sub(lambda m: combinations[m.group(0)], string) +def unicode_simplify_combinations(string, pathsave=False, win_compat=False): + def repl(m): + if pathsave: + return sanitize_filename(_simplify_combinations[m.group(0)], win_compat=win_compat) + else: + return _simplify_combinations[m.group(0)] + + return _re_simplify_combinations.sub(repl, string) def unicode_simplify_accents(string): @@ -436,11 +444,11 @@ def unaccent(string): return unicode_simplify_accents(string) -def replace_non_ascii(string, repl="_", pathsave=False): +def replace_non_ascii(string, repl="_", pathsave=False, win_compat=False): """Replace non-ASCII characters from ``string`` by ``repl``.""" - interim = unicode_simplify_combinations(string, pathsave) + interim = unicode_simplify_combinations(string, pathsave, win_compat) interim = unicode_simplify_accents(interim) - interim = unicode_simplify_punctuation(interim, pathsave) + interim = unicode_simplify_punctuation(interim, pathsave, win_compat) interim = unicode_simplify_compatibility(interim) def error_repl(e, repl="_"): diff --git a/test/test_scripttofilename.py b/test/test_scripttofilename.py index ab2e4ba6a..11ff4a24d 100644 --- a/test/test_scripttofilename.py +++ b/test/test_scripttofilename.py @@ -1,3 +1,5 @@ +import unittest + from test.picardtestcase import PicardTestCase from picard import config @@ -42,6 +44,12 @@ class ScriptToFilenameTest(PicardTestCase): filename = script_to_filename('%artist%/%album%', metadata) self.assertEqual('AC_DC/The Album', filename) + def test_preserve_backslash(self): + metadata = Metadata() + metadata['artist'] = 'AC\\/DC' + filename = script_to_filename('%artist%', metadata) + self.assertEqual('AC__DC' if IS_WIN else 'AC\\_DC', filename) + def test_file_metadata(self): metadata = Metadata() file = File('somepath/somefile.mp3') @@ -53,25 +61,41 @@ class ScriptToFilenameTest(PicardTestCase): metadata['artist'] = 'Die Ärzte' settings = config.setting.copy() settings['ascii_filenames'] = False - filename = script_to_filename('%artist% éöü', metadata, settings=settings) - self.assertEqual('Die Ärzte éöü', filename) + filename = script_to_filename('%artist% éöü½', metadata, settings=settings) + self.assertEqual('Die Ärzte éöü½', filename) settings['ascii_filenames'] = True - filename = script_to_filename('%artist% éöü', metadata, settings=settings) - self.assertEqual('Die Arzte eou', filename) + filename = script_to_filename('%artist% éöü½', metadata, settings=settings) + self.assertEqual('Die Arzte eou 1_2', filename) def test_windows_compatibility(self): metadata = Metadata() - metadata['artist'] = '*:' + metadata['artist'] = '\\*:' settings = config.setting.copy() settings['windows_compatibility'] = False - expect_orig = '*:?' - expect_compat = '___' + expect_orig = '\\*:?' + expect_compat = '____' filename = script_to_filename('%artist%?', metadata, settings=settings) self.assertEqual(expect_compat if IS_WIN else expect_orig, filename) settings['windows_compatibility'] = True filename = script_to_filename('%artist%?', metadata, settings=settings) self.assertEqual(expect_compat, filename) + @unittest.skipUnless(IS_WIN, "windows test") + def test_ascii_win_save(self): + self._test_ascii_windows_compatibility() + + def test_ascii_win_compat(self): + config.setting['windows_compatibility'] = True + self._test_ascii_windows_compatibility() + + def _test_ascii_windows_compatibility(self): + metadata = Metadata() + metadata['artist'] = '\u2216/\\\u2215' + settings = config.setting.copy() + settings['ascii_filenames'] = True + filename = script_to_filename('%artist%/\u2216\\\\\u2215', metadata, settings=settings) + self.assertEqual('____/_\\_', filename) + def test_remove_null_chars(self): metadata = Metadata() filename = script_to_filename('a\x00b\x00', metadata) diff --git a/test/test_textencoding.py b/test/test_textencoding.py index 20aae94da..ebbecad00 100644 --- a/test/test_textencoding.py +++ b/test/test_textencoding.py @@ -2,10 +2,9 @@ from test.picardtestcase import PicardTestCase from picard import util +from picard.const.sys import IS_WIN -#from picard.util import textencoding - # Set the value to true below to show the coverage of Latin characters show_latin2ascii_coverage = False @@ -129,8 +128,12 @@ class PunctuationTest(PicardTestCase): self.assertEqual(util.textencoding.unicode_simplify_punctuation(ascii_chars), ascii_chars) def test_pathsave(self): - self.assertEqual(util.textencoding.unicode_simplify_punctuation('\u2215', True), '_') - self.assertEqual(util.textencoding.unicode_simplify_punctuation('/\\\u2215', True), '/\\_') + self.assertEqual(util.textencoding.unicode_simplify_punctuation('\u2215\u2216', True), '__' if IS_WIN else '_\\') + self.assertEqual(util.textencoding.unicode_simplify_punctuation('/\\\u2215\u2216', True), '/\\__' if IS_WIN else '/\\_\\') + + def test_pathsave_win_compat(self): + self.assertEqual(util.textencoding.unicode_simplify_punctuation('\u2215\u2216', True, True), '__') + self.assertEqual(util.textencoding.unicode_simplify_punctuation('/\\\u2215\u2216', True, True), '/\\__') def test_incorrect(self): pass @@ -200,7 +203,11 @@ class ReplaceNonAsciiTest(PicardTestCase): self.assertEqual(util.textencoding.replace_non_ascii(u"123"), u"123") # Fullwidth digits def test_pathsave(self): - self.assertEqual(util.textencoding.replace_non_ascii('\u2044/8½\\', pathsave=True), '_/8 1_2\\') + expected = '__/8 1_2\\' if IS_WIN else '\\_/8 1_2\\' + self.assertEqual(util.textencoding.replace_non_ascii('\u2216\u2044/8½\\', pathsave=True), expected) + + def test_win_compat(self): + self.assertEqual(util.textencoding.replace_non_ascii('\u2216\u2044/8½\\', pathsave=True, win_compat=True), '__/8 1_2\\') def test_incorrect(self): self.assertNotEqual(util.textencoding.replace_non_ascii(u"Lukáš"), u"Lukáš") diff --git a/test/test_utils.py b/test/test_utils.py index b180d4f36..62d840df5 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -63,6 +63,26 @@ class SanitizeDateTest(PicardTestCase): self.assertNotEqual(util.sanitize_date("2006.03.02"), "2006-03-02") +class SanitizeFilenameTest(PicardTestCase): + + def test_replace_slashes(self): + self.assertEqual(util.sanitize_filename("AC/DC"), "AC_DC") + + def test_custom_replacement(self): + self.assertEqual(util.sanitize_filename("AC/DC", "|"), "AC|DC") + + def test_win_compat(self): + self.assertEqual(util.sanitize_filename("AC\\/DC", win_compat=True), "AC__DC") + + @unittest.skipUnless(IS_WIN, "windows test") + def test_replace_backslashes(self): + self.assertEqual(util.sanitize_filename("AC\\DC"), "AC_DC") + + @unittest.skipIf(IS_WIN, "non-windows test") + def test_keep_backslashes(self): + self.assertEqual(util.sanitize_filename("AC\\DC"), "AC\\DC") + + class TranslateArtistTest(PicardTestCase): def test_latin(self):