mirror of
https://github.com/fergalmoran/picard.git
synced 2026-05-05 02:26:11 +00:00
PICARD-321: Support backslash in filenames on non-Windows OS
With this change backslashes can be used in filenames on systems other than Windows. Only if Windows compatibility is activated backslashes will be replaced as before.
This commit is contained in:
@@ -163,9 +163,13 @@ def strip_non_alnum(string): # noqa: E302
|
||||
return _re_non_alphanum.sub(" ", string).strip()
|
||||
|
||||
|
||||
_re_slashes = re.compile(r'[\\/]', re.UNICODE)
|
||||
def sanitize_filename(string, repl="_"): # noqa: E302
|
||||
return _re_slashes.sub(repl, string)
|
||||
def sanitize_filename(string, repl="_", win_compat=False):
|
||||
string = string.replace(os.sep, repl)
|
||||
if os.altsep:
|
||||
string = string.replace(os.altsep, repl)
|
||||
if win_compat and os.altsep != '\\':
|
||||
string = string.replace('\\', repl)
|
||||
return string
|
||||
|
||||
|
||||
def _reverse_sortname(sortname):
|
||||
|
||||
@@ -34,15 +34,17 @@ def script_to_filename(naming_format, metadata, file=None, settings=None):
|
||||
if settings is None:
|
||||
settings = config.setting
|
||||
# make sure every metadata can safely be used in a path name
|
||||
win_compat = IS_WIN or settings["windows_compatibility"]
|
||||
meta = Metadata()
|
||||
for name in metadata:
|
||||
meta[name] = [sanitize_filename(str(v)) for v in metadata.getall(name)]
|
||||
meta[name] = [sanitize_filename(str(v), win_compat=win_compat)
|
||||
for v in metadata.getall(name)]
|
||||
naming_format = naming_format.replace("\t", "").replace("\n", "")
|
||||
filename = ScriptParser().eval(naming_format, meta, file)
|
||||
if settings["ascii_filenames"]:
|
||||
filename = replace_non_ascii(filename, pathsave=True)
|
||||
filename = replace_non_ascii(filename, pathsave=True, win_compat=win_compat)
|
||||
# replace incompatible characters
|
||||
if settings["windows_compatibility"] or IS_WIN:
|
||||
if win_compat:
|
||||
filename = replace_win32_incompat(filename)
|
||||
# remove null characters
|
||||
filename = filename.replace("\x00", "")
|
||||
|
||||
@@ -179,12 +179,16 @@ _simplify_punctuation = {
|
||||
"\u200B": "", # Zero Width Space
|
||||
}
|
||||
_re_simplify_punctuation = _re_any(_simplify_punctuation.keys())
|
||||
_pathsave_simplify_punctuation = {k: sanitize_filename(v) for k, v in _simplify_punctuation.items()}
|
||||
|
||||
|
||||
def unicode_simplify_punctuation(string, pathsave=False):
|
||||
punctuation = _pathsave_simplify_punctuation if pathsave else _simplify_punctuation
|
||||
return _re_simplify_punctuation.sub(lambda m: punctuation[m.group(0)], string)
|
||||
def unicode_simplify_punctuation(string, pathsave=False, win_compat=False):
|
||||
def repl(m):
|
||||
if pathsave:
|
||||
return sanitize_filename(_simplify_punctuation[m.group(0)], win_compat=win_compat)
|
||||
else:
|
||||
return _simplify_punctuation[m.group(0)]
|
||||
|
||||
return _re_simplify_punctuation.sub(repl, string)
|
||||
|
||||
|
||||
_simplify_combinations = {
|
||||
@@ -413,12 +417,16 @@ _simplify_combinations = {
|
||||
"\u01BE": "ts", # LATIN LETTER TS LIGATION (see http://unicode.org/notes/tn27/)
|
||||
}
|
||||
_re_simplify_combinations = _re_any(_simplify_combinations)
|
||||
_pathsave_simplify_combinations = {k: sanitize_filename(v) for k, v in _simplify_combinations.items()}
|
||||
|
||||
|
||||
def unicode_simplify_combinations(string, pathsave=False):
|
||||
combinations = _pathsave_simplify_combinations if pathsave else _simplify_combinations
|
||||
return _re_simplify_combinations.sub(lambda m: combinations[m.group(0)], string)
|
||||
def unicode_simplify_combinations(string, pathsave=False, win_compat=False):
|
||||
def repl(m):
|
||||
if pathsave:
|
||||
return sanitize_filename(_simplify_combinations[m.group(0)], win_compat=win_compat)
|
||||
else:
|
||||
return _simplify_combinations[m.group(0)]
|
||||
|
||||
return _re_simplify_combinations.sub(repl, string)
|
||||
|
||||
|
||||
def unicode_simplify_accents(string):
|
||||
@@ -436,11 +444,11 @@ def unaccent(string):
|
||||
return unicode_simplify_accents(string)
|
||||
|
||||
|
||||
def replace_non_ascii(string, repl="_", pathsave=False):
|
||||
def replace_non_ascii(string, repl="_", pathsave=False, win_compat=False):
|
||||
"""Replace non-ASCII characters from ``string`` by ``repl``."""
|
||||
interim = unicode_simplify_combinations(string, pathsave)
|
||||
interim = unicode_simplify_combinations(string, pathsave, win_compat)
|
||||
interim = unicode_simplify_accents(interim)
|
||||
interim = unicode_simplify_punctuation(interim, pathsave)
|
||||
interim = unicode_simplify_punctuation(interim, pathsave, win_compat)
|
||||
interim = unicode_simplify_compatibility(interim)
|
||||
|
||||
def error_repl(e, repl="_"):
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
import unittest
|
||||
|
||||
from test.picardtestcase import PicardTestCase
|
||||
|
||||
from picard import config
|
||||
@@ -42,6 +44,12 @@ class ScriptToFilenameTest(PicardTestCase):
|
||||
filename = script_to_filename('%artist%/%album%', metadata)
|
||||
self.assertEqual('AC_DC/The Album', filename)
|
||||
|
||||
def test_preserve_backslash(self):
|
||||
metadata = Metadata()
|
||||
metadata['artist'] = 'AC\\/DC'
|
||||
filename = script_to_filename('%artist%', metadata)
|
||||
self.assertEqual('AC__DC' if IS_WIN else 'AC\\_DC', filename)
|
||||
|
||||
def test_file_metadata(self):
|
||||
metadata = Metadata()
|
||||
file = File('somepath/somefile.mp3')
|
||||
@@ -53,25 +61,41 @@ class ScriptToFilenameTest(PicardTestCase):
|
||||
metadata['artist'] = 'Die Ärzte'
|
||||
settings = config.setting.copy()
|
||||
settings['ascii_filenames'] = False
|
||||
filename = script_to_filename('%artist% éöü', metadata, settings=settings)
|
||||
self.assertEqual('Die Ärzte éöü', filename)
|
||||
filename = script_to_filename('%artist% éöü½', metadata, settings=settings)
|
||||
self.assertEqual('Die Ärzte éöü½', filename)
|
||||
settings['ascii_filenames'] = True
|
||||
filename = script_to_filename('%artist% éöü', metadata, settings=settings)
|
||||
self.assertEqual('Die Arzte eou', filename)
|
||||
filename = script_to_filename('%artist% éöü½', metadata, settings=settings)
|
||||
self.assertEqual('Die Arzte eou 1_2', filename)
|
||||
|
||||
def test_windows_compatibility(self):
|
||||
metadata = Metadata()
|
||||
metadata['artist'] = '*:'
|
||||
metadata['artist'] = '\\*:'
|
||||
settings = config.setting.copy()
|
||||
settings['windows_compatibility'] = False
|
||||
expect_orig = '*:?'
|
||||
expect_compat = '___'
|
||||
expect_orig = '\\*:?'
|
||||
expect_compat = '____'
|
||||
filename = script_to_filename('%artist%?', metadata, settings=settings)
|
||||
self.assertEqual(expect_compat if IS_WIN else expect_orig, filename)
|
||||
settings['windows_compatibility'] = True
|
||||
filename = script_to_filename('%artist%?', metadata, settings=settings)
|
||||
self.assertEqual(expect_compat, filename)
|
||||
|
||||
@unittest.skipUnless(IS_WIN, "windows test")
|
||||
def test_ascii_win_save(self):
|
||||
self._test_ascii_windows_compatibility()
|
||||
|
||||
def test_ascii_win_compat(self):
|
||||
config.setting['windows_compatibility'] = True
|
||||
self._test_ascii_windows_compatibility()
|
||||
|
||||
def _test_ascii_windows_compatibility(self):
|
||||
metadata = Metadata()
|
||||
metadata['artist'] = '\u2216/\\\u2215'
|
||||
settings = config.setting.copy()
|
||||
settings['ascii_filenames'] = True
|
||||
filename = script_to_filename('%artist%/\u2216\\\\\u2215', metadata, settings=settings)
|
||||
self.assertEqual('____/_\\_', filename)
|
||||
|
||||
def test_remove_null_chars(self):
|
||||
metadata = Metadata()
|
||||
filename = script_to_filename('a\x00b\x00', metadata)
|
||||
|
||||
@@ -2,10 +2,9 @@
|
||||
from test.picardtestcase import PicardTestCase
|
||||
|
||||
from picard import util
|
||||
from picard.const.sys import IS_WIN
|
||||
|
||||
|
||||
#from picard.util import textencoding
|
||||
|
||||
# Set the value to true below to show the coverage of Latin characters
|
||||
show_latin2ascii_coverage = False
|
||||
|
||||
@@ -129,8 +128,12 @@ class PunctuationTest(PicardTestCase):
|
||||
self.assertEqual(util.textencoding.unicode_simplify_punctuation(ascii_chars), ascii_chars)
|
||||
|
||||
def test_pathsave(self):
|
||||
self.assertEqual(util.textencoding.unicode_simplify_punctuation('\u2215', True), '_')
|
||||
self.assertEqual(util.textencoding.unicode_simplify_punctuation('/\\\u2215', True), '/\\_')
|
||||
self.assertEqual(util.textencoding.unicode_simplify_punctuation('\u2215\u2216', True), '__' if IS_WIN else '_\\')
|
||||
self.assertEqual(util.textencoding.unicode_simplify_punctuation('/\\\u2215\u2216', True), '/\\__' if IS_WIN else '/\\_\\')
|
||||
|
||||
def test_pathsave_win_compat(self):
|
||||
self.assertEqual(util.textencoding.unicode_simplify_punctuation('\u2215\u2216', True, True), '__')
|
||||
self.assertEqual(util.textencoding.unicode_simplify_punctuation('/\\\u2215\u2216', True, True), '/\\__')
|
||||
|
||||
def test_incorrect(self):
|
||||
pass
|
||||
@@ -200,7 +203,11 @@ class ReplaceNonAsciiTest(PicardTestCase):
|
||||
self.assertEqual(util.textencoding.replace_non_ascii(u"123"), u"123") # Fullwidth digits
|
||||
|
||||
def test_pathsave(self):
|
||||
self.assertEqual(util.textencoding.replace_non_ascii('\u2044/8½\\', pathsave=True), '_/8 1_2\\')
|
||||
expected = '__/8 1_2\\' if IS_WIN else '\\_/8 1_2\\'
|
||||
self.assertEqual(util.textencoding.replace_non_ascii('\u2216\u2044/8½\\', pathsave=True), expected)
|
||||
|
||||
def test_win_compat(self):
|
||||
self.assertEqual(util.textencoding.replace_non_ascii('\u2216\u2044/8½\\', pathsave=True, win_compat=True), '__/8 1_2\\')
|
||||
|
||||
def test_incorrect(self):
|
||||
self.assertNotEqual(util.textencoding.replace_non_ascii(u"Lukáš"), u"Lukáš")
|
||||
|
||||
@@ -63,6 +63,26 @@ class SanitizeDateTest(PicardTestCase):
|
||||
self.assertNotEqual(util.sanitize_date("2006.03.02"), "2006-03-02")
|
||||
|
||||
|
||||
class SanitizeFilenameTest(PicardTestCase):
|
||||
|
||||
def test_replace_slashes(self):
|
||||
self.assertEqual(util.sanitize_filename("AC/DC"), "AC_DC")
|
||||
|
||||
def test_custom_replacement(self):
|
||||
self.assertEqual(util.sanitize_filename("AC/DC", "|"), "AC|DC")
|
||||
|
||||
def test_win_compat(self):
|
||||
self.assertEqual(util.sanitize_filename("AC\\/DC", win_compat=True), "AC__DC")
|
||||
|
||||
@unittest.skipUnless(IS_WIN, "windows test")
|
||||
def test_replace_backslashes(self):
|
||||
self.assertEqual(util.sanitize_filename("AC\\DC"), "AC_DC")
|
||||
|
||||
@unittest.skipIf(IS_WIN, "non-windows test")
|
||||
def test_keep_backslashes(self):
|
||||
self.assertEqual(util.sanitize_filename("AC\\DC"), "AC\\DC")
|
||||
|
||||
|
||||
class TranslateArtistTest(PicardTestCase):
|
||||
|
||||
def test_latin(self):
|
||||
|
||||
Reference in New Issue
Block a user