mirror of
https://github.com/fergalmoran/picard.git
synced 2026-01-07 17:14:55 +00:00
PICARD-803: ASCII replacements can cause folder creation
This commit is contained in:
committed by
Philipp Wolfer
parent
319156beb5
commit
2fbe8f97a3
@@ -273,9 +273,7 @@ class CoverArtImage:
|
||||
metadata.add_unique("coverart_types", cover_type)
|
||||
filename = ScriptParser().eval(filename, metadata)
|
||||
if config.setting["ascii_filenames"]:
|
||||
if isinstance(filename, str):
|
||||
filename = unaccent(filename)
|
||||
filename = replace_non_ascii(filename)
|
||||
filename = replace_non_ascii(filename, pathsave=True)
|
||||
if not filename:
|
||||
filename = "cover"
|
||||
if not os.path.isabs(filename):
|
||||
|
||||
@@ -314,9 +314,7 @@ class File(QtCore.QObject, Item):
|
||||
naming_format = naming_format.replace("\t", "").replace("\n", "")
|
||||
filename = ScriptParser().eval(naming_format, metadata, self)
|
||||
if settings["ascii_filenames"]:
|
||||
if isinstance(filename, str):
|
||||
filename = unaccent(filename)
|
||||
filename = replace_non_ascii(filename)
|
||||
filename = replace_non_ascii(filename, pathsave=True)
|
||||
# replace incompatible characters
|
||||
if settings["windows_compatibility"] or sys.platform == "win32":
|
||||
filename = replace_win32_incompat(filename)
|
||||
|
||||
@@ -65,6 +65,8 @@ from functools import partial
|
||||
import re
|
||||
import unicodedata
|
||||
|
||||
from picard.util import sanitize_filename
|
||||
|
||||
######################### LATIN SIMPLIFICATION ###########################
|
||||
# The translation tables for punctuation and latin combined-characters are taken from
|
||||
# http://unicode.org/repos/cldr/trunk/common/transforms/Latin-ASCII.xml
|
||||
@@ -175,10 +177,11 @@ _simplify_punctuation = {
|
||||
"\u200B": "", # Zero Width Space
|
||||
}
|
||||
_re_simplify_punctuation = _re_any(_simplify_punctuation.keys())
|
||||
_pathsave_simplify_punctuation = {k: sanitize_filename(v) for k, v in _simplify_punctuation.items()}
|
||||
|
||||
|
||||
def unicode_simplify_punctuation(string):
|
||||
return _re_simplify_punctuation.sub(lambda m: _simplify_punctuation[m.group(0)], string)
|
||||
def unicode_simplify_punctuation(string, pathsave=False):
|
||||
punctuation = _pathsave_simplify_punctuation if pathsave else _simplify_punctuation
|
||||
return _re_simplify_punctuation.sub(lambda m: punctuation[m.group(0)], string)
|
||||
|
||||
|
||||
_simplify_combinations = {
|
||||
@@ -407,10 +410,12 @@ _simplify_combinations = {
|
||||
"\u01BE": "ts", # LATIN LETTER TS LIGATION (see http://unicode.org/notes/tn27/)
|
||||
}
|
||||
_re_simplify_combinations = _re_any(_simplify_combinations)
|
||||
_pathsave_simplify_combinations = {k: sanitize_filename(v) for k, v in _simplify_combinations.items()}
|
||||
|
||||
|
||||
def unicode_simplify_combinations(string):
|
||||
return _re_simplify_combinations.sub(lambda m: _simplify_combinations[m.group(0)], string)
|
||||
def unicode_simplify_combinations(string, pathsave=False):
|
||||
combinations = _pathsave_simplify_combinations if pathsave else _simplify_combinations
|
||||
return _re_simplify_combinations.sub(lambda m: combinations[m.group(0)], string)
|
||||
|
||||
|
||||
def unicode_simplify_accents(string):
|
||||
@@ -428,15 +433,15 @@ def unaccent(string):
|
||||
return unicode_simplify_accents(string)
|
||||
|
||||
|
||||
def replace_non_ascii(string, repl="_"):
|
||||
def replace_non_ascii(string, repl="_", pathsave=False):
|
||||
"""Replace non-ASCII characters from ``string`` by ``repl``."""
|
||||
interim = unicode_simplify_combinations(string)
|
||||
interim = unicode_simplify_combinations(string, pathsave)
|
||||
interim = unicode_simplify_accents(interim)
|
||||
interim = unicode_simplify_punctuation(interim)
|
||||
interim = unicode_simplify_punctuation(interim, pathsave)
|
||||
interim = unicode_simplify_compatibility(interim)
|
||||
|
||||
def error_repl(e, repl="_"):
|
||||
return(repl, e.start + 1)
|
||||
return (repl, e.start + 1)
|
||||
codecs.register_error('repl', partial(error_repl, repl=repl))
|
||||
# Decoding and encoding to allow replacements
|
||||
return interim.encode('ascii', 'repl').decode('ascii')
|
||||
|
||||
@@ -128,6 +128,10 @@ class PunctuationTest(unittest.TestCase):
|
||||
self.assertEqual(util.textencoding.unicode_simplify_punctuation(combinations_from), combinations_from)
|
||||
self.assertEqual(util.textencoding.unicode_simplify_punctuation(ascii_chars), ascii_chars)
|
||||
|
||||
def test_pathsave(self):
|
||||
self.assertEqual(util.textencoding.unicode_simplify_punctuation('\u2215', True), '_')
|
||||
self.assertEqual(util.textencoding.unicode_simplify_punctuation('/\\\u2215', True), '/\\_')
|
||||
|
||||
def test_incorrect(self):
|
||||
pass
|
||||
|
||||
@@ -141,6 +145,10 @@ class CombinationsTest(unittest.TestCase):
|
||||
self.assertEqual(util.textencoding.unicode_simplify_combinations(punctuation_from), punctuation_from)
|
||||
self.assertEqual(util.textencoding.unicode_simplify_combinations(ascii_chars), ascii_chars)
|
||||
|
||||
def test_pathsave(self):
|
||||
self.assertEqual(util.textencoding.unicode_simplify_combinations('8½', True), '8 1_2')
|
||||
self.assertEqual(util.textencoding.unicode_simplify_combinations('8/\\½', True), '8/\\ 1_2')
|
||||
|
||||
def test_incorrect(self):
|
||||
pass
|
||||
|
||||
@@ -191,6 +199,9 @@ class ReplaceNonAsciiTest(unittest.TestCase):
|
||||
self.assertEqual(util.textencoding.replace_non_ascii(u"⒈ ⒉ ⒊"), u"1. 2. 3.") # Digit full stop
|
||||
self.assertEqual(util.textencoding.replace_non_ascii(u"123"), u"123") # Fullwidth digits
|
||||
|
||||
def test_pathsave(self):
|
||||
self.assertEqual(util.textencoding.replace_non_ascii('\u2044/8½\\', pathsave=True), '_/8 1_2\\')
|
||||
|
||||
def test_incorrect(self):
|
||||
self.assertNotEqual(util.textencoding.replace_non_ascii(u"Lukáš"), u"Lukáš")
|
||||
self.assertNotEqual(util.textencoding.replace_non_ascii(u"Lukáš"), u"Luk____")
|
||||
|
||||
Reference in New Issue
Block a user