PICARD-803: ASCII replacements can cause folder creation

This commit is contained in:
Philipp Wolfer
2018-08-27 13:38:32 +02:00
committed by Philipp Wolfer
parent 319156beb5
commit 2fbe8f97a3
4 changed files with 27 additions and 15 deletions

View File

@@ -273,9 +273,7 @@ class CoverArtImage:
metadata.add_unique("coverart_types", cover_type)
filename = ScriptParser().eval(filename, metadata)
if config.setting["ascii_filenames"]:
if isinstance(filename, str):
filename = unaccent(filename)
filename = replace_non_ascii(filename)
filename = replace_non_ascii(filename, pathsave=True)
if not filename:
filename = "cover"
if not os.path.isabs(filename):

View File

@@ -314,9 +314,7 @@ class File(QtCore.QObject, Item):
naming_format = naming_format.replace("\t", "").replace("\n", "")
filename = ScriptParser().eval(naming_format, metadata, self)
if settings["ascii_filenames"]:
if isinstance(filename, str):
filename = unaccent(filename)
filename = replace_non_ascii(filename)
filename = replace_non_ascii(filename, pathsave=True)
# replace incompatible characters
if settings["windows_compatibility"] or sys.platform == "win32":
filename = replace_win32_incompat(filename)

View File

@@ -65,6 +65,8 @@ from functools import partial
import re
import unicodedata
from picard.util import sanitize_filename
######################### LATIN SIMPLIFICATION ###########################
# The translation tables for punctuation and latin combined-characters are taken from
# http://unicode.org/repos/cldr/trunk/common/transforms/Latin-ASCII.xml
@@ -175,10 +177,11 @@ _simplify_punctuation = {
"\u200B": "", # Zero Width Space
}
_re_simplify_punctuation = _re_any(_simplify_punctuation.keys())
_pathsave_simplify_punctuation = {k: sanitize_filename(v) for k, v in _simplify_punctuation.items()}
def unicode_simplify_punctuation(string):
return _re_simplify_punctuation.sub(lambda m: _simplify_punctuation[m.group(0)], string)
def unicode_simplify_punctuation(string, pathsave=False):
punctuation = _pathsave_simplify_punctuation if pathsave else _simplify_punctuation
return _re_simplify_punctuation.sub(lambda m: punctuation[m.group(0)], string)
_simplify_combinations = {
@@ -407,10 +410,12 @@ _simplify_combinations = {
"\u01BE": "ts", # LATIN LETTER TS LIGATION (see http://unicode.org/notes/tn27/)
}
_re_simplify_combinations = _re_any(_simplify_combinations)
_pathsave_simplify_combinations = {k: sanitize_filename(v) for k, v in _simplify_combinations.items()}
def unicode_simplify_combinations(string):
return _re_simplify_combinations.sub(lambda m: _simplify_combinations[m.group(0)], string)
def unicode_simplify_combinations(string, pathsave=False):
combinations = _pathsave_simplify_combinations if pathsave else _simplify_combinations
return _re_simplify_combinations.sub(lambda m: combinations[m.group(0)], string)
def unicode_simplify_accents(string):
@@ -428,15 +433,15 @@ def unaccent(string):
return unicode_simplify_accents(string)
def replace_non_ascii(string, repl="_"):
def replace_non_ascii(string, repl="_", pathsave=False):
"""Replace non-ASCII characters from ``string`` by ``repl``."""
interim = unicode_simplify_combinations(string)
interim = unicode_simplify_combinations(string, pathsave)
interim = unicode_simplify_accents(interim)
interim = unicode_simplify_punctuation(interim)
interim = unicode_simplify_punctuation(interim, pathsave)
interim = unicode_simplify_compatibility(interim)
def error_repl(e, repl="_"):
return(repl, e.start + 1)
return (repl, e.start + 1)
codecs.register_error('repl', partial(error_repl, repl=repl))
# Decoding and encoding to allow replacements
return interim.encode('ascii', 'repl').decode('ascii')

View File

@@ -128,6 +128,10 @@ class PunctuationTest(unittest.TestCase):
self.assertEqual(util.textencoding.unicode_simplify_punctuation(combinations_from), combinations_from)
self.assertEqual(util.textencoding.unicode_simplify_punctuation(ascii_chars), ascii_chars)
def test_pathsave(self):
self.assertEqual(util.textencoding.unicode_simplify_punctuation('\u2215', True), '_')
self.assertEqual(util.textencoding.unicode_simplify_punctuation('/\\\u2215', True), '/\\_')
def test_incorrect(self):
pass
@@ -141,6 +145,10 @@ class CombinationsTest(unittest.TestCase):
self.assertEqual(util.textencoding.unicode_simplify_combinations(punctuation_from), punctuation_from)
self.assertEqual(util.textencoding.unicode_simplify_combinations(ascii_chars), ascii_chars)
def test_pathsave(self):
self.assertEqual(util.textencoding.unicode_simplify_combinations('', True), '8 1_2')
self.assertEqual(util.textencoding.unicode_simplify_combinations('8/\\½', True), '8/\\ 1_2')
def test_incorrect(self):
pass
@@ -191,6 +199,9 @@ class ReplaceNonAsciiTest(unittest.TestCase):
self.assertEqual(util.textencoding.replace_non_ascii(u"⒈ ⒉ ⒊"), u"1. 2. 3.") # Digit full stop
self.assertEqual(util.textencoding.replace_non_ascii(u""), u"123") # Fullwidth digits
def test_pathsave(self):
self.assertEqual(util.textencoding.replace_non_ascii('\u2044/8½\\', pathsave=True), '_/8 1_2\\')
def test_incorrect(self):
self.assertNotEqual(util.textencoding.replace_non_ascii(u"Lukáš"), u"Lukáš")
self.assertNotEqual(util.textencoding.replace_non_ascii(u"Lukáš"), u"Luk____")