PICARD-1568: Improve parsing track number from filenames

Avoid false positives, allow some common cases only instead of parsing all numbers.
This commit is contained in:
Philipp Wolfer
2021-04-19 10:36:23 +02:00
parent f590cedf97
commit 19d973c36d
2 changed files with 18 additions and 18 deletions

View File

@@ -380,11 +380,12 @@ def iter_unique(seq):
# order is important
_tracknum_regexps = (
# search for explicit track number (prefix "track")
r"track[\s_-]*(?:no|nr)?[\s_-]*(\d+)",
# search for 2-digit number at start of string
r"^(\d{2})\D",
r"track[\s_-]*(?:(?:no|nr)\.?)?[\s_-]*(?P<number>\d+)",
# search for 2-digit number at start of string (additional leading zeroes are allowed)
r"^(?P<number>0*\d{2})(?:\.)[^0-9,]", # "99. ", but not "99.02"
r"^(?P<number>0*\d{2})[^0-9,.]",
# search for 2-digit number at end of string
r"\D(\d{2})$",
r"[^0-9,.](?P<number>0*\d{2})$",
)
@@ -396,16 +397,9 @@ def tracknum_from_filename(base_filename):
for r in _tracknum_regexps:
match = re.search(r, filename, re.I)
if match:
n = int(match.group(1))
n = int(match.group('number'))
if n > 0:
return n
# find all numbers between 1 and 99
# 4-digit or more numbers are very unlikely to be a track number
# smaller number is preferred in any case
numbers = sorted([int(n) for n in re.findall(r'\d+', filename) if
0 < int(n) <= 99])
if numbers:
return numbers[0]
return None

View File

@@ -443,18 +443,23 @@ class TracknumFromFilenameTest(PicardTestCase):
(42, '42. Foo.mp3'),
(None, '20000 Feet.mp3'),
(242, 'track no 242.mp3'),
(77, 'Track no. 77 .mp3'),
(242, 'track-242.mp3'),
(242, 'track nr 242.mp3'),
(242, 'track_242.mp3'),
# (None, '30,000 Pounds of Bananas.mp3'),
# (None, 'Dalas 1 PM.mp3'),
# (None, "Don't Stop the 80's.mp3"),
(None, '30,000 Pounds of Bananas.mp3'),
(None, 'Dalas 1 PM.mp3'),
(None, "Don't Stop the 80's.mp3"),
(None, 'Symphony no. 5 in D minor.mp3'),
(None, 'Song 2.mp3'),
# (None, '99 Luftballons.mp3'),
# (None, 'Symphony no. 5 in D minor.mp3'),
(7, '99 Luftballons Track 7.mp3'),
(None, 'Margin 0.001.mp3'),
(None, '99.99 Foo.mp3'),
)
for expected, filename in tests:
tracknumber = tracknum_from_filename(filename)
self.assertEqual(expected, tracknumber)
self.assertEqual(expected, tracknumber, filename)
class TracknumAndTitleFromFilenameTest(PicardTestCase):
@@ -465,8 +470,9 @@ class TracknumAndTitleFromFilenameTest(PicardTestCase):
(('1', 'Track 0001'), 'Track 0001.mp3'),
(('99', 'Foo'), '99 Foo.mp3'),
(('42', 'Foo'), '0000042 Foo.mp3'),
(('2', 'Foo'), '0000002 Foo.mp3'),
((None, '20000 Feet'), '20000 Feet.mp3'),
# ((None, '20,000 Feet'), '20,000 Feet.mp3'),
((None, '20,000 Feet'), '20,000 Feet.mp3'),
)
for expected, filename in tests:
result = tracknum_and_title_from_filename(filename)