PICARD-2935: apply genre filters and threshold before selecting minimal usage

This commit is contained in:
Philipp Wolfer
2024-06-25 16:24:43 +02:00
parent e73c260092
commit dbe1254a2f
3 changed files with 31 additions and 15 deletions

View File

@@ -110,10 +110,17 @@ class TagGenreFilter:
return True return True
return False return False
def filter(self, counter): def filter(self, counter: Counter, minusage=0) -> Counter:
for name, count in counter: result = Counter()
for name, count in counter.items():
if not self.skip(name): if not self.skip(name):
yield (name, count) result[name] = count
topcount = result.most_common(1)[0][1]
for name, count in counter.items():
percent = 100 * count // topcount
if percent < minusage:
del result[name]
return result
def format_errors(self): def format_errors(self):
fmt = _("Error line %(lineno)d: %(error)s") fmt = _("Error line %(lineno)d: %(error)s")
@@ -318,18 +325,13 @@ class Track(FileListItem):
if not genres: if not genres:
return [] return []
# Find most common genres
most_common_genres = genres.most_common(limit)
topcount = most_common_genres[0][1]
# Filter by name and usage # Filter by name and usage
genres_filter = TagGenreFilter(filters) genres_filter = TagGenreFilter(filters)
genres_list = [] genres = genres_filter.filter(genres, minusage=minusage)
for name, count in genres_filter.filter(most_common_genres):
percent = 100 * count // topcount # Find most common genres
if percent < minusage: most_common_genres = genres.most_common(limit)
break genres_list = [name.title() for name, _count in most_common_genres]
genres_list.append(name.title())
genres_list.sort() genres_list.sort()
# And generate the genre metadata tag # And generate the genre metadata tag

View File

@@ -20,6 +20,7 @@
# along with this program; if not, write to the Free Software # along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from collections import Counter
from test.picardtestcase import PicardTestCase from test.picardtestcase import PicardTestCase
@@ -168,5 +169,12 @@ class TagGenreFilterTest(PicardTestCase):
def test_filter_method(self): def test_filter_method(self):
tag_filter = TagGenreFilter("-a*") tag_filter = TagGenreFilter("-a*")
result = list(tag_filter.filter([("ax", 1), ("bx", 2), ("ay", 3), ("by", 4)])) genres = Counter(ax=1, bx=2, ay=3, by=4)
self.assertEqual([('bx', 2), ('by', 4)], result) result = tag_filter.filter(genres)
self.assertEqual([('bx', 2), ('by', 4)], list(result.items()))
def test_filter_method_minusage(self):
tag_filter = TagGenreFilter("-a*")
genres = Counter(ax=4, bx=5, ay=20, by=10, bz=4)
result = tag_filter.filter(genres, minusage=50)
self.assertEqual([('bx', 5), ('by', 10)], list(result.items()))

View File

@@ -62,6 +62,12 @@ class TrackGenres2MetadataTest(PicardTestCase):
ret = Track._genres_to_metadata(genres, limit=0) ret = Track._genres_to_metadata(genres, limit=0)
self.assertEqual(ret, []) self.assertEqual(ret, [])
def test_limit_after_filter(self):
genres = Counter(rock=5, blues=7, pop=1, psychedelic=3)
filters = '-rock'
ret = Track._genres_to_metadata(genres, limit=3, filters=filters)
self.assertEqual(ret, ['Blues', 'Pop', 'Psychedelic'])
def test_minusage(self): def test_minusage(self):
genres = Counter(pop=6, rock=7, blues=2) genres = Counter(pop=6, rock=7, blues=2)
ret = Track._genres_to_metadata(genres, minusage=10) ret = Track._genres_to_metadata(genres, minusage=10)