diff --git a/picard/cluster.py b/picard/cluster.py index 81f2b5c1a..7d30dd76a 100644 --- a/picard/cluster.py +++ b/picard/cluster.py @@ -420,21 +420,25 @@ class ClusterList(list, Item): class FileCluster: def __init__(self): self.files = [] - self.artists = defaultdict(lambda: 0) - self.titles = defaultdict(lambda: 0) + self._artist_counts = defaultdict(lambda: 0) + self._artists = defaultdict(lambda: defaultdict(lambda: 0)) + self._titles = defaultdict(lambda: 0) def add(self, album, artist, file): self.files.append(file) - self.artists[artist] += 1 - self.titles[album] += 1 + self._artist_counts[tokenize(artist)] += 1 + self._artists[tokenize(artist)][artist] += 1 + self._titles[album] += 1 @property def artist(self): - return max(self.artists.items(), key=itemgetter(1))[0] + tokenized_artist = max(self._artist_counts.items(), key=itemgetter(1))[0] + return max(self._artists[tokenized_artist].items(), key=itemgetter(1))[0] @property def title(self): - return max(self.titles.items(), key=itemgetter(1))[0] + # Find the most common title + return max(self._titles.items(), key=itemgetter(1))[0] _re_non_alphanum = re.compile(r'\W', re.UNICODE) diff --git a/test/test_clustering.py b/test/test_clustering.py index 84e3f84ad..cf41c7634 100644 --- a/test/test_clustering.py +++ b/test/test_clustering.py @@ -24,6 +24,7 @@ from test.picardtestcase import PicardTestCase from picard.cluster import ( Cluster, + FileCluster, tokenize, ) from picard.file import File @@ -120,14 +121,43 @@ class ClusterTest(PicardTestCase): clusters = list(Cluster.cluster(files)) self.assertEqual(0, len(clusters)) - # def test_common_artist_name(self): - # files = [ - # self._create_file('cluster 1', 'artist 1'), - # self._create_file('cluster 1', 'artist 2'), - # self._create_file('cluster 1', 'artist2'), - # self._create_file('cluster 1', 'artist 1'), - # self._create_file('cluster 1', 'artist 2'), - # ] - # clusters = list(Cluster.cluster(files)) - # self.assertEqual(1, len(clusters)) - # self.assertClusterEqual('cluster 1', 'artist 2', files, clusters[0]) + def test_common_artist_name(self): + files = [ + self._create_file('cluster 1', 'artist 1'), + self._create_file('cluster 1', 'artist 2'), + self._create_file('cluster 1', 'artist2'), + self._create_file('cluster 1', 'artist 1'), + self._create_file('cluster 1', 'artist 2'), + ] + clusters = list(Cluster.cluster(files)) + self.assertEqual(1, len(clusters)) + self.assertClusterEqual('cluster 1', 'artist 2', files, clusters[0]) + + +class FileClusterTest(PicardTestCase): + + def test_single(self): + file = File('foo') + fc = FileCluster() + fc.add('album 1', 'artist 1', file) + self.assertEqual('album 1', fc.title) + self.assertEqual('artist 1', fc.artist) + self.assertEqual([file], fc.files) + + def test_multi(self): + files = [ + File('foo1'), + File('foo2'), + File('foo3'), + File('foo4'), + File('foo5'), + ] + fc = FileCluster() + fc.add('album 1', 'artist1', files[0]) + fc.add('Album 1', 'artist 2', files[1]) + fc.add('album\t1', 'Artist 1', files[2]) + fc.add('Album 1', 'Artist 2', files[3]) + fc.add('album 2', 'Artist 1', files[4]) + self.assertEqual('Album 1', fc.title) + self.assertEqual('Artist 1', fc.artist) + self.assertEqual(files, fc.files)