PICARD-2368: Use totalalbumtracks for cluster matching

This commit is contained in:
Philipp Wolfer
2021-12-18 13:14:12 +01:00
parent 8ed8075cea
commit e32e116d5e
4 changed files with 104 additions and 65 deletions

View File

@@ -85,7 +85,7 @@ class FileList(QtCore.QObject, FileListItem):
def iterfiles(self, save=False):
yield from self.files
def update(self):
def update(self, signal=True):
pass
@property
@@ -99,7 +99,7 @@ class Cluster(FileList):
comparison_weights = {
'album': 17,
'albumartist': 6,
'totaltracks': 5,
'totalalbumtracks': 5,
'releasetype': 10,
'releasecountry': 2,
'format': 2,
@@ -152,7 +152,7 @@ class Cluster(FileList):
file.metadata_images_changed.connect(self.update_metadata_images)
added_files = sorted(added_files, key=attrgetter('discnumber', 'tracknumber', 'base_filename'))
self.files.extend(added_files)
self.metadata['totaltracks'] = len(self.files)
self.update(signal=False)
if self.can_show_coverart:
add_metadata_images(self, added_files)
self.item.add_files(added_files)
@@ -166,7 +166,7 @@ class Cluster(FileList):
self.tagger.window.set_processing(True)
self.metadata.length -= file.metadata.length
self.files.remove(file)
self.metadata['totaltracks'] = len(self.files)
self.update(signal=False)
self.item.remove_file(file)
if self.can_show_coverart:
file.metadata_images_changed.disconnect(self.update_metadata_images)
@@ -177,8 +177,9 @@ class Cluster(FileList):
if not self.special and self.get_num_files() == 0:
self.tagger.remove_cluster(self)
def update(self):
if self.item:
def update(self, signal=True):
self.metadata['~totalalbumtracks'] = self.metadata['totaltracks'] = len(self.files)
if signal and self.item:
self.item.update()
def get_num_files(self):

View File

@@ -145,6 +145,10 @@ def weights_from_preferred_formats(parts, release, preferred_formats, weight):
parts.append((score, weight))
def trackcount_score(actual, expected):
return 0.0 if actual > expected else 0.3 if actual < expected else 1.0
class Metadata(MutableMapping):
"""List of metadata items with dict-like access."""
@@ -241,7 +245,7 @@ class Metadata(MutableMapping):
def compare_to_release_parts(self, release, weights):
parts = []
if "album" in self:
if "album" in self and "album" in weights:
b = release['title']
parts.append((similarity2(self["album"], b), weights["album"]))
@@ -250,66 +254,79 @@ class Metadata(MutableMapping):
b = artist_credit_from_node(release['artist-credit'])[0]
parts.append((similarity2(a, b), weights["albumartist"]))
try:
a = int(self["totaltracks"])
if 'media' in release:
score = 0.0
for media in release['media']:
b = media.get('track-count', 0)
score = max(score, 0.0 if a > b else 0.3 if a < b else 1.0)
if score == 1.0:
break
else:
if "totaltracks" in weights:
try:
a = int(self["totaltracks"])
if 'media' in release:
score = 0.0
for media in release['media']:
b = media.get('track-count', 0)
score = max(score, trackcount_score(a, b))
if score == 1.0:
break
else:
b = release['track-count']
score = trackcount_score(a, b)
parts.append((score, weights["totaltracks"]))
except (ValueError, KeyError):
pass
if "totalalbumtracks" in weights:
try:
a = int(self["~totalalbumtracks"] or self["totaltracks"])
b = release['track-count']
score = 0.0 if a > b else 0.3 if a < b else 1.0
parts.append((score, weights["totaltracks"]))
except (ValueError, KeyError):
pass
score = trackcount_score(a, b)
parts.append((score, weights["totalalbumtracks"]))
except (ValueError, KeyError):
pass
# Date Logic
date_match_factor = 0.0
if "date" in release and release['date'] != '':
release_date = release['date']
if "date" in self:
metadata_date = self['date']
if release_date == metadata_date:
# release has a date and it matches what our metadata had exactly.
date_match_factor = self.__date_match_factors['exact']
if "date" in weights:
if "date" in release and release['date'] != '':
release_date = release['date']
if "date" in self:
metadata_date = self['date']
if release_date == metadata_date:
# release has a date and it matches what our metadata had exactly.
date_match_factor = self.__date_match_factors['exact']
else:
release_year = extract_year_from_date(release_date)
if release_year is not None:
metadata_year = extract_year_from_date(metadata_date)
if metadata_year is not None:
if release_year == metadata_year:
# release has a date and it matches what our metadata had for year exactly.
date_match_factor = self.__date_match_factors['year']
elif abs(release_year - metadata_year) <= 2:
# release has a date and it matches what our metadata had closely (year +/- 2).
date_match_factor = self.__date_match_factors['close_year']
else:
# release has a date but it does not match ours (all else equal,
# its better to have an unknown date than a wrong date, since
# the unknown could actually be correct)
date_match_factor = self.__date_match_factors['differed']
else:
release_year = extract_year_from_date(release_date)
if release_year is not None:
metadata_year = extract_year_from_date(metadata_date)
if metadata_year is not None:
if release_year == metadata_year:
# release has a date and it matches what our metadata had for year exactly.
date_match_factor = self.__date_match_factors['year']
elif abs(release_year - metadata_year) <= 2:
# release has a date and it matches what our metadata had closely (year +/- 2).
date_match_factor = self.__date_match_factors['close_year']
else:
# release has a date but it does not match ours (all else equal,
# its better to have an unknown date than a wrong date, since
# the unknown could actually be correct)
date_match_factor = self.__date_match_factors['differed']
# release has a date but we don't have one (all else equal, we prefer
# tracks that have non-blank date values)
date_match_factor = self.__date_match_factors['exists_vs_null']
else:
# release has a date but we don't have one (all else equal, we prefer
# tracks that have non-blank date values)
date_match_factor = self.__date_match_factors['exists_vs_null']
else:
# release has a no date (all else equal, we don't prefer this
# release since its date is missing)
date_match_factor = self.__date_match_factors['no_release_date']
# release has a no date (all else equal, we don't prefer this
# release since its date is missing)
date_match_factor = self.__date_match_factors['no_release_date']
parts.append((date_match_factor, weights['date']))
parts.append((date_match_factor, weights['date']))
config = get_config()
weights_from_preferred_countries(parts, release,
config.setting["preferred_release_countries"],
weights["releasecountry"])
if "releasecountry" in weights:
weights_from_preferred_countries(parts, release,
config.setting["preferred_release_countries"],
weights["releasecountry"])
weights_from_preferred_formats(parts, release,
config.setting["preferred_release_formats"],
weights["format"])
if "format" in weights:
weights_from_preferred_formats(parts, release,
config.setting["preferred_release_formats"],
weights["format"])
if "releasetype" in weights:
weights_from_release_type_scores(parts, release,

View File

@@ -3,6 +3,7 @@
"packaging": null,
"disambiguation": "初回生産限定盤",
"barcode": "4547366518764",
"track-count": 7,
"media": [
{
"track-offset": 0,

View File

@@ -41,6 +41,7 @@ from picard.metadata import (
MULTI_VALUED_JOINER,
Metadata,
MultiMetadataProxy,
trackcount_score,
weights_from_preferred_countries,
weights_from_preferred_formats,
weights_from_release_type_scores,
@@ -570,22 +571,41 @@ class CommonTests:
match = metadata.compare_to_release(release, Cluster.comparison_weights)
self.assertEqual(sim, match.similarity)
def test_compare_to_release_parts_match_totaltracks(self):
def test_compare_to_release_parts_totaltracks(self):
release = load_test_json('release_multidisc.json')
metadata = Metadata()
weights = {
"totaltracks": 30,
"album": 1,
"date": 1,
"format": 1,
"releasecountry": 1,
}
weights = {"totaltracks": 30}
release_to_metadata(release, metadata)
for totaltracks, sim in ((4, 1.0), (3, 1.0), (2, 0.3), (5, 0.0)):
metadata['totaltracks'] = totaltracks
parts = metadata.compare_to_release_parts(release, weights)
self.assertIn((sim, 30), parts)
def test_compare_to_release_parts_totalalbumtracks(self):
release = load_test_json('release_multidisc.json')
metadata = Metadata()
weights = {"totalalbumtracks": 30}
release_to_metadata(release, metadata)
for totaltracks, sim in ((7, 1.0), (6, 0.3), (8, 0.0)):
metadata['~totalalbumtracks'] = totaltracks
parts = metadata.compare_to_release_parts(release, weights)
self.assertIn((sim, 30), parts)
def test_compare_to_release_parts_totalalbumtracks_totaltracks_fallback(self):
release = load_test_json('release_multidisc.json')
metadata = Metadata()
weights = {"totalalbumtracks": 30}
release_to_metadata(release, metadata)
for totaltracks, sim in ((7, 1.0), (6, 0.3), (8, 0.0)):
metadata['totaltracks'] = totaltracks
parts = metadata.compare_to_release_parts(release, weights)
self.assertIn((sim, 30), parts)
def test_trackcount_score(self):
self.assertEqual(1.0, trackcount_score(5, 5))
self.assertEqual(0.0, trackcount_score(6, 5))
self.assertEqual(0.3, trackcount_score(4, 5))
def test_weights_from_release_type_scores(self):
release = load_test_json('release.json')
parts = []