mirror of
https://github.com/fergalmoran/picard.git
synced 2026-02-17 21:24:00 +00:00
PICARD-1972: Allow initializing Metadata with another Metadata object If calling Metadata(other_metadata) make a full copy and don't just treat other_metadata as a dict. Apart of making the API more intuitive this fixes issues with track metadata being not fully preserved when attaching files to tracks.
626 lines
22 KiB
Python
626 lines
22 KiB
Python
# -*- coding: utf-8 -*-
|
|
#
|
|
# Picard, the next-generation MusicBrainz tagger
|
|
#
|
|
# Copyright (C) 2006-2008, 2011 Lukáš Lalinský
|
|
# Copyright (C) 2009, 2015, 2018-2020 Philipp Wolfer
|
|
# Copyright (C) 2011-2014 Michael Wiencek
|
|
# Copyright (C) 2012 Chad Wilson
|
|
# Copyright (C) 2012 Johannes Weißl
|
|
# Copyright (C) 2012-2014, 2018 Wieland Hoffmann
|
|
# Copyright (C) 2013-2014, 2016, 2018-2020 Laurent Monin
|
|
# Copyright (C) 2013-2014, 2017 Sophist-UK
|
|
# Copyright (C) 2016 Rahul Raturi
|
|
# Copyright (C) 2016-2017 Sambhav Kothari
|
|
# Copyright (C) 2017-2018 Antonio Larrosa
|
|
# Copyright (C) 2018 Vishal Choudhary
|
|
# Copyright (C) 2018 Xincognito10
|
|
# Copyright (C) 2020 Ray Bouchard
|
|
#
|
|
# This program is free software; you can redistribute it and/or
|
|
# modify it under the terms of the GNU General Public License
|
|
# as published by the Free Software Foundation; either version 2
|
|
# of the License, or (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program; if not, write to the Free Software
|
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
|
|
|
|
from collections import namedtuple
|
|
from collections.abc import (
|
|
Iterable,
|
|
MutableMapping,
|
|
)
|
|
from functools import partial
|
|
|
|
from PyQt5.QtCore import QObject
|
|
|
|
from picard import config
|
|
from picard.mbjson import (
|
|
artist_credit_from_node,
|
|
get_score,
|
|
)
|
|
from picard.plugin import (
|
|
PluginFunctions,
|
|
PluginPriority,
|
|
)
|
|
from picard.similarity import similarity2
|
|
from picard.util import (
|
|
extract_year_from_date,
|
|
linear_combination_of_weights,
|
|
)
|
|
from picard.util.imagelist import ImageList
|
|
from picard.util.tags import PRESERVED_TAGS
|
|
|
|
|
|
MULTI_VALUED_JOINER = '; '
|
|
|
|
# lengths difference over this number of milliseconds will give a score of 0.0
|
|
# equal lengths will give a score of 1.0
|
|
# example
|
|
# a b score
|
|
# 20000 0 0.333333333333
|
|
# 20000 10000 0.666666666667
|
|
# 20000 20000 1.0
|
|
# 20000 30000 0.666666666667
|
|
# 20000 40000 0.333333333333
|
|
# 20000 50000 0.0
|
|
LENGTH_SCORE_THRES_MS = 30000
|
|
|
|
SimMatchTrack = namedtuple('SimMatchTrack', 'similarity releasegroup release track')
|
|
SimMatchRelease = namedtuple('SimMatchRelease', 'similarity release')
|
|
|
|
|
|
def weights_from_release_type_scores(parts, release, release_type_scores,
|
|
weight_release_type=1):
|
|
# This function generates a score that determines how likely this release will be selected in a lookup.
|
|
# The score goes from 0 to 1 with 1 being the most likely to be chosen and 0 the least likely
|
|
# This score is based on the preferences of release-types found in this release
|
|
# This algorithm works by taking the scores of the primary type (and secondary if found) and averages them
|
|
# If no types are found, it is set to the score of the 'Other' type or 0.5 if 'Other' doesnt exist
|
|
# It appends (score, weight_release_type) to passed parts list
|
|
|
|
# if our preference is zero for the release_type, force to never return this recording
|
|
# by using a large zero weight. This means it only gets picked if there are no others at all.
|
|
skip_release = False
|
|
|
|
type_scores = dict(release_type_scores)
|
|
score = 0.0
|
|
if 'release-group' in release and 'primary-type' in release['release-group']:
|
|
types_found = [release['release-group']['primary-type']]
|
|
if 'secondary-types' in release['release-group']:
|
|
types_found += release['release-group']['secondary-types']
|
|
other_score = type_scores.get('Other', 0.5)
|
|
for release_type in types_found:
|
|
type_score = type_scores.get(release_type, other_score)
|
|
if type_score == 0:
|
|
skip_release = True
|
|
score += type_score
|
|
score /= len(types_found)
|
|
|
|
if skip_release:
|
|
parts.append((0, 9999))
|
|
else:
|
|
parts.append((score, weight_release_type))
|
|
|
|
|
|
def weights_from_preferred_countries(parts, release,
|
|
preferred_countries,
|
|
weight):
|
|
total_countries = len(preferred_countries)
|
|
if total_countries:
|
|
score = 0.0
|
|
if "country" in release:
|
|
try:
|
|
i = preferred_countries.index(release['country'])
|
|
score = float(total_countries - i) / float(total_countries)
|
|
except ValueError:
|
|
pass
|
|
parts.append((score, weight))
|
|
|
|
|
|
def weights_from_preferred_formats(parts, release, preferred_formats, weight):
|
|
total_formats = len(preferred_formats)
|
|
if total_formats and 'media' in release:
|
|
score = 0.0
|
|
subtotal = 0
|
|
for medium in release['media']:
|
|
if "format" in medium:
|
|
try:
|
|
i = preferred_formats.index(medium['format'])
|
|
score += float(total_formats - i) / float(total_formats)
|
|
except ValueError:
|
|
pass
|
|
subtotal += 1
|
|
if subtotal > 0:
|
|
score /= subtotal
|
|
parts.append((score, weight))
|
|
|
|
|
|
class Metadata(MutableMapping):
|
|
|
|
"""List of metadata items with dict-like access."""
|
|
|
|
__weights = [
|
|
('title', 22),
|
|
('artist', 6),
|
|
('album', 12),
|
|
('tracknumber', 6),
|
|
('totaltracks', 5),
|
|
('discnumber', 5),
|
|
('totaldiscs', 4),
|
|
]
|
|
|
|
__date_match_factors = {
|
|
'exact': 1.00,
|
|
'year': 0.95,
|
|
'close_year': 0.85,
|
|
'exists_vs_null': 0.65,
|
|
'no_release_date': 0.25,
|
|
'differed': 0.0
|
|
}
|
|
|
|
multi_valued_joiner = MULTI_VALUED_JOINER
|
|
|
|
def __init__(self, *args, deleted_tags=None, images=None, length=None, **kwargs):
|
|
self._store = dict()
|
|
self.deleted_tags = set()
|
|
self.length = 0
|
|
self.images = ImageList()
|
|
self.has_common_images = True
|
|
|
|
if args or kwargs:
|
|
self.update(*args, **kwargs)
|
|
if images is not None:
|
|
for image in images:
|
|
self.images.append(image)
|
|
if deleted_tags is not None:
|
|
for tag in deleted_tags:
|
|
del self[tag]
|
|
if length is not None:
|
|
self.length = int(length)
|
|
|
|
def __bool__(self):
|
|
return bool(len(self))
|
|
|
|
def __len__(self):
|
|
return len(self._store) + len(self.images)
|
|
|
|
@staticmethod
|
|
def length_score(a, b):
|
|
return (1.0 - min(abs(a - b),
|
|
LENGTH_SCORE_THRES_MS) / float(LENGTH_SCORE_THRES_MS))
|
|
|
|
def compare(self, other, ignored=None):
|
|
parts = []
|
|
if ignored is None:
|
|
ignored = []
|
|
|
|
if self.length and other.length and '~length' not in ignored:
|
|
score = self.length_score(self.length, other.length)
|
|
parts.append((score, 8))
|
|
|
|
for name, weight in self.__weights:
|
|
if name in ignored:
|
|
continue
|
|
a = self[name]
|
|
b = other[name]
|
|
if a and b:
|
|
if name in ('tracknumber', 'totaltracks', 'discnumber', 'totaldiscs'):
|
|
try:
|
|
ia = int(a)
|
|
ib = int(b)
|
|
except ValueError:
|
|
ia = a
|
|
ib = b
|
|
score = 1.0 - (int(ia != ib))
|
|
else:
|
|
score = similarity2(a, b)
|
|
parts.append((score, weight))
|
|
elif (a and name in other.deleted_tags
|
|
or b and name in self.deleted_tags):
|
|
parts.append((0, weight))
|
|
return linear_combination_of_weights(parts)
|
|
|
|
def compare_to_release(self, release, weights):
|
|
"""
|
|
Compare metadata to a MusicBrainz release. Produces a probability as a
|
|
linear combination of weights that the metadata matches a certain album.
|
|
"""
|
|
parts = self.compare_to_release_parts(release, weights)
|
|
sim = linear_combination_of_weights(parts) * get_score(release)
|
|
return SimMatchRelease(similarity=sim, release=release)
|
|
|
|
def compare_to_release_parts(self, release, weights):
|
|
parts = []
|
|
if "album" in self:
|
|
b = release['title']
|
|
parts.append((similarity2(self["album"], b), weights["album"]))
|
|
|
|
if "albumartist" in self and "albumartist" in weights:
|
|
a = self["albumartist"]
|
|
b = artist_credit_from_node(release['artist-credit'])[0]
|
|
parts.append((similarity2(a, b), weights["albumartist"]))
|
|
|
|
try:
|
|
a = int(self["totaltracks"])
|
|
b = release['track-count']
|
|
score = 0.0 if a > b else 0.3 if a < b else 1.0
|
|
parts.append((score, weights["totaltracks"]))
|
|
except (ValueError, KeyError):
|
|
pass
|
|
|
|
# Date Logic
|
|
date_match_factor = 0.0
|
|
if "date" in release and release['date'] != '':
|
|
release_date = release['date']
|
|
if "date" in self:
|
|
metadata_date = self['date']
|
|
if release_date == metadata_date:
|
|
# release has a date and it matches what our metadata had exactly.
|
|
date_match_factor = self.__date_match_factors['exact']
|
|
else:
|
|
release_year = extract_year_from_date(release_date)
|
|
if release_year is not None:
|
|
metadata_year = extract_year_from_date(metadata_date)
|
|
if metadata_year is not None:
|
|
if release_year == metadata_year:
|
|
# release has a date and it matches what our metadata had for year exactly.
|
|
date_match_factor = self.__date_match_factors['year']
|
|
elif abs(release_year - metadata_year) <= 2:
|
|
# release has a date and it matches what our metadata had closely (year +/- 2).
|
|
date_match_factor = self.__date_match_factors['close_year']
|
|
else:
|
|
# release has a date but it does not match ours (all else equal,
|
|
# its better to have an unknown date than a wrong date, since
|
|
# the unknown could actually be correct)
|
|
date_match_factor = self.__date_match_factors['differed']
|
|
else:
|
|
# release has a date but we don't have one (all else equal, we prefer
|
|
# tracks that have non-blank date values)
|
|
date_match_factor = self.__date_match_factors['exists_vs_null']
|
|
else:
|
|
# release has a no date (all else equal, we don't prefer this
|
|
# release since its date is missing)
|
|
date_match_factor = self.__date_match_factors['no_release_date']
|
|
|
|
parts.append((date_match_factor, weights['date']))
|
|
|
|
weights_from_preferred_countries(parts, release,
|
|
config.setting["preferred_release_countries"],
|
|
weights["releasecountry"])
|
|
|
|
weights_from_preferred_formats(parts, release,
|
|
config.setting["preferred_release_formats"],
|
|
weights["format"])
|
|
|
|
if "releasetype" in weights:
|
|
weights_from_release_type_scores(parts, release,
|
|
config.setting["release_type_scores"],
|
|
weights["releasetype"])
|
|
|
|
rg = QObject.tagger.get_release_group_by_id(release['release-group']['id'])
|
|
if release['id'] in rg.loaded_albums:
|
|
parts.append((1.0, 6))
|
|
|
|
return parts
|
|
|
|
def compare_to_track(self, track, weights):
|
|
parts = []
|
|
|
|
if 'title' in self:
|
|
a = self['title']
|
|
b = track.get('title', '')
|
|
parts.append((similarity2(a, b), weights["title"]))
|
|
|
|
if 'artist' in self:
|
|
a = self['artist']
|
|
artist_credits = track.get('artist-credit', [])
|
|
b = artist_credit_from_node(artist_credits)[0]
|
|
parts.append((similarity2(a, b), weights["artist"]))
|
|
|
|
a = self.length
|
|
if a > 0 and 'length' in track:
|
|
b = track['length']
|
|
score = self.length_score(a, b)
|
|
parts.append((score, weights["length"]))
|
|
|
|
releases = []
|
|
if "releases" in track:
|
|
releases = track['releases']
|
|
|
|
search_score = get_score(track)
|
|
if not releases:
|
|
sim = linear_combination_of_weights(parts) * search_score
|
|
return SimMatchTrack(similarity=sim, releasegroup=None, release=None, track=track)
|
|
|
|
if 'isvideo' in weights:
|
|
metadata_is_video = self['~video'] == '1'
|
|
track_is_video = track.get('video', False)
|
|
score = 1 if metadata_is_video == track_is_video else 0
|
|
parts.append((score, weights['isvideo']))
|
|
|
|
result = SimMatchTrack(similarity=-1, releasegroup=None, release=None, track=None)
|
|
for release in releases:
|
|
release_parts = self.compare_to_release_parts(release, weights)
|
|
sim = linear_combination_of_weights(parts + release_parts) * search_score
|
|
if sim > result.similarity:
|
|
rg = release['release-group'] if "release-group" in release else None
|
|
result = SimMatchTrack(similarity=sim, releasegroup=rg, release=release, track=track)
|
|
return result
|
|
|
|
def copy(self, other, copy_images=True):
|
|
self.clear()
|
|
self._update_from_metadata(other, copy_images)
|
|
|
|
def update(self, *args, **kwargs):
|
|
one_arg = len(args) == 1
|
|
if one_arg and (isinstance(args[0], self.__class__) or isinstance(args[0], MultiMetadataProxy)):
|
|
self._update_from_metadata(args[0])
|
|
elif one_arg and isinstance(args[0], MutableMapping):
|
|
# update from MutableMapping (ie. dict)
|
|
for k, v in args[0].items():
|
|
self[k] = v
|
|
elif args or kwargs:
|
|
# update from a dict-like constructor parameters
|
|
for k, v in dict(*args, **kwargs).items():
|
|
self[k] = v
|
|
else:
|
|
# no argument, raise TypeError to mimic dict.update()
|
|
raise TypeError("descriptor 'update' of '%s' object needs an argument" % self.__class__.__name__)
|
|
|
|
def diff(self, other):
|
|
"""Returns a new Metadata object with only the tags that changed in self compared to other"""
|
|
m = Metadata()
|
|
for tag, values in self.rawitems():
|
|
other_values = other.getall(tag)
|
|
if other_values != values:
|
|
m[tag] = values
|
|
m.deleted_tags = self.deleted_tags - other.deleted_tags
|
|
return m
|
|
|
|
def _update_from_metadata(self, other, copy_images=True):
|
|
for k, v in other.rawitems():
|
|
self.set(k, v[:])
|
|
|
|
for tag in other.deleted_tags:
|
|
del self[tag]
|
|
|
|
if copy_images and other.images:
|
|
self.images = other.images.copy()
|
|
if other.length:
|
|
self.length = other.length
|
|
|
|
def clear(self):
|
|
self._store.clear()
|
|
self.images = ImageList()
|
|
self.length = 0
|
|
self.clear_deleted()
|
|
|
|
def clear_deleted(self):
|
|
self.deleted_tags = set()
|
|
|
|
@staticmethod
|
|
def normalize_tag(name):
|
|
return name.rstrip(':')
|
|
|
|
def getall(self, name):
|
|
return self._store.get(self.normalize_tag(name), [])
|
|
|
|
def getraw(self, name):
|
|
return self._store[self.normalize_tag(name)]
|
|
|
|
def get(self, key, default=None):
|
|
values = self._store.get(self.normalize_tag(key), None)
|
|
if values:
|
|
return self.multi_valued_joiner.join(values)
|
|
else:
|
|
return default
|
|
|
|
def __getitem__(self, name):
|
|
return self.get(name, '')
|
|
|
|
def set(self, name, values):
|
|
name = self.normalize_tag(name)
|
|
if isinstance(values, str) or not isinstance(values, Iterable):
|
|
values = [values]
|
|
values = [str(value) for value in values if value or value == 0]
|
|
if values:
|
|
self._store[name] = values
|
|
self.deleted_tags.discard(name)
|
|
elif name in self._store:
|
|
del self[name]
|
|
|
|
def __setitem__(self, name, values):
|
|
self.set(name, values)
|
|
|
|
def __contains__(self, name):
|
|
return self._store.__contains__(self.normalize_tag(name))
|
|
|
|
def __delitem__(self, name):
|
|
name = self.normalize_tag(name)
|
|
try:
|
|
del self._store[name]
|
|
except KeyError:
|
|
pass
|
|
finally:
|
|
self.deleted_tags.add(name)
|
|
|
|
def add(self, name, value):
|
|
if value or value == 0:
|
|
name = self.normalize_tag(name)
|
|
self._store.setdefault(name, []).append(str(value))
|
|
self.deleted_tags.discard(name)
|
|
|
|
def add_unique(self, name, value):
|
|
name = self.normalize_tag(name)
|
|
if value not in self.getall(name):
|
|
self.add(name, value)
|
|
|
|
def delete(self, name):
|
|
"""Deprecated: use del directly"""
|
|
del self[self.normalize_tag(name)]
|
|
|
|
def unset(self, name):
|
|
"""Removes a tag from the metadata, but does not mark it for deletion.
|
|
|
|
Args:
|
|
name: name of the tag to unset
|
|
"""
|
|
name = self.normalize_tag(name)
|
|
try:
|
|
del self._store[name]
|
|
except KeyError:
|
|
pass
|
|
|
|
def __iter__(self):
|
|
return iter(self._store)
|
|
|
|
def items(self):
|
|
for name, values in self._store.items():
|
|
for value in values:
|
|
yield name, value
|
|
|
|
def rawitems(self):
|
|
"""Returns the metadata items.
|
|
|
|
>>> m.rawitems()
|
|
[("key1", ["value1", "value2"]), ("key2", ["value3"])]
|
|
"""
|
|
return self._store.items()
|
|
|
|
def apply_func(self, func):
|
|
for name, values in list(self.rawitems()):
|
|
if name not in PRESERVED_TAGS:
|
|
self[name] = [func(value) for value in values]
|
|
|
|
def strip_whitespace(self):
|
|
"""Strip leading/trailing whitespace.
|
|
|
|
>>> m = Metadata()
|
|
>>> m["foo"] = " bar "
|
|
>>> m["foo"]
|
|
" bar "
|
|
>>> m.strip_whitespace()
|
|
>>> m["foo"]
|
|
"bar"
|
|
"""
|
|
self.apply_func(str.strip)
|
|
|
|
def __repr__(self):
|
|
return "%s(%r, deleted_tags=%r, length=%r, images=%r)" % (self.__class__.__name__, self._store, self.deleted_tags, self.length, self.images)
|
|
|
|
def __str__(self):
|
|
return ("store: %r\ndeleted: %r\nimages: %r\nlength: %r" % (self._store, self.deleted_tags, [str(img) for img in self.images], self.length))
|
|
|
|
|
|
class MultiMetadataProxy:
|
|
"""
|
|
Wraps a writable Metadata object together with another
|
|
readonly Metadata object.
|
|
|
|
Changes are written to the writable object, while values are
|
|
read from both the writable and the readonly object (with the writable
|
|
object taking precedence). The use case is to provide access to Metadata
|
|
values without making them part of the actual Metadata. E.g. allow track
|
|
metadata to use file specific metadata, without making it actually part
|
|
of the track.
|
|
"""
|
|
|
|
WRITE_METHODS = [
|
|
'add_unique',
|
|
'add',
|
|
'apply_func',
|
|
'clear_deleted',
|
|
'clear',
|
|
'copy',
|
|
'delete',
|
|
'set',
|
|
'strip_whitespace',
|
|
'unset',
|
|
'update',
|
|
]
|
|
|
|
def __init__(self, metadata, *readonly_metadata):
|
|
self.metadata = metadata
|
|
self.combined_metadata = Metadata()
|
|
for m in reversed(readonly_metadata):
|
|
self.combined_metadata.update(m)
|
|
self.combined_metadata.update(metadata)
|
|
|
|
def __getattr__(self, name):
|
|
if name in self.WRITE_METHODS:
|
|
return partial(self.__write, name)
|
|
else:
|
|
attribute = self.combined_metadata.__getattribute__(name)
|
|
if callable(attribute):
|
|
return partial(self.__read, name)
|
|
else:
|
|
return attribute
|
|
|
|
def __setattr__(self, name, value):
|
|
if name in ('metadata', 'combined_metadata'):
|
|
super().__setattr__(name, value)
|
|
else:
|
|
self.metadata.__setattr__(name, value)
|
|
self.combined_metadata.__setattr__(name, value)
|
|
|
|
def __write(self, name, *args, **kwargs):
|
|
func1 = self.metadata.__getattribute__(name)
|
|
func2 = self.combined_metadata.__getattribute__(name)
|
|
func1(*args, **kwargs)
|
|
return func2(*args, **kwargs)
|
|
|
|
def __read(self, name, *args, **kwargs):
|
|
func = self.combined_metadata.__getattribute__(name)
|
|
return func(*args, **kwargs)
|
|
|
|
def __getitem__(self, name):
|
|
return self.__read('__getitem__', name)
|
|
|
|
def __setitem__(self, name, values):
|
|
return self.__write('__setitem__', name, values)
|
|
|
|
def __delitem__(self, name):
|
|
return self.__write('__delitem__', name)
|
|
|
|
def __iter__(self):
|
|
return self.__read('__iter__')
|
|
|
|
def __len__(self):
|
|
return self.__read('__len__')
|
|
|
|
def __contains__(self, name):
|
|
return self.__read('__contains__', name)
|
|
|
|
def __repr__(self):
|
|
return self.__read('__repr__')
|
|
|
|
|
|
_album_metadata_processors = PluginFunctions(label='album_metadata_processors')
|
|
_track_metadata_processors = PluginFunctions(label='track_metadata_processors')
|
|
|
|
|
|
def register_album_metadata_processor(function, priority=PluginPriority.NORMAL):
|
|
"""Registers new album-level metadata processor."""
|
|
_album_metadata_processors.register(function.__module__, function, priority)
|
|
|
|
|
|
def register_track_metadata_processor(function, priority=PluginPriority.NORMAL):
|
|
"""Registers new track-level metadata processor."""
|
|
_track_metadata_processors.register(function.__module__, function, priority)
|
|
|
|
|
|
def run_album_metadata_processors(album_object, metadata, release):
|
|
_album_metadata_processors.run(album_object, metadata, release)
|
|
|
|
|
|
def run_track_metadata_processors(album_object, metadata, track, release=None):
|
|
_track_metadata_processors.run(album_object, metadata, track, release)
|