diff --git a/picard/album.py b/picard/album.py index c0b77e0b5..38d406821 100644 --- a/picard/album.py +++ b/picard/album.py @@ -159,7 +159,7 @@ class Album(DataObject): sim = file.orig_metadata.compare(track.metadata) if sim > bestMatch[0]: bestMatch = sim, track - + if bestMatch[1]: file.move_to_track(bestMatch[1]) diff --git a/picard/metadata.py b/picard/metadata.py index 2b617cbfb..376d7c5ed 100644 --- a/picard/metadata.py +++ b/picard/metadata.py @@ -41,14 +41,11 @@ class Metadata(QtCore.QObject): parts = [] tags = { - "musicbrainz_trackid": 10, - "musicbrainz_artistid": 10, - "musicbrainz_albumid": 10, "~#length": 16, - "title": 14, - "artist": 8, - "album": 10, - "tracknumber": 12, + "title": 20, + "artist": 6, + "album": 12, + "tracknumber": 5, } identical = [ @@ -60,25 +57,25 @@ class Metadata(QtCore.QObject): "discnumber", "totaldiscs", ] - - for tag in self.keys(): - if tag not in tags and not tag.startswith("~"): - tags[tag] = 1 - - for tag in other.keys(): - if tag not in tags and not tag.startswith("~"): - tags[tag] = 1 - + + #for tag in self.keys(): + # if tag not in tags and not tag.startswith("~"): + # tags[tag] = 1 + + #for tag in other.keys(): + # if tag not in tags and not tag.startswith("~"): + # tags[tag] = 1 + for tag, weight in tags.items(): if self[tag] and other[tag]: if tag in identical: sim = 1.0 - abs(cmp(self[tag], other[tag])) - elif tag in ["~#length"]: + elif tag == "~#length": sim = 1.0 - min(abs(self[tag] - other[tag]), 30000) / 30000.0 else: sim = similarity(self[tag], other[tag]) parts.append((sim, weight)) - + total = reduce(lambda x, y: x + y[1], parts, 0.0) return reduce(lambda x, y: x + y[0] * y[1] / total, parts, 0.0) diff --git a/picard/similarity.py b/picard/similarity.py index 776267808..c56e395d6 100644 --- a/picard/similarity.py +++ b/picard/similarity.py @@ -19,45 +19,14 @@ import math import re - - -def distance(a,b): - """Calculates the Levenshtein distance between a and b.""" - - n, m = len(a), len(b) - if n > m: - # Make sure n <= m, to use O(min(n,m)) space - a,b = b,a - n,m = m,n - - current = range(n+1) - for i in range(1,m+1): - previous, current = current, [i]+[0]*n - for j in range(1,n+1): - add, delete = previous[j]+1, current[j-1]+1 - change = previous[j-1] - if a[j-1] != b[i-1]: - change = change + 1 - current[j] = min(add, delete, change) - - return current[n] - - -def boost(sim): - sim2 = sim - sim = min(1, (math.exp(sim) - 1) / (math.e - 1.2)) - sim = math.pow(sim, 0.8) - sim = max(sim2, sim) - return sim +from difflib import SequenceMatcher +from picard.util import unaccent def raw_similarity(a, b): """Calculates raw similarity of strings ``a`` and ``b``.""" - if not a or not b: - return 0.0 - sim = 1 - distance(a, b) * 1.0 / max(len(a), len(b)) - return boost(sim) - + d = SequenceMatcher(None, a, b).ratio() + return d _split_re = re.compile("\W", re.UNICODE) _stop_words = ["the", "--", "in", "of", "a", "feat"] @@ -73,19 +42,23 @@ _replace_words = { "disc 8": "CD8", } -def similarity(a1, b1): - """Calculates "smart" similarity of strings ``a`` and ``b``.""" - a2 = a1 - b2 = b1 +def normalize(string): for w, r in _replace_words.items(): - a2 = a2.replace(w, r) - b2 = b2.replace(w, r) - def flt(a): - def flt(a): - return a not in _stop_words and len(a) > 1 - return u" ".join(filter(flt, _split_re.split(a.lower()))) - a2 = flt(a2) - b2 = flt(b2) + string = string.replace(w, r) + string = string.lower() + string = " ".join(filter(lambda a: a not in _stop_words and len(a) > 1, + _split_re.split(string))) + string = unaccent(string) + return string + +def similarity(a1, b1): + return raw_similarity(a1, b1) + """Calculates "smart" similarity of strings ``a`` and ``b``.""" + a2 = normalize(a1) + if a2: + b2 = normalize(b1) + else: + b2 = "" sim1 = raw_similarity(a1, b1) if a2 or b2: sim2 = raw_similarity(a2, b2) diff --git a/picard/tagger.py b/picard/tagger.py index bccff13ec..5064f8e80 100644 --- a/picard/tagger.py +++ b/picard/tagger.py @@ -96,20 +96,38 @@ class Tagger(QtGui.QApplication, ComponentManager, Component): self.connect(self.window, QtCore.SIGNAL("addDirectory"), self.onAddDirectory) self.connect(self.worker, QtCore.SIGNAL("statusBarMessage(const QString &)"), self.window.setStatusBarMessage) self.connect(self.window, QtCore.SIGNAL("file_updated(int)"), QtCore.SIGNAL("file_updated(int)")) - + self.worker.start() self.browserIntegration.start() - + + def match_files_to_album(self, files, album): + matches = [] + for file in files: + for track in album.tracks: + sim = track.metadata.compare(file.orig_metadata) + matches.append((sim, file, track)) + matches.sort(reverse=True) + matched = [] + for sim, file, track in matches: + if sim <= 0.5: + continue + if file in matched: + continue + if track.linked_file and track.linked_file.similarity > sim: + continue + file.move_to_track(track) + matched.append(file) + def exit(self): self.browserIntegration.stop() self.worker.stop() - + def run(self): self.window.show() res = self.exec_() self.exit() return res - + def setup_gettext(self, localeDir): """Setup locales, load translations, install gettext functions.""" if sys.platform == "win32": diff --git a/picard/ui/itemviews.py b/picard/ui/itemviews.py index cd311753c..bf342f46b 100644 --- a/picard/ui/itemviews.py +++ b/picard/ui/itemviews.py @@ -147,7 +147,7 @@ class BaseTreeView(QtGui.QTreeWidget): mimeData.setData("application/picard.file-list", "\n".join(file_ids)) print "\n".join(file_ids) return mimeData - + def dropFiles(self, files, target): # File -> Track if isinstance(target, Track): @@ -164,8 +164,7 @@ class BaseTreeView(QtGui.QTreeWidget): file.move_to_cluster(target.cluster) # File -> Album elif isinstance(target, Album): - for file in files: - target.matchFile(file) + self.tagger.match_files_to_album(files, target) def dropAlbums(self, albums, target): # Album -> Cluster @@ -289,7 +288,7 @@ class FileTreeView(BaseTreeView): file.lock_for_read() try: - metadata = file.metadata + metadata = file.orig_metadata item.setText(0, metadata["title"]) item.setText(1, format_time(metadata.get("~#length", 0))) item.setText(2, metadata["artist"]) diff --git a/picard/ui/mainwindow.py b/picard/ui/mainwindow.py index 42c699bdf..7154fc251 100644 --- a/picard/ui/mainwindow.py +++ b/picard/ui/mainwindow.py @@ -410,7 +410,7 @@ class MainWindow(QtGui.QMainWindow): if obj.linked_file: orig_metadata = obj.linked_file.orig_metadata metadata = obj.linked_file.metadata - statusBar = obj.linked_file.filename + statusBar = "%s (%d%%)" % (obj.linked_file.filename, obj.linked_file.similarity * 100) file = obj.linked_file else: orig_metadata = obj.metadata