mirror of
https://github.com/fergalmoran/picard.git
synced 2026-01-06 16:44:06 +00:00
* Replace custom Levenshtein distance implementation with difflib.SequenceMatcher
* Better files->album matching.
This commit is contained in:
@@ -159,7 +159,7 @@ class Album(DataObject):
|
||||
sim = file.orig_metadata.compare(track.metadata)
|
||||
if sim > bestMatch[0]:
|
||||
bestMatch = sim, track
|
||||
|
||||
|
||||
if bestMatch[1]:
|
||||
file.move_to_track(bestMatch[1])
|
||||
|
||||
|
||||
@@ -41,14 +41,11 @@ class Metadata(QtCore.QObject):
|
||||
parts = []
|
||||
|
||||
tags = {
|
||||
"musicbrainz_trackid": 10,
|
||||
"musicbrainz_artistid": 10,
|
||||
"musicbrainz_albumid": 10,
|
||||
"~#length": 16,
|
||||
"title": 14,
|
||||
"artist": 8,
|
||||
"album": 10,
|
||||
"tracknumber": 12,
|
||||
"title": 20,
|
||||
"artist": 6,
|
||||
"album": 12,
|
||||
"tracknumber": 5,
|
||||
}
|
||||
|
||||
identical = [
|
||||
@@ -60,25 +57,25 @@ class Metadata(QtCore.QObject):
|
||||
"discnumber",
|
||||
"totaldiscs",
|
||||
]
|
||||
|
||||
for tag in self.keys():
|
||||
if tag not in tags and not tag.startswith("~"):
|
||||
tags[tag] = 1
|
||||
|
||||
for tag in other.keys():
|
||||
if tag not in tags and not tag.startswith("~"):
|
||||
tags[tag] = 1
|
||||
|
||||
|
||||
#for tag in self.keys():
|
||||
# if tag not in tags and not tag.startswith("~"):
|
||||
# tags[tag] = 1
|
||||
|
||||
#for tag in other.keys():
|
||||
# if tag not in tags and not tag.startswith("~"):
|
||||
# tags[tag] = 1
|
||||
|
||||
for tag, weight in tags.items():
|
||||
if self[tag] and other[tag]:
|
||||
if tag in identical:
|
||||
sim = 1.0 - abs(cmp(self[tag], other[tag]))
|
||||
elif tag in ["~#length"]:
|
||||
elif tag == "~#length":
|
||||
sim = 1.0 - min(abs(self[tag] - other[tag]), 30000) / 30000.0
|
||||
else:
|
||||
sim = similarity(self[tag], other[tag])
|
||||
parts.append((sim, weight))
|
||||
|
||||
|
||||
total = reduce(lambda x, y: x + y[1], parts, 0.0)
|
||||
return reduce(lambda x, y: x + y[0] * y[1] / total, parts, 0.0)
|
||||
|
||||
|
||||
@@ -19,45 +19,14 @@
|
||||
|
||||
import math
|
||||
import re
|
||||
|
||||
|
||||
def distance(a,b):
|
||||
"""Calculates the Levenshtein distance between a and b."""
|
||||
|
||||
n, m = len(a), len(b)
|
||||
if n > m:
|
||||
# Make sure n <= m, to use O(min(n,m)) space
|
||||
a,b = b,a
|
||||
n,m = m,n
|
||||
|
||||
current = range(n+1)
|
||||
for i in range(1,m+1):
|
||||
previous, current = current, [i]+[0]*n
|
||||
for j in range(1,n+1):
|
||||
add, delete = previous[j]+1, current[j-1]+1
|
||||
change = previous[j-1]
|
||||
if a[j-1] != b[i-1]:
|
||||
change = change + 1
|
||||
current[j] = min(add, delete, change)
|
||||
|
||||
return current[n]
|
||||
|
||||
|
||||
def boost(sim):
|
||||
sim2 = sim
|
||||
sim = min(1, (math.exp(sim) - 1) / (math.e - 1.2))
|
||||
sim = math.pow(sim, 0.8)
|
||||
sim = max(sim2, sim)
|
||||
return sim
|
||||
from difflib import SequenceMatcher
|
||||
from picard.util import unaccent
|
||||
|
||||
|
||||
def raw_similarity(a, b):
|
||||
"""Calculates raw similarity of strings ``a`` and ``b``."""
|
||||
if not a or not b:
|
||||
return 0.0
|
||||
sim = 1 - distance(a, b) * 1.0 / max(len(a), len(b))
|
||||
return boost(sim)
|
||||
|
||||
d = SequenceMatcher(None, a, b).ratio()
|
||||
return d
|
||||
|
||||
_split_re = re.compile("\W", re.UNICODE)
|
||||
_stop_words = ["the", "--", "in", "of", "a", "feat"]
|
||||
@@ -73,19 +42,23 @@ _replace_words = {
|
||||
"disc 8": "CD8",
|
||||
}
|
||||
|
||||
def similarity(a1, b1):
|
||||
"""Calculates "smart" similarity of strings ``a`` and ``b``."""
|
||||
a2 = a1
|
||||
b2 = b1
|
||||
def normalize(string):
|
||||
for w, r in _replace_words.items():
|
||||
a2 = a2.replace(w, r)
|
||||
b2 = b2.replace(w, r)
|
||||
def flt(a):
|
||||
def flt(a):
|
||||
return a not in _stop_words and len(a) > 1
|
||||
return u" ".join(filter(flt, _split_re.split(a.lower())))
|
||||
a2 = flt(a2)
|
||||
b2 = flt(b2)
|
||||
string = string.replace(w, r)
|
||||
string = string.lower()
|
||||
string = " ".join(filter(lambda a: a not in _stop_words and len(a) > 1,
|
||||
_split_re.split(string)))
|
||||
string = unaccent(string)
|
||||
return string
|
||||
|
||||
def similarity(a1, b1):
|
||||
return raw_similarity(a1, b1)
|
||||
"""Calculates "smart" similarity of strings ``a`` and ``b``."""
|
||||
a2 = normalize(a1)
|
||||
if a2:
|
||||
b2 = normalize(b1)
|
||||
else:
|
||||
b2 = ""
|
||||
sim1 = raw_similarity(a1, b1)
|
||||
if a2 or b2:
|
||||
sim2 = raw_similarity(a2, b2)
|
||||
|
||||
@@ -96,20 +96,38 @@ class Tagger(QtGui.QApplication, ComponentManager, Component):
|
||||
self.connect(self.window, QtCore.SIGNAL("addDirectory"), self.onAddDirectory)
|
||||
self.connect(self.worker, QtCore.SIGNAL("statusBarMessage(const QString &)"), self.window.setStatusBarMessage)
|
||||
self.connect(self.window, QtCore.SIGNAL("file_updated(int)"), QtCore.SIGNAL("file_updated(int)"))
|
||||
|
||||
|
||||
self.worker.start()
|
||||
self.browserIntegration.start()
|
||||
|
||||
|
||||
def match_files_to_album(self, files, album):
|
||||
matches = []
|
||||
for file in files:
|
||||
for track in album.tracks:
|
||||
sim = track.metadata.compare(file.orig_metadata)
|
||||
matches.append((sim, file, track))
|
||||
matches.sort(reverse=True)
|
||||
matched = []
|
||||
for sim, file, track in matches:
|
||||
if sim <= 0.5:
|
||||
continue
|
||||
if file in matched:
|
||||
continue
|
||||
if track.linked_file and track.linked_file.similarity > sim:
|
||||
continue
|
||||
file.move_to_track(track)
|
||||
matched.append(file)
|
||||
|
||||
def exit(self):
|
||||
self.browserIntegration.stop()
|
||||
self.worker.stop()
|
||||
|
||||
|
||||
def run(self):
|
||||
self.window.show()
|
||||
res = self.exec_()
|
||||
self.exit()
|
||||
return res
|
||||
|
||||
|
||||
def setup_gettext(self, localeDir):
|
||||
"""Setup locales, load translations, install gettext functions."""
|
||||
if sys.platform == "win32":
|
||||
|
||||
@@ -147,7 +147,7 @@ class BaseTreeView(QtGui.QTreeWidget):
|
||||
mimeData.setData("application/picard.file-list", "\n".join(file_ids))
|
||||
print "\n".join(file_ids)
|
||||
return mimeData
|
||||
|
||||
|
||||
def dropFiles(self, files, target):
|
||||
# File -> Track
|
||||
if isinstance(target, Track):
|
||||
@@ -164,8 +164,7 @@ class BaseTreeView(QtGui.QTreeWidget):
|
||||
file.move_to_cluster(target.cluster)
|
||||
# File -> Album
|
||||
elif isinstance(target, Album):
|
||||
for file in files:
|
||||
target.matchFile(file)
|
||||
self.tagger.match_files_to_album(files, target)
|
||||
|
||||
def dropAlbums(self, albums, target):
|
||||
# Album -> Cluster
|
||||
@@ -289,7 +288,7 @@ class FileTreeView(BaseTreeView):
|
||||
|
||||
file.lock_for_read()
|
||||
try:
|
||||
metadata = file.metadata
|
||||
metadata = file.orig_metadata
|
||||
item.setText(0, metadata["title"])
|
||||
item.setText(1, format_time(metadata.get("~#length", 0)))
|
||||
item.setText(2, metadata["artist"])
|
||||
|
||||
@@ -410,7 +410,7 @@ class MainWindow(QtGui.QMainWindow):
|
||||
if obj.linked_file:
|
||||
orig_metadata = obj.linked_file.orig_metadata
|
||||
metadata = obj.linked_file.metadata
|
||||
statusBar = obj.linked_file.filename
|
||||
statusBar = "%s (%d%%)" % (obj.linked_file.filename, obj.linked_file.similarity * 100)
|
||||
file = obj.linked_file
|
||||
else:
|
||||
orig_metadata = obj.metadata
|
||||
|
||||
Reference in New Issue
Block a user