From ac9989fdea444c68bfb01883d14aa29a619befd6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Lalinsk=C3=BD?= Date: Sun, 15 Oct 2006 16:14:31 +0200 Subject: [PATCH] Lowercase strings before calculating similarity. --- picard/album.py | 1 - picard/similarity.py | 29 ++++++++++++++--------------- picard/tagger.py | 2 +- 3 files changed, 15 insertions(+), 17 deletions(-) diff --git a/picard/album.py b/picard/album.py index 124d6c9f7..3bd5e34d4 100644 --- a/picard/album.py +++ b/picard/album.py @@ -172,7 +172,6 @@ class Album(DataObject): artist_id = self.metadata["musicbrainz_artistid"] artist_name = self.metadata["artist"] artist_sortname = self.metadata["artist_sortname"] - print artist_name tr = Track(extractUuid(track.id), track.title, Artist(artist_id, artist_name), self) tr.duration = track.duration or 0 diff --git a/picard/similarity.py b/picard/similarity.py index 55c430a01..be6c36da5 100644 --- a/picard/similarity.py +++ b/picard/similarity.py @@ -19,7 +19,7 @@ import math import re -from picard.util import unaccent +from picard.util import unaccent, strip_non_alnum from picard.util.astrcmp import astrcmp @@ -38,27 +38,26 @@ _replace_words = { } def normalize(string): - for w, r in _replace_words.items(): - string = string.replace(w, r) - string = string.lower() - string = " ".join(filter(lambda a: a not in _stop_words and len(a) > 1, - _split_re.split(string))) - string = unaccent(string) + string = strip_non_alnum(string.lower()) + #string = " ".join(filter(lambda a: a not in _stop_words and len(a) > 1, + # _split_re.split(string))) + #string = unaccent(string) return string def similarity(a1, b1): - return astrcmp(a1, b1) +# return astrcmp(a1, b1) """Calculates "smart" similarity of strings ``a`` and ``b``.""" a2 = normalize(a1) if a2: b2 = normalize(b1) else: b2 = "" - sim1 = raw_similarity(a1, b1) - if a2 or b2: - sim2 = raw_similarity(a2, b2) - sim = sim1 * 0.1 + sim2 * 0.9 - else: - sim = sim1 - return sim + return astrcmp(a2, b2) + #sim1 = astrcmp(a1, b1) + #if a2 or b2: + # sim2 = astrcmp(a2, b2) + # sim = sim1 * 0.1 + sim2 * 0.9 + #else: + # sim = sim1 + #return sim diff --git a/picard/tagger.py b/picard/tagger.py index 48489e321..74d831560 100644 --- a/picard/tagger.py +++ b/picard/tagger.py @@ -138,7 +138,7 @@ class Tagger(QtGui.QApplication, ComponentManager, Component): matches.sort(reverse=True) matched = [] for sim, file, track in matches: - if sim <= 0.5: + if sim <= 0.3: continue if file in matched: continue