From 355c0a2ab0b9383aef0488850897cf3526b306d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Lalinsk=C3=BD?= Date: Sat, 8 Apr 2017 13:05:36 +0200 Subject: [PATCH 1/2] Add pure Python fallback for astrcmp --- picard/util/{astrcmp.c => _astrcmp.c} | 4 +-- picard/util/astrcmp.py | 36 +++++++++++++++++++++++++++ setup.py | 2 +- 3 files changed, 39 insertions(+), 3 deletions(-) rename picard/util/{astrcmp.c => _astrcmp.c} (99%) create mode 100644 picard/util/astrcmp.py diff --git a/picard/util/astrcmp.c b/picard/util/_astrcmp.c similarity index 99% rename from picard/util/astrcmp.c rename to picard/util/_astrcmp.c index 95641b4dc..7b8dd1803 100644 --- a/picard/util/astrcmp.c +++ b/picard/util/_astrcmp.c @@ -180,7 +180,7 @@ static struct PyModuleDef AstrcmpModule = }; PyMODINIT_FUNC -PyInit_astrcmp(void) +PyInit__astrcmp(void) { return PyModule_Create(&AstrcmpModule); -} \ No newline at end of file +} diff --git a/picard/util/astrcmp.py b/picard/util/astrcmp.py new file mode 100644 index 000000000..ff07cae6a --- /dev/null +++ b/picard/util/astrcmp.py @@ -0,0 +1,36 @@ +# http://hetland.org/coding/python/levenshtein.py + +# This is a straightforward implementation of a well-known algorithm, and thus +# probably shouldn't be covered by copyright to begin with. But in case it is, +# the author (Magnus Lie Hetland) has, to the extent possible under law, +# dedicated all copyright and related and neighboring rights to this software +# to the public domain worldwide, by distributing it under the CC0 license, +# version 1.0. This software is distributed without any warranty. For more +# information, see + + +def astrcmp_py(a,b): + "Calculates the Levenshtein distance between a and b." + n, m = len(a), len(b) + if n > m: + # Make sure n <= m, to use O(min(n,m)) space + a,b = b,a + n,m = m,n + + current = range(n+1) + for i in range(1,m+1): + previous, current = current, [i]+[0]*n + for j in range(1,n+1): + add, delete = previous[j]+1, current[j-1]+1 + change = previous[j-1] + if a[j-1] != b[i-1]: + change = change + 1 + current[j] = min(add, delete, change) + + return 1.0 - float(current[n]) / max(m, n) + + +try: + from picard.util._astrcmp import astrcmp +except ImportError: + astrcmp = astrcmp_py diff --git a/setup.py b/setup.py index 2687c8818..a537e1975 100755 --- a/setup.py +++ b/setup.py @@ -35,7 +35,7 @@ from distutils.dist import Distribution from distutils.spawn import find_executable ext_modules = [ - Extension('picard.util.astrcmp', sources=['picard/util/astrcmp.c']), + Extension('picard.util._astrcmp', sources=['picard/util/_astrcmp.c']), ] py2app_exclude_modules = [ From bf7934d8489d145c7faf986872f523c2efeb12ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Lalinsk=C3=BD?= Date: Sat, 8 Apr 2017 15:28:24 +0200 Subject: [PATCH 2/2] Add astrcmp tests --- picard/util/astrcmp.py | 16 ++++++++++------ test/test_util_astrcmp.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 6 deletions(-) create mode 100644 test/test_util_astrcmp.py diff --git a/picard/util/astrcmp.py b/picard/util/astrcmp.py index ff07cae6a..6822bc58f 100644 --- a/picard/util/astrcmp.py +++ b/picard/util/astrcmp.py @@ -14,23 +14,27 @@ def astrcmp_py(a,b): n, m = len(a), len(b) if n > m: # Make sure n <= m, to use O(min(n,m)) space - a,b = b,a - n,m = m,n + a, b = b, a + n, m = m, n + + if n == 0 or m == 0.0: + return 0.0 current = range(n+1) - for i in range(1,m+1): + for i in range(1, m+1): previous, current = current, [i]+[0]*n - for j in range(1,n+1): + for j in range(1, n+1): add, delete = previous[j]+1, current[j-1]+1 change = previous[j-1] if a[j-1] != b[i-1]: change = change + 1 current[j] = min(add, delete, change) - return 1.0 - float(current[n]) / max(m, n) + return 1.0 - current[n] / max(m, n) try: - from picard.util._astrcmp import astrcmp + from picard.util._astrcmp import astrcmp as astrcmp_c + astrcmp = astrcmp_c except ImportError: astrcmp = astrcmp_py diff --git a/test/test_util_astrcmp.py b/test/test_util_astrcmp.py new file mode 100644 index 000000000..5198f5ac2 --- /dev/null +++ b/test/test_util_astrcmp.py @@ -0,0 +1,28 @@ +# -*- coding: utf-8 -*- + +import os +import os.path +import unittest +from picard.util.astrcmp import astrcmp_c, astrcmp_py + + +class AstrcmpBase(object): + func = None + + def test_astrcmp(self): + astrcmp = self.__class__.func + self.assertAlmostEqual(0.0, astrcmp(u"", u"")) + self.assertAlmostEqual(0.0, astrcmp(u"a", u"")) + self.assertAlmostEqual(0.0, astrcmp(u"", u"a")) + self.assertAlmostEqual(1.0, astrcmp(u"a", u"a")) + self.assertAlmostEqual(0.0, astrcmp(u"a", u"b")) + self.assertAlmostEqual(0.0, astrcmp(u"ab", u"ba")) + self.assertAlmostEqual(0.7083333333333333, astrcmp(u"The Great Gig in the Sky", u"Great Gig In The sky")) + + +class AstrcmpCTest(AstrcmpBase, unittest.TestCase): + func = astrcmp_c + + +class AstrcmpPyTest(AstrcmpBase, unittest.TestCase): + func = astrcmp_py