Merge pull request #689 from lalinsky/py-astrcmp

Add pure Python fallback for astrcmp
This commit is contained in:
Lukáš Lalinský
2017-04-08 15:39:29 +02:00
committed by GitHub
4 changed files with 71 additions and 3 deletions

View File

@@ -180,7 +180,7 @@ static struct PyModuleDef AstrcmpModule =
};
PyMODINIT_FUNC
PyInit_astrcmp(void)
PyInit__astrcmp(void)
{
return PyModule_Create(&AstrcmpModule);
}
}

40
picard/util/astrcmp.py Normal file
View File

@@ -0,0 +1,40 @@
# http://hetland.org/coding/python/levenshtein.py
# This is a straightforward implementation of a well-known algorithm, and thus
# probably shouldn't be covered by copyright to begin with. But in case it is,
# the author (Magnus Lie Hetland) has, to the extent possible under law,
# dedicated all copyright and related and neighboring rights to this software
# to the public domain worldwide, by distributing it under the CC0 license,
# version 1.0. This software is distributed without any warranty. For more
# information, see <http://creativecommons.org/publicdomain/zero/1.0>
def astrcmp_py(a,b):
"Calculates the Levenshtein distance between a and b."
n, m = len(a), len(b)
if n > m:
# Make sure n <= m, to use O(min(n,m)) space
a, b = b, a
n, m = m, n
if n == 0 or m == 0.0:
return 0.0
current = range(n+1)
for i in range(1, m+1):
previous, current = current, [i]+[0]*n
for j in range(1, n+1):
add, delete = previous[j]+1, current[j-1]+1
change = previous[j-1]
if a[j-1] != b[i-1]:
change = change + 1
current[j] = min(add, delete, change)
return 1.0 - current[n] / max(m, n)
try:
from picard.util._astrcmp import astrcmp as astrcmp_c
astrcmp = astrcmp_c
except ImportError:
astrcmp = astrcmp_py

View File

@@ -35,7 +35,7 @@ from distutils.dist import Distribution
from distutils.spawn import find_executable
ext_modules = [
Extension('picard.util.astrcmp', sources=['picard/util/astrcmp.c']),
Extension('picard.util._astrcmp', sources=['picard/util/_astrcmp.c']),
]
py2app_exclude_modules = [

28
test/test_util_astrcmp.py Normal file
View File

@@ -0,0 +1,28 @@
# -*- coding: utf-8 -*-
import os
import os.path
import unittest
from picard.util.astrcmp import astrcmp_c, astrcmp_py
class AstrcmpBase(object):
func = None
def test_astrcmp(self):
astrcmp = self.__class__.func
self.assertAlmostEqual(0.0, astrcmp(u"", u""))
self.assertAlmostEqual(0.0, astrcmp(u"a", u""))
self.assertAlmostEqual(0.0, astrcmp(u"", u"a"))
self.assertAlmostEqual(1.0, astrcmp(u"a", u"a"))
self.assertAlmostEqual(0.0, astrcmp(u"a", u"b"))
self.assertAlmostEqual(0.0, astrcmp(u"ab", u"ba"))
self.assertAlmostEqual(0.7083333333333333, astrcmp(u"The Great Gig in the Sky", u"Great Gig In The sky"))
class AstrcmpCTest(AstrcmpBase, unittest.TestCase):
func = astrcmp_c
class AstrcmpPyTest(AstrcmpBase, unittest.TestCase):
func = astrcmp_py