Fix typo: threshhold -> threshold

This commit is contained in:
Laurent Monin
2021-11-21 19:16:47 +01:00
parent 86de73d213
commit 02ca1e641b
2 changed files with 11 additions and 11 deletions

View File

@@ -51,17 +51,17 @@ SCRIPT_WEIGHTING_FACTORS = {
}
def detect_script_weighted(string_to_check, threshhold=0.0):
def detect_script_weighted(string_to_check, threshold=0.0):
"""Provide a dictionary of the unicode scripts found in the supplied string that meet
or exceed the specified weighting threshhold based on the number of characters matching
or exceed the specified weighting threshold based on the number of characters matching
the script as a weighted percentage of the number of characters matching all scripts.
Args:
string_to_check (str): The unicode string to check
threshhold (float, optional): Minimum threshhold to include in the results. Defaults to 0.
threshold (float, optional): Minimum threshold to include in the results. Defaults to 0.
Returns:
dict: Dictionary of the scripts represented in the string with their threshhold values.
dict: Dictionary of the scripts represented in the string with their threshold values.
"""
scripts = {}
total_weighting = 0
@@ -74,21 +74,21 @@ def detect_script_weighted(string_to_check, threshhold=0.0):
# Normalize weightings to a float between 0 and 1 inclusive.
for key in scripts:
scripts[key] /= total_weighting
return dict(filter(lambda item: item[1] >= threshhold, scripts.items()))
return dict(filter(lambda item: item[1] >= threshold, scripts.items()))
def list_script_weighted(string_to_check, threshhold=0.0):
def list_script_weighted(string_to_check, threshold=0.0):
"""Provide a list of the unicode scripts found in the supplied string that meet
or exceed the specified weighting threshhold based on the number of characters
or exceed the specified weighting threshold based on the number of characters
matching the script as a weighted percentage of the number of characters matching
all scripts. The list is sorted in descending order of weighted values.
Args:
string_to_check (str): The unicode string to check
threshhold (float, optional): Minimum threshhold to include in the results. Defaults to 0.
threshold (float, optional): Minimum threshold to include in the results. Defaults to 0.
Returns:
list: List of the scripts represented in the string sorted in descending order of weighted values.
"""
weighted_dict = detect_script_weighted(string_to_check, threshhold)
weighted_dict = detect_script_weighted(string_to_check, threshold)
return sorted(weighted_dict, key=weighted_dict.get, reverse=True)

View File

@@ -35,7 +35,7 @@ class WeightedScriptDetectionTest(PicardTestCase):
self.assertAlmostEqual(scripts['CYRILLIC'], 0.518, 3)
self.assertAlmostEqual(scripts['GREEK'], 0.287, 3)
scripts = detect_script_weighted("Latin, кириллический, Ελληνική", threshhold=0.5)
scripts = detect_script_weighted("Latin, кириллический, Ελληνική", threshold=0.5)
script_keys = list(scripts.keys())
self.assertEqual(script_keys, ["CYRILLIC"])
@@ -82,5 +82,5 @@ class ListScriptWeightedTest(PicardTestCase):
scripts = list_script_weighted("Cyrillic, кириллический, 汉字")
self.assertEqual(scripts, ['CYRILLIC', 'LATIN', 'CJK'])
scripts = list_script_weighted("Cyrillic, кириллический, 汉字", threshhold=0.3)
scripts = list_script_weighted("Cyrillic, кириллический, 汉字", threshold=0.3)
self.assertEqual(scripts, ['CYRILLIC', 'LATIN'])