From 203933ff9f9620f19fbba24c585da52c12fc09fd Mon Sep 17 00:00:00 2001 From: Laurent Monin Date: Thu, 3 Feb 2022 10:31:54 +0100 Subject: [PATCH 1/2] Add few tests for translate_artist_names_script_exception --- test/data/ws_data/artist_arabic.json | 90 ++++++++++++++++++++++++++++ test/test_mbjson.py | 56 +++++++++++++++++ 2 files changed, 146 insertions(+) create mode 100644 test/data/ws_data/artist_arabic.json diff --git a/test/data/ws_data/artist_arabic.json b/test/data/ws_data/artist_arabic.json new file mode 100644 index 000000000..8eb5d5aff --- /dev/null +++ b/test/data/ws_data/artist_arabic.json @@ -0,0 +1,90 @@ +{ + "area": { + "type-id": null, + "id": "8e0551f2-95c2-3cc0-a0a9-f2d344f10667", + "name": "Egypt", + "disambiguation": "", + "sort-name": "Egypt", + "iso-3166-1-codes": [ + "EG" + ], + "type": null + }, + "begin-area": { + "type-id": null, + "disambiguation": "", + "id": "cf82cb78-741a-46e8-8448-13b824261ca0", + "name": "Asw\u0101n", + "sort-name": "Asw\u0101n", + "iso-3166-2-codes": [ + "EG-ASN" + ], + "type": null + }, + "country": "EG", + "isnis": [ + "0000000081697225" + ], + "sort-name": "Mounir, Mohamed", + "end-area": null, + "life-span": { + "begin": "1954-10-10", + "ended": false, + "end": null + }, + "begin_area": { + "type-id": null, + "disambiguation": "", + "id": "cf82cb78-741a-46e8-8448-13b824261ca0", + "name": "Asw\u0101n", + "sort-name": "Asw\u0101n", + "iso-3166-2-codes": [ + "EG-ASN" + ], + "type": null + }, + "disambiguation": "", + "gender": "Male", + "type-id": "b6e035f4-3ce9-331c-97df-83397230b0df", + "type": "Person", + "aliases": [ + { + "type": null, + "primary": null, + "name": "Mohamed Moneer", + "end": null, + "ended": false, + "sort-name": "Mohamed Moneer", + "locale": null, + "begin": null, + "type-id": null + }, + { + "ended": false, + "end": null, + "type": "Artist name", + "name": "Mohamed Mounir", + "primary": true, + "type-id": "894afba6-2816-3c24-8072-eadb66bd04bc", + "begin": null, + "locale": "en", + "sort-name": "Mounir, Mohamad" + }, + { + "type-id": null, + "begin": null, + "locale": null, + "sort-name": "Mohamed Mounir", + "ended": false, + "end": null, + "primary": null, + "type": null, + "name": "Mohamed Mounir" + } + ], + "id": "5235052b-7fa0-498b-accf-26b9e7767da7", + "ipis": [], + "name": "\u0645\u062d\u0645\u062f \u0645\u0646\u064a\u0631", + "gender-id": "36d3d30a-839d-3eda-8cb3-29be4384e4a9", + "end_area": null +} diff --git a/test/test_mbjson.py b/test/test_mbjson.py index af275c20c..41be22782 100644 --- a/test/test_mbjson.py +++ b/test/test_mbjson.py @@ -458,6 +458,23 @@ class ArtistTranslationTest(MBJSONTest): (artist_name, artist_sort_name) = _translate_artist_node(self.json_doc) self.assertEqual(artist_name, 'Ed Sheeran (en_CA)') + def test_locale_specific_match_first_exc(self): + settings = { + "standardize_tracks": False, + "standardize_artists": False, + "standardize_releases": False, + "translate_artist_names": True, + "translate_artist_names_script_exception": True, + "script_exceptions": [("LATIN", 0)], + "standardize_instruments": True, + "release_ars": True, + "preferred_release_countries": [], + "artist_locales": ['en_CA', 'en'], + } + self.set_config_values(settings) + (artist_name, artist_sort_name) = _translate_artist_node(self.json_doc) + self.assertEqual(artist_name, 'Ed Sheeran') + def test_locale_specific_match_second(self): settings = { "standardize_tracks": False, @@ -507,6 +524,45 @@ class ArtistTranslationTest(MBJSONTest): self.assertEqual(artist_name, 'Ed Sheeran') +class ArtistTranslationArabicExceptionsTest(MBJSONTest): + + filename = 'artist_arabic.json' + + def test_locale_specific_match_first_exc1(self): + settings = { + "standardize_tracks": False, + "standardize_artists": False, + "standardize_releases": False, + "translate_artist_names": True, + "translate_artist_names_script_exception": True, + "script_exceptions": [("LATIN", 0)], + "standardize_instruments": True, + "release_ars": True, + "preferred_release_countries": [], + "artist_locales": ['en_CA', 'en'], + } + self.set_config_values(settings) + (artist_name, artist_sort_name) = _translate_artist_node(self.json_doc) + self.assertEqual(artist_name, 'Mohamed Mounir') + + def test_locale_specific_match_first_exc2(self): + settings = { + "standardize_tracks": False, + "standardize_artists": False, + "standardize_releases": False, + "translate_artist_names": True, + "translate_artist_names_script_exception": True, + "script_exceptions": [("ARABIC", 0)], + "standardize_instruments": True, + "release_ars": True, + "preferred_release_countries": [], + "artist_locales": ['en_CA', 'en'], + } + self.set_config_values(settings) + (artist_name, artist_sort_name) = _translate_artist_node(self.json_doc) + self.assertEqual(artist_name, 'محمد منير') + + class ReleaseGroupTest(MBJSONTest): filename = 'release_group.json' From 310c2baed0a1620271ae7e95f3710db0880120ce Mon Sep 17 00:00:00 2001 From: Laurent Monin Date: Thu, 3 Feb 2022 10:32:43 +0100 Subject: [PATCH 2/2] _translate_artist_node(): shorten long lines a bit --- picard/mbjson.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/picard/mbjson.py b/picard/mbjson.py index dbac9bc5e..12ba27d55 100644 --- a/picard/mbjson.py +++ b/picard/mbjson.py @@ -207,18 +207,23 @@ def _translate_artist_node(node, config=None): detected_scripts = detect_script_weighted(node["name"]) if detected_scripts: log_text += "; ".join( - list("{0} ({1:.1f}%)".format(scr_id, detected_scripts[scr_id] * 100) for scr_id in detected_scripts) + "{0} ({1:.1f}%)".format(scr_id, detected_scripts[scr_id] * 100) + for scr_id in detected_scripts ) else: log_text += "None" log.debug(log_text) if detected_scripts: - if config.setting["script_exceptions"]: + script_exceptions = config.setting["script_exceptions"] + if script_exceptions: log_text = " found in selected scripts: " + "; ".join( - list("{0} ({1}%)".format(scr[0], scr[1]) for scr in config.setting["script_exceptions"]) + "{0} ({1}%)".format(scr[0], scr[1]) + for scr in script_exceptions ) - for script_id, script_weighting in config.setting["script_exceptions"]: - if script_id in detected_scripts and detected_scripts[script_id] >= script_weighting / 100: + for script_id, script_weighting in script_exceptions: + if script_id not in detected_scripts: + continue + if detected_scripts[script_id] >= script_weighting / 100: log.debug("Match" + log_text) return node['name'], node['sort-name'] log.debug("No match" + log_text)