Merge pull request #2058 from zas/translate_artist_node_cleanup

Translate artist node cleanup
This commit is contained in:
Philipp Wolfer
2022-02-03 17:04:54 +01:00
committed by GitHub
3 changed files with 156 additions and 5 deletions

View File

@@ -207,18 +207,23 @@ def _translate_artist_node(node, config=None):
detected_scripts = detect_script_weighted(node["name"])
if detected_scripts:
log_text += "; ".join(
list("{0} ({1:.1f}%)".format(scr_id, detected_scripts[scr_id] * 100) for scr_id in detected_scripts)
"{0} ({1:.1f}%)".format(scr_id, detected_scripts[scr_id] * 100)
for scr_id in detected_scripts
)
else:
log_text += "None"
log.debug(log_text)
if detected_scripts:
if config.setting["script_exceptions"]:
script_exceptions = config.setting["script_exceptions"]
if script_exceptions:
log_text = " found in selected scripts: " + "; ".join(
list("{0} ({1}%)".format(scr[0], scr[1]) for scr in config.setting["script_exceptions"])
"{0} ({1}%)".format(scr[0], scr[1])
for scr in script_exceptions
)
for script_id, script_weighting in config.setting["script_exceptions"]:
if script_id in detected_scripts and detected_scripts[script_id] >= script_weighting / 100:
for script_id, script_weighting in script_exceptions:
if script_id not in detected_scripts:
continue
if detected_scripts[script_id] >= script_weighting / 100:
log.debug("Match" + log_text)
return node['name'], node['sort-name']
log.debug("No match" + log_text)

View File

@@ -0,0 +1,90 @@
{
"area": {
"type-id": null,
"id": "8e0551f2-95c2-3cc0-a0a9-f2d344f10667",
"name": "Egypt",
"disambiguation": "",
"sort-name": "Egypt",
"iso-3166-1-codes": [
"EG"
],
"type": null
},
"begin-area": {
"type-id": null,
"disambiguation": "",
"id": "cf82cb78-741a-46e8-8448-13b824261ca0",
"name": "Asw\u0101n",
"sort-name": "Asw\u0101n",
"iso-3166-2-codes": [
"EG-ASN"
],
"type": null
},
"country": "EG",
"isnis": [
"0000000081697225"
],
"sort-name": "Mounir, Mohamed",
"end-area": null,
"life-span": {
"begin": "1954-10-10",
"ended": false,
"end": null
},
"begin_area": {
"type-id": null,
"disambiguation": "",
"id": "cf82cb78-741a-46e8-8448-13b824261ca0",
"name": "Asw\u0101n",
"sort-name": "Asw\u0101n",
"iso-3166-2-codes": [
"EG-ASN"
],
"type": null
},
"disambiguation": "",
"gender": "Male",
"type-id": "b6e035f4-3ce9-331c-97df-83397230b0df",
"type": "Person",
"aliases": [
{
"type": null,
"primary": null,
"name": "Mohamed Moneer",
"end": null,
"ended": false,
"sort-name": "Mohamed Moneer",
"locale": null,
"begin": null,
"type-id": null
},
{
"ended": false,
"end": null,
"type": "Artist name",
"name": "Mohamed Mounir",
"primary": true,
"type-id": "894afba6-2816-3c24-8072-eadb66bd04bc",
"begin": null,
"locale": "en",
"sort-name": "Mounir, Mohamad"
},
{
"type-id": null,
"begin": null,
"locale": null,
"sort-name": "Mohamed Mounir",
"ended": false,
"end": null,
"primary": null,
"type": null,
"name": "Mohamed Mounir"
}
],
"id": "5235052b-7fa0-498b-accf-26b9e7767da7",
"ipis": [],
"name": "\u0645\u062d\u0645\u062f \u0645\u0646\u064a\u0631",
"gender-id": "36d3d30a-839d-3eda-8cb3-29be4384e4a9",
"end_area": null
}

View File

@@ -458,6 +458,23 @@ class ArtistTranslationTest(MBJSONTest):
(artist_name, artist_sort_name) = _translate_artist_node(self.json_doc)
self.assertEqual(artist_name, 'Ed Sheeran (en_CA)')
def test_locale_specific_match_first_exc(self):
settings = {
"standardize_tracks": False,
"standardize_artists": False,
"standardize_releases": False,
"translate_artist_names": True,
"translate_artist_names_script_exception": True,
"script_exceptions": [("LATIN", 0)],
"standardize_instruments": True,
"release_ars": True,
"preferred_release_countries": [],
"artist_locales": ['en_CA', 'en'],
}
self.set_config_values(settings)
(artist_name, artist_sort_name) = _translate_artist_node(self.json_doc)
self.assertEqual(artist_name, 'Ed Sheeran')
def test_locale_specific_match_second(self):
settings = {
"standardize_tracks": False,
@@ -507,6 +524,45 @@ class ArtistTranslationTest(MBJSONTest):
self.assertEqual(artist_name, 'Ed Sheeran')
class ArtistTranslationArabicExceptionsTest(MBJSONTest):
filename = 'artist_arabic.json'
def test_locale_specific_match_first_exc1(self):
settings = {
"standardize_tracks": False,
"standardize_artists": False,
"standardize_releases": False,
"translate_artist_names": True,
"translate_artist_names_script_exception": True,
"script_exceptions": [("LATIN", 0)],
"standardize_instruments": True,
"release_ars": True,
"preferred_release_countries": [],
"artist_locales": ['en_CA', 'en'],
}
self.set_config_values(settings)
(artist_name, artist_sort_name) = _translate_artist_node(self.json_doc)
self.assertEqual(artist_name, 'Mohamed Mounir')
def test_locale_specific_match_first_exc2(self):
settings = {
"standardize_tracks": False,
"standardize_artists": False,
"standardize_releases": False,
"translate_artist_names": True,
"translate_artist_names_script_exception": True,
"script_exceptions": [("ARABIC", 0)],
"standardize_instruments": True,
"release_ars": True,
"preferred_release_countries": [],
"artist_locales": ['en_CA', 'en'],
}
self.set_config_values(settings)
(artist_name, artist_sort_name) = _translate_artist_node(self.json_doc)
self.assertEqual(artist_name, 'محمد منير')
class ReleaseGroupTest(MBJSONTest):
filename = 'release_group.json'