mirror of
https://github.com/fergalmoran/picard.git
synced 2026-01-03 15:13:57 +00:00
Merge pull request #2058 from zas/translate_artist_node_cleanup
Translate artist node cleanup
This commit is contained in:
@@ -207,18 +207,23 @@ def _translate_artist_node(node, config=None):
|
||||
detected_scripts = detect_script_weighted(node["name"])
|
||||
if detected_scripts:
|
||||
log_text += "; ".join(
|
||||
list("{0} ({1:.1f}%)".format(scr_id, detected_scripts[scr_id] * 100) for scr_id in detected_scripts)
|
||||
"{0} ({1:.1f}%)".format(scr_id, detected_scripts[scr_id] * 100)
|
||||
for scr_id in detected_scripts
|
||||
)
|
||||
else:
|
||||
log_text += "None"
|
||||
log.debug(log_text)
|
||||
if detected_scripts:
|
||||
if config.setting["script_exceptions"]:
|
||||
script_exceptions = config.setting["script_exceptions"]
|
||||
if script_exceptions:
|
||||
log_text = " found in selected scripts: " + "; ".join(
|
||||
list("{0} ({1}%)".format(scr[0], scr[1]) for scr in config.setting["script_exceptions"])
|
||||
"{0} ({1}%)".format(scr[0], scr[1])
|
||||
for scr in script_exceptions
|
||||
)
|
||||
for script_id, script_weighting in config.setting["script_exceptions"]:
|
||||
if script_id in detected_scripts and detected_scripts[script_id] >= script_weighting / 100:
|
||||
for script_id, script_weighting in script_exceptions:
|
||||
if script_id not in detected_scripts:
|
||||
continue
|
||||
if detected_scripts[script_id] >= script_weighting / 100:
|
||||
log.debug("Match" + log_text)
|
||||
return node['name'], node['sort-name']
|
||||
log.debug("No match" + log_text)
|
||||
|
||||
90
test/data/ws_data/artist_arabic.json
Normal file
90
test/data/ws_data/artist_arabic.json
Normal file
@@ -0,0 +1,90 @@
|
||||
{
|
||||
"area": {
|
||||
"type-id": null,
|
||||
"id": "8e0551f2-95c2-3cc0-a0a9-f2d344f10667",
|
||||
"name": "Egypt",
|
||||
"disambiguation": "",
|
||||
"sort-name": "Egypt",
|
||||
"iso-3166-1-codes": [
|
||||
"EG"
|
||||
],
|
||||
"type": null
|
||||
},
|
||||
"begin-area": {
|
||||
"type-id": null,
|
||||
"disambiguation": "",
|
||||
"id": "cf82cb78-741a-46e8-8448-13b824261ca0",
|
||||
"name": "Asw\u0101n",
|
||||
"sort-name": "Asw\u0101n",
|
||||
"iso-3166-2-codes": [
|
||||
"EG-ASN"
|
||||
],
|
||||
"type": null
|
||||
},
|
||||
"country": "EG",
|
||||
"isnis": [
|
||||
"0000000081697225"
|
||||
],
|
||||
"sort-name": "Mounir, Mohamed",
|
||||
"end-area": null,
|
||||
"life-span": {
|
||||
"begin": "1954-10-10",
|
||||
"ended": false,
|
||||
"end": null
|
||||
},
|
||||
"begin_area": {
|
||||
"type-id": null,
|
||||
"disambiguation": "",
|
||||
"id": "cf82cb78-741a-46e8-8448-13b824261ca0",
|
||||
"name": "Asw\u0101n",
|
||||
"sort-name": "Asw\u0101n",
|
||||
"iso-3166-2-codes": [
|
||||
"EG-ASN"
|
||||
],
|
||||
"type": null
|
||||
},
|
||||
"disambiguation": "",
|
||||
"gender": "Male",
|
||||
"type-id": "b6e035f4-3ce9-331c-97df-83397230b0df",
|
||||
"type": "Person",
|
||||
"aliases": [
|
||||
{
|
||||
"type": null,
|
||||
"primary": null,
|
||||
"name": "Mohamed Moneer",
|
||||
"end": null,
|
||||
"ended": false,
|
||||
"sort-name": "Mohamed Moneer",
|
||||
"locale": null,
|
||||
"begin": null,
|
||||
"type-id": null
|
||||
},
|
||||
{
|
||||
"ended": false,
|
||||
"end": null,
|
||||
"type": "Artist name",
|
||||
"name": "Mohamed Mounir",
|
||||
"primary": true,
|
||||
"type-id": "894afba6-2816-3c24-8072-eadb66bd04bc",
|
||||
"begin": null,
|
||||
"locale": "en",
|
||||
"sort-name": "Mounir, Mohamad"
|
||||
},
|
||||
{
|
||||
"type-id": null,
|
||||
"begin": null,
|
||||
"locale": null,
|
||||
"sort-name": "Mohamed Mounir",
|
||||
"ended": false,
|
||||
"end": null,
|
||||
"primary": null,
|
||||
"type": null,
|
||||
"name": "Mohamed Mounir"
|
||||
}
|
||||
],
|
||||
"id": "5235052b-7fa0-498b-accf-26b9e7767da7",
|
||||
"ipis": [],
|
||||
"name": "\u0645\u062d\u0645\u062f \u0645\u0646\u064a\u0631",
|
||||
"gender-id": "36d3d30a-839d-3eda-8cb3-29be4384e4a9",
|
||||
"end_area": null
|
||||
}
|
||||
@@ -458,6 +458,23 @@ class ArtistTranslationTest(MBJSONTest):
|
||||
(artist_name, artist_sort_name) = _translate_artist_node(self.json_doc)
|
||||
self.assertEqual(artist_name, 'Ed Sheeran (en_CA)')
|
||||
|
||||
def test_locale_specific_match_first_exc(self):
|
||||
settings = {
|
||||
"standardize_tracks": False,
|
||||
"standardize_artists": False,
|
||||
"standardize_releases": False,
|
||||
"translate_artist_names": True,
|
||||
"translate_artist_names_script_exception": True,
|
||||
"script_exceptions": [("LATIN", 0)],
|
||||
"standardize_instruments": True,
|
||||
"release_ars": True,
|
||||
"preferred_release_countries": [],
|
||||
"artist_locales": ['en_CA', 'en'],
|
||||
}
|
||||
self.set_config_values(settings)
|
||||
(artist_name, artist_sort_name) = _translate_artist_node(self.json_doc)
|
||||
self.assertEqual(artist_name, 'Ed Sheeran')
|
||||
|
||||
def test_locale_specific_match_second(self):
|
||||
settings = {
|
||||
"standardize_tracks": False,
|
||||
@@ -507,6 +524,45 @@ class ArtistTranslationTest(MBJSONTest):
|
||||
self.assertEqual(artist_name, 'Ed Sheeran')
|
||||
|
||||
|
||||
class ArtistTranslationArabicExceptionsTest(MBJSONTest):
|
||||
|
||||
filename = 'artist_arabic.json'
|
||||
|
||||
def test_locale_specific_match_first_exc1(self):
|
||||
settings = {
|
||||
"standardize_tracks": False,
|
||||
"standardize_artists": False,
|
||||
"standardize_releases": False,
|
||||
"translate_artist_names": True,
|
||||
"translate_artist_names_script_exception": True,
|
||||
"script_exceptions": [("LATIN", 0)],
|
||||
"standardize_instruments": True,
|
||||
"release_ars": True,
|
||||
"preferred_release_countries": [],
|
||||
"artist_locales": ['en_CA', 'en'],
|
||||
}
|
||||
self.set_config_values(settings)
|
||||
(artist_name, artist_sort_name) = _translate_artist_node(self.json_doc)
|
||||
self.assertEqual(artist_name, 'Mohamed Mounir')
|
||||
|
||||
def test_locale_specific_match_first_exc2(self):
|
||||
settings = {
|
||||
"standardize_tracks": False,
|
||||
"standardize_artists": False,
|
||||
"standardize_releases": False,
|
||||
"translate_artist_names": True,
|
||||
"translate_artist_names_script_exception": True,
|
||||
"script_exceptions": [("ARABIC", 0)],
|
||||
"standardize_instruments": True,
|
||||
"release_ars": True,
|
||||
"preferred_release_countries": [],
|
||||
"artist_locales": ['en_CA', 'en'],
|
||||
}
|
||||
self.set_config_values(settings)
|
||||
(artist_name, artist_sort_name) = _translate_artist_node(self.json_doc)
|
||||
self.assertEqual(artist_name, 'محمد منير')
|
||||
|
||||
|
||||
class ReleaseGroupTest(MBJSONTest):
|
||||
|
||||
filename = 'release_group.json'
|
||||
|
||||
Reference in New Issue
Block a user