diff --git a/picard/const/__init__.py b/picard/const/__init__.py index 7ca14dbff..cec1be93a 100644 --- a/picard/const/__init__.py +++ b/picard/const/__init__.py @@ -95,6 +95,11 @@ PICARD_URLS = { # Various Artists MBID VARIOUS_ARTISTS_ID = '89ad4ac3-39f7-470e-963a-56509c546377' +# Artist alias types +ALIAS_TYPE_ARTIST_NAME_ID = '894afba6-2816-3c24-8072-eadb66bd04bc' +ALIAS_TYPE_LEGAL_NAME_ID = 'd4dcd0c0-b341-3612-a332-c0ce797b25cf' +ALIAS_TYPE_SEARCH_HINT_ID = '1937e404-b981-3cb7-8151-4c86ebfc8d8e' + # Special purpose track titles SILENCE_TRACK_TITLE = '[silence]' DATA_TRACK_TITLE = '[data track]' diff --git a/picard/mbjson.py b/picard/mbjson.py index f21d35dfa..d6378633d 100644 --- a/picard/mbjson.py +++ b/picard/mbjson.py @@ -30,7 +30,11 @@ from collections import namedtuple from picard import log from picard.config import get_config -from picard.const import RELEASE_FORMATS +from picard.const import ( + ALIAS_TYPE_ARTIST_NAME_ID, + ALIAS_TYPE_LEGAL_NAME_ID, + RELEASE_FORMATS, +) from picard.util import ( format_time, linear_combination_of_weights, @@ -40,7 +44,7 @@ from picard.util import ( from picard.util.script_detector_weighted import detect_script_weighted -_artist_rel_types = { +_ARTIST_REL_TYPES = { 'arranger': 'arranger', 'audio': 'engineer', 'chorus master': 'performer:chorus master', @@ -106,35 +110,27 @@ _RELEASE_GROUP_TO_METADATA = { 'title': '~releasegroup', } - -_REPLACE_MAP = {} -_PREFIX_ATTRS = ['guest', 'additional', 'minor', 'solo'] +_PREFIX_ATTRS = {'guest', 'additional', 'minor', 'solo'} _BLANK_SPECIAL_RELTYPES = {'vocal': 'vocals'} -def _transform_attribute(attr, attr_credits): - if attr in attr_credits: - return attr_credits[attr] - else: - return _REPLACE_MAP.get(attr, attr) - - def _parse_attributes(attrs, reltype, attr_credits): prefixes = [] nouns = [] for attr in attrs: - attr = _transform_attribute(attr, attr_credits) + if attr in attr_credits: + attr = attr_credits[attr] if attr in _PREFIX_ATTRS: prefixes.append(attr) else: nouns.append(attr) - prefix = " ".join(prefixes) if len(nouns) > 1: result = "%s and %s" % (", ".join(nouns[:-1]), nouns[-1:][0]) elif len(nouns) == 1: result = nouns[0] else: result = _BLANK_SPECIAL_RELTYPES.get(reltype, "") + prefix = " ".join(prefixes) return " ".join([prefix, result]).strip() @@ -169,7 +165,7 @@ def _relations_to_metadata_target_type_artist(relation, m, context): return else: try: - name = _artist_rel_types[reltype] + name = _ARTIST_REL_TYPES[reltype] except KeyError: return if context.instrumental and name == 'lyricist': @@ -241,6 +237,46 @@ def _relations_to_metadata(relations, m, instrumental=False, config=None, entity _RELATIONS_TO_METADATA_TARGET_TYPE_FUNC[relation['target-type']](relation, m, context) +def _locales_from_aliases(aliases): + def check_higher_score(locale_dict, locale, score): + return locale not in locale_dict or score > locale_dict[locale][0] + + full_locales = {} + root_locales = {} + for alias in aliases: + if not alias['primary']: + continue + if 'locale' not in alias: + continue + full_locale = alias['locale'] + root_locale = full_locale.split('_')[0] + full_parts = [] + root_parts = [] + score = 0.8 + full_parts.append((score, 5)) + if '_' in full_locale: + score = 0.4 + root_parts.append((score, 5)) + if alias['type-id'] == ALIAS_TYPE_ARTIST_NAME_ID: + score = 0.8 + elif alias['type-id'] == ALIAS_TYPE_LEGAL_NAME_ID: + score = 0.5 + else: + # as 2014/09/19, only Artist or Legal names should have the + # Primary flag + score = 0.0 + full_parts.append((score, 5)) + root_parts.append((score, 5)) + comb = linear_combination_of_weights(full_parts) + if check_higher_score(full_locales, full_locale, comb): + full_locales[full_locale] = (comb, (alias['name'], alias['sort-name'])) + comb = linear_combination_of_weights(root_parts) + if check_higher_score(root_locales, root_locale, comb): + root_locales[root_locale] = (comb, (alias['name'], alias['sort-name'])) + + return full_locales, root_locales + + def _translate_artist_node(node, config=None): config = config or get_config() translated_name, sort_name = None, None @@ -273,43 +309,9 @@ def _translate_artist_node(node, config=None): else: log.warning("No scripts selected for translation exception match check.") - def check_higher_score(locale_dict, locale, score): - return locale not in locale_dict or score > locale_dict[locale][0] - # Prepare dictionaries of available locale aliases - full_locales = {} - root_locales = {} if 'aliases' in node: - for alias in node['aliases']: - if not alias['primary']: - continue - if 'locale' not in alias: - continue - full_locale = alias['locale'] - root_locale = full_locale.split('_')[0] - full_parts = [] - root_parts = [] - score = 0.8 - full_parts.append((score, 5)) - if '_' in full_locale: - score = 0.4 - root_parts.append((score, 5)) - if alias['type'] == "Artist name": - score = 0.8 - elif alias['type'] == "Legal Name": - score = 0.5 - else: - # as 2014/09/19, only Artist or Legal names should have the - # Primary flag - score = 0.0 - full_parts.append((score, 5)) - root_parts.append((score, 5)) - comb = linear_combination_of_weights(full_parts) - if check_higher_score(full_locales, full_locale, comb): - full_locales[full_locale] = (comb, (alias['name'], alias['sort-name'])) - comb = linear_combination_of_weights(root_parts) - if check_higher_score(root_locales, root_locale, comb): - root_locales[root_locale] = (comb, (alias['name'], alias['sort-name'])) + full_locales, root_locales = _locales_from_aliases(node['aliases']) # First pass to match full locale if available for locale in config.setting['artist_locales']: diff --git a/test/data/ws_data/artist_ended.json b/test/data/ws_data/artist_ended.json new file mode 100644 index 000000000..8e8fb8c73 --- /dev/null +++ b/test/data/ws_data/artist_ended.json @@ -0,0 +1,79 @@ +{ + "name": "Serge Gainsbourg", + "sort-name": "Gainsbourg, Serge", + "isnis": [ + "0000000115935851" + ], + "life-span": { + "ended": true, + "begin": "1928-04-02", + "end": "1991-03-02" + }, + "id": "b21ef19b-c6aa-4775-90d3-3cc3e067ce6d", + "end_area": { + "sort-name": "Paris", + "name": "Paris", + "iso-3166-2-codes": [ + "FR-75" + ], + "type-id": null, + "disambiguation": "", + "id": "dc10c22b-e510-4006-8b7f-fecb4f36436e", + "type": null + }, + "begin-area": { + "disambiguation": "", + "type": null, + "id": "dc10c22b-e510-4006-8b7f-fecb4f36436e", + "name": "Paris", + "iso-3166-2-codes": [ + "FR-75" + ], + "type-id": null, + "sort-name": "Paris" + }, + "type-id": "b6e035f4-3ce9-331c-97df-83397230b0df", + "area": { + "iso-3166-1-codes": [ + "FR" + ], + "disambiguation": "", + "id": "08310658-51eb-3801-80de-5a0739207115", + "type": null, + "sort-name": "France", + "name": "France", + "type-id": null + }, + "country": "FR", + "ipis": [ + "00011123948", + "00011935702", + "00012741616" + ], + "gender": "Male", + "gender-id": "36d3d30a-839d-3eda-8cb3-29be4384e4a9", + "type": "Person", + "disambiguation": "", + "end-area": { + "sort-name": "Paris", + "name": "Paris", + "iso-3166-2-codes": [ + "FR-75" + ], + "type-id": null, + "disambiguation": "", + "id": "dc10c22b-e510-4006-8b7f-fecb4f36436e", + "type": null + }, + "begin_area": { + "disambiguation": "", + "type": null, + "id": "dc10c22b-e510-4006-8b7f-fecb4f36436e", + "name": "Paris", + "iso-3166-2-codes": [ + "FR-75" + ], + "type-id": null, + "sort-name": "Paris" + } +} diff --git a/test/data/ws_data/media.json b/test/data/ws_data/media.json deleted file mode 100644 index d9c87dfb1..000000000 --- a/test/data/ws_data/media.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "format-id": "3e9080b0-5e6c-34ab-bd15-f526b6306a64", - "track-count": 10, - "position": 1, - "title": "", - "track-offset": 0, - "format": "12\" Vinyl", - "tracks": [] -} diff --git a/test/data/ws_data/release_5medias.json b/test/data/ws_data/release_5medias.json new file mode 100644 index 000000000..103d9e3df --- /dev/null +++ b/test/data/ws_data/release_5medias.json @@ -0,0 +1,197 @@ +{ + "status-id": "4e304316-386d-3409-af2e-78857eec5cfe", + "id": "f17a0f30-8eb1-4322-b54e-fb71edb78d7c", + "quality": "normal", + "date": "2011-07-11", + "label-info": [ + { + "catalog-number": "50999 029435 2 7", + "label": { + "label-code": 542, + "type-id": "7aaa37fe-2def-3476-b359-80245850062d", + "type": "Original Production", + "sort-name": "EMI", + "name": "EMI", + "id": "c029628b-6633-439e-bcee-ed02e8a338f7", + "disambiguation": "EMI Records, since 1972", + "aliases": [ + { + "type-id": null, + "locale": null, + "primary": null, + "begin": null, + "end": null, + "name": "EMI", + "sort-name": "EMI", + "type": null, + "ended": false + }, + { + "type": null, + "sort-name": "EMI 100", + "name": "EMI 100", + "ended": false, + "begin": null, + "end": null, + "type-id": null, + "locale": null, + "primary": null + }, + { + "ended": false, + "type": "Search hint", + "name": "EMI Recorded Music Australia Pty Ltd", + "sort-name": "EMI Recorded Music Australia Pty Ltd", + "end": null, + "begin": null, + "primary": null, + "type-id": "829662f2-a781-3ec8-8b46-fbcea6196f81", + "locale": null + }, + { + "begin": null, + "end": null, + "locale": null, + "type-id": null, + "primary": null, + "type": null, + "sort-name": "EMI Records (UK)", + "name": "EMI Records (UK)", + "ended": false + }, + { + "begin": null, + "end": null, + "locale": null, + "type-id": null, + "primary": null, + "name": "EMI UK", + "sort-name": "EMI UK", + "type": null, + "ended": false + } + ] + } + } + ], + "cover-art-archive": { + "count": 88, + "back": true, + "darkened": false, + "front": true, + "artwork": true + }, + "packaging-id": "815b7785-8284-3926-8f04-e48bc6c4d102", + "country": "XE", + "status": "Official", + "packaging": "Other", + "artist-credit": [ + { + "joinphrase": "", + "artist": { + "aliases": [], + "type-id": "e431f5f6-b5d2-343d-8b36-72607fffb74b", + "id": "83d91898-7763-47d7-b03b-b92132375c47", + "disambiguation": "", + "type": "Group", + "name": "Pink Floyd", + "sort-name": "Pink Floyd" + }, + "name": "Pink Floyd" + } + ], + "disambiguation": "Immersion box set", + "aliases": [], + "asin": "B004ZNAUVW", + "text-representation": { + "language": "eng", + "script": "Latn" + }, + "media": [ + { + "position": 1, + "track-count": 5, + "format-id": "9712d52a-4509-3d4b-a1a2-67c88c643e31", + "title": "The Original Album", + "discs": [ + { + "sectors": 199410, + "offsets": [ + 150, + 61109, + 94976, + 118065, + 143171 + ], + "offset-count": 5, + "id": "tNSQ3K59B8ZkSb19P__Jet6B.sk-" + } + ], + "format": "CD" + }, + { + "track-count": 6, + "format-id": "9712d52a-4509-3d4b-a1a2-67c88c643e31", + "position": 2, + "format": "CD", + "title": "Unreleased Audio Tracks", + "discs": [ + { + "id": "6NksHllhjO74WpVDabBDhj3P0qk-", + "offset-count": 6, + "offsets": [ + 150, + 91851, + 148493, + 230435, + 240674, + 273050 + ], + "sectors": 301068 + } + ] + }, + { + "format": "DVD-Video", + "discs": [], + "title": "Wish You Were Here Multi\u2010Channel Audio Mixes", + "format-id": "bb71fd58-ff93-32b4-a201-4ad1b2a80e5f", + "track-count": 15, + "position": 3 + }, + { + "format": "DVD-Video", + "discs": [], + "title": "Audio\u2010Visual Material", + "format-id": "bb71fd58-ff93-32b4-a201-4ad1b2a80e5f", + "track-count": 4, + "position": 4 + }, + { + "discs": [], + "title": "High Resolution Audio and Audio\u2010Visual Material", + "format": "Blu-ray", + "position": 5, + "format-id": "c693c05b-3316-3d69-afc2-5e2bc455bffc", + "track-count": 19 + } + ], + "title": "Wish You Were Here", + "barcode": "5099902943527", + "release-events": [ + { + "date": "2011-07-11", + "area": { + "disambiguation": "", + "id": "89a675c2-3e37-3518-b83c-418bad59a85a", + "type-id": null, + "iso-3166-1-codes": [ + "XE" + ], + "sort-name": "Europe", + "name": "Europe", + "type": null + } + } + ] +} diff --git a/test/test_mbjson.py b/test/test_mbjson.py index 6fb61f545..eefc641ee 100644 --- a/test/test_mbjson.py +++ b/test/test_mbjson.py @@ -33,8 +33,16 @@ from test.picardtestcase import ( from picard import config from picard.album import Album +from picard.const import ( + ALIAS_TYPE_ARTIST_NAME_ID, + ALIAS_TYPE_LEGAL_NAME_ID, + ALIAS_TYPE_SEARCH_HINT_ID, +) from picard.mbjson import ( + _locales_from_aliases, _node_skip_empty_iter, + _parse_attributes, + _relations_to_metadata_target_type_url, _translate_artist_node, artist_to_metadata, countries_from_node, @@ -415,14 +423,27 @@ class NullTrackTest(MBJSONTest): class MediaTest(MBJSONTest): - filename = 'media.json' + filename = 'release_5medias.json' - def test_track(self): + def test_media_formats_from_node_multi(self): + formats = media_formats_from_node(self.json_doc['media']) + self.assertEqual('2×CD + 2×DVD-Video + Blu-ray', formats) + + def test_medium_to_metadata_0(self): m = Metadata() - medium_to_metadata(self.json_doc, m) + medium_to_metadata(self.json_doc['media'][0], m) self.assertEqual(m['discnumber'], '1') - self.assertEqual(m['media'], '12" Vinyl') - self.assertEqual(m['totaltracks'], '10') + self.assertEqual(m['media'], 'CD') + self.assertEqual(m['totaltracks'], '5') + self.assertEqual(m['discsubtitle'], 'The Original Album') + + def test_medium_to_metadata_4(self): + m = Metadata() + medium_to_metadata(self.json_doc['media'][4], m) + self.assertEqual(m['discnumber'], '5') + self.assertEqual(m['media'], 'Blu-ray') + self.assertEqual(m['totaltracks'], '19') + self.assertEqual(m['discsubtitle'], 'High Resolution Audio and Audio‐Visual Material') class MediaPregapTest(MBJSONTest): @@ -447,22 +468,6 @@ class NullMediaTest(MBJSONTest): self.assertEqual(m, {}) -class ArtistTest(MBJSONTest): - - filename = 'artist.json' - - def test_artist(self): - m = Metadata() - artist_to_metadata(self.json_doc, m) - self.assertEqual(m['area'], 'United Kingdom') - self.assertEqual(m['beginarea'], 'Hebden Bridge') - self.assertEqual(m['begindate'], '1991-02-17') - self.assertEqual(m['gender'], 'Male') - self.assertEqual(m['musicbrainz_artistid'], 'b8a7c51f-362c-4dcb-a259-bc6e0095f0a6') - self.assertEqual(m['name'], 'Ed Sheeran') - self.assertEqual(m['type'], 'Person') - - class NullArtistTest(MBJSONTest): filename = 'artist_null.json' @@ -473,6 +478,24 @@ class NullArtistTest(MBJSONTest): self.assertEqual(m, {}) +class ArtistEndedTest(MBJSONTest): + + filename = 'artist_ended.json' + + def test_artist_ended(self): + m = Metadata() + artist_to_metadata(self.json_doc, m) + self.assertEqual(m['area'], 'France') + self.assertEqual(m['beginarea'], 'Paris') + self.assertEqual(m['begindate'], '1928-04-02') + self.assertEqual(m['endarea'], 'Paris') + self.assertEqual(m['enddate'], '1991-03-02') + self.assertEqual(m['gender'], 'Male') + self.assertEqual(m['musicbrainz_artistid'], 'b21ef19b-c6aa-4775-90d3-3cc3e067ce6d') + self.assertEqual(m['name'], 'Serge Gainsbourg') + self.assertEqual(m['type'], 'Person') + + class ArtistTranslationTest(MBJSONTest): filename = 'artist.json' @@ -598,6 +621,100 @@ class ArtistTranslationArabicExceptionsTest(MBJSONTest): self.assertEqual(artist_name, 'محمد منير') +class TestAliasesLocales(PicardTestCase): + + def setUp(self): + self.maxDiff = None + + self.aliases = [ + { + "name": "Shearan", + "sort-name": "Shearan", + "primary": None, + "locale": None, + "type-id": ALIAS_TYPE_SEARCH_HINT_ID, + }, + { + "primary": True, + "name": "Ed Sheeran (en)", + "sort-name": "Sheeran, Ed", + "type-id": ALIAS_TYPE_ARTIST_NAME_ID, + "locale": "en", + }, + { + "primary": True, + "name": "Ed Sheeran (en_CA)", + "sort-name": "Sheeran, Ed", + "type-id": ALIAS_TYPE_ARTIST_NAME_ID, + "locale": "en_CA", + }, + ] + + def test_1(self): + expect_full = {'en': (0.8, ('Ed Sheeran (en)', 'Sheeran, Ed')), 'en_CA': (0.8, ('Ed Sheeran (en_CA)', 'Sheeran, Ed'))} + expect_root = {'en': (0.8, ('Ed Sheeran (en)', 'Sheeran, Ed'))} + + full_locales, root_locales = _locales_from_aliases(self.aliases) + self.assertDictEqual(expect_full, full_locales) + self.assertDictEqual(expect_root, root_locales) + + def test_2(self): + self.aliases[2]['type-id'] = ALIAS_TYPE_LEGAL_NAME_ID + + expect_full = {'en': (0.8, ('Ed Sheeran (en)', 'Sheeran, Ed')), 'en_CA': (0.65, ('Ed Sheeran (en_CA)', 'Sheeran, Ed'))} + expect_root = {'en': (0.8, ('Ed Sheeran (en)', 'Sheeran, Ed'))} + + full_locales, root_locales = _locales_from_aliases(self.aliases) + self.assertDictEqual(expect_full, full_locales) + self.assertDictEqual(expect_root, root_locales) + + def test_3(self): + self.aliases[0]['primary'] = True + del self.aliases[0]['locale'] + + expect_full = {'en': (0.8, ('Ed Sheeran (en)', 'Sheeran, Ed')), 'en_CA': (0.8, ('Ed Sheeran (en_CA)', 'Sheeran, Ed'))} + expect_root = {'en': (0.8, ('Ed Sheeran (en)', 'Sheeran, Ed'))} + + full_locales, root_locales = _locales_from_aliases(self.aliases) + self.assertDictEqual(expect_full, full_locales) + self.assertDictEqual(expect_root, root_locales) + + def test_4(self): + self.aliases[2]['type-id'] = ALIAS_TYPE_SEARCH_HINT_ID + + expect_full = {'en': (0.8, ('Ed Sheeran (en)', 'Sheeran, Ed')), 'en_CA': (0.4, ('Ed Sheeran (en_CA)', 'Sheeran, Ed'))} + expect_root = {'en': (0.8, ('Ed Sheeran (en)', 'Sheeran, Ed'))} + + full_locales, root_locales = _locales_from_aliases(self.aliases) + self.assertDictEqual(expect_full, full_locales) + self.assertDictEqual(expect_root, root_locales) + + def test_5(self): + self.aliases[1]['locale'] = 'en_US' + self.aliases[1]['name'] = 'Ed Sheeran (en_US)' + + expect_full = {'en_US': (0.8, ('Ed Sheeran (en_US)', 'Sheeran, Ed')), 'en_CA': (0.8, ('Ed Sheeran (en_CA)', 'Sheeran, Ed'))} + expect_root = {'en': (0.6, ('Ed Sheeran (en_US)', 'Sheeran, Ed'))} + + full_locales, root_locales = _locales_from_aliases(self.aliases) + self.assertDictEqual(expect_full, full_locales) + self.assertDictEqual(expect_root, root_locales) + + def test_6(self): + self.aliases[2]['locale'] = 'en' + self.aliases[2]['name'] = 'Ed Sheeran (en2)' + self.aliases[2]['type-id'] = ALIAS_TYPE_ARTIST_NAME_ID + self.aliases[1]['type-id'] = ALIAS_TYPE_LEGAL_NAME_ID + self.aliases[1]['name'] = 'Ed Sheeran (en1)' + + expect_full = {'en': (0.8, ('Ed Sheeran (en2)', 'Sheeran, Ed'))} + expect_root = {'en': (0.8, ('Ed Sheeran (en2)', 'Sheeran, Ed'))} + + full_locales, root_locales = _locales_from_aliases(self.aliases) + self.assertDictEqual(expect_full, full_locales) + self.assertDictEqual(expect_root, root_locales) + + class ReleaseGroupTest(MBJSONTest): filename = 'release_group.json' @@ -717,3 +834,70 @@ class GetScoreTest(PicardTestCase): def test_get_score_no_score(self): self.assertEqual(1.0, get_score({})) + + +class ParseAttributeTest(PicardTestCase): + + def test_1(self): + attrs, reltype, attr_credits = ('guest', 'keyboard'), 'instrument', {'keyboard': 'keyboards'} + result = _parse_attributes(attrs, reltype, attr_credits) + expected = 'guest keyboards' + self.assertEqual(expected, result) + + def test_2(self): + attrs, reltype, attr_credits = (), 'vocal', {} + result = _parse_attributes(attrs, reltype, attr_credits) + expected = 'vocals' + self.assertEqual(expected, result) + + def test_3(self): + attrs, reltype, attr_credits = ('guitar', 'keyboard'), 'instrument', {'keyboard': 'keyboards', 'guitar': 'weird guitar'} + result = _parse_attributes(attrs, reltype, attr_credits) + expected = 'weird guitar and keyboards' + self.assertEqual(expected, result) + + +class RelationsToMetadataTargetTypeUrlTest(PicardTestCase): + def test_invalid_asin_url(self): + m = Metadata() + relation = { + 'type': 'amazon asin', + 'url': { + 'resource': 'http://www.amazon.com/dp/020530902x', + } + } + _relations_to_metadata_target_type_url(relation, m, None) + self.assertEqual('', m['asin']) + + def test_has_asin_already(self): + m = Metadata({'asin': 'ASIN'}) + relation = { + 'type': 'amazon asin', + 'url': { + 'resource': 'http://www.amazon.com/dp/020530902X', + } + } + _relations_to_metadata_target_type_url(relation, m, None) + self.assertEqual('ASIN', m['asin']) + + def test_valid_asin_url(self): + m = Metadata() + relation = { + 'type': 'amazon asin', + 'url': { + 'resource': 'http://www.amazon.com/dp/020530902X', + } + } + _relations_to_metadata_target_type_url(relation, m, None) + self.assertEqual('020530902X', m['asin']) + + def test_license_url(self): + m = Metadata() + relation = { + 'type': 'license', + 'url': { + 'resource': 'https://URL.LICENSE', + } + } + _relations_to_metadata_target_type_url(relation, m, None) + self.assertEqual('https://URL.LICENSE', m['license'])