Merge pull request #2219 from zas/mbjson_tidy3

Tidy up mbjson (pass 3)
This commit is contained in:
Philipp Wolfer
2023-06-02 18:02:14 +02:00
committed by GitHub
6 changed files with 538 additions and 80 deletions

View File

@@ -95,6 +95,11 @@ PICARD_URLS = {
# Various Artists MBID
VARIOUS_ARTISTS_ID = '89ad4ac3-39f7-470e-963a-56509c546377'
# Artist alias types
ALIAS_TYPE_ARTIST_NAME_ID = '894afba6-2816-3c24-8072-eadb66bd04bc'
ALIAS_TYPE_LEGAL_NAME_ID = 'd4dcd0c0-b341-3612-a332-c0ce797b25cf'
ALIAS_TYPE_SEARCH_HINT_ID = '1937e404-b981-3cb7-8151-4c86ebfc8d8e'
# Special purpose track titles
SILENCE_TRACK_TITLE = '[silence]'
DATA_TRACK_TITLE = '[data track]'

View File

@@ -30,7 +30,11 @@ from collections import namedtuple
from picard import log
from picard.config import get_config
from picard.const import RELEASE_FORMATS
from picard.const import (
ALIAS_TYPE_ARTIST_NAME_ID,
ALIAS_TYPE_LEGAL_NAME_ID,
RELEASE_FORMATS,
)
from picard.util import (
format_time,
linear_combination_of_weights,
@@ -40,7 +44,7 @@ from picard.util import (
from picard.util.script_detector_weighted import detect_script_weighted
_artist_rel_types = {
_ARTIST_REL_TYPES = {
'arranger': 'arranger',
'audio': 'engineer',
'chorus master': 'performer:chorus master',
@@ -106,35 +110,27 @@ _RELEASE_GROUP_TO_METADATA = {
'title': '~releasegroup',
}
_REPLACE_MAP = {}
_PREFIX_ATTRS = ['guest', 'additional', 'minor', 'solo']
_PREFIX_ATTRS = {'guest', 'additional', 'minor', 'solo'}
_BLANK_SPECIAL_RELTYPES = {'vocal': 'vocals'}
def _transform_attribute(attr, attr_credits):
if attr in attr_credits:
return attr_credits[attr]
else:
return _REPLACE_MAP.get(attr, attr)
def _parse_attributes(attrs, reltype, attr_credits):
prefixes = []
nouns = []
for attr in attrs:
attr = _transform_attribute(attr, attr_credits)
if attr in attr_credits:
attr = attr_credits[attr]
if attr in _PREFIX_ATTRS:
prefixes.append(attr)
else:
nouns.append(attr)
prefix = " ".join(prefixes)
if len(nouns) > 1:
result = "%s and %s" % (", ".join(nouns[:-1]), nouns[-1:][0])
elif len(nouns) == 1:
result = nouns[0]
else:
result = _BLANK_SPECIAL_RELTYPES.get(reltype, "")
prefix = " ".join(prefixes)
return " ".join([prefix, result]).strip()
@@ -169,7 +165,7 @@ def _relations_to_metadata_target_type_artist(relation, m, context):
return
else:
try:
name = _artist_rel_types[reltype]
name = _ARTIST_REL_TYPES[reltype]
except KeyError:
return
if context.instrumental and name == 'lyricist':
@@ -241,6 +237,46 @@ def _relations_to_metadata(relations, m, instrumental=False, config=None, entity
_RELATIONS_TO_METADATA_TARGET_TYPE_FUNC[relation['target-type']](relation, m, context)
def _locales_from_aliases(aliases):
def check_higher_score(locale_dict, locale, score):
return locale not in locale_dict or score > locale_dict[locale][0]
full_locales = {}
root_locales = {}
for alias in aliases:
if not alias['primary']:
continue
if 'locale' not in alias:
continue
full_locale = alias['locale']
root_locale = full_locale.split('_')[0]
full_parts = []
root_parts = []
score = 0.8
full_parts.append((score, 5))
if '_' in full_locale:
score = 0.4
root_parts.append((score, 5))
if alias['type-id'] == ALIAS_TYPE_ARTIST_NAME_ID:
score = 0.8
elif alias['type-id'] == ALIAS_TYPE_LEGAL_NAME_ID:
score = 0.5
else:
# as 2014/09/19, only Artist or Legal names should have the
# Primary flag
score = 0.0
full_parts.append((score, 5))
root_parts.append((score, 5))
comb = linear_combination_of_weights(full_parts)
if check_higher_score(full_locales, full_locale, comb):
full_locales[full_locale] = (comb, (alias['name'], alias['sort-name']))
comb = linear_combination_of_weights(root_parts)
if check_higher_score(root_locales, root_locale, comb):
root_locales[root_locale] = (comb, (alias['name'], alias['sort-name']))
return full_locales, root_locales
def _translate_artist_node(node, config=None):
config = config or get_config()
translated_name, sort_name = None, None
@@ -273,43 +309,9 @@ def _translate_artist_node(node, config=None):
else:
log.warning("No scripts selected for translation exception match check.")
def check_higher_score(locale_dict, locale, score):
return locale not in locale_dict or score > locale_dict[locale][0]
# Prepare dictionaries of available locale aliases
full_locales = {}
root_locales = {}
if 'aliases' in node:
for alias in node['aliases']:
if not alias['primary']:
continue
if 'locale' not in alias:
continue
full_locale = alias['locale']
root_locale = full_locale.split('_')[0]
full_parts = []
root_parts = []
score = 0.8
full_parts.append((score, 5))
if '_' in full_locale:
score = 0.4
root_parts.append((score, 5))
if alias['type'] == "Artist name":
score = 0.8
elif alias['type'] == "Legal Name":
score = 0.5
else:
# as 2014/09/19, only Artist or Legal names should have the
# Primary flag
score = 0.0
full_parts.append((score, 5))
root_parts.append((score, 5))
comb = linear_combination_of_weights(full_parts)
if check_higher_score(full_locales, full_locale, comb):
full_locales[full_locale] = (comb, (alias['name'], alias['sort-name']))
comb = linear_combination_of_weights(root_parts)
if check_higher_score(root_locales, root_locale, comb):
root_locales[root_locale] = (comb, (alias['name'], alias['sort-name']))
full_locales, root_locales = _locales_from_aliases(node['aliases'])
# First pass to match full locale if available
for locale in config.setting['artist_locales']:

View File

@@ -0,0 +1,79 @@
{
"name": "Serge Gainsbourg",
"sort-name": "Gainsbourg, Serge",
"isnis": [
"0000000115935851"
],
"life-span": {
"ended": true,
"begin": "1928-04-02",
"end": "1991-03-02"
},
"id": "b21ef19b-c6aa-4775-90d3-3cc3e067ce6d",
"end_area": {
"sort-name": "Paris",
"name": "Paris",
"iso-3166-2-codes": [
"FR-75"
],
"type-id": null,
"disambiguation": "",
"id": "dc10c22b-e510-4006-8b7f-fecb4f36436e",
"type": null
},
"begin-area": {
"disambiguation": "",
"type": null,
"id": "dc10c22b-e510-4006-8b7f-fecb4f36436e",
"name": "Paris",
"iso-3166-2-codes": [
"FR-75"
],
"type-id": null,
"sort-name": "Paris"
},
"type-id": "b6e035f4-3ce9-331c-97df-83397230b0df",
"area": {
"iso-3166-1-codes": [
"FR"
],
"disambiguation": "",
"id": "08310658-51eb-3801-80de-5a0739207115",
"type": null,
"sort-name": "France",
"name": "France",
"type-id": null
},
"country": "FR",
"ipis": [
"00011123948",
"00011935702",
"00012741616"
],
"gender": "Male",
"gender-id": "36d3d30a-839d-3eda-8cb3-29be4384e4a9",
"type": "Person",
"disambiguation": "",
"end-area": {
"sort-name": "Paris",
"name": "Paris",
"iso-3166-2-codes": [
"FR-75"
],
"type-id": null,
"disambiguation": "",
"id": "dc10c22b-e510-4006-8b7f-fecb4f36436e",
"type": null
},
"begin_area": {
"disambiguation": "",
"type": null,
"id": "dc10c22b-e510-4006-8b7f-fecb4f36436e",
"name": "Paris",
"iso-3166-2-codes": [
"FR-75"
],
"type-id": null,
"sort-name": "Paris"
}
}

View File

@@ -1,9 +0,0 @@
{
"format-id": "3e9080b0-5e6c-34ab-bd15-f526b6306a64",
"track-count": 10,
"position": 1,
"title": "",
"track-offset": 0,
"format": "12\" Vinyl",
"tracks": []
}

View File

@@ -0,0 +1,197 @@
{
"status-id": "4e304316-386d-3409-af2e-78857eec5cfe",
"id": "f17a0f30-8eb1-4322-b54e-fb71edb78d7c",
"quality": "normal",
"date": "2011-07-11",
"label-info": [
{
"catalog-number": "50999 029435 2 7",
"label": {
"label-code": 542,
"type-id": "7aaa37fe-2def-3476-b359-80245850062d",
"type": "Original Production",
"sort-name": "EMI",
"name": "EMI",
"id": "c029628b-6633-439e-bcee-ed02e8a338f7",
"disambiguation": "EMI Records, since 1972",
"aliases": [
{
"type-id": null,
"locale": null,
"primary": null,
"begin": null,
"end": null,
"name": "EMI",
"sort-name": "EMI",
"type": null,
"ended": false
},
{
"type": null,
"sort-name": "EMI 100",
"name": "EMI 100",
"ended": false,
"begin": null,
"end": null,
"type-id": null,
"locale": null,
"primary": null
},
{
"ended": false,
"type": "Search hint",
"name": "EMI Recorded Music Australia Pty Ltd",
"sort-name": "EMI Recorded Music Australia Pty Ltd",
"end": null,
"begin": null,
"primary": null,
"type-id": "829662f2-a781-3ec8-8b46-fbcea6196f81",
"locale": null
},
{
"begin": null,
"end": null,
"locale": null,
"type-id": null,
"primary": null,
"type": null,
"sort-name": "EMI Records (UK)",
"name": "EMI Records (UK)",
"ended": false
},
{
"begin": null,
"end": null,
"locale": null,
"type-id": null,
"primary": null,
"name": "EMI UK",
"sort-name": "EMI UK",
"type": null,
"ended": false
}
]
}
}
],
"cover-art-archive": {
"count": 88,
"back": true,
"darkened": false,
"front": true,
"artwork": true
},
"packaging-id": "815b7785-8284-3926-8f04-e48bc6c4d102",
"country": "XE",
"status": "Official",
"packaging": "Other",
"artist-credit": [
{
"joinphrase": "",
"artist": {
"aliases": [],
"type-id": "e431f5f6-b5d2-343d-8b36-72607fffb74b",
"id": "83d91898-7763-47d7-b03b-b92132375c47",
"disambiguation": "",
"type": "Group",
"name": "Pink Floyd",
"sort-name": "Pink Floyd"
},
"name": "Pink Floyd"
}
],
"disambiguation": "Immersion box set",
"aliases": [],
"asin": "B004ZNAUVW",
"text-representation": {
"language": "eng",
"script": "Latn"
},
"media": [
{
"position": 1,
"track-count": 5,
"format-id": "9712d52a-4509-3d4b-a1a2-67c88c643e31",
"title": "The Original Album",
"discs": [
{
"sectors": 199410,
"offsets": [
150,
61109,
94976,
118065,
143171
],
"offset-count": 5,
"id": "tNSQ3K59B8ZkSb19P__Jet6B.sk-"
}
],
"format": "CD"
},
{
"track-count": 6,
"format-id": "9712d52a-4509-3d4b-a1a2-67c88c643e31",
"position": 2,
"format": "CD",
"title": "Unreleased Audio Tracks",
"discs": [
{
"id": "6NksHllhjO74WpVDabBDhj3P0qk-",
"offset-count": 6,
"offsets": [
150,
91851,
148493,
230435,
240674,
273050
],
"sectors": 301068
}
]
},
{
"format": "DVD-Video",
"discs": [],
"title": "Wish You Were Here Multi\u2010Channel Audio Mixes",
"format-id": "bb71fd58-ff93-32b4-a201-4ad1b2a80e5f",
"track-count": 15,
"position": 3
},
{
"format": "DVD-Video",
"discs": [],
"title": "Audio\u2010Visual Material",
"format-id": "bb71fd58-ff93-32b4-a201-4ad1b2a80e5f",
"track-count": 4,
"position": 4
},
{
"discs": [],
"title": "High Resolution Audio and Audio\u2010Visual Material",
"format": "Blu-ray",
"position": 5,
"format-id": "c693c05b-3316-3d69-afc2-5e2bc455bffc",
"track-count": 19
}
],
"title": "Wish You Were Here",
"barcode": "5099902943527",
"release-events": [
{
"date": "2011-07-11",
"area": {
"disambiguation": "",
"id": "89a675c2-3e37-3518-b83c-418bad59a85a",
"type-id": null,
"iso-3166-1-codes": [
"XE"
],
"sort-name": "Europe",
"name": "Europe",
"type": null
}
}
]
}

View File

@@ -33,8 +33,16 @@ from test.picardtestcase import (
from picard import config
from picard.album import Album
from picard.const import (
ALIAS_TYPE_ARTIST_NAME_ID,
ALIAS_TYPE_LEGAL_NAME_ID,
ALIAS_TYPE_SEARCH_HINT_ID,
)
from picard.mbjson import (
_locales_from_aliases,
_node_skip_empty_iter,
_parse_attributes,
_relations_to_metadata_target_type_url,
_translate_artist_node,
artist_to_metadata,
countries_from_node,
@@ -415,14 +423,27 @@ class NullTrackTest(MBJSONTest):
class MediaTest(MBJSONTest):
filename = 'media.json'
filename = 'release_5medias.json'
def test_track(self):
def test_media_formats_from_node_multi(self):
formats = media_formats_from_node(self.json_doc['media'])
self.assertEqual('2×CD + 2×DVD-Video + Blu-ray', formats)
def test_medium_to_metadata_0(self):
m = Metadata()
medium_to_metadata(self.json_doc, m)
medium_to_metadata(self.json_doc['media'][0], m)
self.assertEqual(m['discnumber'], '1')
self.assertEqual(m['media'], '12" Vinyl')
self.assertEqual(m['totaltracks'], '10')
self.assertEqual(m['media'], 'CD')
self.assertEqual(m['totaltracks'], '5')
self.assertEqual(m['discsubtitle'], 'The Original Album')
def test_medium_to_metadata_4(self):
m = Metadata()
medium_to_metadata(self.json_doc['media'][4], m)
self.assertEqual(m['discnumber'], '5')
self.assertEqual(m['media'], 'Blu-ray')
self.assertEqual(m['totaltracks'], '19')
self.assertEqual(m['discsubtitle'], 'High Resolution Audio and AudioVisual Material')
class MediaPregapTest(MBJSONTest):
@@ -447,22 +468,6 @@ class NullMediaTest(MBJSONTest):
self.assertEqual(m, {})
class ArtistTest(MBJSONTest):
filename = 'artist.json'
def test_artist(self):
m = Metadata()
artist_to_metadata(self.json_doc, m)
self.assertEqual(m['area'], 'United Kingdom')
self.assertEqual(m['beginarea'], 'Hebden Bridge')
self.assertEqual(m['begindate'], '1991-02-17')
self.assertEqual(m['gender'], 'Male')
self.assertEqual(m['musicbrainz_artistid'], 'b8a7c51f-362c-4dcb-a259-bc6e0095f0a6')
self.assertEqual(m['name'], 'Ed Sheeran')
self.assertEqual(m['type'], 'Person')
class NullArtistTest(MBJSONTest):
filename = 'artist_null.json'
@@ -473,6 +478,24 @@ class NullArtistTest(MBJSONTest):
self.assertEqual(m, {})
class ArtistEndedTest(MBJSONTest):
filename = 'artist_ended.json'
def test_artist_ended(self):
m = Metadata()
artist_to_metadata(self.json_doc, m)
self.assertEqual(m['area'], 'France')
self.assertEqual(m['beginarea'], 'Paris')
self.assertEqual(m['begindate'], '1928-04-02')
self.assertEqual(m['endarea'], 'Paris')
self.assertEqual(m['enddate'], '1991-03-02')
self.assertEqual(m['gender'], 'Male')
self.assertEqual(m['musicbrainz_artistid'], 'b21ef19b-c6aa-4775-90d3-3cc3e067ce6d')
self.assertEqual(m['name'], 'Serge Gainsbourg')
self.assertEqual(m['type'], 'Person')
class ArtistTranslationTest(MBJSONTest):
filename = 'artist.json'
@@ -598,6 +621,100 @@ class ArtistTranslationArabicExceptionsTest(MBJSONTest):
self.assertEqual(artist_name, 'محمد منير')
class TestAliasesLocales(PicardTestCase):
def setUp(self):
self.maxDiff = None
self.aliases = [
{
"name": "Shearan",
"sort-name": "Shearan",
"primary": None,
"locale": None,
"type-id": ALIAS_TYPE_SEARCH_HINT_ID,
},
{
"primary": True,
"name": "Ed Sheeran (en)",
"sort-name": "Sheeran, Ed",
"type-id": ALIAS_TYPE_ARTIST_NAME_ID,
"locale": "en",
},
{
"primary": True,
"name": "Ed Sheeran (en_CA)",
"sort-name": "Sheeran, Ed",
"type-id": ALIAS_TYPE_ARTIST_NAME_ID,
"locale": "en_CA",
},
]
def test_1(self):
expect_full = {'en': (0.8, ('Ed Sheeran (en)', 'Sheeran, Ed')), 'en_CA': (0.8, ('Ed Sheeran (en_CA)', 'Sheeran, Ed'))}
expect_root = {'en': (0.8, ('Ed Sheeran (en)', 'Sheeran, Ed'))}
full_locales, root_locales = _locales_from_aliases(self.aliases)
self.assertDictEqual(expect_full, full_locales)
self.assertDictEqual(expect_root, root_locales)
def test_2(self):
self.aliases[2]['type-id'] = ALIAS_TYPE_LEGAL_NAME_ID
expect_full = {'en': (0.8, ('Ed Sheeran (en)', 'Sheeran, Ed')), 'en_CA': (0.65, ('Ed Sheeran (en_CA)', 'Sheeran, Ed'))}
expect_root = {'en': (0.8, ('Ed Sheeran (en)', 'Sheeran, Ed'))}
full_locales, root_locales = _locales_from_aliases(self.aliases)
self.assertDictEqual(expect_full, full_locales)
self.assertDictEqual(expect_root, root_locales)
def test_3(self):
self.aliases[0]['primary'] = True
del self.aliases[0]['locale']
expect_full = {'en': (0.8, ('Ed Sheeran (en)', 'Sheeran, Ed')), 'en_CA': (0.8, ('Ed Sheeran (en_CA)', 'Sheeran, Ed'))}
expect_root = {'en': (0.8, ('Ed Sheeran (en)', 'Sheeran, Ed'))}
full_locales, root_locales = _locales_from_aliases(self.aliases)
self.assertDictEqual(expect_full, full_locales)
self.assertDictEqual(expect_root, root_locales)
def test_4(self):
self.aliases[2]['type-id'] = ALIAS_TYPE_SEARCH_HINT_ID
expect_full = {'en': (0.8, ('Ed Sheeran (en)', 'Sheeran, Ed')), 'en_CA': (0.4, ('Ed Sheeran (en_CA)', 'Sheeran, Ed'))}
expect_root = {'en': (0.8, ('Ed Sheeran (en)', 'Sheeran, Ed'))}
full_locales, root_locales = _locales_from_aliases(self.aliases)
self.assertDictEqual(expect_full, full_locales)
self.assertDictEqual(expect_root, root_locales)
def test_5(self):
self.aliases[1]['locale'] = 'en_US'
self.aliases[1]['name'] = 'Ed Sheeran (en_US)'
expect_full = {'en_US': (0.8, ('Ed Sheeran (en_US)', 'Sheeran, Ed')), 'en_CA': (0.8, ('Ed Sheeran (en_CA)', 'Sheeran, Ed'))}
expect_root = {'en': (0.6, ('Ed Sheeran (en_US)', 'Sheeran, Ed'))}
full_locales, root_locales = _locales_from_aliases(self.aliases)
self.assertDictEqual(expect_full, full_locales)
self.assertDictEqual(expect_root, root_locales)
def test_6(self):
self.aliases[2]['locale'] = 'en'
self.aliases[2]['name'] = 'Ed Sheeran (en2)'
self.aliases[2]['type-id'] = ALIAS_TYPE_ARTIST_NAME_ID
self.aliases[1]['type-id'] = ALIAS_TYPE_LEGAL_NAME_ID
self.aliases[1]['name'] = 'Ed Sheeran (en1)'
expect_full = {'en': (0.8, ('Ed Sheeran (en2)', 'Sheeran, Ed'))}
expect_root = {'en': (0.8, ('Ed Sheeran (en2)', 'Sheeran, Ed'))}
full_locales, root_locales = _locales_from_aliases(self.aliases)
self.assertDictEqual(expect_full, full_locales)
self.assertDictEqual(expect_root, root_locales)
class ReleaseGroupTest(MBJSONTest):
filename = 'release_group.json'
@@ -717,3 +834,70 @@ class GetScoreTest(PicardTestCase):
def test_get_score_no_score(self):
self.assertEqual(1.0, get_score({}))
class ParseAttributeTest(PicardTestCase):
def test_1(self):
attrs, reltype, attr_credits = ('guest', 'keyboard'), 'instrument', {'keyboard': 'keyboards'}
result = _parse_attributes(attrs, reltype, attr_credits)
expected = 'guest keyboards'
self.assertEqual(expected, result)
def test_2(self):
attrs, reltype, attr_credits = (), 'vocal', {}
result = _parse_attributes(attrs, reltype, attr_credits)
expected = 'vocals'
self.assertEqual(expected, result)
def test_3(self):
attrs, reltype, attr_credits = ('guitar', 'keyboard'), 'instrument', {'keyboard': 'keyboards', 'guitar': 'weird guitar'}
result = _parse_attributes(attrs, reltype, attr_credits)
expected = 'weird guitar and keyboards'
self.assertEqual(expected, result)
class RelationsToMetadataTargetTypeUrlTest(PicardTestCase):
def test_invalid_asin_url(self):
m = Metadata()
relation = {
'type': 'amazon asin',
'url': {
'resource': 'http://www.amazon.com/dp/020530902x',
}
}
_relations_to_metadata_target_type_url(relation, m, None)
self.assertEqual('', m['asin'])
def test_has_asin_already(self):
m = Metadata({'asin': 'ASIN'})
relation = {
'type': 'amazon asin',
'url': {
'resource': 'http://www.amazon.com/dp/020530902X',
}
}
_relations_to_metadata_target_type_url(relation, m, None)
self.assertEqual('ASIN', m['asin'])
def test_valid_asin_url(self):
m = Metadata()
relation = {
'type': 'amazon asin',
'url': {
'resource': 'http://www.amazon.com/dp/020530902X',
}
}
_relations_to_metadata_target_type_url(relation, m, None)
self.assertEqual('020530902X', m['asin'])
def test_license_url(self):
m = Metadata()
relation = {
'type': 'license',
'url': {
'resource': 'https://URL.LICENSE',
}
}
_relations_to_metadata_target_type_url(relation, m, None)
self.assertEqual('https://URL.LICENSE', m['license'])