Files
picard/picard/mbjson.py

599 lines
21 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# -*- coding: utf-8 -*-
#
# Picard, the next-generation MusicBrainz tagger
#
# Copyright (C) 2017 David Mandelberg
# Copyright (C) 2017-2018 Sambhav Kothari
# Copyright (C) 2017-2021 Laurent Monin
# Copyright (C) 2018-2021 Philipp Wolfer
# Copyright (C) 2019 Michael Wiencek
# Copyright (C) 2020 David Kellner
# Copyright (C) 2020 dukeyin
# Copyright (C) 2021 Bob Swift
# Copyright (C) 2021 Vladislav Karbovskii
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from picard import log
from picard.config import get_config
from picard.const import RELEASE_FORMATS
from picard.util import (
format_time,
linear_combination_of_weights,
parse_amazon_url,
translate_from_sortname,
)
from picard.util.script_detector_weighted import detect_script_weighted
_artist_rel_types = {
"arranger": "arranger",
"audio": "engineer",
"chorus master": "performer:chorus master",
"composer": "composer",
"concertmaster": "performer:concertmaster",
"conductor": "conductor",
"engineer": "engineer",
"instrument arranger": "arranger",
"librettist": "lyricist",
"live sound": "engineer",
"lyricist": "lyricist",
# "mastering": "engineer",
"mix-DJ": "djmixer",
"mix": "mixer",
"orchestrator": "arranger",
"performing orchestra": "performer:orchestra",
"producer": "producer",
# "recording": "engineer",
"remixer": "remixer",
"sound": "engineer",
"video director": "director",
"vocal arranger": "arranger",
"writer": "writer",
}
_TRACK_TO_METADATA = {
'number': '~musicbrainz_tracknumber',
'position': 'tracknumber',
'title': 'title',
}
_MEDIUM_TO_METADATA = {
'format': 'media',
'position': 'discnumber',
'title': 'discsubtitle',
'track-count': 'totaltracks',
}
_RECORDING_TO_METADATA = {
'disambiguation': '~recordingcomment',
'first-release-date': '~recording_firstreleasedate',
'title': 'title',
}
_RELEASE_TO_METADATA = {
'annotation': '~releaseannotation',
'asin': 'asin',
'barcode': 'barcode',
'country': 'releasecountry',
'date': 'date',
'disambiguation': '~releasecomment',
'title': 'album',
}
_ARTIST_TO_METADATA = {
'gender': 'gender',
'name': 'name',
'type': 'type',
}
_RELEASE_GROUP_TO_METADATA = {
'disambiguation': '~releasegroupcomment',
'first-release-date': '~releasegroup_firstreleasedate',
'title': '~releasegroup',
}
_REPLACE_MAP = {}
_PREFIX_ATTRS = ['guest', 'additional', 'minor', 'solo']
_BLANK_SPECIAL_RELTYPES = {'vocal': 'vocals'}
def _transform_attribute(attr, attr_credits):
if attr in attr_credits:
return attr_credits[attr]
else:
return _REPLACE_MAP.get(attr, attr)
def _parse_attributes(attrs, reltype, attr_credits):
prefixes = []
nouns = []
for attr in attrs:
attr = _transform_attribute(attr, attr_credits)
if attr in _PREFIX_ATTRS:
prefixes.append(attr)
else:
nouns.append(attr)
prefix = ' '.join(prefixes)
if len(nouns) > 1:
result = '%s and %s' % (', '.join(nouns[:-1]), nouns[-1:][0])
elif len(nouns) == 1:
result = nouns[0]
else:
result = _BLANK_SPECIAL_RELTYPES.get(reltype, '')
return ' '.join([prefix, result]).strip()
def _relations_to_metadata(relations, m):
config = get_config()
use_credited_as = not config.setting['standardize_artists']
use_instrument_credits = not config.setting['standardize_instruments']
for relation in relations:
if relation['target-type'] == 'artist':
artist = relation['artist']
value, valuesort = _translate_artist_node(artist)
has_translation = (value != artist['name'])
if not has_translation and use_credited_as and 'target-credit' in relation:
credited_as = relation['target-credit']
if credited_as:
value = credited_as
reltype = relation['type']
attribs = []
if 'attributes' in relation:
attribs = [a for a in relation['attributes']]
if reltype in {'vocal', 'instrument', 'performer'}:
if use_instrument_credits:
attr_credits = relation.get('attribute-credits', {})
else:
attr_credits = {}
name = 'performer:' + _parse_attributes(attribs, reltype, attr_credits)
elif reltype == 'mix-DJ' and len(attribs) > 0:
if not hasattr(m, "_djmix_ars"):
m._djmix_ars = {}
for attr in attribs:
m._djmix_ars.setdefault(attr.split()[1], []).append(value)
continue
else:
try:
name = _artist_rel_types[reltype]
except KeyError:
continue
if value not in m[name]:
m.add(name, value)
if name == 'composer' and valuesort not in m['composersort']:
m.add('composersort', valuesort)
elif relation['target-type'] == 'work':
if relation['type'] == 'performance':
performance_to_metadata(relation, m)
work_to_metadata(relation['work'], m)
elif relation['target-type'] == 'url':
if relation['type'] == 'amazon asin' and 'asin' not in m:
amz = parse_amazon_url(relation['url']['resource'])
if amz is not None:
m['asin'] = amz['asin']
elif relation['type'] == 'license':
url = relation['url']['resource']
m.add('license', url)
def _translate_artist_node(node):
config = get_config()
transl, translsort = None, None
if config.setting['translate_artist_names']:
if config.setting['translate_artist_names_script_exception']:
log_text = 'Script alpha characters found in "{0}": '.format(node["name"],)
detected_scripts = detect_script_weighted(node["name"])
if detected_scripts:
log_text += "; ".join(
list("{0} ({1:.1f}%)".format(scr_id, detected_scripts[scr_id] * 100) for scr_id in detected_scripts)
)
else:
log_text += "None"
log.debug(log_text)
if detected_scripts:
if config.setting["script_exceptions"]:
log_text = " found in selected scripts: " + "; ".join(
list("{0} ({1}%)".format(scr[0], scr[1]) for scr in config.setting["script_exceptions"])
)
for script_id, script_weighting in config.setting["script_exceptions"]:
if script_id in detected_scripts and detected_scripts[script_id] >= script_weighting / 100:
log.debug("Match" + log_text)
return node['name'], node['sort-name']
log.debug("No match" + log_text)
else:
log.warning("No scripts selected for translation exception match check.")
def check_higher_score(locale_dict, locale, score):
return locale not in locale_dict or score > locale_dict[locale][0]
# Prepare dictionaries of available locale aliases
full_locales = {}
root_locales = {}
if "aliases" in node:
for alias in node['aliases']:
if not alias["primary"]:
continue
if "locale" not in alias:
continue
full_locale = alias['locale']
root_locale = full_locale.split("_")[0]
full_parts = []
root_parts = []
score = 0.8
full_parts.append((score, 5))
if '_' in full_locale:
score = 0.4
root_parts.append((score, 5))
if alias["type"] == "Artist name":
score = 0.8
elif alias["type"] == "Legal Name":
score = 0.5
else:
# as 2014/09/19, only Artist or Legal names should have the
# Primary flag
score = 0.0
full_parts.append((score, 5))
root_parts.append((score, 5))
comb = linear_combination_of_weights(full_parts)
if check_higher_score(full_locales, full_locale, comb):
full_locales[full_locale] = (comb, (alias['name'], alias["sort-name"]))
comb = linear_combination_of_weights(root_parts)
if check_higher_score(root_locales, root_locale, comb):
root_locales[root_locale] = (comb, (alias['name'], alias["sort-name"]))
# First pass to match full locale if available
for locale in config.setting["artist_locales"]:
if locale in full_locales:
return full_locales[locale][1]
# Second pass to match root locale if available
for locale in config.setting["artist_locales"]:
lang = locale.split("_")[0]
if lang in root_locales:
return root_locales[lang][1]
# No matches found in available alias locales
translsort = node['sort-name']
transl = translate_from_sortname(node['name'] or "", translsort)
else:
transl, translsort = node['name'], node['sort-name']
return (transl, translsort)
def artist_credit_from_node(node):
artist = ""
artistsort = ""
artists = []
artistssort = []
config = get_config()
use_credited_as = not config.setting["standardize_artists"]
for artist_info in node:
a = artist_info['artist']
translated, translated_sort = _translate_artist_node(a)
has_translation = (translated != a['name'])
if has_translation:
name = translated
elif use_credited_as and 'name' in artist_info:
name = artist_info['name']
else:
name = a['name']
artist += name
artistsort += translated_sort or ""
artists.append(name)
artistssort.append(translated_sort)
if 'joinphrase' in artist_info:
artist += artist_info['joinphrase'] or ""
artistsort += artist_info['joinphrase'] or ""
return (artist, artistsort, artists, artistssort)
def artist_credit_to_metadata(node, m, release=False):
ids = [n['artist']['id'] for n in node]
artist, artistsort, artists, artistssort = artist_credit_from_node(node)
if release:
m["musicbrainz_albumartistid"] = ids
m["albumartist"] = artist
m["albumartistsort"] = artistsort
m["~albumartists"] = artists
m["~albumartists_sort"] = artistssort
else:
m["musicbrainz_artistid"] = ids
m["artist"] = artist
m["artistsort"] = artistsort
m["artists"] = artists
m["~artists_sort"] = artistssort
def _release_event_iter(node):
if "release-events" in node:
yield from node['release-events']
def _country_from_release_event(release_event):
try:
return release_event['area']['iso-3166-1-codes'][0]
# TypeError in case object is None
except (KeyError, IndexError, TypeError):
pass
return None
def countries_from_node(node):
countries = []
for release_event in _release_event_iter(node):
country_code = _country_from_release_event(release_event)
if country_code:
countries.append(country_code)
return countries
def release_dates_and_countries_from_node(node):
dates = []
countries = []
for release_event in _release_event_iter(node):
dates.append(release_event['date'] or '')
country_code = _country_from_release_event(release_event)
if country_code:
countries.append(country_code)
return dates, countries
def label_info_from_node(node):
labels = []
catalog_numbers = []
for label_info in node:
if 'label' in label_info and label_info['label'] and 'name' in label_info['label']:
label = label_info['label']['name']
if label and label not in labels:
labels.append(label)
if 'catalog-number' in label_info:
cat_num = label_info['catalog-number']
if cat_num and cat_num not in catalog_numbers:
catalog_numbers.append(cat_num)
return (labels, catalog_numbers)
def media_formats_from_node(node):
formats_count = {}
formats_order = []
for medium in node:
text = medium.get('format', "(unknown)") or "(unknown)"
if text in formats_count:
formats_count[text] += 1
else:
formats_count[text] = 1
formats_order.append(text)
formats = []
for medium_format in formats_order:
count = formats_count[medium_format]
medium_format = RELEASE_FORMATS.get(medium_format, medium_format)
if count > 1:
medium_format = str(count) + "×" + medium_format
formats.append(medium_format)
return " + ".join(formats)
def _node_skip_empty_iter(node):
for key, value in node.items():
if value or value == 0:
yield key, value
def track_to_metadata(node, track):
m = track.metadata
recording_to_metadata(node['recording'], m, track)
m.add_unique('musicbrainz_trackid', node['id'])
# overwrite with data we have on the track
for key, value in _node_skip_empty_iter(node):
if key in _TRACK_TO_METADATA:
m[_TRACK_TO_METADATA[key]] = value
elif key == 'length' and value:
m.length = value
elif key == 'artist-credit':
artist_credit_to_metadata(value, m)
if m.length:
m['~length'] = format_time(m.length)
def recording_to_metadata(node, m, track=None):
m.length = 0
m.add_unique('musicbrainz_recordingid', node['id'])
for key, value in _node_skip_empty_iter(node):
if key in _RECORDING_TO_METADATA:
m[_RECORDING_TO_METADATA[key]] = value
elif key == 'user-rating':
m['~rating'] = value['value']
elif key == 'length':
m.length = value
elif key == 'artist-credit':
artist_credit_to_metadata(value, m)
# set tags from artists
if track:
for credit in value:
artist = credit['artist']
artist_obj = track.append_track_artist(artist['id'])
add_genres_from_node(artist, artist_obj)
elif key == 'relations':
_relations_to_metadata(value, m)
elif track and key in {'genres', 'tags'}:
add_genres(value, track)
elif track and key in {'user-genres', 'user-tags'}:
add_user_genres(value, track)
elif key == 'isrcs':
add_isrcs_to_metadata(value, m)
elif key == 'video' and value:
m['~video'] = '1'
if m['title']:
m['~recordingtitle'] = m['title']
if m.length:
m['~length'] = format_time(m.length)
if 'instrumental' in m.getall('~performance_attributes'):
m.unset('lyricist')
m['language'] = 'zxx'
def performance_to_metadata(relation, m):
if 'attributes' in relation:
for attribute in relation['attributes']:
m.add_unique("~performance_attributes", attribute)
def work_to_metadata(work, m):
m.add_unique("musicbrainz_workid", work['id'])
if 'languages' in work:
for language in work['languages']:
m.add_unique("language", language)
elif 'language' in work:
m.add_unique("language", work['language'])
if 'title' in work:
m.add_unique("work", work['title'])
if 'disambiguation' in work:
m.add_unique("~workcomment", work['disambiguation'])
if 'relations' in work:
_relations_to_metadata(work['relations'], m)
def medium_to_metadata(node, m):
for key, value in _node_skip_empty_iter(node):
if key in _MEDIUM_TO_METADATA:
m[_MEDIUM_TO_METADATA[key]] = value
def artist_to_metadata(node, m):
"""Make meatadata dict from a JSON 'artist' node."""
m.add_unique("musicbrainz_artistid", node['id'])
for key, value in _node_skip_empty_iter(node):
if key in _ARTIST_TO_METADATA:
m[_ARTIST_TO_METADATA[key]] = value
elif key == "area":
m["area"] = value['name']
elif key == "life-span":
if "begin" in value:
m["begindate"] = value['begin']
if "ended" in value:
ended = value['ended']
if ended and "end" in value:
m["enddate"] = value['end']
elif key == "begin-area":
m["beginarea"] = value['name']
elif key == "end-area":
m["endarea"] = value['name']
def release_to_metadata(node, m, album=None):
"""Make metadata dict from a JSON 'release' node."""
config = get_config()
m.add_unique('musicbrainz_albumid', node['id'])
for key, value in _node_skip_empty_iter(node):
if key in _RELEASE_TO_METADATA:
m[_RELEASE_TO_METADATA[key]] = value
elif key == 'status':
m['releasestatus'] = value.lower()
elif key == 'artist-credit':
artist_credit_to_metadata(value, m, release=True)
# set tags from artists
if album is not None:
for credit in value:
artist = credit['artist']
artist_obj = album.append_album_artist(artist['id'])
add_genres_from_node(artist, artist_obj)
elif key == 'relations' and config.setting['release_ars']:
_relations_to_metadata(value, m)
elif key == 'label-info':
m['label'], m['catalognumber'] = label_info_from_node(value)
elif key == 'text-representation':
if 'language' in value:
m['~releaselanguage'] = value['language']
if 'script' in value:
m['script'] = value['script']
m['~releasecountries'] = release_countries = countries_from_node(node)
# The MB web service returns the first release country in the country tag.
# If the user has configured preferred release countries, use the first one
# if it is one in the complete list of release countries.
for country in config.setting["preferred_release_countries"]:
if country in release_countries:
m['releasecountry'] = country
break
add_genres_from_node(node, album)
def release_group_to_metadata(node, m, release_group=None):
"""Make metadata dict from a JSON 'release-group' node taken from inside a 'release' node."""
m.add_unique('musicbrainz_releasegroupid', node['id'])
for key, value in _node_skip_empty_iter(node):
if key in _RELEASE_GROUP_TO_METADATA:
m[_RELEASE_GROUP_TO_METADATA[key]] = value
elif key == 'primary-type':
m['~primaryreleasetype'] = value.lower()
elif key == 'secondary-types':
add_secondary_release_types(value, m)
add_genres_from_node(node, release_group)
if m['~releasegroup_firstreleasedate']:
m['originaldate'] = m['~releasegroup_firstreleasedate']
m['originalyear'] = m['originaldate'][:4]
m['releasetype'] = m.getall('~primaryreleasetype') + m.getall('~secondaryreleasetype')
def add_secondary_release_types(node, m):
for secondary_type in node:
m.add_unique('~secondaryreleasetype', secondary_type.lower())
def add_genres_from_node(node, obj):
if obj is None:
return
if 'genres' in node:
add_genres(node['genres'], obj)
if 'tags' in node:
add_genres(node['tags'], obj)
if 'user-genres' in node:
add_user_genres(node['user-genres'], obj)
if 'user-tags' in node:
add_user_genres(node['user-tags'], obj)
def add_genres(node, obj):
for tag in node:
key = tag['name']
count = tag['count']
if key:
obj.add_genre(key, count)
def add_user_genres(node, obj):
for tag in node:
key = tag['name']
if key:
obj.add_genre(key, 1)
def add_isrcs_to_metadata(node, metadata):
for isrc in node:
metadata.add('isrc', isrc)
def get_score(node):
"""Returns the score attribute for a node.
The score is expected to be an integer between 0 and 100, it is returned as
a value between 0.0 and 1.0. If there is no score attribute or it has an
invalid value 1.0 will be returned.
"""
try:
return int(node.get('score', 100)) / 100
except (TypeError, ValueError):
return 1.0