PICARD-2584: If AcoustID provides not metadata load the MB recording

This resolves problem with AcoustID scan finding no matches or only
standalone-recordings.
This commit is contained in:
Philipp Wolfer
2023-12-21 22:20:59 +01:00
parent 990a79c8f3
commit c5b0ddf05f
4 changed files with 197 additions and 44 deletions

View File

@@ -111,10 +111,11 @@ class AcoustIDClient(QtCore.QObject):
try:
status = document['status']
if status == 'ok':
resolver = RecordingResolver(self._acoustid_api.webservice)
resolver.resolve(
resolver = RecordingResolver(
self._acoustid_api.webservice,
document,
partial(self._on_recording_resolve_finish, task, document, http))
callback=partial(self._on_recording_resolve_finish, task, document, http))
resolver.resolve()
else:
mparms = {
'error': document['error']['message'],
@@ -134,17 +135,18 @@ class AcoustIDClient(QtCore.QObject):
task.next_func(doc, http, e)
def _on_recording_resolve_finish(self, task, document, http, result=None, error=None):
recording_list = document['recordings'] = result
document['recordings'] = recording_list = result
if not recording_list:
results = document.get('results')
if results:
# Set AcoustID in tags if there was no matching recording
task.file.metadata['acoustid_id'] = results[0]['id']
acoustid = results[0].get('id')
task.file.metadata['acoustid_id'] = acoustid
task.file.update()
log.debug(
"AcoustID: Found no matching recordings for '%s',"
" setting acoustid_id tag to %r",
task.file.filename, results[0]['id']
task.file.filename, acoustid
)
else:
log.debug(

View File

@@ -144,13 +144,5 @@ def parse_recording(recording):
return recording_mb
def max_source_count(recordings):
"""Given a list of recordings return the highest number of sources.
This ignores recordings without metadata.
"""
sources = [
r.get('sources', 1)
for r in recordings
if r.get('title')
]
return max(sources + [1])
def recording_has_metadata(recording):
return 'id' in recording and recording.get('title') is not None

View File

@@ -18,38 +18,118 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from collections import (
defaultdict,
deque,
namedtuple,
)
from typing import (
Dict,
List,
)
from PyQt6.QtNetwork import QNetworkReply
from picard.acoustid.json_helpers import (
max_source_count,
parse_recording,
recording_has_metadata,
)
from picard.webservice import WebService
from picard.webservice.api_helpers import MBAPIHelper
class Recording:
recording: dict
result_score: float
sources: int
def __init__(self, recording, result_score=1.0, sources=1):
self.recording = recording
self.result_score = result_score
self.sources = sources
IncompleteRecording = namedtuple('Recording', 'mbid acoustid result_score sources')
class RecordingResolver:
"""Given an AcoustID lookup result returns a list of MB recordings.
The recordings are either directly taken from the AcoustID result or, if the
results return only the MBID without metadata, loaded via the MB web service.
"""
def __init__(self, ws: WebService) -> None:
self.mbapi = MBAPIHelper(ws)
_recording_map: Dict[str, Dict[str, Recording]]
def resolve(self, doc: dict, callback: callable) -> None:
recording_map = {}
results = doc.get('results') or []
def __init__(self, ws: WebService, doc: dict, callback: callable) -> None:
self._mbapi = MBAPIHelper(ws)
self._doc = doc
self._callback = callback
self._recording_map = defaultdict(dict)
self._missing_metadata = deque()
def resolve(self) -> None:
results = self._doc.get('results') or []
for result in results:
recordings = result.get('recordings') or []
max_sources = max_source_count(recordings)
result_score = get_score(result)
acoustid = result.get('id')
for recording in recordings:
parsed_recording = parse_recording(recording)
if parsed_recording is not None:
# Calculate a score based on result score and sources for this
# recording relative to other recordings in this result
score = min(recording.get('sources', 1) / max_sources, 1.0) * 100
parsed_recording['score'] = score * result_score
parsed_recording['acoustid'] = result.get('id')
recording_map[parsed_recording['id']] = parsed_recording
sources = recording.get('sources', 1)
if recording_has_metadata(recording):
self._recording_map[acoustid][recording['id']] = Recording(
recording=parse_recording(recording),
result_score=result_score,
sources=sources,
)
else:
self._missing_metadata.append(IncompleteRecording(
mbid=recording.get('id'),
acoustid=acoustid,
result_score=result_score,
sources=sources,
))
# TODO: Load recording details for recordings without metadata
callback(recording_map.values())
if self._missing_metadata:
self._load_recordings()
else:
self._send_results()
def _load_recordings(self):
if not self._missing_metadata:
self._send_results()
return
self._mbapi.get_track_by_id(
self._missing_metadata[0].mbid,
self._recording_request_finished,
inc=['release-groups', 'releases'],
)
def _recording_request_finished(self, mb_recording, http, error):
recording = self._missing_metadata.popleft()
if error:
if error == QNetworkReply.NetworkError.ContentNotFoundError:
# Recording does not exist, ignore and move on
self._load_recordings()
else:
self._send_results(error)
return
mbid = mb_recording.get('id')
recording_dict = self._recording_map[recording.acoustid]
if mbid:
if mbid not in recording_dict:
recording_dict[mbid] = Recording(
recording=mb_recording,
result_score=recording.result_score,
sources=recording.sources,
)
else:
recording_dict[mbid].sources += recording.sources
self._load_recordings()
def _send_results(self, error=None):
self._callback(list(parse_recording_map(self._recording_map)), error)
def get_score(node):
@@ -57,3 +137,28 @@ def get_score(node):
return float(node.get('score', 1.0))
except (TypeError, ValueError):
return 1.0
def parse_recording_map(recording_map: Dict[str, Dict[str, Recording]]):
for acoustid, recordings in recording_map.items():
recording_list = recordings.values()
max_sources = max_source_count(recording_list)
for recording in recording_list:
parsed_recording = recording.recording
if parsed_recording is not None:
# Calculate a score based on result score and sources for this
# recording relative to other recordings in this result
score = min(recording.sources / max_sources, 1.0) * 100
parsed_recording['score'] = score * recording.result_score
parsed_recording['acoustid'] = acoustid
parsed_recording['sources'] = recording.sources
yield parsed_recording
def max_source_count(recordings: List[Recording]):
"""Given a list of recordings return the highest number of sources.
This ignores recordings without metadata.
"""
sources = {r.sources for r in recordings}
sources.add(1)
return max(sources)

View File

@@ -31,8 +31,13 @@ import os
from test.picardtestcase import PicardTestCase
from picard.acoustid.json_helpers import (
max_source_count,
parse_recording,
recording_has_metadata,
)
from picard.acoustid.recordings import (
Recording,
max_source_count,
parse_recording_map,
)
from picard.mbjson import recording_to_metadata
from picard.metadata import Metadata
@@ -100,17 +105,66 @@ class NullRecordingTest(AcoustIDTest):
self.assertEqual(m, {})
class MaxSourceCountTest(AcoustIDTest):
filename = 'acoustid_no_metadata.json'
class MaxSourceCountTest(PicardTestCase):
def test_max_source_count(self):
c = max_source_count(self.json_doc['results'][0]['recordings'])
self.assertEqual(13, c)
recordings = (
Recording({}, sources=5),
Recording({}, sources=13),
Recording({}, sources=12),
)
self.assertEqual(13, max_source_count(recordings))
def test_max_source_count_no_recordings(self):
c = max_source_count([])
self.assertEqual(1, c)
self.assertEqual(1, max_source_count([]))
def test_max_source_count_no_value(self):
c = max_source_count([{'title': 'foo', 'sources': 42}, {'title': 'foo'}])
self.assertEqual(42, c)
def test_max_source_count_smaller_1(self):
recordings = (
Recording({}, sources=0),
)
self.assertEqual(1, max_source_count(recordings))
class ParseRecordingMapTest(PicardTestCase):
def test_parse_recording_map(self):
recordings = {
"cb127606-8e3d-4dcf-9902-69c7a9b59bc9": {
"d12e0535-3b2f-4987-8b03-63d85ef57fdd": Recording({
"id": "d12e0535-3b2f-4987-8b03-63d85ef57fdd",
}),
"e8a313ee-b723-4b48-9395-6c8e026fc9e0": Recording(
{"id": "e8a313ee-b723-4b48-9395-6c8e026fc9e0"},
sources=4,
result_score=0.5,
),
}
}
expected = [
{
"id": "d12e0535-3b2f-4987-8b03-63d85ef57fdd",
"acoustid": "cb127606-8e3d-4dcf-9902-69c7a9b59bc9",
"score": 25,
"sources": 1,
},
{
"id": "e8a313ee-b723-4b48-9395-6c8e026fc9e0",
"acoustid": "cb127606-8e3d-4dcf-9902-69c7a9b59bc9",
"score": 50,
"sources": 4,
}
]
self.assertEqual(expected, list(parse_recording_map(recordings)))
class RecordingHasMetadataTest(PicardTestCase):
filename = 'acoustid_no_metadata.json'
def test_recording_has_metadata(self):
recording = {
'id': '8855169b-b148-4896-9ed3-a1065dc60cf4',
'sources': 94,
}
self.assertFalse(recording_has_metadata(recording))
recording['title'] = 'Foo'
self.assertTrue(recording_has_metadata(recording))
del recording['id']
self.assertFalse(recording_has_metadata(recording))