PICARD-2584: If AcoustID provides not metadata load the MB recording

This resolves problem with AcoustID scan finding no matches or only standalone-recordings.
2025-12-26 11:18:20 +00:00 · 2023-12-21 22:20:59 +01:00
parent 990a79c8f3
commit c5b0ddf05f
4 changed files with 197 additions and 44 deletions
--- a/picard/acoustid/init.py
+++ b/picard/acoustid/init.py
@@ -111,10 +111,11 @@ class AcoustIDClient(QtCore.QObject):
            try:
                status = document['status']
                if status == 'ok':
-                    resolver = RecordingResolver(self._acoustid_api.webservice)
-                    resolver.resolve(
+                    resolver = RecordingResolver(
+                        self._acoustid_api.webservice,
                        document,
-                        partial(self._on_recording_resolve_finish, task, document, http))
+                        callback=partial(self._on_recording_resolve_finish, task, document, http))
+                    resolver.resolve()
                else:
                    mparms = {
                        'error': document['error']['message'],
@@ -134,17 +135,18 @@ class AcoustIDClient(QtCore.QObject):
                task.next_func(doc, http, e)

    def _on_recording_resolve_finish(self, task, document, http, result=None, error=None):
-        recording_list = document['recordings'] = result
+        document['recordings'] = recording_list = result
        if not recording_list:
            results = document.get('results')
            if results:
                # Set AcoustID in tags if there was no matching recording
-                task.file.metadata['acoustid_id'] = results[0]['id']
+                acoustid = results[0].get('id')
+                task.file.metadata['acoustid_id'] = acoustid
                task.file.update()
                log.debug(
                    "AcoustID: Found no matching recordings for '%s',"
                    " setting acoustid_id tag to %r",
-                    task.file.filename, results[0]['id']
+                    task.file.filename, acoustid
                )
        else:
            log.debug(
--- a/picard/acoustid/json_helpers.py
+++ b/picard/acoustid/json_helpers.py
@@ -144,13 +144,5 @@ def parse_recording(recording):
    return recording_mb


-def max_source_count(recordings):
-    """Given a list of recordings return the highest number of sources.
-    This ignores recordings without metadata.
-    """
-    sources = [
-        r.get('sources', 1)
-        for r in recordings
-        if r.get('title')
-    ]
-    return max(sources + [1])
+def recording_has_metadata(recording):
+    return 'id' in recording and recording.get('title') is not None
--- a/picard/acoustid/recordings.py
+++ b/picard/acoustid/recordings.py
@@ -18,38 +18,118 @@
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.

+from collections import (
+    defaultdict,
+    deque,
+    namedtuple,
+)
+from typing import (
+    Dict,
+    List,
+)
+
+from PyQt6.QtNetwork import QNetworkReply
+
 from picard.acoustid.json_helpers import (
-    max_source_count,
    parse_recording,
+    recording_has_metadata,
 )
 from picard.webservice import WebService
 from picard.webservice.api_helpers import MBAPIHelper


+class Recording:
+    recording: dict
+    result_score: float
+    sources: int
+
+    def __init__(self, recording, result_score=1.0, sources=1):
+        self.recording = recording
+        self.result_score = result_score
+        self.sources = sources
+
+
+IncompleteRecording = namedtuple('Recording', 'mbid acoustid result_score sources')
+
+
 class RecordingResolver:
+    """Given an AcoustID lookup result returns a list of MB recordings.
+    The recordings are either directly taken from the AcoustID result or, if the
+    results return only the MBID without metadata, loaded via the MB web service.
+    """

-    def __init__(self, ws: WebService) -> None:
-        self.mbapi = MBAPIHelper(ws)
+    _recording_map: Dict[str, Dict[str, Recording]]

-    def resolve(self, doc: dict, callback: callable) -> None:
-        recording_map = {}
-        results = doc.get('results') or []
+    def __init__(self, ws: WebService, doc: dict, callback: callable) -> None:
+        self._mbapi = MBAPIHelper(ws)
+        self._doc = doc
+        self._callback = callback
+        self._recording_map = defaultdict(dict)
+        self._missing_metadata = deque()
+
+    def resolve(self) -> None:
+        results = self._doc.get('results') or []
        for result in results:
            recordings = result.get('recordings') or []
-            max_sources = max_source_count(recordings)
            result_score = get_score(result)
+            acoustid = result.get('id')
            for recording in recordings:
-                parsed_recording = parse_recording(recording)
-                if parsed_recording is not None:
-                    # Calculate a score based on result score and sources for this
-                    # recording relative to other recordings in this result
-                    score = min(recording.get('sources', 1) / max_sources, 1.0) * 100
-                    parsed_recording['score'] = score * result_score
-                    parsed_recording['acoustid'] = result.get('id')
-                    recording_map[parsed_recording['id']] = parsed_recording
+                sources = recording.get('sources', 1)
+                if recording_has_metadata(recording):
+                    self._recording_map[acoustid][recording['id']] = Recording(
+                        recording=parse_recording(recording),
+                        result_score=result_score,
+                        sources=sources,
+                    )
+                else:
+                    self._missing_metadata.append(IncompleteRecording(
+                        mbid=recording.get('id'),
+                        acoustid=acoustid,
+                        result_score=result_score,
+                        sources=sources,
+                    ))

-        # TODO: Load recording details for recordings without metadata
-        callback(recording_map.values())
+        if self._missing_metadata:
+            self._load_recordings()
+        else:
+            self._send_results()
+
+    def _load_recordings(self):
+        if not self._missing_metadata:
+            self._send_results()
+            return
+
+        self._mbapi.get_track_by_id(
+            self._missing_metadata[0].mbid,
+            self._recording_request_finished,
+            inc=['release-groups', 'releases'],
+        )
+
+    def _recording_request_finished(self, mb_recording, http, error):
+        recording = self._missing_metadata.popleft()
+        if error:
+            if error == QNetworkReply.NetworkError.ContentNotFoundError:
+                # Recording does not exist, ignore and move on
+                self._load_recordings()
+            else:
+                self._send_results(error)
+            return
+
+        mbid = mb_recording.get('id')
+        recording_dict = self._recording_map[recording.acoustid]
+        if mbid:
+            if mbid not in recording_dict:
+                recording_dict[mbid] = Recording(
+                    recording=mb_recording,
+                    result_score=recording.result_score,
+                    sources=recording.sources,
+                )
+            else:
+                recording_dict[mbid].sources += recording.sources
+        self._load_recordings()
+
+    def _send_results(self, error=None):
+        self._callback(list(parse_recording_map(self._recording_map)), error)


 def get_score(node):
@@ -57,3 +137,28 @@ def get_score(node):
        return float(node.get('score', 1.0))
    except (TypeError, ValueError):
        return 1.0
+
+
+def parse_recording_map(recording_map: Dict[str, Dict[str, Recording]]):
+    for acoustid, recordings in recording_map.items():
+        recording_list = recordings.values()
+        max_sources = max_source_count(recording_list)
+        for recording in recording_list:
+            parsed_recording = recording.recording
+            if parsed_recording is not None:
+                # Calculate a score based on result score and sources for this
+                # recording relative to other recordings in this result
+                score = min(recording.sources / max_sources, 1.0) * 100
+                parsed_recording['score'] = score * recording.result_score
+                parsed_recording['acoustid'] = acoustid
+                parsed_recording['sources'] = recording.sources
+            yield parsed_recording
+
+
+def max_source_count(recordings: List[Recording]):
+    """Given a list of recordings return the highest number of sources.
+    This ignores recordings without metadata.
+    """
+    sources = {r.sources for r in recordings}
+    sources.add(1)
+    return max(sources)
--- a/test/test_acoustid.py
+++ b/test/test_acoustid.py
@@ -31,8 +31,13 @@ import os
 from test.picardtestcase import PicardTestCase

 from picard.acoustid.json_helpers import (
-    max_source_count,
    parse_recording,
+    recording_has_metadata,
+)
+from picard.acoustid.recordings import (
+    Recording,
+    max_source_count,
+    parse_recording_map,
 )
 from picard.mbjson import recording_to_metadata
 from picard.metadata import Metadata
@@ -100,17 +105,66 @@ class NullRecordingTest(AcoustIDTest):
        self.assertEqual(m, {})


-class MaxSourceCountTest(AcoustIDTest):
-    filename = 'acoustid_no_metadata.json'
-
+class MaxSourceCountTest(PicardTestCase):
    def test_max_source_count(self):
-        c = max_source_count(self.json_doc['results'][0]['recordings'])
-        self.assertEqual(13, c)
+        recordings = (
+            Recording({}, sources=5),
+            Recording({}, sources=13),
+            Recording({}, sources=12),
+        )
+        self.assertEqual(13, max_source_count(recordings))

    def test_max_source_count_no_recordings(self):
-        c = max_source_count([])
-        self.assertEqual(1, c)
+        self.assertEqual(1, max_source_count([]))

-    def test_max_source_count_no_value(self):
-        c = max_source_count([{'title': 'foo', 'sources': 42}, {'title': 'foo'}])
-        self.assertEqual(42, c)
+    def test_max_source_count_smaller_1(self):
+        recordings = (
+            Recording({}, sources=0),
+        )
+        self.assertEqual(1, max_source_count(recordings))
+
+
+class ParseRecordingMapTest(PicardTestCase):
+    def test_parse_recording_map(self):
+        recordings = {
+            "cb127606-8e3d-4dcf-9902-69c7a9b59bc9": {
+                "d12e0535-3b2f-4987-8b03-63d85ef57fdd": Recording({
+                    "id": "d12e0535-3b2f-4987-8b03-63d85ef57fdd",
+                }),
+                "e8a313ee-b723-4b48-9395-6c8e026fc9e0": Recording(
+                    {"id": "e8a313ee-b723-4b48-9395-6c8e026fc9e0"},
+                    sources=4,
+                    result_score=0.5,
+                ),
+            }
+        }
+        expected = [
+            {
+                "id": "d12e0535-3b2f-4987-8b03-63d85ef57fdd",
+                "acoustid": "cb127606-8e3d-4dcf-9902-69c7a9b59bc9",
+                "score": 25,
+                "sources": 1,
+            },
+            {
+                "id": "e8a313ee-b723-4b48-9395-6c8e026fc9e0",
+                "acoustid": "cb127606-8e3d-4dcf-9902-69c7a9b59bc9",
+                "score": 50,
+                "sources": 4,
+            }
+        ]
+        self.assertEqual(expected, list(parse_recording_map(recordings)))
+
+
+class RecordingHasMetadataTest(PicardTestCase):
+    filename = 'acoustid_no_metadata.json'
+
+    def test_recording_has_metadata(self):
+        recording = {
+            'id': '8855169b-b148-4896-9ed3-a1065dc60cf4',
+            'sources': 94,
+        }
+        self.assertFalse(recording_has_metadata(recording))
+        recording['title'] = 'Foo'
+        self.assertTrue(recording_has_metadata(recording))
+        del recording['id']
+        self.assertFalse(recording_has_metadata(recording))