diff --git a/picard/acoustid/__init__.py b/picard/acoustid/__init__.py index 5f468e260..246463397 100644 --- a/picard/acoustid/__init__.py +++ b/picard/acoustid/__init__.py @@ -111,10 +111,11 @@ class AcoustIDClient(QtCore.QObject): try: status = document['status'] if status == 'ok': - resolver = RecordingResolver(self._acoustid_api.webservice) - resolver.resolve( + resolver = RecordingResolver( + self._acoustid_api.webservice, document, - partial(self._on_recording_resolve_finish, task, document, http)) + callback=partial(self._on_recording_resolve_finish, task, document, http)) + resolver.resolve() else: mparms = { 'error': document['error']['message'], @@ -134,17 +135,18 @@ class AcoustIDClient(QtCore.QObject): task.next_func(doc, http, e) def _on_recording_resolve_finish(self, task, document, http, result=None, error=None): - recording_list = document['recordings'] = result + document['recordings'] = recording_list = result if not recording_list: results = document.get('results') if results: # Set AcoustID in tags if there was no matching recording - task.file.metadata['acoustid_id'] = results[0]['id'] + acoustid = results[0].get('id') + task.file.metadata['acoustid_id'] = acoustid task.file.update() log.debug( "AcoustID: Found no matching recordings for '%s'," " setting acoustid_id tag to %r", - task.file.filename, results[0]['id'] + task.file.filename, acoustid ) else: log.debug( diff --git a/picard/acoustid/json_helpers.py b/picard/acoustid/json_helpers.py index d4ef99cd9..d9bbeb5f7 100644 --- a/picard/acoustid/json_helpers.py +++ b/picard/acoustid/json_helpers.py @@ -144,13 +144,5 @@ def parse_recording(recording): return recording_mb -def max_source_count(recordings): - """Given a list of recordings return the highest number of sources. - This ignores recordings without metadata. - """ - sources = [ - r.get('sources', 1) - for r in recordings - if r.get('title') - ] - return max(sources + [1]) +def recording_has_metadata(recording): + return 'id' in recording and recording.get('title') is not None diff --git a/picard/acoustid/recordings.py b/picard/acoustid/recordings.py index a34c62027..159594a8d 100644 --- a/picard/acoustid/recordings.py +++ b/picard/acoustid/recordings.py @@ -18,38 +18,118 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +from collections import ( + defaultdict, + deque, + namedtuple, +) +from typing import ( + Dict, + List, +) + +from PyQt6.QtNetwork import QNetworkReply + from picard.acoustid.json_helpers import ( - max_source_count, parse_recording, + recording_has_metadata, ) from picard.webservice import WebService from picard.webservice.api_helpers import MBAPIHelper +class Recording: + recording: dict + result_score: float + sources: int + + def __init__(self, recording, result_score=1.0, sources=1): + self.recording = recording + self.result_score = result_score + self.sources = sources + + +IncompleteRecording = namedtuple('Recording', 'mbid acoustid result_score sources') + + class RecordingResolver: + """Given an AcoustID lookup result returns a list of MB recordings. + The recordings are either directly taken from the AcoustID result or, if the + results return only the MBID without metadata, loaded via the MB web service. + """ - def __init__(self, ws: WebService) -> None: - self.mbapi = MBAPIHelper(ws) + _recording_map: Dict[str, Dict[str, Recording]] - def resolve(self, doc: dict, callback: callable) -> None: - recording_map = {} - results = doc.get('results') or [] + def __init__(self, ws: WebService, doc: dict, callback: callable) -> None: + self._mbapi = MBAPIHelper(ws) + self._doc = doc + self._callback = callback + self._recording_map = defaultdict(dict) + self._missing_metadata = deque() + + def resolve(self) -> None: + results = self._doc.get('results') or [] for result in results: recordings = result.get('recordings') or [] - max_sources = max_source_count(recordings) result_score = get_score(result) + acoustid = result.get('id') for recording in recordings: - parsed_recording = parse_recording(recording) - if parsed_recording is not None: - # Calculate a score based on result score and sources for this - # recording relative to other recordings in this result - score = min(recording.get('sources', 1) / max_sources, 1.0) * 100 - parsed_recording['score'] = score * result_score - parsed_recording['acoustid'] = result.get('id') - recording_map[parsed_recording['id']] = parsed_recording + sources = recording.get('sources', 1) + if recording_has_metadata(recording): + self._recording_map[acoustid][recording['id']] = Recording( + recording=parse_recording(recording), + result_score=result_score, + sources=sources, + ) + else: + self._missing_metadata.append(IncompleteRecording( + mbid=recording.get('id'), + acoustid=acoustid, + result_score=result_score, + sources=sources, + )) - # TODO: Load recording details for recordings without metadata - callback(recording_map.values()) + if self._missing_metadata: + self._load_recordings() + else: + self._send_results() + + def _load_recordings(self): + if not self._missing_metadata: + self._send_results() + return + + self._mbapi.get_track_by_id( + self._missing_metadata[0].mbid, + self._recording_request_finished, + inc=['release-groups', 'releases'], + ) + + def _recording_request_finished(self, mb_recording, http, error): + recording = self._missing_metadata.popleft() + if error: + if error == QNetworkReply.NetworkError.ContentNotFoundError: + # Recording does not exist, ignore and move on + self._load_recordings() + else: + self._send_results(error) + return + + mbid = mb_recording.get('id') + recording_dict = self._recording_map[recording.acoustid] + if mbid: + if mbid not in recording_dict: + recording_dict[mbid] = Recording( + recording=mb_recording, + result_score=recording.result_score, + sources=recording.sources, + ) + else: + recording_dict[mbid].sources += recording.sources + self._load_recordings() + + def _send_results(self, error=None): + self._callback(list(parse_recording_map(self._recording_map)), error) def get_score(node): @@ -57,3 +137,28 @@ def get_score(node): return float(node.get('score', 1.0)) except (TypeError, ValueError): return 1.0 + + +def parse_recording_map(recording_map: Dict[str, Dict[str, Recording]]): + for acoustid, recordings in recording_map.items(): + recording_list = recordings.values() + max_sources = max_source_count(recording_list) + for recording in recording_list: + parsed_recording = recording.recording + if parsed_recording is not None: + # Calculate a score based on result score and sources for this + # recording relative to other recordings in this result + score = min(recording.sources / max_sources, 1.0) * 100 + parsed_recording['score'] = score * recording.result_score + parsed_recording['acoustid'] = acoustid + parsed_recording['sources'] = recording.sources + yield parsed_recording + + +def max_source_count(recordings: List[Recording]): + """Given a list of recordings return the highest number of sources. + This ignores recordings without metadata. + """ + sources = {r.sources for r in recordings} + sources.add(1) + return max(sources) diff --git a/test/test_acoustid.py b/test/test_acoustid.py index 5124d57a6..cea2c4b31 100644 --- a/test/test_acoustid.py +++ b/test/test_acoustid.py @@ -31,8 +31,13 @@ import os from test.picardtestcase import PicardTestCase from picard.acoustid.json_helpers import ( - max_source_count, parse_recording, + recording_has_metadata, +) +from picard.acoustid.recordings import ( + Recording, + max_source_count, + parse_recording_map, ) from picard.mbjson import recording_to_metadata from picard.metadata import Metadata @@ -100,17 +105,66 @@ class NullRecordingTest(AcoustIDTest): self.assertEqual(m, {}) -class MaxSourceCountTest(AcoustIDTest): - filename = 'acoustid_no_metadata.json' - +class MaxSourceCountTest(PicardTestCase): def test_max_source_count(self): - c = max_source_count(self.json_doc['results'][0]['recordings']) - self.assertEqual(13, c) + recordings = ( + Recording({}, sources=5), + Recording({}, sources=13), + Recording({}, sources=12), + ) + self.assertEqual(13, max_source_count(recordings)) def test_max_source_count_no_recordings(self): - c = max_source_count([]) - self.assertEqual(1, c) + self.assertEqual(1, max_source_count([])) - def test_max_source_count_no_value(self): - c = max_source_count([{'title': 'foo', 'sources': 42}, {'title': 'foo'}]) - self.assertEqual(42, c) + def test_max_source_count_smaller_1(self): + recordings = ( + Recording({}, sources=0), + ) + self.assertEqual(1, max_source_count(recordings)) + + +class ParseRecordingMapTest(PicardTestCase): + def test_parse_recording_map(self): + recordings = { + "cb127606-8e3d-4dcf-9902-69c7a9b59bc9": { + "d12e0535-3b2f-4987-8b03-63d85ef57fdd": Recording({ + "id": "d12e0535-3b2f-4987-8b03-63d85ef57fdd", + }), + "e8a313ee-b723-4b48-9395-6c8e026fc9e0": Recording( + {"id": "e8a313ee-b723-4b48-9395-6c8e026fc9e0"}, + sources=4, + result_score=0.5, + ), + } + } + expected = [ + { + "id": "d12e0535-3b2f-4987-8b03-63d85ef57fdd", + "acoustid": "cb127606-8e3d-4dcf-9902-69c7a9b59bc9", + "score": 25, + "sources": 1, + }, + { + "id": "e8a313ee-b723-4b48-9395-6c8e026fc9e0", + "acoustid": "cb127606-8e3d-4dcf-9902-69c7a9b59bc9", + "score": 50, + "sources": 4, + } + ] + self.assertEqual(expected, list(parse_recording_map(recordings))) + + +class RecordingHasMetadataTest(PicardTestCase): + filename = 'acoustid_no_metadata.json' + + def test_recording_has_metadata(self): + recording = { + 'id': '8855169b-b148-4896-9ed3-a1065dc60cf4', + 'sources': 94, + } + self.assertFalse(recording_has_metadata(recording)) + recording['title'] = 'Foo' + self.assertTrue(recording_has_metadata(recording)) + del recording['id'] + self.assertFalse(recording_has_metadata(recording))