From 66b63968cdfaad223b6b258fd867a222bdc3651a Mon Sep 17 00:00:00 2001 From: Laurent Monin Date: Thu, 18 Nov 2021 16:44:42 +0100 Subject: [PATCH 1/2] Introduce new class ABExtractor in order to get extractor dynamically - use a cache to prevent useless version & sha calculations - based cache key on stat.st_mtime_ns + path - declare ab_extractor as a property of tagger class - pass tagger to all methods that needs it --- picard/acousticbrainz/__init__.py | 131 ++++++++++++++++++---------- picard/tagger.py | 8 +- picard/ui/mainwindow.py | 3 +- picard/ui/options/acousticbrainz.py | 2 - test/test_acousticbrainz.py | 15 ++-- 5 files changed, 97 insertions(+), 62 deletions(-) diff --git a/picard/acousticbrainz/__init__.py b/picard/acousticbrainz/__init__.py index fd26bc6ae..8c0aa1ec4 100644 --- a/picard/acousticbrainz/__init__.py +++ b/picard/acousticbrainz/__init__.py @@ -21,7 +21,10 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - +from collections import ( + defaultdict, + namedtuple, +) from concurrent.futures import Future from functools import partial import json @@ -48,63 +51,97 @@ from picard.util.thread import run_task from picard.webservice import ratecontrol -_acousticbrainz_extractor = None -_acousticbrainz_extractor_sha = None ratecontrol.set_minimum_delay((ACOUSTICBRAINZ_HOST, ACOUSTICBRAINZ_PORT), 1000) +ABExtractorProperties = namedtuple('ABExtractorProperties', ('path', 'version', 'sha', 'mtime_ns')) -def get_extractor(config=None): - if not config: - config = get_config() - extractor_path = config.setting["acousticbrainz_extractor"] - if not extractor_path: - extractor_path = find_extractor() - else: - extractor_path = find_executable(extractor_path) - return extractor_path + +class ABExtractor: + + def __init__(self): + self.cache = defaultdict(lambda: None) + + def get(self, config=None): + if not config: + config = get_config() + if not config.setting["use_acousticbrainz"]: + log.debug('ABExtractor: AcousticBrainz disabled') + return None + + extractor_path = config.setting["acousticbrainz_extractor"] + if not extractor_path: + extractor_path = find_extractor() + else: + extractor_path = find_executable(extractor_path) + if not extractor_path: + log.debug('ABExtractor: cannot find a path to extractor binary') + return None + + try: + statinfo = os.stat(extractor_path) + mtime_ns = statinfo.st_mtime_ns + except OSError as exc: + log.warning('ABExtractor: cannot stat extractor: %s', exc) + return None + + # check if we have this in cache already + cached = self.cache[(extractor_path, mtime_ns)] + if cached is not None: + log.debug('ABExtractor: cached: %r', cached) + return cached + + # create a new cache entry + try: + version = check_extractor_version(extractor_path) + if version: + sha = precompute_extractor_sha(extractor_path) + result = ABExtractorProperties( + path=extractor_path, + version=version, + sha=sha, + mtime_ns=mtime_ns + ) + self.cache[(result.path, result.mtime_ns)] = result + log.debug('ABExtractor: caching: %r', result) + return result + else: + raise Exception("check_extractor_version(%r) returned None" % extractor_path) + except Exception as exc: + log.warning('ABExtractor: failed to get version or sha: %s', exc) + return None + + def path(self, config=None): + result = self.get(config) + return result.path if result else None + + def version(self, config=None): + result = self.get(config) + return result.version if result else None + + def sha(self, config=None): + result = self.get(config) + return result.sha if result else None + + def available(self, config=None): + return self.get(config) is not None def find_extractor(): return find_executable(*EXTRACTOR_NAMES) -def ab_available(): - config = get_config() - return config.setting["use_acousticbrainz"] and _acousticbrainz_extractor_sha is not None - - def ab_check_version(extractor): extractor_path = find_executable(extractor) return check_extractor_version(extractor_path) -def ab_setup_extractor(): - global _acousticbrainz_extractor, _acousticbrainz_extractor_sha - _acousticbrainz_extractor = None - _acousticbrainz_extractor_sha = None - config = get_config() - if config.setting["use_acousticbrainz"]: - acousticbrainz_extractor = get_extractor(config) - log.debug("Checking up AcousticBrainz availability") - if acousticbrainz_extractor: - version = check_extractor_version(acousticbrainz_extractor) - if version: - sha = precompute_extractor_sha(acousticbrainz_extractor) - _acousticbrainz_extractor = acousticbrainz_extractor - _acousticbrainz_extractor_sha = sha - log.debug("AcousticBrainz is available: version %s - sha1 %s" % (version, sha)) - return version - log.warning("AcousticBrainz is not available") - return None - - def ab_feature_extraction(tagger, recording_id, input_path, extractor_callback): # Fetch existing features from AB server to check for duplicates before extracting tagger.webservice.get( host=ACOUSTICBRAINZ_HOST, port=ACOUSTICBRAINZ_PORT, path="/%s/low-level" % recording_id, - handler=partial(run_extractor, input_path, extractor_callback), + handler=partial(run_extractor, tagger, input_path, extractor_callback), priority=True, important=False, parse_response_type=None @@ -140,7 +177,7 @@ def ab_extractor_callback(tagger, file, result, error): file.update() -def run_extractor(input_path, extractor_callback, response, reply, error, main_thread=False): +def run_extractor(tagger, input_path, extractor_callback, response, reply, error, main_thread=False): duplicate = False # Check if AcousticBrainz server answered with the json file for the recording id if not error: @@ -159,30 +196,34 @@ def run_extractor(input_path, extractor_callback, response, reply, error, main_t else: if main_thread: # Run extractor on main thread, used for testing - extractor_callback(extractor(input_path), None) + extractor_callback(extractor(tagger, input_path), None) else: # Run extractor on a different thread and call the callback when done - run_task(partial(extractor, input_path), extractor_callback) + run_task(partial(extractor, tagger, input_path), extractor_callback) -def extractor(input_path): +def extractor(tagger, input_path): # Create a temporary file with AcousticBrainz output output_file = NamedTemporaryFile("w", suffix=".json") output_file.close() # close file to ensure other processes can write to it + extractor = tagger.ab_extractor.get() + if not extractor: + return (output_file.name, -1, "no extractor found") + # Call the features extractor and wait for it to finish try: - return_code, stdout, stderr = run_executable(_acousticbrainz_extractor, input_path, output_file.name) + return_code, stdout, stderr = run_executable(extractor.path, input_path, output_file.name) results = (output_file.name, return_code, stdout+stderr) except (FileNotFoundError, PermissionError) as e: - # this can happen if _acousticbrainz_extractor was removed or its permissions changed + # this can happen if AcousticBrainz extractor was removed or its permissions changed return (output_file.name, -1, str(e)) # Add feature extractor sha to the output features file try: with open(output_file.name, "r+", encoding="utf-8") as f: features = json.load(f) - features["metadata"]["version"]["essentia_build_sha"] = _acousticbrainz_extractor_sha + features["metadata"]["version"]["essentia_build_sha"] = extractor.sha f.seek(0) json.dump(features, f, indent=4) except FileNotFoundError: diff --git a/picard/tagger.py b/picard/tagger.py index f359c8f05..4beba698c 100644 --- a/picard/tagger.py +++ b/picard/tagger.py @@ -71,10 +71,9 @@ from picard import ( log, ) from picard.acousticbrainz import ( - ab_available, + ABExtractor, ab_extractor_callback, ab_feature_extraction, - ab_setup_extractor, ) from picard.acoustid.manager import AcoustIDManager from picard.album import ( @@ -259,8 +258,7 @@ class Tagger(QtWidgets.QApplication): self.acoustidmanager = AcoustIDManager(acoustid_api) # Setup AcousticBrainz extraction - if config.setting["use_acousticbrainz"]: - ab_setup_extractor() + self.ab_extractor = ABExtractor() self.enable_menu_icons(config.setting['show_menu_icons']) @@ -864,7 +862,7 @@ class Tagger(QtWidgets.QApplication): def extract_and_submit_acousticbrainz_features(self, objs): """Extract AcousticBrainz features and submit them.""" - if not ab_available(): + if not self.ab_extractor.available(): return for file in iter_files_from_objects(objs): diff --git a/picard/ui/mainwindow.py b/picard/ui/mainwindow.py index 6d5d8b4a0..4d5670077 100644 --- a/picard/ui/mainwindow.py +++ b/picard/ui/mainwindow.py @@ -61,7 +61,6 @@ from picard import ( PICARD_APP_ID, log, ) -from picard.acousticbrainz import ab_available from picard.album import Album from picard.browser import addrelease from picard.cluster import ( @@ -1358,7 +1357,7 @@ class MainWindow(QtWidgets.QMainWindow, PreserveGeometry): # Skip further loops if all values now True. if can_analyze and can_save and can_remove and can_refresh and can_autotag and can_submit and can_extract: break - can_extract = can_extract and ab_available() + can_extract = can_extract and self.tagger.ab_extractor.available() self.remove_action.setEnabled(can_remove) self.save_action.setEnabled(can_save) self.view_info_action.setEnabled(can_view_info) diff --git a/picard/ui/options/acousticbrainz.py b/picard/ui/options/acousticbrainz.py index e59d27053..f9a54da17 100644 --- a/picard/ui/options/acousticbrainz.py +++ b/picard/ui/options/acousticbrainz.py @@ -30,7 +30,6 @@ from PyQt5 import QtWidgets from picard.acousticbrainz import ( ab_check_version, - ab_setup_extractor, find_extractor, ) from picard.config import ( @@ -94,7 +93,6 @@ class AcousticBrainzOptionsPage(OptionsPage): self.tagger.window.update_actions() if enabled: self._config.setting["acousticbrainz_extractor"] = self.ui.acousticbrainz_extractor.text() - ab_setup_extractor() def acousticbrainz_extractor_browse(self): path, _filter = QtWidgets.QFileDialog.getOpenFileName(self, "", self.ui.acousticbrainz_extractor.text()) diff --git a/test/test_acousticbrainz.py b/test/test_acousticbrainz.py index 98b3d2910..268515fdb 100644 --- a/test/test_acousticbrainz.py +++ b/test/test_acousticbrainz.py @@ -30,10 +30,9 @@ from unittest.mock import ( from test.picardtestcase import PicardTestCase from picard.acousticbrainz import ( - ab_available, + ABExtractor, ab_extractor_callback, ab_feature_extraction, - ab_setup_extractor, ) from picard.file import File @@ -57,20 +56,20 @@ class AcousticBrainzSetupTest(PicardTestCase): self.set_config_values(settings) # Try to setup AB - ab_setup_extractor() + ab_extractor = ABExtractor() # Extractor should be found - self.assertTrue(ab_available()) + self.assertTrue(ab_extractor.available()) def test_ab_setup_not_present(self): settings['acousticbrainz_extractor'] = "non_existing_extractor" self.set_config_values(settings) # Try to setup AB - ab_setup_extractor() + ab_extractor = ABExtractor() # Extractor should not be found - self.assertFalse(ab_available()) + self.assertFalse(ab_extractor.available()) class AcousticBrainzFeatureExtractionTest(PicardTestCase): @@ -85,8 +84,8 @@ class AcousticBrainzFeatureExtractionTest(PicardTestCase): settings['acousticbrainz_extractor'] = mock_extractor self.set_config_values(settings) - ab_setup_extractor() - self.assertTrue(ab_available()) + self.tagger.ab_extractor = ABExtractor() + self.assertTrue(self.tagger.ab_extractor.available()) # Load an irrelevant test file self.file = File("./test/data/test.mp3") From e72c552df06028d367961d152645bec5ccb2ce85 Mon Sep 17 00:00:00 2001 From: Laurent Monin Date: Fri, 19 Nov 2021 09:32:09 +0100 Subject: [PATCH 2/2] Re-initialize cache on new entry, keep only one --- picard/acousticbrainz/__init__.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/picard/acousticbrainz/__init__.py b/picard/acousticbrainz/__init__.py index 8c0aa1ec4..5c8af296f 100644 --- a/picard/acousticbrainz/__init__.py +++ b/picard/acousticbrainz/__init__.py @@ -59,6 +59,9 @@ ABExtractorProperties = namedtuple('ABExtractorProperties', ('path', 'version', class ABExtractor: def __init__(self): + self._init_cache() + + def _init_cache(self): self.cache = defaultdict(lambda: None) def get(self, config=None): @@ -101,6 +104,8 @@ class ABExtractor: sha=sha, mtime_ns=mtime_ns ) + # clear the cache, we keep only one entry + self._init_cache() self.cache[(result.path, result.mtime_ns)] = result log.debug('ABExtractor: caching: %r', result) return result