From 66b63968cdfaad223b6b258fd867a222bdc3651a Mon Sep 17 00:00:00 2001
From: Laurent Monin <github@norz.org>
Date: Thu, 18 Nov 2021 16:44:42 +0100
Subject: [PATCH 1/2] Introduce new class ABExtractor in order to get extractor
 dynamically

- use a cache to prevent useless version & sha calculations
- based cache key on stat.st_mtime_ns + path
- declare ab_extractor as a property of tagger class
- pass tagger to all methods that needs it
---
 picard/acousticbrainz/__init__.py   | 131 ++++++++++++++++++----------
 picard/tagger.py                    |   8 +-
 picard/ui/mainwindow.py             |   3 +-
 picard/ui/options/acousticbrainz.py |   2 -
 test/test_acousticbrainz.py         |  15 ++--
 5 files changed, 97 insertions(+), 62 deletions(-)

diff --git a/picard/acousticbrainz/__init__.py b/picard/acousticbrainz/__init__.py
index fd26bc6ae..8c0aa1ec4 100644
--- a/picard/acousticbrainz/__init__.py
+++ b/picard/acousticbrainz/__init__.py
@@ -21,7 +21,10 @@
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 
-
+from collections import (
+    defaultdict,
+    namedtuple,
+)
 from concurrent.futures import Future
 from functools import partial
 import json
@@ -48,63 +51,97 @@ from picard.util.thread import run_task
 from picard.webservice import ratecontrol
 
 
-_acousticbrainz_extractor = None
-_acousticbrainz_extractor_sha = None
 ratecontrol.set_minimum_delay((ACOUSTICBRAINZ_HOST, ACOUSTICBRAINZ_PORT), 1000)
 
+ABExtractorProperties = namedtuple('ABExtractorProperties', ('path', 'version', 'sha', 'mtime_ns'))
 
-def get_extractor(config=None):
-    if not config:
-        config = get_config()
-    extractor_path = config.setting["acousticbrainz_extractor"]
-    if not extractor_path:
-        extractor_path = find_extractor()
-    else:
-        extractor_path = find_executable(extractor_path)
-    return extractor_path
+
+class ABExtractor:
+
+    def __init__(self):
+        self.cache = defaultdict(lambda: None)
+
+    def get(self, config=None):
+        if not config:
+            config = get_config()
+        if not config.setting["use_acousticbrainz"]:
+            log.debug('ABExtractor: AcousticBrainz disabled')
+            return None
+
+        extractor_path = config.setting["acousticbrainz_extractor"]
+        if not extractor_path:
+            extractor_path = find_extractor()
+        else:
+            extractor_path = find_executable(extractor_path)
+        if not extractor_path:
+            log.debug('ABExtractor: cannot find a path to extractor binary')
+            return None
+
+        try:
+            statinfo = os.stat(extractor_path)
+            mtime_ns = statinfo.st_mtime_ns
+        except OSError as exc:
+            log.warning('ABExtractor: cannot stat extractor: %s', exc)
+            return None
+
+        # check if we have this in cache already
+        cached = self.cache[(extractor_path, mtime_ns)]
+        if cached is not None:
+            log.debug('ABExtractor: cached: %r', cached)
+            return cached
+
+        # create a new cache entry
+        try:
+            version = check_extractor_version(extractor_path)
+            if version:
+                sha = precompute_extractor_sha(extractor_path)
+                result = ABExtractorProperties(
+                    path=extractor_path,
+                    version=version,
+                    sha=sha,
+                    mtime_ns=mtime_ns
+                )
+                self.cache[(result.path, result.mtime_ns)] = result
+                log.debug('ABExtractor: caching: %r', result)
+                return result
+            else:
+                raise Exception("check_extractor_version(%r) returned None" % extractor_path)
+        except Exception as exc:
+            log.warning('ABExtractor: failed to get version or sha: %s', exc)
+        return None
+
+    def path(self, config=None):
+        result = self.get(config)
+        return result.path if result else None
+
+    def version(self, config=None):
+        result = self.get(config)
+        return result.version if result else None
+
+    def sha(self, config=None):
+        result = self.get(config)
+        return result.sha if result else None
+
+    def available(self, config=None):
+        return self.get(config) is not None
 
 
 def find_extractor():
     return find_executable(*EXTRACTOR_NAMES)
 
 
-def ab_available():
-    config = get_config()
-    return config.setting["use_acousticbrainz"] and _acousticbrainz_extractor_sha is not None
-
-
 def ab_check_version(extractor):
     extractor_path = find_executable(extractor)
     return check_extractor_version(extractor_path)
 
 
-def ab_setup_extractor():
-    global _acousticbrainz_extractor, _acousticbrainz_extractor_sha
-    _acousticbrainz_extractor = None
-    _acousticbrainz_extractor_sha = None
-    config = get_config()
-    if config.setting["use_acousticbrainz"]:
-        acousticbrainz_extractor = get_extractor(config)
-        log.debug("Checking up AcousticBrainz availability")
-        if acousticbrainz_extractor:
-            version = check_extractor_version(acousticbrainz_extractor)
-            if version:
-                sha = precompute_extractor_sha(acousticbrainz_extractor)
-                _acousticbrainz_extractor = acousticbrainz_extractor
-                _acousticbrainz_extractor_sha = sha
-                log.debug("AcousticBrainz is available: version %s - sha1 %s" % (version, sha))
-                return version
-        log.warning("AcousticBrainz is not available")
-    return None
-
-
 def ab_feature_extraction(tagger, recording_id, input_path, extractor_callback):
     # Fetch existing features from AB server to check for duplicates before extracting
     tagger.webservice.get(
         host=ACOUSTICBRAINZ_HOST,
         port=ACOUSTICBRAINZ_PORT,
         path="/%s/low-level" % recording_id,
-        handler=partial(run_extractor, input_path, extractor_callback),
+        handler=partial(run_extractor, tagger, input_path, extractor_callback),
         priority=True,
         important=False,
         parse_response_type=None
@@ -140,7 +177,7 @@ def ab_extractor_callback(tagger, file, result, error):
     file.update()
 
 
-def run_extractor(input_path, extractor_callback, response, reply, error, main_thread=False):
+def run_extractor(tagger, input_path, extractor_callback, response, reply, error, main_thread=False):
     duplicate = False
     # Check if AcousticBrainz server answered with the json file for the recording id
     if not error:
@@ -159,30 +196,34 @@ def run_extractor(input_path, extractor_callback, response, reply, error, main_t
     else:
         if main_thread:
             # Run extractor on main thread, used for testing
-            extractor_callback(extractor(input_path), None)
+            extractor_callback(extractor(tagger, input_path), None)
         else:
             # Run extractor on a different thread and call the callback when done
-            run_task(partial(extractor, input_path), extractor_callback)
+            run_task(partial(extractor, tagger, input_path), extractor_callback)
 
 
-def extractor(input_path):
+def extractor(tagger, input_path):
     # Create a temporary file with AcousticBrainz output
     output_file = NamedTemporaryFile("w", suffix=".json")
     output_file.close()  # close file to ensure other processes can write to it
 
+    extractor = tagger.ab_extractor.get()
+    if not extractor:
+        return (output_file.name, -1, "no extractor found")
+
     # Call the features extractor and wait for it to finish
     try:
-        return_code, stdout, stderr = run_executable(_acousticbrainz_extractor, input_path, output_file.name)
+        return_code, stdout, stderr = run_executable(extractor.path, input_path, output_file.name)
         results = (output_file.name, return_code, stdout+stderr)
     except (FileNotFoundError, PermissionError) as e:
-        # this can happen if _acousticbrainz_extractor was removed or its permissions changed
+        # this can happen if AcousticBrainz extractor was removed or its permissions changed
         return (output_file.name, -1, str(e))
 
     # Add feature extractor sha to the output features file
     try:
         with open(output_file.name, "r+", encoding="utf-8") as f:
             features = json.load(f)
-            features["metadata"]["version"]["essentia_build_sha"] = _acousticbrainz_extractor_sha
+            features["metadata"]["version"]["essentia_build_sha"] = extractor.sha
             f.seek(0)
             json.dump(features, f, indent=4)
     except FileNotFoundError:
diff --git a/picard/tagger.py b/picard/tagger.py
index f359c8f05..4beba698c 100644
--- a/picard/tagger.py
+++ b/picard/tagger.py
@@ -71,10 +71,9 @@ from picard import (
     log,
 )
 from picard.acousticbrainz import (
-    ab_available,
+    ABExtractor,
     ab_extractor_callback,
     ab_feature_extraction,
-    ab_setup_extractor,
 )
 from picard.acoustid.manager import AcoustIDManager
 from picard.album import (
@@ -259,8 +258,7 @@ class Tagger(QtWidgets.QApplication):
         self.acoustidmanager = AcoustIDManager(acoustid_api)
 
         # Setup AcousticBrainz extraction
-        if config.setting["use_acousticbrainz"]:
-            ab_setup_extractor()
+        self.ab_extractor = ABExtractor()
 
         self.enable_menu_icons(config.setting['show_menu_icons'])
 
@@ -864,7 +862,7 @@ class Tagger(QtWidgets.QApplication):
 
     def extract_and_submit_acousticbrainz_features(self, objs):
         """Extract AcousticBrainz features and submit them."""
-        if not ab_available():
+        if not self.ab_extractor.available():
             return
 
         for file in iter_files_from_objects(objs):
diff --git a/picard/ui/mainwindow.py b/picard/ui/mainwindow.py
index 6d5d8b4a0..4d5670077 100644
--- a/picard/ui/mainwindow.py
+++ b/picard/ui/mainwindow.py
@@ -61,7 +61,6 @@ from picard import (
     PICARD_APP_ID,
     log,
 )
-from picard.acousticbrainz import ab_available
 from picard.album import Album
 from picard.browser import addrelease
 from picard.cluster import (
@@ -1358,7 +1357,7 @@ class MainWindow(QtWidgets.QMainWindow, PreserveGeometry):
             # Skip further loops if all values now True.
             if can_analyze and can_save and can_remove and can_refresh and can_autotag and can_submit and can_extract:
                 break
-        can_extract = can_extract and ab_available()
+        can_extract = can_extract and self.tagger.ab_extractor.available()
         self.remove_action.setEnabled(can_remove)
         self.save_action.setEnabled(can_save)
         self.view_info_action.setEnabled(can_view_info)
diff --git a/picard/ui/options/acousticbrainz.py b/picard/ui/options/acousticbrainz.py
index e59d27053..f9a54da17 100644
--- a/picard/ui/options/acousticbrainz.py
+++ b/picard/ui/options/acousticbrainz.py
@@ -30,7 +30,6 @@ from PyQt5 import QtWidgets
 
 from picard.acousticbrainz import (
     ab_check_version,
-    ab_setup_extractor,
     find_extractor,
 )
 from picard.config import (
@@ -94,7 +93,6 @@ class AcousticBrainzOptionsPage(OptionsPage):
             self.tagger.window.update_actions()
         if enabled:
             self._config.setting["acousticbrainz_extractor"] = self.ui.acousticbrainz_extractor.text()
-            ab_setup_extractor()
 
     def acousticbrainz_extractor_browse(self):
         path, _filter = QtWidgets.QFileDialog.getOpenFileName(self, "", self.ui.acousticbrainz_extractor.text())
diff --git a/test/test_acousticbrainz.py b/test/test_acousticbrainz.py
index 98b3d2910..268515fdb 100644
--- a/test/test_acousticbrainz.py
+++ b/test/test_acousticbrainz.py
@@ -30,10 +30,9 @@ from unittest.mock import (
 from test.picardtestcase import PicardTestCase
 
 from picard.acousticbrainz import (
-    ab_available,
+    ABExtractor,
     ab_extractor_callback,
     ab_feature_extraction,
-    ab_setup_extractor,
 )
 from picard.file import File
 
@@ -57,20 +56,20 @@ class AcousticBrainzSetupTest(PicardTestCase):
         self.set_config_values(settings)
 
         # Try to setup AB
-        ab_setup_extractor()
+        ab_extractor = ABExtractor()
 
         # Extractor should be found
-        self.assertTrue(ab_available())
+        self.assertTrue(ab_extractor.available())
 
     def test_ab_setup_not_present(self):
         settings['acousticbrainz_extractor'] = "non_existing_extractor"
         self.set_config_values(settings)
 
         # Try to setup AB
-        ab_setup_extractor()
+        ab_extractor = ABExtractor()
 
         # Extractor should not be found
-        self.assertFalse(ab_available())
+        self.assertFalse(ab_extractor.available())
 
 
 class AcousticBrainzFeatureExtractionTest(PicardTestCase):
@@ -85,8 +84,8 @@ class AcousticBrainzFeatureExtractionTest(PicardTestCase):
         settings['acousticbrainz_extractor'] = mock_extractor
         self.set_config_values(settings)
 
-        ab_setup_extractor()
-        self.assertTrue(ab_available())
+        self.tagger.ab_extractor = ABExtractor()
+        self.assertTrue(self.tagger.ab_extractor.available())
 
         # Load an irrelevant test file
         self.file = File("./test/data/test.mp3")

From e72c552df06028d367961d152645bec5ccb2ce85 Mon Sep 17 00:00:00 2001
From: Laurent Monin <github@norz.org>
Date: Fri, 19 Nov 2021 09:32:09 +0100
Subject: [PATCH 2/2] Re-initialize cache on new entry, keep only one

---
 picard/acousticbrainz/__init__.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/picard/acousticbrainz/__init__.py b/picard/acousticbrainz/__init__.py
index 8c0aa1ec4..5c8af296f 100644
--- a/picard/acousticbrainz/__init__.py
+++ b/picard/acousticbrainz/__init__.py
@@ -59,6 +59,9 @@ ABExtractorProperties = namedtuple('ABExtractorProperties', ('path', 'version',
 class ABExtractor:
 
     def __init__(self):
+        self._init_cache()
+
+    def _init_cache(self):
         self.cache = defaultdict(lambda: None)
 
     def get(self, config=None):
@@ -101,6 +104,8 @@ class ABExtractor:
                     sha=sha,
                     mtime_ns=mtime_ns
                 )
+                # clear the cache, we keep only one entry
+                self._init_cache()
                 self.cache[(result.path, result.mtime_ns)] = result
                 log.debug('ABExtractor: caching: %r', result)
                 return result