PICARD-2396: Don't submit AcoustID on large duration difference

If the difference between the file's and the recording's duration exceeds a threshold, do not submit this pair to AcoustID.
This commit is contained in:
Philipp Wolfer
2022-01-20 20:52:36 +01:00
parent 2b19341593
commit 11d71bcc8c
3 changed files with 90 additions and 13 deletions

View File

@@ -6,7 +6,7 @@
# Copyright (C) 2017 Sambhav Kothari
# Copyright (C) 2018 Vishal Choudhary
# Copyright (C) 2018, 2020-2021 Laurent Monin
# Copyright (C) 2020 Philipp Wolfer
# Copyright (C) 2020, 2022 Philipp Wolfer
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
@@ -31,14 +31,19 @@ from picard import log
from picard.util import load_json
# Maximum difference between file duration and MB metadata length.
# If the match is above this threshold the fingerprint will not get submitted.
# Compare also acoustid/const.py in acoustid-server sources
FINGERPRINT_MAX_ALLOWED_LENGTH_DIFF_MS = 30000
class Submission(object):
def __init__(self, fingerprint, duration, orig_recordingid=None, recordingid=None, puid=None):
def __init__(self, fingerprint, duration, recordingid=None, metadata=None):
self.fingerprint = fingerprint
self.duration = duration
self.puid = puid
self.orig_recordingid = orig_recordingid
self.recordingid = recordingid
self.recordingid = self.orig_recordingid = recordingid
self.metadata = metadata
self.attempts = 0
def __len__(self):
@@ -49,6 +54,15 @@ class Submission(object):
# we use 10% here, to be safe
return int((len(self.fingerprint) + len(self.puid)) * 1.1)
@property
def puid(self):
return self.metadata.get('musicip_puid', '') if self.metadata else ''
@property
def valid_duration(self):
return abs(self.duration * 1000 - self.metadata.length) <= FINGERPRINT_MAX_ALLOWED_LENGTH_DIFF_MS
@property
def is_submitted(self):
return not self.recordingid or self.orig_recordingid == self.recordingid
@@ -71,8 +85,9 @@ class AcoustIDManager(QtCore.QObject):
def add(self, file, recordingid):
if not file.acoustid_fingerprint or not file.acoustid_length:
return
puid = file.metadata['musicip_puid']
self._submissions[file] = Submission(file.acoustid_fingerprint, file.acoustid_length, recordingid, recordingid, puid)
metadata = file.metadata
self._submissions[file] = Submission(
file.acoustid_fingerprint, file.acoustid_length, recordingid, metadata)
self._check_unsubmitted()
def update(self, file, recordingid):
@@ -80,6 +95,7 @@ class AcoustIDManager(QtCore.QObject):
if submission is None:
return
submission.recordingid = recordingid
submission.metadata = file.metadata
self._check_unsubmitted()
def remove(self, file):
@@ -90,12 +106,12 @@ class AcoustIDManager(QtCore.QObject):
def is_submitted(self, file):
submission = self._submissions.get(file)
if submission:
return submission.is_submitted()
return submission.is_submitted
return True
def _unsubmitted(self, reset=False):
for file, submission in self._submissions.items():
if not submission.is_submitted():
if not submission.is_submitted and submission.valid_duration:
if reset:
submission.attempts = 0
yield (file, submission)

View File

@@ -3,7 +3,7 @@
# Picard, the next-generation MusicBrainz tagger
#
# Copyright (C) 2020 Laurent Monin
# Copyright (C) 2020 Philipp Wolfer
# Copyright (C) 2020, 2022 Philipp Wolfer
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
@@ -27,8 +27,13 @@ from unittest.mock import (
from test.picardtestcase import PicardTestCase
from picard.acoustid.manager import AcoustIDManager
from picard.acoustid.manager import (
FINGERPRINT_MAX_ALLOWED_LENGTH_DIFF_MS,
AcoustIDManager,
Submission,
)
from picard.file import File
from picard.metadata import Metadata
def mock_succeed_submission(*args, **kwargs):
@@ -48,6 +53,7 @@ def dummy_file(i):
file = File('foo%d.flac' % i)
file.acoustid_fingerprint = 'Z' * FINGERPRINT_SIZE
file.acoustid_length = 120
file.metadata = Metadata(length=file.acoustid_length * 1000)
return file
@@ -132,3 +138,56 @@ class AcoustIDManagerTest(PicardTestCase):
self.assertEqual(self.mock_api_helper.submit_acoustid_fingerprints.call_count, 8)
for f in files:
self.assertFalse(self.acoustidmanager.is_submitted(f))
class SubmissionTest(PicardTestCase):
def test_init(self):
fingerprint = 'abc'
duration = 42
recordingid = 'rec1'
metadata = Metadata({
'musicip_puid': 'puid1'
})
submission = Submission(fingerprint, duration, recordingid, metadata)
self.assertEqual(fingerprint, submission.fingerprint)
self.assertEqual(duration, submission.duration)
self.assertEqual(recordingid, submission.recordingid)
self.assertEqual(recordingid, submission.orig_recordingid)
self.assertEqual(metadata, submission.metadata)
self.assertEqual(metadata['musicip_puid'], submission.puid)
self.assertEqual(0, submission.attempts)
def test_valid_duration(self):
duration_s = 342
duration_ms = duration_s * 1000
metadata = Metadata()
submission = Submission('abc', duration_s, metadata=metadata)
self.assertFalse(submission.valid_duration)
metadata.length = duration_ms
self.assertTrue(submission.valid_duration)
metadata.length = duration_ms + FINGERPRINT_MAX_ALLOWED_LENGTH_DIFF_MS
self.assertTrue(submission.valid_duration)
metadata.length = duration_ms - FINGERPRINT_MAX_ALLOWED_LENGTH_DIFF_MS
self.assertTrue(submission.valid_duration)
metadata.length = duration_ms + 1 + FINGERPRINT_MAX_ALLOWED_LENGTH_DIFF_MS
self.assertFalse(submission.valid_duration)
metadata.length = duration_ms - 1 - FINGERPRINT_MAX_ALLOWED_LENGTH_DIFF_MS
self.assertFalse(submission.valid_duration)
def test_init_no_metadata(self):
submission = Submission('abc', 42)
self.assertIsNone(submission.metadata)
self.assertEqual('', submission.puid)
def test_is_submitted_no_recording_id(self):
submission = Submission('abc', 42)
self.assertTrue(submission.is_submitted)
submission.recordingid = 'rec1'
self.assertFalse(submission.is_submitted)
def test_is_submitted_move_recording_id(self):
submission = Submission('abc', 42, recordingid='rec1')
self.assertTrue(submission.is_submitted)
submission.recordingid = 'rec2'
self.assertFalse(submission.is_submitted)

View File

@@ -239,9 +239,11 @@ class AcoustdIdAPITest(PicardTestCase):
def test_submissions_to_args(self):
submissions = [
Submission('f1', 1, orig_recordingid='or1', recordingid='r1', puid='p1'),
Submission('f2', 2, orig_recordingid='or2', recordingid='r2', puid='p2'),
Submission('f1', 1, recordingid='r1', metadata={'musicip_puid': 'p1'}),
Submission('f2', 2, recordingid='r2', metadata={'musicip_puid': 'p2'}),
]
submissions[0].orig_recordingid = 'or1'
submissions[1].orig_recordingid = 'or2'
result = self.api._submissions_to_args(submissions)
expected = {
'user': 'apikey',