Merge pull request #975 from phw/PICARD-1339-fixup

PICARD-1339: Improve file removal performance
This commit is contained in:
Philipp Wolfer
2018-10-09 08:50:53 +02:00
committed by GitHub
6 changed files with 411 additions and 59 deletions

View File

@@ -58,7 +58,11 @@ from picard.util import (
format_time,
mbid_validate,
)
from picard.util.imagelist import update_metadata_images
from picard.util.imagelist import (
add_metadata_images,
remove_metadata_images,
update_metadata_images
)
from picard.util.textencoding import asciipunct
from picard.ui.item import Item
@@ -406,14 +410,14 @@ class Album(DataObject, Item):
def _add_file(self, track, file):
self._files += 1
self.update(update_tracks=False)
add_metadata_images(self, [file])
file.metadata_images_changed.connect(self.update_metadata_images)
self.update_metadata_images()
def _remove_file(self, track, file):
self._files -= 1
self.update(update_tracks=False)
file.metadata_images_changed.disconnect(self.update_metadata_images)
self.update_metadata_images()
remove_metadata_images(self, [file])
def match_files(self, files, use_recordingid=True):
"""Match files to tracks on this album, based on metadata similarity or recordingid."""

View File

@@ -37,7 +37,10 @@ from picard.util import (
album_artist_from_path,
format_time,
)
from picard.util.imagelist import update_metadata_images
from picard.util.imagelist import (
remove_metadata_images,
update_metadata_images
)
from picard.ui.item import Item
@@ -114,7 +117,7 @@ class Cluster(QtCore.QObject, Item):
self.item.remove_file(file)
if not self.special and self.get_num_files() == 0:
self.tagger.remove_cluster(self)
self.update_metadata_images()
remove_metadata_images(self, [file])
self._update_related_album()
def update(self):

View File

@@ -62,6 +62,7 @@ class Metadata(dict):
def __init__(self):
super().__init__()
self.images = ImageList()
self.has_common_images = True
self.deleted_tags = set()
self.length = 0

View File

@@ -43,7 +43,11 @@ from picard.script import (
ScriptParser,
enabled_tagger_scripts_texts,
)
from picard.util.imagelist import update_metadata_images
from picard.util.imagelist import (
add_metadata_images,
remove_metadata_images,
update_metadata_images
)
from picard.util.textencoding import asciipunct
from picard.ui.item import Item
@@ -80,8 +84,9 @@ class Track(DataObject, Item):
if file not in self.linked_files:
self.linked_files.append(file)
self.num_linked_files += 1
self.album._add_file(self, file)
self.update_file_metadata(file)
add_metadata_images(self, [file])
self.album._add_file(self, file)
file.metadata_images_changed.connect(self.update_orig_metadata_images)
def update_file_metadata(self, file):
@@ -98,14 +103,14 @@ class Track(DataObject, Item):
self.linked_files.remove(file)
self.num_linked_files -= 1
file.copy_metadata(file.orig_metadata, preserve_deleted=False)
self.album._remove_file(self, file)
file.metadata_images_changed.disconnect(self.update_orig_metadata_images)
self.album._remove_file(self, file)
remove_metadata_images(self, [file])
self.update()
def update(self):
if self.item:
self.item.update()
self.update_orig_metadata_images()
def iterfiles(self, save=False):
for file in self.linked_files:
@@ -246,8 +251,8 @@ class Track(DataObject, Item):
def keep_original_images(self):
for file in self.linked_files:
file.keep_original_images()
self.update_orig_metadata_images()
if self.linked_files:
self.update_orig_metadata_images()
self.metadata.images = self.orig_metadata.images[:]
else:
self.metadata.images = []

View File

@@ -2,6 +2,7 @@
#
# Picard, the next-generation MusicBrainz tagger
# Copyright (C) 2017 Antonio Larrosa <alarrosa@suse.com>
# Copyright (C) 2018 Philipp Wolfer <ph.wolfer@gmail.com>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
@@ -35,34 +36,11 @@ class ImageList(list):
return result
def _process_images(state, src_obj):
from picard.track import Track
# Check new images
if state.update_new_metadata:
if state.first_new_obj:
state.new_images = src_obj.metadata.images[:]
state.first_new_obj = False
else:
if state.new_images != src_obj.metadata.images:
state.has_common_new_images = False
state.new_images.extend([image for image in src_obj.metadata.images if image not in state.new_images])
if state.update_orig_metadata and not isinstance(src_obj, Track):
# Check orig images, but not for Tracks (which don't have a useful orig_metadata)
if state.first_orig_obj:
state.orig_images = src_obj.orig_metadata.images[:]
state.first_orig_obj = False
else:
if state.orig_images != src_obj.orig_metadata.images:
state.has_common_orig_images = False
state.orig_images.extend([image for image in src_obj.orig_metadata.images if image not in state.orig_images])
class State:
class ImageListState:
def __init__(self):
self.new_images = ImageList()
self.orig_images = ImageList()
self.new_images = set()
self.orig_images = set()
self.sources = []
self.has_common_new_images = True
self.has_common_orig_images = True
self.first_new_obj = True
@@ -72,36 +50,174 @@ class State:
self.update_orig_metadata = False
# TODO: use functools.singledispatch when py3 is supported
def update_metadata_images(obj):
def _process_images(state, src_obj):
from picard.track import Track
from picard.cluster import Cluster
from picard.album import Album
state = State()
# Check new images
if state.update_new_metadata:
if state.new_images != set(src_obj.metadata.images):
if not state.first_new_obj:
state.has_common_new_images = False
state.new_images = state.new_images.union(src_obj.metadata.images)
if state.first_new_obj:
state.first_new_obj = False
if isinstance(obj, Album):
sources = []
for track in obj.tracks:
sources.append(track)
sources += track.linked_files
sources += obj.unmatched_files.files
state.update_new_metadata = True
state.update_orig_metadata = True
elif isinstance(obj, Track):
sources = obj.linked_files
state.update_orig_metadata = True
elif isinstance(obj, Cluster):
sources = obj.files
state.update_new_metadata = True
if state.update_orig_metadata and not isinstance(src_obj, Track):
# Check orig images, but not for Tracks (which don't have a useful orig_metadata)
if state.orig_images != set(src_obj.orig_metadata.images):
if not state.first_orig_obj:
state.has_common_orig_images = False
state.orig_images = state.orig_images.union(src_obj.orig_metadata.images)
if state.first_orig_obj:
state.first_orig_obj = False
for src_obj in sources:
def _update_state(obj, state):
for src_obj in state.sources:
_process_images(state, src_obj)
if state.update_new_metadata:
obj.metadata.images = state.new_images
obj.metadata.images = ImageList(state.new_images)
obj.metadata.has_common_images = state.has_common_new_images
if state.update_orig_metadata:
obj.orig_metadata.images = state.orig_images
obj.orig_metadata.images = ImageList(state.orig_images)
obj.orig_metadata.has_common_images = state.has_common_orig_images
# TODO: use functools.singledispatch when py3 is supported
def _get_state(obj):
from picard.album import Album
from picard.cluster import Cluster
from picard.track import Track
state = ImageListState()
if isinstance(obj, Album):
for track in obj.tracks:
state.sources.append(track)
state.sources += track.linked_files
state.sources += obj.unmatched_files.files
state.update_new_metadata = True
state.update_orig_metadata = True
elif isinstance(obj, Track):
state.sources = obj.linked_files
state.update_orig_metadata = True
elif isinstance(obj, Cluster):
state.sources = obj.files
state.update_new_metadata = True
return state
def _get_metadata_images(state, sources):
new_images = set()
orig_images = set()
for s in sources:
if state.update_new_metadata:
new_images = new_images.union(s.metadata.images)
if state.update_orig_metadata:
orig_images = orig_images.union(s.orig_metadata.images)
return (new_images, orig_images)
def update_metadata_images(obj):
"""Update the metadata images `obj` based on its children.
Based on the type of `obj` this will update `obj.metadata.images` to
represent the metadata images of all children (`Track` or `File` objects).
This method will iterate over all children and completely rebuild
`obj.metadata.images`. Whenever possible the more specific functions
`add_metadata_images` or `remove_metadata_images` should be used.
Args:
obj: A `Cluster`, `Album` or `Track` object with `metadata` property
"""
_update_state(obj, _get_state(obj))
def _add_images(metadata, added_images):
if not added_images:
return
current_images = set(metadata.images)
if added_images != current_images:
metadata.images = ImageList(current_images.union(added_images))
metadata.has_common_images = False
def add_metadata_images(obj, added_sources):
"""Add the images in the metadata of `added_sources` to the metadata of `obj`.
Args:
obj: A `Cluster`, `Album` or `Track` object with `metadata` property
added_sources: List of child objects (`Track` or `File`) which's metadata images should be added to `obj`
"""
state = _get_state(obj)
(added_new_images, added_orig_images) = _get_metadata_images(state, added_sources)
if state.update_new_metadata:
_add_images(obj.metadata, added_new_images)
if state.update_orig_metadata:
_add_images(obj.orig_metadata, added_orig_images)
def _remove_images(metadata, sources, removed_images):
"""Removes `removed_images` from metadata `images`, but only if they are not included in `sources`.
Args:
metadata: `Metadata` object from which images should be removed
sources: List of source `Metadata` objects
removed_images: Set of `CoverArt` proposed for removal from `metadata`
"""
if not metadata.images or not removed_images:
return
if not sources:
metadata.images = ImageList()
metadata.has_common_images = True
return
current_images = set(metadata.images)
if metadata.has_common_images and current_images == removed_images:
return
common_images = True # True, if all children share the same images
previous_images = None
# Iterate over all sources and check whether the images proposed to be
# removed are used in any sources. Images used in existing sources
# must not be removed.
for source_metadata in sources:
source_images = set(source_metadata.images)
if previous_images and common_images and previous_images != source_images:
common_images = False
previous_images = set(source_metadata.images) # Remember for next iteration
removed_images = removed_images.difference(source_images)
if not removed_images and not common_images:
return # No images left to remove, abort immediatelly
metadata.images = ImageList(current_images.difference(removed_images))
metadata.has_common_images = common_images
def remove_metadata_images(obj, removed_sources):
"""Remove the images in the metadata of `removed_sources` from the metadata of `obj`.
Args:
obj: A `Cluster`, `Album` or `Track` object with `metadata` property
removed_sources: List of child objects (`Track` or `File`) which's metadata images should be removed from `obj`
"""
from picard.track import Track
state = _get_state(obj)
(removed_new_images, removed_orig_images) = _get_metadata_images(state, removed_sources)
if state.update_new_metadata:
sources = [s.metadata for s in state.sources]
_remove_images(obj.metadata, sources, removed_new_images)
if state.update_orig_metadata:
sources = [s.orig_metadata for s in state.sources if not isinstance(s, Track)]
_remove_images(obj.orig_metadata, sources, removed_orig_images)

223
test/test_imagelist.py Normal file
View File

@@ -0,0 +1,223 @@
# -*- coding: utf-8 -*-
import os
import struct
import unittest
from picard.album import Album
from picard.cluster import Cluster
from picard.coverart.image import CoverArtImage
from picard.track import Track
from picard.file import File
from picard.util.imagelist import (
add_metadata_images,
remove_metadata_images,
update_metadata_images
)
def create_fake_png(extra):
"""Creates fake PNG data that satisfies Picard's internal image type detection"""
return b'\x89PNG\x0D\x0A\x1A\x0A' + (b'a' * 4) + b'IHDR' + struct.pack('>LL', 100, 100) + extra
def create_test_files():
test_images = [
CoverArtImage(url='file://file1', data=create_fake_png(b'a')),
CoverArtImage(url='file://file2', data=create_fake_png(b'b')),
]
test_files = [
File('test1.flac'),
File('test2.flac'),
File('test2.flac')
]
test_files[0].metadata.append_image(test_images[0])
test_files[1].metadata.append_image(test_images[1])
test_files[2].metadata.append_image(test_images[1])
test_files[0].orig_metadata.append_image(test_images[0])
test_files[1].orig_metadata.append_image(test_images[1])
test_files[2].orig_metadata.append_image(test_images[1])
return (test_images, test_files)
class UpdateMetadataImagesTest(unittest.TestCase):
def setUp(self):
(self.test_images, self.test_files) = create_test_files()
def test_update_cluster_images(self):
cluster = Cluster('Test')
cluster.files = list(self.test_files)
update_metadata_images(cluster)
self.assertEqual(set(self.test_images), set(cluster.metadata.images))
self.assertFalse(cluster.metadata.has_common_images)
cluster.files.remove(self.test_files[2])
update_metadata_images(cluster)
self.assertEqual(set(self.test_images), set(cluster.metadata.images))
self.assertFalse(cluster.metadata.has_common_images)
cluster.files.remove(self.test_files[0])
update_metadata_images(cluster)
self.assertEqual(set(self.test_images[1:]), set(cluster.metadata.images))
self.assertTrue(cluster.metadata.has_common_images)
cluster.files.append(self.test_files[2])
update_metadata_images(cluster)
self.assertEqual(set(self.test_images[1:]), set(cluster.metadata.images))
self.assertTrue(cluster.metadata.has_common_images)
def test_update_track_images(self):
track = Track('00000000-0000-0000-0000-000000000000')
track.linked_files = list(self.test_files)
update_metadata_images(track)
self.assertEqual(set(self.test_images), set(track.orig_metadata.images))
self.assertFalse(track.orig_metadata.has_common_images)
track.linked_files.remove(self.test_files[2])
update_metadata_images(track)
self.assertEqual(set(self.test_images), set(track.orig_metadata.images))
self.assertFalse(track.orig_metadata.has_common_images)
track.linked_files.remove(self.test_files[0])
update_metadata_images(track)
self.assertEqual(set(self.test_images[1:]), set(track.orig_metadata.images))
self.assertTrue(track.orig_metadata.has_common_images)
track.linked_files.append(self.test_files[2])
update_metadata_images(track)
self.assertEqual(set(self.test_images[1:]), set(track.orig_metadata.images))
self.assertTrue(track.orig_metadata.has_common_images)
def test_update_album_images(self):
album = Album('00000000-0000-0000-0000-000000000000')
track1 = Track('00000000-0000-0000-0000-000000000001')
track1.linked_files.append(self.test_files[0])
track2 = Track('00000000-0000-0000-0000-000000000002')
track2.linked_files.append(self.test_files[1])
album.tracks = [track1, track2]
album.unmatched_files.files.append(self.test_files[2])
update_metadata_images(album)
self.assertEqual(set(self.test_images), set(album.orig_metadata.images))
self.assertFalse(album.orig_metadata.has_common_images)
album.tracks.remove(track2)
update_metadata_images(album)
self.assertEqual(set(self.test_images), set(album.orig_metadata.images))
self.assertFalse(album.orig_metadata.has_common_images)
# album.unmatched_files.files.remove(self.test_files[2])
album.tracks.remove(track1)
update_metadata_images(album)
self.assertEqual(set(self.test_images[1:]), set(album.orig_metadata.images))
self.assertTrue(album.orig_metadata.has_common_images)
album.tracks.append(track2)
update_metadata_images(album)
self.assertEqual(set(self.test_images[1:]), set(album.orig_metadata.images))
self.assertTrue(album.orig_metadata.has_common_images)
class RemoveMetadataImagesTest(unittest.TestCase):
def setUp(self):
(self.test_images, self.test_files) = create_test_files()
def test_remove_from_cluster(self):
cluster = Cluster('Test')
cluster.files = list(self.test_files)
update_metadata_images(cluster)
cluster.files.remove(self.test_files[0])
remove_metadata_images(cluster, [self.test_files[0]])
self.assertEqual(set(self.test_images[1:]), set(cluster.metadata.images))
self.assertTrue(cluster.metadata.has_common_images)
def test_remove_from_cluster_with_common_images(self):
cluster = Cluster('Test')
cluster.files = list(self.test_files[1:])
update_metadata_images(cluster)
cluster.files.remove(self.test_files[1])
remove_metadata_images(cluster, [self.test_files[1]])
self.assertEqual(set(self.test_images[1:]), set(cluster.metadata.images))
self.assertTrue(cluster.metadata.has_common_images)
def test_remove_from_empty_cluster(self):
cluster = Cluster('Test')
cluster.files.append(File('test1.flac'))
update_metadata_images(cluster)
remove_metadata_images(cluster, [cluster.files[0]])
self.assertEqual(set(), set(cluster.metadata.images))
self.assertTrue(cluster.metadata.has_common_images)
def test_remove_from_track(self):
track = Track('00000000-0000-0000-0000-000000000000')
track.linked_files = list(self.test_files)
update_metadata_images(track)
track.linked_files.remove(self.test_files[0])
remove_metadata_images(track, [self.test_files[0]])
self.assertEqual(set(self.test_images[1:]), set(track.orig_metadata.images))
self.assertTrue(track.orig_metadata.has_common_images)
def test_remove_from_track_with_common_images(self):
track = Track('00000000-0000-0000-0000-000000000000')
track.linked_files = list(self.test_files[1:])
update_metadata_images(track)
track.linked_files.remove(self.test_files[1])
remove_metadata_images(track, [self.test_files[1]])
self.assertEqual(set(self.test_images[1:]), set(track.orig_metadata.images))
self.assertTrue(track.orig_metadata.has_common_images)
def test_remove_from_empty_track(self):
track = Track('00000000-0000-0000-0000-000000000000')
track.linked_files.append(File('test1.flac'))
update_metadata_images(track)
remove_metadata_images(track, [track.linked_files[0]])
self.assertEqual(set(), set(track.orig_metadata.images))
self.assertTrue(track.orig_metadata.has_common_images)
def test_remove_from_album(self):
album = Album('00000000-0000-0000-0000-000000000000')
album.unmatched_files.files = list(self.test_files)
update_metadata_images(album)
album.unmatched_files.files.remove(self.test_files[0])
remove_metadata_images(album, [self.test_files[0]])
self.assertEqual(set(self.test_images[1:]), set(album.metadata.images))
self.assertEqual(set(self.test_images[1:]), set(album.orig_metadata.images))
self.assertTrue(album.metadata.has_common_images)
self.assertTrue(album.orig_metadata.has_common_images)
def test_remove_from_album_with_common_images(self):
album = Album('00000000-0000-0000-0000-000000000000')
album.unmatched_files.files = list(self.test_files[1:])
update_metadata_images(album)
album.unmatched_files.files.remove(self.test_files[1])
remove_metadata_images(album, [self.test_files[1]])
self.assertEqual(set(self.test_images[1:]), set(album.metadata.images))
self.assertEqual(set(self.test_images[1:]), set(album.orig_metadata.images))
self.assertTrue(album.metadata.has_common_images)
self.assertTrue(album.orig_metadata.has_common_images)
def test_remove_from_empty_album(self):
album = Album('00000000-0000-0000-0000-000000000000')
album.unmatched_files.files.append(File('test1.flac'))
update_metadata_images(album)
remove_metadata_images(album, [album.unmatched_files.files[0]])
self.assertEqual(set(), set(album.metadata.images))
self.assertEqual(set(), set(album.orig_metadata.images))
self.assertTrue(album.metadata.has_common_images)
self.assertTrue(album.orig_metadata.has_common_images)
class AddMetadataImagesTest(unittest.TestCase):
def setUp(self):
(self.test_images, self.test_files) = create_test_files()
def test_add_to_cluster(self):
cluster = Cluster('Test')
cluster.files = [self.test_files[0]]
update_metadata_images(cluster)
cluster.files += self.test_files[1:]
add_metadata_images(cluster, self.test_files[1:])
self.assertEqual(set(self.test_images), set(cluster.metadata.images))
self.assertFalse(cluster.metadata.has_common_images)