Improve a2 enhanced lrc support

2025-12-22 09:18:18 +00:00 · 2024-03-20 15:42:51 +01:00
parent 3ef7e993fa
commit 2f3ffee03c
4 changed files with 79 additions and 49 deletions
--- a/picard/formats/id3.py
+++ b/picard/formats/id3.py
@@ -64,7 +64,7 @@ from picard.util import (
 )
 from picard.util.tags import (
    parse_comment_tag,
-    parse_lyrics_tag,
+    parse_subtag,
 )


@@ -245,8 +245,10 @@ class ID3File(File):
        'MVIN': re.compile(r'^(?P<movementnumber>\d+)(?:/(?P<movementtotal>\d+))?$')
    }

-    __lrc_line_re_parse = re.compile(r'(\[\d\d:\d\d\.\d\d\])')
-    __lrc_syllable_re_parse = re.compile(r'(\[\d\d:\d\d\.\d\d\]|<\d\d:\d\d\.\d\d>)')
+    __lrc_line_re_parse = re.compile(r'(\[\d\d:\d\d\.\d\d\d\])')
+    __lrc_syllable_re_parse = re.compile(r'(<\d\d:\d\d\.\d\d\d>)')
+    __lrc_syllable_at_the_end_re_parse = re.compile(r'(<\d\d:\d\d\.\d\d\d>$)')
+    __lrc_both_re_parse = re.compile(r'(\[\d\d:\d\d\.\d\d\d\]|<\d\d:\d\d\.\d\d\d>)')

    def __init__(self, filename):
        super().__init__(filename)
@@ -491,7 +493,7 @@ class ID3File(File):
                for value in values:
                    tags.add(id3.USLT(encoding=encoding, desc=desc, text=value))
            elif name == 'syncedlyrics' or name.startswith('syncedlyrics:'):
-                (lang, desc) = parse_lyrics_tag(name)
+                (lang, desc) = parse_subtag(name)
                for value in values:
                    sylt_lyrics = self._parse_lrc_text(value)
                    # If the text does not contain any timestamps, the tag is not added
@@ -619,7 +621,7 @@ class ID3File(File):
                        if frame.FrameID == 'USLT' and frame.desc == desc:
                            del tags[key]
                elif name == 'syncedlyrics' or name.startswith('syncedlyrics:'):
-                    (lang, desc) = parse_lyrics_tag(name)
+                    (lang, desc) = parse_subtag(name)
                    for key, frame in list(tags.items()):
                        if frame.FrameID == 'SYLT' and frame.desc == desc and frame.lang == lang \
                                and frame.type == 1 and frame.format == 2:
@@ -729,27 +731,50 @@ class ID3File(File):
        return values

    def _parse_sylt_text(self, text):
-        lrc_lyrics = []
-        previous_line_ended = True
-        for lyrics, milliseconds in text:
-            minutes = milliseconds // (60 * 1000)
-            seconds = (milliseconds % (60 * 1000)) // 1000
-            hundredths = (milliseconds % 1000) // 10
-            if previous_line_ended:
-                lrc_lyrics.append(f"[{minutes:02d}:{seconds:02d}.{hundredths:02d}]{lyrics}")
-                previous_line_ended = False
-            else:
-                lrc_lyrics.append(f"<{minutes:02d}:{seconds:02d}.{hundredths:02d}>{lyrics}")
+
+        def milliseconds_to_timestamp(ms):
+            minutes = ms // (60 * 1000)
+            seconds = (ms % (60 * 1000)) // 1000
+            remaining_ms = ms % 1000
+            return f"{minutes:02d}:{seconds:02d}.{remaining_ms:03d}"
+
+        lyrics, milliseconds = zip(*text)
+        first_timestamp = milliseconds_to_timestamp(milliseconds[0])
+        lrc_lyrics = [f"[{first_timestamp}]"]
+        for i, lyrics in enumerate(lyrics):
+            timestamp = milliseconds_to_timestamp(milliseconds[i])
            if '\n' in lyrics:
-                previous_line_ended = True
+                split = lyrics.split('\n')
+                lrc_lyrics.append(f"<{timestamp}>{split[0]}")
+                for line in split[1:]:
+                    if i + 1 < len(milliseconds):
+                        estimation = (milliseconds[i] + milliseconds[i + 1]) // 2
+                    else:
+                        estimation = milliseconds[i]
+                    timestamp = milliseconds_to_timestamp(estimation)
+                    lrc_lyrics.append(f"\n[{timestamp}]{line}")
+            else:
+                lrc_lyrics.append(f"<{timestamp}>{lyrics}")
        return "".join(lrc_lyrics)

    def _parse_lrc_text(self, text):
        sylt_lyrics = []
-        timestamp_and_lyrics = batched(self.__lrc_syllable_re_parse.split(text)[1:], 2)
+
+        # If the text is in a2 enhanced lrc
+        if self.__lrc_syllable_re_parse.search(text):
+            lines = []
+            split = text.split("\n")
+            for line in split:
+                if self.__lrc_line_re_parse.match(line):
+                    line = self.__lrc_line_re_parse.sub("", line)
+                line = self.__lrc_syllable_at_the_end_re_parse.sub("", line.rstrip())
+                lines.append(line)
+            text = "\n".join(lines)
+
+        timestamp_and_lyrics = batched(self.__lrc_both_re_parse.split(text)[1:], 2)
        for timestamp, lyrics in timestamp_and_lyrics:
-            minutes, seconds, hundredths = timestamp[1:-1].replace(".", ":").split(':')
-            milliseconds = int(minutes) * 60 * 1000 + int(seconds) * 1000 + int(hundredths) * 10
+            minutes, seconds, ms = timestamp[1:-1].replace(".", ":").split(':')
+            milliseconds = int(minutes) * 60 * 1000 + int(float('%s.%s' % (seconds, ms)) * 1000)
            sylt_lyrics.append((lyrics, milliseconds))
        return sylt_lyrics

--- a/picard/util/tags.py
+++ b/picard/util/tags.py
@@ -199,7 +199,7 @@ def parse_comment_tag(name):  # noqa: E302
    return lang, desc


-def parse_lyrics_tag(name):
+def parse_subtag(name):
    """
    Parses a tag name like "lyrics:XXX:desc", where XXX is the language.
    If language is not set, the colons are still mandatory, and "eng" is
--- a/test/formats/test_id3.py
+++ b/test/formats/test_id3.py
@@ -372,9 +372,10 @@ class CommonId3Tests:

        @skipUnlessTestfile
        def test_syncedlyrics_preserve_language_and_description(self):
-            metadata = Metadata({'syncedlyrics:ita': '[00:00.00]foo3'})
-            metadata.add('syncedlyrics:deu:desc', '[00:00.00]foo1')
-            metadata.add('syncedlyrics::desc', '[00:00.00]foo4')
+            metadata = Metadata({'syncedlyrics': '[00:00.000]<00:00.000>foo1'})
+            metadata.add('syncedlyrics:deu:desc', '[00:00.000]<00:00.000>foo2')
+            metadata.add('syncedlyrics:ita', '[00:00.000]<00:00.000>foo3')
+            metadata.add('syncedlyrics::desc', '[00:00.000]<00:00.000>foo4')
            loaded_metadata = save_and_load_metadata(self.filename, metadata)
            self.assertEqual(metadata['syncedlyrics'], loaded_metadata['syncedlyrics:eng'])
            self.assertEqual(metadata['syncedlyrics:deu:desc'], loaded_metadata['syncedlyrics:deu:desc'])
@@ -383,7 +384,7 @@ class CommonId3Tests:

        @skipUnlessTestfile
        def test_syncedlyrics_delete(self):
-            metadata = Metadata({'syncedlyrics': '[00:00.00]foo3'})
+            metadata = Metadata({'syncedlyrics': '[00:00.000]<00:00.000>foo1'})
            metadata.delete('syncedlyrics:eng')
            save_metadata(self.filename, metadata)
            raw_metadata = load_raw(self.filename)
@@ -777,31 +778,35 @@ class ID3FileTest(PicardTestCase):

    def test_syncedlyrics_converting_to_lrc(self):
        sylt = (
-            [("first", 0), ("bar\n", 500), ("second", 1000), ("bar", 1500)],
-            [("test", 0), ("syl", 10), ("la", 20), ("bles", 30)],
-            [("test lyrics with\n", 0), ("only line time stamps", 5000)])
+            [("Test", 0), ("normal\n", 500), ("behaviour", 1000)],
+            [("Test", 0), ("syl", 10), ("la", 20), ("bles", 30)],
+            [("Test newline\nin the middle", 0), ("of the text", 1000)],
+            [("Test empty lyrics at the end\n", 0), ("", 1000)],
+            [("Test timestamp estimation", 0), ("in the\nlast phrase", 1000)])
        correct_lrc = (
-            "[00:00.00]first<00:00.50>bar\n[00:01.00]second<00:01.50>bar",
-            "[00:00.00]test<00:00.01>syl<00:00.02>la<00:00.03>bles",
-            "[00:00.00]test lyrics with\n[00:05.00]only line time stamps")
-        for sylt, correct_sylt in zip(sylt, correct_lrc):
-            sylt = self.file._parse_sylt_text(sylt)
-            self.assertEqual(sylt, correct_sylt)
+            "[00:00.000]<00:00.000>Test<00:00.500>normal\n[00:00.750]<00:01.000>behaviour",
+            "[00:00.000]<00:00.000>Test<00:00.010>syl<00:00.020>la<00:00.030>bles",
+            "[00:00.000]<00:00.000>Test newline\n[00:00.500]in the middle<00:01.000>of the text",
+            "[00:00.000]<00:00.000>Test empty lyrics at the end\n[00:00.500]<00:01.000>",
+            "[00:00.000]<00:00.000>Test timestamp estimation<00:01.000>in the\n[00:01.000]last phrase")
+        for sylt, correct_lrc in zip(sylt, correct_lrc):
+            lrc = self.file._parse_sylt_text(sylt)
+            self.assertEqual(lrc, correct_lrc)

    def test_syncedlyrics_converting_to_sylt(self):
        lrc = (
-            "[00:00.00]first<00:00.50>bar\n[00:01.00]second<00:01.50>bar",
-            "[00:00.00]test lyrics with\n[01:00.00]only line time stamps",
-            "[00:00.00]test lyrics with no[00:01.00]new lines",
-            "first invalid[00:00.00]input\nsecond invalid[00:01.00]input",
-            "[00:02.00]test out of order[00:01.00]timestamps",
+            "[00:00.000]<00:00.000>Test<00:00.500>normal\n[00:00.750]<00:01.000>behaviour",
+            "[00:00.000]Test lyrics with\n[01:00.000]only line time stamps",
+            "<00:00.000>Test lyrics with<01:00.000>only syllable time stamps",
+            "[00:00.000]<00:00.000>Test extra<00:00.500>\n[00:00.750]<00:00.750>timestamp<00:01.500>",
+            "Test invalid[00:00.000]input\nTest invalid[00:01.000]input",
            "Test lyrics with no timestamps")
        correct_sylt = (
-            [("first", 0), ("bar\n", 500), ("second", 1000), ("bar", 1500)],
-            [("test lyrics with\n", 0), ("only line time stamps", 60 * 1000)],
-            [("test lyrics with no", 0), ("new lines", 1000)],
-            [("input\nsecond invalid", 0), ("input", 1000)],
-            [("test out of order", 2000), ("timestamps", 1000)],
+            [("Test", 0), ("normal\n", 500), ("behaviour", 1000)],
+            [("Test lyrics with\n", 0), ("only line time stamps", 60 * 1000)],
+            [("Test lyrics with", 0), ("only syllable time stamps", 60 * 1000)],
+            [("Test extra\n", 0), ("timestamp", 750)],
+            [("input\nTest invalid", 0), ("input", 1000)],
            [])
        for lrc, correct_sylt in zip(lrc, correct_sylt):
            sylt = self.file._parse_lrc_text(lrc)
--- a/test/test_util_tags.py
+++ b/test/test_util_tags.py
@@ -25,7 +25,7 @@ from test.picardtestcase import PicardTestCase
 from picard.util.tags import (
    display_tag_name,
    parse_comment_tag,
-    parse_lyrics_tag,
+    parse_subtag,
 )


@@ -42,7 +42,7 @@ class UtilTagsTest(PicardTestCase):
        self.assertEqual(('eng', ''), parse_comment_tag('comment'))

    def test_parse_lyrics_tag(self):
-        self.assertEqual(('eng', ''), parse_lyrics_tag('lyrics'))
-        self.assertEqual(('XXX', 'foo'), parse_lyrics_tag('lyrics:XXX:foo'))
-        self.assertEqual(('XXX', ''), parse_lyrics_tag('lyrics:XXX'))
-        self.assertEqual(('eng', 'foo'), parse_lyrics_tag('lyrics::foo'))
+        self.assertEqual(('eng', ''), parse_subtag('lyrics'))
+        self.assertEqual(('XXX', 'foo'), parse_subtag('lyrics:XXX:foo'))
+        self.assertEqual(('XXX', ''), parse_subtag('lyrics:XXX'))
+        self.assertEqual(('eng', 'foo'), parse_subtag('lyrics::foo'))