diff --git a/picard/formats/id3.py b/picard/formats/id3.py index 88b024ac2..76f07a248 100644 --- a/picard/formats/id3.py +++ b/picard/formats/id3.py @@ -64,7 +64,7 @@ from picard.util import ( ) from picard.util.tags import ( parse_comment_tag, - parse_lyrics_tag, + parse_subtag, ) @@ -245,8 +245,10 @@ class ID3File(File): 'MVIN': re.compile(r'^(?P\d+)(?:/(?P\d+))?$') } - __lrc_line_re_parse = re.compile(r'(\[\d\d:\d\d\.\d\d\])') - __lrc_syllable_re_parse = re.compile(r'(\[\d\d:\d\d\.\d\d\]|<\d\d:\d\d\.\d\d>)') + __lrc_line_re_parse = re.compile(r'(\[\d\d:\d\d\.\d\d\d\])') + __lrc_syllable_re_parse = re.compile(r'(<\d\d:\d\d\.\d\d\d>)') + __lrc_syllable_at_the_end_re_parse = re.compile(r'(<\d\d:\d\d\.\d\d\d>$)') + __lrc_both_re_parse = re.compile(r'(\[\d\d:\d\d\.\d\d\d\]|<\d\d:\d\d\.\d\d\d>)') def __init__(self, filename): super().__init__(filename) @@ -491,7 +493,7 @@ class ID3File(File): for value in values: tags.add(id3.USLT(encoding=encoding, desc=desc, text=value)) elif name == 'syncedlyrics' or name.startswith('syncedlyrics:'): - (lang, desc) = parse_lyrics_tag(name) + (lang, desc) = parse_subtag(name) for value in values: sylt_lyrics = self._parse_lrc_text(value) # If the text does not contain any timestamps, the tag is not added @@ -619,7 +621,7 @@ class ID3File(File): if frame.FrameID == 'USLT' and frame.desc == desc: del tags[key] elif name == 'syncedlyrics' or name.startswith('syncedlyrics:'): - (lang, desc) = parse_lyrics_tag(name) + (lang, desc) = parse_subtag(name) for key, frame in list(tags.items()): if frame.FrameID == 'SYLT' and frame.desc == desc and frame.lang == lang \ and frame.type == 1 and frame.format == 2: @@ -729,27 +731,50 @@ class ID3File(File): return values def _parse_sylt_text(self, text): - lrc_lyrics = [] - previous_line_ended = True - for lyrics, milliseconds in text: - minutes = milliseconds // (60 * 1000) - seconds = (milliseconds % (60 * 1000)) // 1000 - hundredths = (milliseconds % 1000) // 10 - if previous_line_ended: - lrc_lyrics.append(f"[{minutes:02d}:{seconds:02d}.{hundredths:02d}]{lyrics}") - previous_line_ended = False - else: - lrc_lyrics.append(f"<{minutes:02d}:{seconds:02d}.{hundredths:02d}>{lyrics}") + + def milliseconds_to_timestamp(ms): + minutes = ms // (60 * 1000) + seconds = (ms % (60 * 1000)) // 1000 + remaining_ms = ms % 1000 + return f"{minutes:02d}:{seconds:02d}.{remaining_ms:03d}" + + lyrics, milliseconds = zip(*text) + first_timestamp = milliseconds_to_timestamp(milliseconds[0]) + lrc_lyrics = [f"[{first_timestamp}]"] + for i, lyrics in enumerate(lyrics): + timestamp = milliseconds_to_timestamp(milliseconds[i]) if '\n' in lyrics: - previous_line_ended = True + split = lyrics.split('\n') + lrc_lyrics.append(f"<{timestamp}>{split[0]}") + for line in split[1:]: + if i + 1 < len(milliseconds): + estimation = (milliseconds[i] + milliseconds[i + 1]) // 2 + else: + estimation = milliseconds[i] + timestamp = milliseconds_to_timestamp(estimation) + lrc_lyrics.append(f"\n[{timestamp}]{line}") + else: + lrc_lyrics.append(f"<{timestamp}>{lyrics}") return "".join(lrc_lyrics) def _parse_lrc_text(self, text): sylt_lyrics = [] - timestamp_and_lyrics = batched(self.__lrc_syllable_re_parse.split(text)[1:], 2) + + # If the text is in a2 enhanced lrc + if self.__lrc_syllable_re_parse.search(text): + lines = [] + split = text.split("\n") + for line in split: + if self.__lrc_line_re_parse.match(line): + line = self.__lrc_line_re_parse.sub("", line) + line = self.__lrc_syllable_at_the_end_re_parse.sub("", line.rstrip()) + lines.append(line) + text = "\n".join(lines) + + timestamp_and_lyrics = batched(self.__lrc_both_re_parse.split(text)[1:], 2) for timestamp, lyrics in timestamp_and_lyrics: - minutes, seconds, hundredths = timestamp[1:-1].replace(".", ":").split(':') - milliseconds = int(minutes) * 60 * 1000 + int(seconds) * 1000 + int(hundredths) * 10 + minutes, seconds, ms = timestamp[1:-1].replace(".", ":").split(':') + milliseconds = int(minutes) * 60 * 1000 + int(float('%s.%s' % (seconds, ms)) * 1000) sylt_lyrics.append((lyrics, milliseconds)) return sylt_lyrics diff --git a/picard/util/tags.py b/picard/util/tags.py index 61825b1ef..030beb339 100644 --- a/picard/util/tags.py +++ b/picard/util/tags.py @@ -199,7 +199,7 @@ def parse_comment_tag(name): # noqa: E302 return lang, desc -def parse_lyrics_tag(name): +def parse_subtag(name): """ Parses a tag name like "lyrics:XXX:desc", where XXX is the language. If language is not set, the colons are still mandatory, and "eng" is diff --git a/test/formats/test_id3.py b/test/formats/test_id3.py index 13870371e..a007161cf 100644 --- a/test/formats/test_id3.py +++ b/test/formats/test_id3.py @@ -372,9 +372,10 @@ class CommonId3Tests: @skipUnlessTestfile def test_syncedlyrics_preserve_language_and_description(self): - metadata = Metadata({'syncedlyrics:ita': '[00:00.00]foo3'}) - metadata.add('syncedlyrics:deu:desc', '[00:00.00]foo1') - metadata.add('syncedlyrics::desc', '[00:00.00]foo4') + metadata = Metadata({'syncedlyrics': '[00:00.000]<00:00.000>foo1'}) + metadata.add('syncedlyrics:deu:desc', '[00:00.000]<00:00.000>foo2') + metadata.add('syncedlyrics:ita', '[00:00.000]<00:00.000>foo3') + metadata.add('syncedlyrics::desc', '[00:00.000]<00:00.000>foo4') loaded_metadata = save_and_load_metadata(self.filename, metadata) self.assertEqual(metadata['syncedlyrics'], loaded_metadata['syncedlyrics:eng']) self.assertEqual(metadata['syncedlyrics:deu:desc'], loaded_metadata['syncedlyrics:deu:desc']) @@ -383,7 +384,7 @@ class CommonId3Tests: @skipUnlessTestfile def test_syncedlyrics_delete(self): - metadata = Metadata({'syncedlyrics': '[00:00.00]foo3'}) + metadata = Metadata({'syncedlyrics': '[00:00.000]<00:00.000>foo1'}) metadata.delete('syncedlyrics:eng') save_metadata(self.filename, metadata) raw_metadata = load_raw(self.filename) @@ -777,31 +778,35 @@ class ID3FileTest(PicardTestCase): def test_syncedlyrics_converting_to_lrc(self): sylt = ( - [("first", 0), ("bar\n", 500), ("second", 1000), ("bar", 1500)], - [("test", 0), ("syl", 10), ("la", 20), ("bles", 30)], - [("test lyrics with\n", 0), ("only line time stamps", 5000)]) + [("Test", 0), ("normal\n", 500), ("behaviour", 1000)], + [("Test", 0), ("syl", 10), ("la", 20), ("bles", 30)], + [("Test newline\nin the middle", 0), ("of the text", 1000)], + [("Test empty lyrics at the end\n", 0), ("", 1000)], + [("Test timestamp estimation", 0), ("in the\nlast phrase", 1000)]) correct_lrc = ( - "[00:00.00]first<00:00.50>bar\n[00:01.00]second<00:01.50>bar", - "[00:00.00]test<00:00.01>syl<00:00.02>la<00:00.03>bles", - "[00:00.00]test lyrics with\n[00:05.00]only line time stamps") - for sylt, correct_sylt in zip(sylt, correct_lrc): - sylt = self.file._parse_sylt_text(sylt) - self.assertEqual(sylt, correct_sylt) + "[00:00.000]<00:00.000>Test<00:00.500>normal\n[00:00.750]<00:01.000>behaviour", + "[00:00.000]<00:00.000>Test<00:00.010>syl<00:00.020>la<00:00.030>bles", + "[00:00.000]<00:00.000>Test newline\n[00:00.500]in the middle<00:01.000>of the text", + "[00:00.000]<00:00.000>Test empty lyrics at the end\n[00:00.500]<00:01.000>", + "[00:00.000]<00:00.000>Test timestamp estimation<00:01.000>in the\n[00:01.000]last phrase") + for sylt, correct_lrc in zip(sylt, correct_lrc): + lrc = self.file._parse_sylt_text(sylt) + self.assertEqual(lrc, correct_lrc) def test_syncedlyrics_converting_to_sylt(self): lrc = ( - "[00:00.00]first<00:00.50>bar\n[00:01.00]second<00:01.50>bar", - "[00:00.00]test lyrics with\n[01:00.00]only line time stamps", - "[00:00.00]test lyrics with no[00:01.00]new lines", - "first invalid[00:00.00]input\nsecond invalid[00:01.00]input", - "[00:02.00]test out of order[00:01.00]timestamps", + "[00:00.000]<00:00.000>Test<00:00.500>normal\n[00:00.750]<00:01.000>behaviour", + "[00:00.000]Test lyrics with\n[01:00.000]only line time stamps", + "<00:00.000>Test lyrics with<01:00.000>only syllable time stamps", + "[00:00.000]<00:00.000>Test extra<00:00.500>\n[00:00.750]<00:00.750>timestamp<00:01.500>", + "Test invalid[00:00.000]input\nTest invalid[00:01.000]input", "Test lyrics with no timestamps") correct_sylt = ( - [("first", 0), ("bar\n", 500), ("second", 1000), ("bar", 1500)], - [("test lyrics with\n", 0), ("only line time stamps", 60 * 1000)], - [("test lyrics with no", 0), ("new lines", 1000)], - [("input\nsecond invalid", 0), ("input", 1000)], - [("test out of order", 2000), ("timestamps", 1000)], + [("Test", 0), ("normal\n", 500), ("behaviour", 1000)], + [("Test lyrics with\n", 0), ("only line time stamps", 60 * 1000)], + [("Test lyrics with", 0), ("only syllable time stamps", 60 * 1000)], + [("Test extra\n", 0), ("timestamp", 750)], + [("input\nTest invalid", 0), ("input", 1000)], []) for lrc, correct_sylt in zip(lrc, correct_sylt): sylt = self.file._parse_lrc_text(lrc) diff --git a/test/test_util_tags.py b/test/test_util_tags.py index 92dbda752..6e455fcf5 100644 --- a/test/test_util_tags.py +++ b/test/test_util_tags.py @@ -25,7 +25,7 @@ from test.picardtestcase import PicardTestCase from picard.util.tags import ( display_tag_name, parse_comment_tag, - parse_lyrics_tag, + parse_subtag, ) @@ -42,7 +42,7 @@ class UtilTagsTest(PicardTestCase): self.assertEqual(('eng', ''), parse_comment_tag('comment')) def test_parse_lyrics_tag(self): - self.assertEqual(('eng', ''), parse_lyrics_tag('lyrics')) - self.assertEqual(('XXX', 'foo'), parse_lyrics_tag('lyrics:XXX:foo')) - self.assertEqual(('XXX', ''), parse_lyrics_tag('lyrics:XXX')) - self.assertEqual(('eng', 'foo'), parse_lyrics_tag('lyrics::foo')) + self.assertEqual(('eng', ''), parse_subtag('lyrics')) + self.assertEqual(('XXX', 'foo'), parse_subtag('lyrics:XXX:foo')) + self.assertEqual(('XXX', ''), parse_subtag('lyrics:XXX')) + self.assertEqual(('eng', 'foo'), parse_subtag('lyrics::foo'))