diff --git a/src/id3.c b/src/id3.c index 9485405..5960f73 100644 --- a/src/id3.c +++ b/src/id3.c @@ -798,24 +798,50 @@ _id3_parse_v2_frame_data(id3info *id3, char const *id, uint32_t size, id3_framet // Read key and uppercase it SV *key = NULL; SV *value = NULL; + AV *array = NULL; + int count = 0; read += _id3_get_utf8_string(id3, &key, size - read, encoding); if (key != NULL && SvPOK(key) && sv_len(key)) { upcase(SvPVX(key)); - // Read value + // Read value(s) if (frametype->fields[2] == ID3_FIELD_TYPE_LATIN1) { // WXXX frames have a latin1 value field regardless of encoding byte encoding = ISO_8859_1; } - read += _id3_get_utf8_string(id3, &value, size - read, encoding); + // ID3v2.4 allows multiple null-separated strings in TXXX value field. + // Loop mirrors the STRINGLIST handler for standard T* frames. + while (read < size) { + if (count++ == 1 && value != NULL) { + array = newAV(); + av_push(array, value); + } + value = NULL; - // (T|W)XXX frames don't support multiple strings separated by nulls, even in v2.4 + read += _id3_get_utf8_string(id3, &value, size - read, encoding); - // Only one tag per unique key value is allowed, that's why there is no array support here - if (value != NULL && SvPOK(value)) { + if (array != NULL && value != NULL && SvPOK(value)) { + // Bug 16452, do not add an empty string + if (sv_len(value) > 0) + av_push(array, value); + } + } + + if (array != NULL) { + if (av_len(array) == 0) { + // Multiple strings but only one non-empty: collapse to scalar + my_hv_store_ent( id3->tags, key, av_shift(array) ); + SvREFCNT_dec(array); + } + else { + my_hv_store_ent( id3->tags, key, newRV_noinc( (SV *)array ) ); + } + array = NULL; + } + else if (value != NULL && SvPOK(value)) { my_hv_store_ent( id3->tags, key, value ); } else { diff --git a/t/generate_txxx_fixtures.py b/t/generate_txxx_fixtures.py new file mode 100644 index 0000000..969d241 --- /dev/null +++ b/t/generate_txxx_fixtures.py @@ -0,0 +1,87 @@ +#!/usr/bin/env python3 +"""Generate MP3 test fixtures for Audio::Scan TXXX multi-value tests. + +Uses a valid MPEG frame extracted from an existing fixture as the audio +payload, then hand-builds ID3v2.4 tags with null-separated TXXX values +so we get exact byte-level control over the multi-value encoding. +""" +import struct, os + +OUTDIR = "/home/martijn/github/Audio-Scan/t/mp3" +# Use an existing valid MP3 as the audio base +BASE_MP3 = "/home/martijn/github/Audio-Scan/t/mp3/no-tags-mp1l3.mp3" + + +def get_audio_payload(): + """Read raw MPEG audio frames from an existing no-tags fixture.""" + with open(BASE_MP3, 'rb') as f: + return f.read() + + +def syncsafe_encode(size): + """Encode an integer as a 4-byte syncsafe integer (ID3v2.4 spec).""" + return ( + ((size & 0x0FE00000) << 3) | + ((size & 0x001FC000) << 2) | + ((size & 0x00003F80) << 1) | + (size & 0x0000007F) + ) + + +def make_txxx_frame(desc, values, encoding=3): + """Build an ID3v2.4 TXXX frame with null-separated values. + encoding: 0=Latin1, 3=UTF-8 + """ + if encoding == 3: + sep = b'\x00' + desc_bytes = desc.encode('utf-8') + b'\x00' + val_bytes = sep.join(v.encode('utf-8') for v in values) + else: + sep = b'\x00' + desc_bytes = desc.encode('latin-1') + b'\x00' + val_bytes = sep.join(v.encode('latin-1') for v in values) + + data = bytes([encoding]) + desc_bytes + val_bytes + size = len(data) + frame = b'TXXX' + struct.pack('>I', syncsafe_encode(size)) + b'\x00\x00' + data + return frame + + +def make_id3v2_tag(frames): + """Wrap frames in an ID3v2.4 tag header.""" + body = b''.join(frames) + size = len(body) + header = b'ID3' + b'\x04\x00' + b'\x00' + struct.pack('>I', syncsafe_encode(size)) + return header + body + + +def write_fixture(name, frames): + tag = make_id3v2_tag(frames) + audio = get_audio_payload() + path = os.path.join(OUTDIR, name) + with open(path, 'wb') as f: + f.write(tag + audio) + print(f"Wrote {path} ({len(tag) + len(audio)} bytes)") + + +# Fixture 1: Two-value TXXX (ALBUMARTISTS) +write_fixture("v2.4-txxx-multivalue.mp3", [ + make_txxx_frame("ALBUMARTISTS", ["Artist1", "Artist2"]), + make_txxx_frame("ARTISTS", ["TrackArtist1", "TrackArtist2"]), +]) + +# Fixture 2: Three-value TXXX +write_fixture("v2.4-txxx-multivalue-3.mp3", [ + make_txxx_frame("ALBUMARTISTS", ["Artist1", "Artist2", "Artist3"]), +]) + +# Fixture 3: Multi-value with empty slot (tagger bug simulation) +write_fixture("v2.4-txxx-multivalue-empty.mp3", [ + make_txxx_frame("ALBUMARTISTS", ["Artist1", "", "Artist3"]), +]) + +# Fixture 4: Single-value TXXX (regression guard) +write_fixture("v2.4-txxx-single.mp3", [ + make_txxx_frame("ALBUMARTISTS", ["OnlyArtist"]), + make_txxx_frame("USER FRAME", ["SingleValue"]), +]) diff --git a/t/mp3.t b/t/mp3.t index ee3a4cc..761fc54 100644 --- a/t/mp3.t +++ b/t/mp3.t @@ -3,7 +3,7 @@ use strict; use Digest::MD5 qw(md5_hex); use File::Spec::Functions; use FindBin (); -use Test::More tests => 401; +use Test::More tests => 413; use Test::Warn; use Audio::Scan; @@ -1374,6 +1374,39 @@ eval { is( $tags->{MP3GAIN_MINMAX}, '123,203', 'bad APE tag MP3GAIN_MINMAX ok' ); } +# Multi-value TXXX frames (ID3v2.4 null-separated values) +{ + my $s = Audio::Scan->scan( _f('v2.4-txxx-multivalue.mp3') ); + my $tags = $s->{tags}; + + is( ref $tags->{ALBUMARTISTS}, 'ARRAY', 'ID3v2.4 multi-value TXXX returns arrayref' ); + is( $tags->{ALBUMARTISTS}->[0], 'Artist1', 'ID3v2.4 multi-value TXXX value 1 ok' ); + is( $tags->{ALBUMARTISTS}->[1], 'Artist2', 'ID3v2.4 multi-value TXXX value 2 ok' ); + is( ref $tags->{ARTISTS}, 'ARRAY', 'ID3v2.4 multi-value TXXX ARTISTS returns arrayref' ); + is( $tags->{ARTISTS}->[0], 'TrackArtist1', 'ID3v2.4 multi-value TXXX ARTISTS value 1 ok' ); + is( $tags->{ARTISTS}->[1], 'TrackArtist2', 'ID3v2.4 multi-value TXXX ARTISTS value 2 ok' ); +} + +# Multi-value TXXX with 3 values +{ + my $s = Audio::Scan->scan( _f('v2.4-txxx-multivalue-3.mp3') ); + my $tags = $s->{tags}; + + is( ref $tags->{ALBUMARTISTS}, 'ARRAY', 'ID3v2.4 3-value TXXX returns arrayref' ); + is( scalar @{$tags->{ALBUMARTISTS}}, 3, 'ID3v2.4 3-value TXXX has 3 elements' ); + is( $tags->{ALBUMARTISTS}->[2], 'Artist3', 'ID3v2.4 3-value TXXX value 3 ok' ); +} + +# Multi-value TXXX with empty slot (tagger bug) +{ + my $s = Audio::Scan->scan( _f('v2.4-txxx-multivalue-empty.mp3') ); + my $tags = $s->{tags}; + + is( ref $tags->{ALBUMARTISTS}, 'ARRAY', 'ID3v2.4 TXXX with empty slot returns arrayref' ); + is( scalar @{$tags->{ALBUMARTISTS}}, 2, 'ID3v2.4 TXXX empty slot is skipped' ); + is( $tags->{ALBUMARTISTS}->[1], 'Artist3', 'ID3v2.4 TXXX value after empty slot ok' ); +} + sub _f { return catfile( $FindBin::Bin, 'mp3', shift ); } diff --git a/t/mp3/v2.4-txxx-multivalue-3.mp3 b/t/mp3/v2.4-txxx-multivalue-3.mp3 new file mode 100644 index 0000000..491ed39 Binary files /dev/null and b/t/mp3/v2.4-txxx-multivalue-3.mp3 differ diff --git a/t/mp3/v2.4-txxx-multivalue-empty.mp3 b/t/mp3/v2.4-txxx-multivalue-empty.mp3 new file mode 100644 index 0000000..ff5c69b Binary files /dev/null and b/t/mp3/v2.4-txxx-multivalue-empty.mp3 differ diff --git a/t/mp3/v2.4-txxx-multivalue.mp3 b/t/mp3/v2.4-txxx-multivalue.mp3 new file mode 100644 index 0000000..da69f60 Binary files /dev/null and b/t/mp3/v2.4-txxx-multivalue.mp3 differ diff --git a/t/mp3/v2.4-txxx-single.mp3 b/t/mp3/v2.4-txxx-single.mp3 new file mode 100644 index 0000000..698c17c Binary files /dev/null and b/t/mp3/v2.4-txxx-single.mp3 differ