Message ID | 20250118182109.2695C19E94D@tsrv.corpit.ru (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | vvfat: refresh writing long filename | expand |
Am 18.01.25 um 18:35 schrieb Michael Tokarev: > In function create_long_filname(), the array name[8 + 3] in > struct direntry_t is used as if it were defined as name[32]. > This is intentional and works. It's nevertheless an out of > bounds array access. To avoid this problem, this patch adds a > struct lfn_direntry_t with multiple name arrays. A directory > entry for a long FAT file name is significantly different from > a directory entry for a regular FAT file name. > > This change makes whole logic dealing with the long filenames > a bit more clear (hopefully). > > Based on ideas by Volker R\ufffd\ufffdmelin. > > Signed-off-by: Michael Tokarev <mjt@tls.msk.ru> > --- > block/vvfat.c | 69 ++++++++++++++++++++++++++++++++------------------- > 1 file changed, 43 insertions(+), 26 deletions(-) > > diff --git a/block/vvfat.c b/block/vvfat.c > index 8ffe8b3b9b..8320733045 100644 > --- a/block/vvfat.c > +++ b/block/vvfat.c > @@ -255,6 +255,17 @@ typedef struct direntry_t { > uint32_t size; > } QEMU_PACKED direntry_t; > > +typedef struct lfn_direntry_t { > + uint8_t sequence; > + uint8_t name01[10]; > + uint8_t attributes; > + uint8_t direntry_type; > + uint8_t sfn_checksum; > + uint8_t name0e[12]; > + uint16_t begin; > + uint8_t name1c[4]; > +} QEMU_PACKED lfn_direntry_t; > + > /* this structure are used to transparently access the files */ > > typedef struct mapping_t { > @@ -399,11 +410,22 @@ static void init_mbr(BDRVVVFATState *s, int cyls, int heads, int secs) > > /* direntry functions */ > > +static unsigned write_lfn_part(uint8_t *dest, unsigned dsize, > + const gunichar2 *lptr, const gunichar2 *lend) > +{ > + unsigned i; > + for(i = 0; i < dsize / 2 && lptr + i < lend; ++i) { > + dest[i / 2 + 0] = lptr[i] & 0xff; > + dest[i / 2 + 1] = lptr[i] >> 8; > + } Hi Michael, this is not right. It's necessary to initialize the remaining elements of the name arrays. The rules are: If the file name length in characters is a multiple of 13 you are done. Otherwise the remaining unused LFN direntry name array elements have to be filled with one 0x0000 (a 16 bit 0) and the rest with 0xffff. > + return i; /* return number of elements actually written */ > +} > + > static direntry_t *create_long_filename(BDRVVVFATState *s, const char *filename) > { > - int number_of_entries, i; > + unsigned number_of_entries, entidx; > + gunichar2 *lptr, *lend; > glong length; > - direntry_t *entry; > > gunichar2 *longname = g_utf8_to_utf16(filename, -1, NULL, &length, NULL); > if (!longname) { > @@ -411,31 +433,26 @@ static direntry_t *create_long_filename(BDRVVVFATState *s, const char *filename) > return NULL; > } > > - number_of_entries = DIV_ROUND_UP(length * 2, 26); > - > - for(i=0;i<number_of_entries;i++) { > - entry=array_get_next(&(s->directory)); > - entry->attributes=0xf; > - entry->reserved[0]=0; > - entry->begin=0; > - entry->name[0]=(number_of_entries-i)|(i==0?0x40:0); > - } > - for(i=0;i<26*number_of_entries;i++) { > - int offset=(i%26); > - if(offset<10) offset=1+offset; > - else if(offset<22) offset=14+offset-10; > - else offset=28+offset-22; > - entry=array_get(&(s->directory),s->directory.next-1-(i/26)); > - if (i >= 2 * length + 2) { > - entry->name[offset] = 0xff; > - } else if (i % 2 == 0) { > - entry->name[offset] = longname[i / 2] & 0xff; > - } else { > - entry->name[offset] = longname[i / 2] >> 8; > - } > + /* > + * each lfn_direntry holds 13 utf16 chars (26 bytes) of the file name, > + * the name is split into several directory entries. > + */ > + number_of_entries = DIV_ROUND_UP(length, 13); > + > + lend = longname + length; /* pointer past the end of longname */ > + for(entidx = 0, lptr = longname; entidx < number_of_entries; entidx++) { > + lfn_direntry_t *entry = array_get_next(&(s->directory)); > + entry->sequence = (number_of_entries - entidx) | (entidx == 0 ? 0x40 : 0); > + entry->attributes = 0xf; > + entry->direntry_type = 0; > + entry->begin = 0; > + /* each lftn_direntry has 3 pieces to hold parts of long file name */ > + lptr += write_lfn_part(entry->name01, sizeof(entry->name01), lptr, lend); > + lptr += write_lfn_part(entry->name0e, sizeof(entry->name0e), lptr, lend); > + lptr += write_lfn_part(entry->name1c, sizeof(entry->name1c), lptr, lend); I would use ARRAY_SIZE() instead of sizeof(). ARRAY_SIZE() is the number of elements an array can hold. You then don't have to remember the size of an array element. > } > g_free(longname); > - return array_get(&(s->directory),s->directory.next-number_of_entries); > + return array_get(&(s->directory), s->directory.next - number_of_entries); + return array_get(&s->directory, s->directory.next - number_of_entries); The parentheses around s->directory are unnecessary. With best regards, Volker > } > > static char is_free(const direntry_t* direntry) > @@ -731,7 +748,7 @@ static inline direntry_t* create_short_and_long_name(BDRVVVFATState* s, > /* calculate anew, because realloc could have taken place */ > entry_long=array_get(&(s->directory),long_index); > while(entry_long<entry && is_long_name(entry_long)) { > - entry_long->reserved[1]=chksum; > + ((lfn_direntry_t *)entry_long)->sfn_checksum = chksum; > entry_long++; > } > }
18.01.2025 23:32, Volker Rümelin wrote: >> +static unsigned write_lfn_part(uint8_t *dest, unsigned dsize, >> + const gunichar2 *lptr, const gunichar2 *lend) >> +{ >> + unsigned i; >> + for(i = 0; i < dsize / 2 && lptr + i < lend; ++i) { >> + dest[i / 2 + 0] = lptr[i] & 0xff; >> + dest[i / 2 + 1] = lptr[i] >> 8; >> + } > > Hi Michael, > > this is not right. It's necessary to initialize the remaining elements > of the name arrays. > The rules are: > If the file name length in characters is a multiple of 13 you are done. > Otherwise the remaining unused LFN direntry name array elements have to > be filled with one 0x0000 (a 16 bit 0) and the rest with 0xffff. I wonder how it worked for me. I tested it with linux and windows guests. Hmm. Well, it's rather trivial to fix this. Lemme do just that.. >> + lptr += write_lfn_part(entry->name1c, sizeof(entry->name1c), lptr, lend); > > I would use ARRAY_SIZE() instead of sizeof(). ARRAY_SIZE() is the number > of elements an array can hold. You then don't have to remember the size > of an array element. sizeof() works fine here :) /mjt
On Sat, 18 Jan 2025, Michael Tokarev wrote: > In function create_long_filname(), the array name[8 + 3] in > struct direntry_t is used as if it were defined as name[32]. > This is intentional and works. It's nevertheless an out of > bounds array access. To avoid this problem, this patch adds a > struct lfn_direntry_t with multiple name arrays. A directory > entry for a long FAT file name is significantly different from > a directory entry for a regular FAT file name. > > This change makes whole logic dealing with the long filenames > a bit more clear (hopefully). > > Based on ideas by Volker R??melin. > > Signed-off-by: Michael Tokarev <mjt@tls.msk.ru> > --- > block/vvfat.c | 69 ++++++++++++++++++++++++++++++++------------------- > 1 file changed, 43 insertions(+), 26 deletions(-) > > diff --git a/block/vvfat.c b/block/vvfat.c > index 8ffe8b3b9b..8320733045 100644 > --- a/block/vvfat.c > +++ b/block/vvfat.c > @@ -255,6 +255,17 @@ typedef struct direntry_t { > uint32_t size; > } QEMU_PACKED direntry_t; > > +typedef struct lfn_direntry_t { > + uint8_t sequence; > + uint8_t name01[10]; > + uint8_t attributes; > + uint8_t direntry_type; > + uint8_t sfn_checksum; > + uint8_t name0e[12]; > + uint16_t begin; > + uint8_t name1c[4]; > +} QEMU_PACKED lfn_direntry_t; > + > /* this structure are used to transparently access the files */ > > typedef struct mapping_t { > @@ -399,11 +410,22 @@ static void init_mbr(BDRVVVFATState *s, int cyls, int heads, int secs) > > /* direntry functions */ > > +static unsigned write_lfn_part(uint8_t *dest, unsigned dsize, > + const gunichar2 *lptr, const gunichar2 *lend) > +{ > + unsigned i; > + for(i = 0; i < dsize / 2 && lptr + i < lend; ++i) { > + dest[i / 2 + 0] = lptr[i] & 0xff; > + dest[i / 2 + 1] = lptr[i] >> 8; Why not uint16_t and maybe cpu_to_le (or whatever that's called) if needed? May be simpler than handling it byte by byte. Regards, BALATON Zoltan > + } > + return i; /* return number of elements actually written */ > +} > + > static direntry_t *create_long_filename(BDRVVVFATState *s, const char *filename) > { > - int number_of_entries, i; > + unsigned number_of_entries, entidx; > + gunichar2 *lptr, *lend; > glong length; > - direntry_t *entry; > > gunichar2 *longname = g_utf8_to_utf16(filename, -1, NULL, &length, NULL); > if (!longname) { > @@ -411,31 +433,26 @@ static direntry_t *create_long_filename(BDRVVVFATState *s, const char *filename) > return NULL; > } > > - number_of_entries = DIV_ROUND_UP(length * 2, 26); > - > - for(i=0;i<number_of_entries;i++) { > - entry=array_get_next(&(s->directory)); > - entry->attributes=0xf; > - entry->reserved[0]=0; > - entry->begin=0; > - entry->name[0]=(number_of_entries-i)|(i==0?0x40:0); > - } > - for(i=0;i<26*number_of_entries;i++) { > - int offset=(i%26); > - if(offset<10) offset=1+offset; > - else if(offset<22) offset=14+offset-10; > - else offset=28+offset-22; > - entry=array_get(&(s->directory),s->directory.next-1-(i/26)); > - if (i >= 2 * length + 2) { > - entry->name[offset] = 0xff; > - } else if (i % 2 == 0) { > - entry->name[offset] = longname[i / 2] & 0xff; > - } else { > - entry->name[offset] = longname[i / 2] >> 8; > - } > + /* > + * each lfn_direntry holds 13 utf16 chars (26 bytes) of the file name, > + * the name is split into several directory entries. > + */ > + number_of_entries = DIV_ROUND_UP(length, 13); > + > + lend = longname + length; /* pointer past the end of longname */ > + for(entidx = 0, lptr = longname; entidx < number_of_entries; entidx++) { > + lfn_direntry_t *entry = array_get_next(&(s->directory)); > + entry->sequence = (number_of_entries - entidx) | (entidx == 0 ? 0x40 : 0); > + entry->attributes = 0xf; > + entry->direntry_type = 0; > + entry->begin = 0; > + /* each lftn_direntry has 3 pieces to hold parts of long file name */ > + lptr += write_lfn_part(entry->name01, sizeof(entry->name01), lptr, lend); > + lptr += write_lfn_part(entry->name0e, sizeof(entry->name0e), lptr, lend); > + lptr += write_lfn_part(entry->name1c, sizeof(entry->name1c), lptr, lend); > } > g_free(longname); > - return array_get(&(s->directory),s->directory.next-number_of_entries); > + return array_get(&(s->directory), s->directory.next - number_of_entries); > } > > static char is_free(const direntry_t* direntry) > @@ -731,7 +748,7 @@ static inline direntry_t* create_short_and_long_name(BDRVVVFATState* s, > /* calculate anew, because realloc could have taken place */ > entry_long=array_get(&(s->directory),long_index); > while(entry_long<entry && is_long_name(entry_long)) { > - entry_long->reserved[1]=chksum; > + ((lfn_direntry_t *)entry_long)->sfn_checksum = chksum; > entry_long++; > } > } >
18.01.2025 23:32, Volker Rümelin wrote: > The rules are: > If the file name length in characters is a multiple of 13 you are done. > Otherwise the remaining unused LFN direntry name array elements have to > be filled with one 0x0000 (a 16 bit 0) and the rest with 0xffff. BTW, are we really okay with a single dirent if the name length is exactly 13 utf16 chars? Don't it need a null terminator? /mjt
18.01.2025 23:54, BALATON Zoltan wrote: >> +typedef struct lfn_direntry_t { >> + uint8_t sequence; >> + uint8_t name01[10]; >> + uint8_t attributes; >> + uint8_t direntry_type; >> + uint8_t sfn_checksum; >> + uint8_t name0e[12]; >> + uint16_t begin; >> + uint8_t name1c[4]; >> +} QEMU_PACKED lfn_direntry_t; >> +static unsigned write_lfn_part(uint8_t *dest, unsigned dsize, >> + const gunichar2 *lptr, const gunichar2 *lend) >> +{ >> + unsigned i; >> + for(i = 0; i < dsize / 2 && lptr + i < lend; ++i) { >> + dest[i / 2 + 0] = lptr[i] & 0xff; >> + dest[i / 2 + 1] = lptr[i] >> 8; > > Why not uint16_t and maybe cpu_to_le (or whatever that's called) if needed? May be simpler than handling it byte by byte. The dest array is unaligned - this is, eg, name01 in the above struct. Will it work to use entry->name01[i] = cpu_to_le16(lptr[i]) here, provided lfn_direntry_t=>name is declared as uint16_t name[5] ? I haven't done programming for quite a while... ;) /mjt
Am 18.01.25 um 21:55 schrieb Michael Tokarev: > 18.01.2025 23:32, Volker Rümelin wrote: > >> The rules are: >> If the file name length in characters is a multiple of 13 you are done. >> Otherwise the remaining unused LFN direntry name array elements have to >> be filled with one 0x0000 (a 16 bit 0) and the rest with 0xffff. > > BTW, are we really okay with a single dirent if the name length is > exactly > 13 utf16 chars? Don't it need a null terminator? > Yes, that's okay. A null terminator isn't allowed in this case. See page 28 at https://msdn.microsoft.com/en-us/windows/hardware/gg463080.aspx With best regards, Volker
On Sun, 19 Jan 2025, Michael Tokarev wrote: > 18.01.2025 23:54, BALATON Zoltan wrote: >>> +typedef struct lfn_direntry_t { >>> + uint8_t sequence; >>> + uint8_t name01[10]; >>> + uint8_t attributes; >>> + uint8_t direntry_type; >>> + uint8_t sfn_checksum; >>> + uint8_t name0e[12]; >>> + uint16_t begin; >>> + uint8_t name1c[4]; >>> +} QEMU_PACKED lfn_direntry_t; > >>> +static unsigned write_lfn_part(uint8_t *dest, unsigned dsize, >>> + const gunichar2 *lptr, const gunichar2 >>> *lend) >>> +{ >>> + unsigned i; >>> + for(i = 0; i < dsize / 2 && lptr + i < lend; ++i) { >>> + dest[i / 2 + 0] = lptr[i] & 0xff; >>> + dest[i / 2 + 1] = lptr[i] >> 8; >> >> Why not uint16_t and maybe cpu_to_le (or whatever that's called) if needed? >> May be simpler than handling it byte by byte. > > The dest array is unaligned - this is, eg, name01 in the above struct. > Will it work to use entry->name01[i] = cpu_to_le16(lptr[i]) here, > provided lfn_direntry_t=>name is declared as uint16_t name[5] ? I think it should work, I don't see why it would not. The compiler should be able to figure out how to handle unaligned data where needed, you should not need to do that by hand. Or I think you'd get a warning if it would not work. Regards, BALATON Zoltan > I haven't done programming for quite a while... ;) > > /mjt > >
19.01.2025 04:07, BALATON Zoltan wrote: > On Sun, 19 Jan 2025, Michael Tokarev wrote: >> 18.01.2025 23:54, BALATON Zoltan wrote: >>>> +typedef struct lfn_direntry_t { >>>> + uint8_t sequence; >>>> + uint8_t name01[10]; >>>> + uint8_t attributes; >>>> + uint8_t direntry_type; >>>> + uint8_t sfn_checksum; >>>> + uint8_t name0e[12]; >>>> + uint16_t begin; >>>> + uint8_t name1c[4]; >>>> +} QEMU_PACKED lfn_direntry_t; >> >>>> +static unsigned write_lfn_part(uint8_t *dest, unsigned dsize, >>>> + const gunichar2 *lptr, const gunichar2 *lend) >>>> +{ >>>> + unsigned i; >>>> + for(i = 0; i < dsize / 2 && lptr + i < lend; ++i) { >>>> + dest[i / 2 + 0] = lptr[i] & 0xff; >>>> + dest[i / 2 + 1] = lptr[i] >> 8; >>> >>> Why not uint16_t and maybe cpu_to_le (or whatever that's called) if needed? May be simpler than handling it byte by byte. >> >> The dest array is unaligned - this is, eg, name01 in the above struct. >> Will it work to use entry->name01[i] = cpu_to_le16(lptr[i]) here, >> provided lfn_direntry_t=>name is declared as uint16_t name[5] ? > > I think it should work, I don't see why it would not. The compiler should be able to figure out how to handle unaligned data where needed, you should > not need to do that by hand. Or I think you'd get a warning if it would not work. Nope, it doesn't work. Because when a (unaligned) pointer to this 2-byte integer is passed to write_lfn_part(), this function doesn't know it is unaligned. The compiler already issues a warning at the point when we get address of a field of a packed structure. In this case, when calling write_lfn_part() with lfn_direntry_t->name01 as an argument (and other names too). And the code actually generates a trap on architectures where this matters, - I tried a simple test program on sparc. Thanks, /mjt
diff --git a/block/vvfat.c b/block/vvfat.c index 8ffe8b3b9b..8320733045 100644 --- a/block/vvfat.c +++ b/block/vvfat.c @@ -255,6 +255,17 @@ typedef struct direntry_t { uint32_t size; } QEMU_PACKED direntry_t; +typedef struct lfn_direntry_t { + uint8_t sequence; + uint8_t name01[10]; + uint8_t attributes; + uint8_t direntry_type; + uint8_t sfn_checksum; + uint8_t name0e[12]; + uint16_t begin; + uint8_t name1c[4]; +} QEMU_PACKED lfn_direntry_t; + /* this structure are used to transparently access the files */ typedef struct mapping_t { @@ -399,11 +410,22 @@ static void init_mbr(BDRVVVFATState *s, int cyls, int heads, int secs) /* direntry functions */ +static unsigned write_lfn_part(uint8_t *dest, unsigned dsize, + const gunichar2 *lptr, const gunichar2 *lend) +{ + unsigned i; + for(i = 0; i < dsize / 2 && lptr + i < lend; ++i) { + dest[i / 2 + 0] = lptr[i] & 0xff; + dest[i / 2 + 1] = lptr[i] >> 8; + } + return i; /* return number of elements actually written */ +} + static direntry_t *create_long_filename(BDRVVVFATState *s, const char *filename) { - int number_of_entries, i; + unsigned number_of_entries, entidx; + gunichar2 *lptr, *lend; glong length; - direntry_t *entry; gunichar2 *longname = g_utf8_to_utf16(filename, -1, NULL, &length, NULL); if (!longname) { @@ -411,31 +433,26 @@ static direntry_t *create_long_filename(BDRVVVFATState *s, const char *filename) return NULL; } - number_of_entries = DIV_ROUND_UP(length * 2, 26); - - for(i=0;i<number_of_entries;i++) { - entry=array_get_next(&(s->directory)); - entry->attributes=0xf; - entry->reserved[0]=0; - entry->begin=0; - entry->name[0]=(number_of_entries-i)|(i==0?0x40:0); - } - for(i=0;i<26*number_of_entries;i++) { - int offset=(i%26); - if(offset<10) offset=1+offset; - else if(offset<22) offset=14+offset-10; - else offset=28+offset-22; - entry=array_get(&(s->directory),s->directory.next-1-(i/26)); - if (i >= 2 * length + 2) { - entry->name[offset] = 0xff; - } else if (i % 2 == 0) { - entry->name[offset] = longname[i / 2] & 0xff; - } else { - entry->name[offset] = longname[i / 2] >> 8; - } + /* + * each lfn_direntry holds 13 utf16 chars (26 bytes) of the file name, + * the name is split into several directory entries. + */ + number_of_entries = DIV_ROUND_UP(length, 13); + + lend = longname + length; /* pointer past the end of longname */ + for(entidx = 0, lptr = longname; entidx < number_of_entries; entidx++) { + lfn_direntry_t *entry = array_get_next(&(s->directory)); + entry->sequence = (number_of_entries - entidx) | (entidx == 0 ? 0x40 : 0); + entry->attributes = 0xf; + entry->direntry_type = 0; + entry->begin = 0; + /* each lftn_direntry has 3 pieces to hold parts of long file name */ + lptr += write_lfn_part(entry->name01, sizeof(entry->name01), lptr, lend); + lptr += write_lfn_part(entry->name0e, sizeof(entry->name0e), lptr, lend); + lptr += write_lfn_part(entry->name1c, sizeof(entry->name1c), lptr, lend); } g_free(longname); - return array_get(&(s->directory),s->directory.next-number_of_entries); + return array_get(&(s->directory), s->directory.next - number_of_entries); } static char is_free(const direntry_t* direntry) @@ -731,7 +748,7 @@ static inline direntry_t* create_short_and_long_name(BDRVVVFATState* s, /* calculate anew, because realloc could have taken place */ entry_long=array_get(&(s->directory),long_index); while(entry_long<entry && is_long_name(entry_long)) { - entry_long->reserved[1]=chksum; + ((lfn_direntry_t *)entry_long)->sfn_checksum = chksum; entry_long++; } }
In function create_long_filname(), the array name[8 + 3] in struct direntry_t is used as if it were defined as name[32]. This is intentional and works. It's nevertheless an out of bounds array access. To avoid this problem, this patch adds a struct lfn_direntry_t with multiple name arrays. A directory entry for a long FAT file name is significantly different from a directory entry for a regular FAT file name. This change makes whole logic dealing with the long filenames a bit more clear (hopefully). Based on ideas by Volker Rümelin. Signed-off-by: Michael Tokarev <mjt@tls.msk.ru> --- block/vvfat.c | 69 ++++++++++++++++++++++++++++++++------------------- 1 file changed, 43 insertions(+), 26 deletions(-)