Message ID | 20210123100345.642933-1-ppandit@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | fdc: check drive block device before usage (CVE-2021-20196) | expand |
Hello, Here is a reproducer for this bug found by the OSS-Fuzz fuzzer: cat << EOF | qemu-system-i386 -display none -machine accel=qtest \ -m 512M -machine pc -device floppy,unit=1,id=floppy0,drive=disk0 \ -drive id=disk0,file=null-co://,file.read-zeroes=on,if=none,format=raw \ -qtest stdio outw 0x3f4 0x2500 outb 0x3f5 0x81 outb 0x3f5 0x0 outb 0x3f5 0x0 outb 0x3f5 0x0 outw 0x3f4 0x0 outw 0x3f4 0x4000 outw 0x3f4 0x13 outb 0x3f5 0x1 outw 0x3f2 0x1405 outw 0x3f4 0x0 EOF -Alex On 210123 1533, P J P wrote: > From: Prasad J Pandit <pjp@fedoraproject.org> > > While processing ioport command in 'fdctrl_write_dor', device > controller may select a drive which is not initialised with a > block device. This may result in a NULL pointer dereference. > Add checks to avoid it. > > Fixes: CVE-2021-20196 > Reported-by: Gaoning Pan <pgn@zju.edu.cn> > Buglink: https://bugs.launchpad.net/qemu/+bug/1912780 > Signed-off-by: Prasad J Pandit <pjp@fedoraproject.org> > --- > hw/block/fdc.c | 11 +++++++++-- > 1 file changed, 9 insertions(+), 2 deletions(-) > > diff --git a/hw/block/fdc.c b/hw/block/fdc.c > index 3636874432..13a9470d19 100644 > --- a/hw/block/fdc.c > +++ b/hw/block/fdc.c > @@ -1429,7 +1429,9 @@ static void fdctrl_write_dor(FDCtrl *fdctrl, uint32_t value) > } > } > /* Selected drive */ > - fdctrl->cur_drv = value & FD_DOR_SELMASK; > + if (fdctrl->drives[value & FD_DOR_SELMASK].blk) { > + fdctrl->cur_drv = value & FD_DOR_SELMASK; > + } > > fdctrl->dor = value; > } > @@ -1894,6 +1896,10 @@ static uint32_t fdctrl_read_data(FDCtrl *fdctrl) > uint32_t pos; > > cur_drv = get_cur_drv(fdctrl); > + if (!cur_drv->blk) { > + FLOPPY_DPRINTF("No drive connected\n"); > + return 0; > + } > fdctrl->dsr &= ~FD_DSR_PWRDOWN; > if (!(fdctrl->msr & FD_MSR_RQM) || !(fdctrl->msr & FD_MSR_DIO)) { > FLOPPY_DPRINTF("error: controller not ready for reading\n"); > @@ -2420,7 +2426,8 @@ static void fdctrl_write_data(FDCtrl *fdctrl, uint32_t value) > if (pos == FD_SECTOR_LEN - 1 || > fdctrl->data_pos == fdctrl->data_len) { > cur_drv = get_cur_drv(fdctrl); > - if (blk_pwrite(cur_drv->blk, fd_offset(cur_drv), fdctrl->fifo, > + if (cur_drv->blk == NULL > + || blk_pwrite(cur_drv->blk, fd_offset(cur_drv), fdctrl->fifo, > BDRV_SECTOR_SIZE, 0) < 0) { > FLOPPY_DPRINTF("error writing sector %d\n", > fd_sector(cur_drv)); > -- > 2.29.2 > >
On 210123 1247, Alexander Bulekov wrote: > Hello, > Here is a reproducer for this bug found by the OSS-Fuzz fuzzer: > > cat << EOF | qemu-system-i386 -display none -machine accel=qtest \ > -m 512M -machine pc -device floppy,unit=1,id=floppy0,drive=disk0 \ > -drive id=disk0,file=null-co://,file.read-zeroes=on,if=none,format=raw \ > -qtest stdio > outw 0x3f4 0x2500 > outb 0x3f5 0x81 > outb 0x3f5 0x0 > outb 0x3f5 0x0 > outb 0x3f5 0x0 > outw 0x3f4 0x0 > outw 0x3f4 0x4000 > outw 0x3f4 0x13 > outb 0x3f5 0x1 > outw 0x3f2 0x1405 > outw 0x3f4 0x0 > EOF > Oops there's a mistake there. Fixed: cat << EOF | ./qemu-system-i386 -display none -machine accel=qtest \ -m 512M -machine pc -device floppy,unit=1,id=floppy0,drive=disk0 \ -drive id=disk0,file=null-co://,file.read-zeroes=on,if=none,format=raw \ -qtest stdio outw 0x3f4 0x2500 outb 0x3f5 0x81 outb 0x3f5 0x0 outb 0x3f5 0x0 outb 0x3f5 0x0 outw 0x3f2 0x14 outw 0x3f4 0x0 outw 0x3f4 0x4000 outw 0x3f4 0x13 outb 0x3f5 0x1 outw 0x3f2 0x1405 outw 0x3f4 0x0 EOF > -Alex > > On 210123 1533, P J P wrote: > > From: Prasad J Pandit <pjp@fedoraproject.org> > > > > While processing ioport command in 'fdctrl_write_dor', device > > controller may select a drive which is not initialised with a > > block device. This may result in a NULL pointer dereference. > > Add checks to avoid it. > > > > Fixes: CVE-2021-20196 > > Reported-by: Gaoning Pan <pgn@zju.edu.cn> > > Buglink: https://bugs.launchpad.net/qemu/+bug/1912780 > > Signed-off-by: Prasad J Pandit <pjp@fedoraproject.org> > > --- > > hw/block/fdc.c | 11 +++++++++-- > > 1 file changed, 9 insertions(+), 2 deletions(-) > > > > diff --git a/hw/block/fdc.c b/hw/block/fdc.c > > index 3636874432..13a9470d19 100644 > > --- a/hw/block/fdc.c > > +++ b/hw/block/fdc.c > > @@ -1429,7 +1429,9 @@ static void fdctrl_write_dor(FDCtrl *fdctrl, uint32_t value) > > } > > } > > /* Selected drive */ > > - fdctrl->cur_drv = value & FD_DOR_SELMASK; > > + if (fdctrl->drives[value & FD_DOR_SELMASK].blk) { > > + fdctrl->cur_drv = value & FD_DOR_SELMASK; > > + } > > > > fdctrl->dor = value; > > } > > @@ -1894,6 +1896,10 @@ static uint32_t fdctrl_read_data(FDCtrl *fdctrl) > > uint32_t pos; > > > > cur_drv = get_cur_drv(fdctrl); > > + if (!cur_drv->blk) { > > + FLOPPY_DPRINTF("No drive connected\n"); > > + return 0; > > + } > > fdctrl->dsr &= ~FD_DSR_PWRDOWN; > > if (!(fdctrl->msr & FD_MSR_RQM) || !(fdctrl->msr & FD_MSR_DIO)) { > > FLOPPY_DPRINTF("error: controller not ready for reading\n"); > > @@ -2420,7 +2426,8 @@ static void fdctrl_write_data(FDCtrl *fdctrl, uint32_t value) > > if (pos == FD_SECTOR_LEN - 1 || > > fdctrl->data_pos == fdctrl->data_len) { > > cur_drv = get_cur_drv(fdctrl); > > - if (blk_pwrite(cur_drv->blk, fd_offset(cur_drv), fdctrl->fifo, > > + if (cur_drv->blk == NULL > > + || blk_pwrite(cur_drv->blk, fd_offset(cur_drv), fdctrl->fifo, > > BDRV_SECTOR_SIZE, 0) < 0) { > > FLOPPY_DPRINTF("error writing sector %d\n", > > fd_sector(cur_drv)); > > -- > > 2.29.2 > > > >
+-- On Sat, 23 Jan 2021, P J P wrote --+ | From: Prasad J Pandit <pjp@fedoraproject.org> | | While processing ioport command in 'fdctrl_write_dor', device | controller may select a drive which is not initialised with a | block device. This may result in a NULL pointer dereference. | Add checks to avoid it. | | Fixes: CVE-2021-20196 | Reported-by: Gaoning Pan <pgn@zju.edu.cn> | Buglink: https://bugs.launchpad.net/qemu/+bug/1912780 | Signed-off-by: Prasad J Pandit <pjp@fedoraproject.org> | --- | hw/block/fdc.c | 11 +++++++++-- | 1 file changed, 9 insertions(+), 2 deletions(-) | | diff --git a/hw/block/fdc.c b/hw/block/fdc.c | index 3636874432..13a9470d19 100644 | --- a/hw/block/fdc.c | +++ b/hw/block/fdc.c | @@ -1429,7 +1429,9 @@ static void fdctrl_write_dor(FDCtrl *fdctrl, uint32_t value) | } | } | /* Selected drive */ | - fdctrl->cur_drv = value & FD_DOR_SELMASK; | + if (fdctrl->drives[value & FD_DOR_SELMASK].blk) { | + fdctrl->cur_drv = value & FD_DOR_SELMASK; | + } | | fdctrl->dor = value; | } | @@ -1894,6 +1896,10 @@ static uint32_t fdctrl_read_data(FDCtrl *fdctrl) | uint32_t pos; | | cur_drv = get_cur_drv(fdctrl); | + if (!cur_drv->blk) { | + FLOPPY_DPRINTF("No drive connected\n"); | + return 0; | + } | fdctrl->dsr &= ~FD_DSR_PWRDOWN; | if (!(fdctrl->msr & FD_MSR_RQM) || !(fdctrl->msr & FD_MSR_DIO)) { | FLOPPY_DPRINTF("error: controller not ready for reading\n"); | @@ -2420,7 +2426,8 @@ static void fdctrl_write_data(FDCtrl *fdctrl, uint32_t value) | if (pos == FD_SECTOR_LEN - 1 || | fdctrl->data_pos == fdctrl->data_len) { | cur_drv = get_cur_drv(fdctrl); | - if (blk_pwrite(cur_drv->blk, fd_offset(cur_drv), fdctrl->fifo, | + if (cur_drv->blk == NULL | + || blk_pwrite(cur_drv->blk, fd_offset(cur_drv), fdctrl->fifo, | BDRV_SECTOR_SIZE, 0) < 0) { | FLOPPY_DPRINTF("error writing sector %d\n", | fd_sector(cur_drv)); | Ping..! -- Prasad J Pandit / Red Hat Product Security Team 8685 545E B54C 486B C6EB 271E E285 8B5A F050 DE8D
On 23/01/2021 11.03, P J P wrote: > From: Prasad J Pandit <pjp@fedoraproject.org> > > While processing ioport command in 'fdctrl_write_dor', device > controller may select a drive which is not initialised with a > block device. This may result in a NULL pointer dereference. > Add checks to avoid it. > > Fixes: CVE-2021-20196 > Reported-by: Gaoning Pan <pgn@zju.edu.cn> > Buglink: https://bugs.launchpad.net/qemu/+bug/1912780 > Signed-off-by: Prasad J Pandit <pjp@fedoraproject.org> > --- > hw/block/fdc.c | 11 +++++++++-- > 1 file changed, 9 insertions(+), 2 deletions(-) > > diff --git a/hw/block/fdc.c b/hw/block/fdc.c > index 3636874432..13a9470d19 100644 > --- a/hw/block/fdc.c > +++ b/hw/block/fdc.c > @@ -1429,7 +1429,9 @@ static void fdctrl_write_dor(FDCtrl *fdctrl, uint32_t value) > } > } > /* Selected drive */ > - fdctrl->cur_drv = value & FD_DOR_SELMASK; > + if (fdctrl->drives[value & FD_DOR_SELMASK].blk) { > + fdctrl->cur_drv = value & FD_DOR_SELMASK; > + } > > fdctrl->dor = value; > } > @@ -1894,6 +1896,10 @@ static uint32_t fdctrl_read_data(FDCtrl *fdctrl) > uint32_t pos; > > cur_drv = get_cur_drv(fdctrl); > + if (!cur_drv->blk) { > + FLOPPY_DPRINTF("No drive connected\n"); > + return 0; > + } > fdctrl->dsr &= ~FD_DSR_PWRDOWN; > if (!(fdctrl->msr & FD_MSR_RQM) || !(fdctrl->msr & FD_MSR_DIO)) { > FLOPPY_DPRINTF("error: controller not ready for reading\n"); > @@ -2420,7 +2426,8 @@ static void fdctrl_write_data(FDCtrl *fdctrl, uint32_t value) > if (pos == FD_SECTOR_LEN - 1 || > fdctrl->data_pos == fdctrl->data_len) { > cur_drv = get_cur_drv(fdctrl); > - if (blk_pwrite(cur_drv->blk, fd_offset(cur_drv), fdctrl->fifo, > + if (cur_drv->blk == NULL > + || blk_pwrite(cur_drv->blk, fd_offset(cur_drv), fdctrl->fifo, > BDRV_SECTOR_SIZE, 0) < 0) { > FLOPPY_DPRINTF("error writing sector %d\n", > fd_sector(cur_drv)); > Ping again! Could anybody review / pick this up? Thomas
On 5/14/21 3:23 PM, Thomas Huth wrote: > On 23/01/2021 11.03, P J P wrote: >> From: Prasad J Pandit <pjp@fedoraproject.org> >> >> While processing ioport command in 'fdctrl_write_dor', device >> controller may select a drive which is not initialised with a >> block device. This may result in a NULL pointer dereference. >> Add checks to avoid it. >> >> Fixes: CVE-2021-20196 >> Reported-by: Gaoning Pan <pgn@zju.edu.cn> >> Buglink: https://bugs.launchpad.net/qemu/+bug/1912780 >> Signed-off-by: Prasad J Pandit <pjp@fedoraproject.org> >> --- >> hw/block/fdc.c | 11 +++++++++-- >> 1 file changed, 9 insertions(+), 2 deletions(-) >> >> diff --git a/hw/block/fdc.c b/hw/block/fdc.c >> index 3636874432..13a9470d19 100644 >> --- a/hw/block/fdc.c >> +++ b/hw/block/fdc.c >> @@ -1429,7 +1429,9 @@ static void fdctrl_write_dor(FDCtrl *fdctrl, >> uint32_t value) >> } >> } >> /* Selected drive */ >> - fdctrl->cur_drv = value & FD_DOR_SELMASK; >> + if (fdctrl->drives[value & FD_DOR_SELMASK].blk) { >> + fdctrl->cur_drv = value & FD_DOR_SELMASK; >> + } >> fdctrl->dor = value; >> } >> @@ -1894,6 +1896,10 @@ static uint32_t fdctrl_read_data(FDCtrl *fdctrl) >> uint32_t pos; >> cur_drv = get_cur_drv(fdctrl); >> + if (!cur_drv->blk) { >> + FLOPPY_DPRINTF("No drive connected\n"); >> + return 0; >> + } >> fdctrl->dsr &= ~FD_DSR_PWRDOWN; >> if (!(fdctrl->msr & FD_MSR_RQM) || !(fdctrl->msr & FD_MSR_DIO)) { >> FLOPPY_DPRINTF("error: controller not ready for reading\n"); >> @@ -2420,7 +2426,8 @@ static void fdctrl_write_data(FDCtrl *fdctrl, >> uint32_t value) >> if (pos == FD_SECTOR_LEN - 1 || >> fdctrl->data_pos == fdctrl->data_len) { >> cur_drv = get_cur_drv(fdctrl); >> - if (blk_pwrite(cur_drv->blk, fd_offset(cur_drv), >> fdctrl->fifo, >> + if (cur_drv->blk == NULL >> + || blk_pwrite(cur_drv->blk, fd_offset(cur_drv), >> fdctrl->fifo, >> BDRV_SECTOR_SIZE, 0) < 0) { >> FLOPPY_DPRINTF("error writing sector %d\n", >> fd_sector(cur_drv)); >> > > Ping again! > > Could anybody review / pick this up? > > Thomas > Yep. Not forgotten, despite appearances. Clearing my Python review backlog, then onto FDC/IDE. In the meantime, anything anyone else happens to feel comfortable staging won't upset me any. I don't insist they go through my tree right now.
On 5/14/21 9:26 PM, John Snow wrote: > On 5/14/21 3:23 PM, Thomas Huth wrote: >> On 23/01/2021 11.03, P J P wrote: >>> From: Prasad J Pandit <pjp@fedoraproject.org> >>> >>> While processing ioport command in 'fdctrl_write_dor', device >>> controller may select a drive which is not initialised with a >>> block device. This may result in a NULL pointer dereference. >>> Add checks to avoid it. >>> >>> Fixes: CVE-2021-20196 >>> Reported-by: Gaoning Pan <pgn@zju.edu.cn> >>> Buglink: https://bugs.launchpad.net/qemu/+bug/1912780 >>> Signed-off-by: Prasad J Pandit <pjp@fedoraproject.org> >>> --- >>> hw/block/fdc.c | 11 +++++++++-- >>> 1 file changed, 9 insertions(+), 2 deletions(-) >>> >>> diff --git a/hw/block/fdc.c b/hw/block/fdc.c >>> index 3636874432..13a9470d19 100644 >>> --- a/hw/block/fdc.c >>> +++ b/hw/block/fdc.c >>> @@ -1429,7 +1429,9 @@ static void fdctrl_write_dor(FDCtrl *fdctrl, >>> uint32_t value) >>> } >>> } >>> /* Selected drive */ >>> - fdctrl->cur_drv = value & FD_DOR_SELMASK; >>> + if (fdctrl->drives[value & FD_DOR_SELMASK].blk) { >>> + fdctrl->cur_drv = value & FD_DOR_SELMASK; >>> + } >>> fdctrl->dor = value; >>> } >>> @@ -1894,6 +1896,10 @@ static uint32_t fdctrl_read_data(FDCtrl *fdctrl) >>> uint32_t pos; >>> cur_drv = get_cur_drv(fdctrl); >>> + if (!cur_drv->blk) { >>> + FLOPPY_DPRINTF("No drive connected\n"); >>> + return 0; >>> + } >>> fdctrl->dsr &= ~FD_DSR_PWRDOWN; >>> if (!(fdctrl->msr & FD_MSR_RQM) || !(fdctrl->msr & FD_MSR_DIO)) { >>> FLOPPY_DPRINTF("error: controller not ready for reading\n"); >>> @@ -2420,7 +2426,8 @@ static void fdctrl_write_data(FDCtrl *fdctrl, >>> uint32_t value) >>> if (pos == FD_SECTOR_LEN - 1 || >>> fdctrl->data_pos == fdctrl->data_len) { >>> cur_drv = get_cur_drv(fdctrl); >>> - if (blk_pwrite(cur_drv->blk, fd_offset(cur_drv), >>> fdctrl->fifo, >>> + if (cur_drv->blk == NULL >>> + || blk_pwrite(cur_drv->blk, fd_offset(cur_drv), >>> fdctrl->fifo, >>> BDRV_SECTOR_SIZE, 0) < 0) { >>> FLOPPY_DPRINTF("error writing sector %d\n", >>> fd_sector(cur_drv)); >>> >> >> Ping again! >> >> Could anybody review / pick this up? This patch misses the qtest companion with the reproducer provided by Alexander. > Yep. Not forgotten, despite appearances. Clearing my Python review > backlog, then onto FDC/IDE. Yeah \o/ > > In the meantime, anything anyone else happens to feel comfortable > staging won't upset me any. I don't insist they go through my tree right > now.
+-- On Sat, 15 May 2021, Philippe Mathieu-Daudé wrote --+ | This patch misses the qtest companion with the reproducer | provided by Alexander. Do we need a revised patch[-series] including a qtest? OR it can be done at merge time? Thank you. -- - P J P 8685 545E B54C 486B C6EB 271E E285 8B5A F050 DE8D
On 5/17/21 7:12 AM, P J P wrote: > +-- On Sat, 15 May 2021, Philippe Mathieu-Daudé wrote --+ > | This patch misses the qtest companion with the reproducer > | provided by Alexander. > > Do we need a revised patch[-series] including a qtest? OR it can be done at > merge time? > > Thank you. > -- > - P J P > 8685 545E B54C 486B C6EB 271E E285 8B5A F050 DE8D > Unknown, haven't dug into this patch and problem yet. If you have the time to write a qtest reproducer, you can send it separately and I'll pick it up if everything looks correct. Sorry for the FDC/ATA delays. Working on it. (...Maintainers wanted!) --js
On 5/17/21 1:12 PM, P J P wrote: > +-- On Sat, 15 May 2021, Philippe Mathieu-Daudé wrote --+ > | This patch misses the qtest companion with the reproducer > | provided by Alexander. > > Do we need a revised patch[-series] including a qtest? OR it can be done at > merge time? Paolo usually asks for it and don't queue patch without qtest when reproducer is available, but since it is a recent CVE it might I suppose it depends on the maintainer :)
On 1/23/21 12:52 PM, Alexander Bulekov wrote: > On 210123 1247, Alexander Bulekov wrote: >> Hello, >> Here is a reproducer for this bug found by the OSS-Fuzz fuzzer: >> >> cat << EOF | qemu-system-i386 -display none -machine accel=qtest \ >> -m 512M -machine pc -device floppy,unit=1,id=floppy0,drive=disk0 \ >> -drive id=disk0,file=null-co://,file.read-zeroes=on,if=none,format=raw \ >> -qtest stdio >> outw 0x3f4 0x2500 >> outb 0x3f5 0x81 >> outb 0x3f5 0x0 >> outb 0x3f5 0x0 >> outb 0x3f5 0x0 >> outw 0x3f4 0x0 >> outw 0x3f4 0x4000 >> outw 0x3f4 0x13 >> outb 0x3f5 0x1 >> outw 0x3f2 0x1405 >> outw 0x3f4 0x0 >> EOF >> > Oops there's a mistake there. Fixed: > > cat << EOF | ./qemu-system-i386 -display none -machine accel=qtest \ > -m 512M -machine pc -device floppy,unit=1,id=floppy0,drive=disk0 \ > -drive id=disk0,file=null-co://,file.read-zeroes=on,if=none,format=raw \ > -qtest stdio > outw 0x3f4 0x2500 > outb 0x3f5 0x81 > outb 0x3f5 0x0 > outb 0x3f5 0x0 > outb 0x3f5 0x0 > outw 0x3f2 0x14 > outw 0x3f4 0x0 > outw 0x3f4 0x4000 > outw 0x3f4 0x13 > outb 0x3f5 0x1 > outw 0x3f2 0x1405 > outw 0x3f4 0x0 > EOF > >> -Alex >> This actually still triggers after I apply this patch. It's not the same bug, or the patch isn't sufficient to address this bug. --js
On 1/23/21 5:03 AM, P J P wrote: > From: Prasad J Pandit <pjp@fedoraproject.org> > > While processing ioport command in 'fdctrl_write_dor', device > controller may select a drive which is not initialised with a > block device. This may result in a NULL pointer dereference. > Add checks to avoid it. > > Fixes: CVE-2021-20196 > Reported-by: Gaoning Pan <pgn@zju.edu.cn> > Buglink: https://bugs.launchpad.net/qemu/+bug/1912780 > Signed-off-by: Prasad J Pandit <pjp@fedoraproject.org> > --- > hw/block/fdc.c | 11 +++++++++-- > 1 file changed, 9 insertions(+), 2 deletions(-) > > diff --git a/hw/block/fdc.c b/hw/block/fdc.c > index 3636874432..13a9470d19 100644 > --- a/hw/block/fdc.c > +++ b/hw/block/fdc.c > @@ -1429,7 +1429,9 @@ static void fdctrl_write_dor(FDCtrl *fdctrl, uint32_t value) > } > } > /* Selected drive */ > - fdctrl->cur_drv = value & FD_DOR_SELMASK; > + if (fdctrl->drives[value & FD_DOR_SELMASK].blk) { > + fdctrl->cur_drv = value & FD_DOR_SELMASK; > + } I don't think this is correct. If you look at get_cur_drv(), it uses the TDR_BOOTSEL bit to change the logical mappings of "drive 0" or "drive 1" to be reversed. You don't check that bit here, so you might be checking the wrong drive. Plus, the TDR bit can change later, so I think you shouldn't actually protect the register write like this. Just delete this bit of code. We ought to protect the drives when we go to use them instead of preventing the registers from getting "the wrong values". > > fdctrl->dor = value; > } > @@ -1894,6 +1896,10 @@ static uint32_t fdctrl_read_data(FDCtrl *fdctrl) > uint32_t pos; > > cur_drv = get_cur_drv(fdctrl); > + if (!cur_drv->blk) { > + FLOPPY_DPRINTF("No drive connected\n"); > + return 0; > + } This seems fine ... or at least not worse than the other error handling we already have here. (Which seems to be ... basically, none. We just ignore the write and do nothing, which seems wrong. I guess it's better than a crash... but I don't have the time to do a proper audit of what this is SUPPOSED to do in this case.) > fdctrl->dsr &= ~FD_DSR_PWRDOWN; > if (!(fdctrl->msr & FD_MSR_RQM) || !(fdctrl->msr & FD_MSR_DIO)) { > FLOPPY_DPRINTF("error: controller not ready for reading\n"); > @@ -2420,7 +2426,8 @@ static void fdctrl_write_data(FDCtrl *fdctrl, uint32_t value) > if (pos == FD_SECTOR_LEN - 1 || > fdctrl->data_pos == fdctrl->data_len) { > cur_drv = get_cur_drv(fdctrl); > - if (blk_pwrite(cur_drv->blk, fd_offset(cur_drv), fdctrl->fifo, > + if (cur_drv->blk == NULL > + || blk_pwrite(cur_drv->blk, fd_offset(cur_drv), fdctrl->fifo, Seems fine, but if we had a drive for the earlier check, will we really be in a situation where we don't have one now? > BDRV_SECTOR_SIZE, 0) < 0) { > FLOPPY_DPRINTF("error writing sector %d\n", > fd_sector(cur_drv)); > Ignore the bit I sent earlier about the qtest reproducer not correlating to this patch -- it does, I was experiencing an unrelated crash. --js
Hello John, +-- On Mon, 17 May 2021, John Snow wrote --+ | > /* Selected drive */ | > - fdctrl->cur_drv = value & FD_DOR_SELMASK; | > + if (fdctrl->drives[value & FD_DOR_SELMASK].blk) { | > + fdctrl->cur_drv = value & FD_DOR_SELMASK; | > + } | | I don't think this is correct. If you look at get_cur_drv(), it uses the | TDR_BOOTSEL bit to change the logical mappings of "drive 0" or "drive 1" to be | reversed. You don't check that bit here, so you might be checking the wrong | drive. | | Plus, the TDR bit can change later, so I think you shouldn't actually protect | the register write like this. Just delete this bit of code. We ought to | protect the drives when we go to use them instead of preventing the registers | from getting "the wrong values". * I see. | > cur_drv = get_cur_drv(fdctrl); | > + if (!cur_drv->blk) { | > + FLOPPY_DPRINTF("No drive connected\n"); | > + return 0; | > + } | | This seems fine ... or at least not worse than the other error handling we | already have here. (Which seems to be ... basically, none. We just ignore the | write and do nothing, which seems wrong. I guess it's better than a crash... | but I don't have the time to do a proper audit of what this is SUPPOSED to do | in this case.) | | > - if (blk_pwrite(cur_drv->blk, fd_offset(cur_drv), fdctrl->fifo, | > + if (cur_drv->blk == NULL | > + || blk_pwrite(cur_drv->blk, fd_offset(cur_drv), | > fdctrl->fifo, | | Seems fine, but if we had a drive for the earlier check, will we really be in | a situation where we don't have one now? | | Ignore the bit I sent earlier about the qtest reproducer not correlating to | this patch -- it does, I was experiencing an unrelated crash. * Okay. | On 5/17/21 7:12 AM, P J P wrote: | > Do we need a revised patch[-series] including a qtest? OR it can be done at | > merge time? | | If you have the time to write a qtest reproducer, you can send it separately | and I'll pick it up if everything looks correct. * Yes, that seems better, I'll try to create a qtest, but it may take time. * I'll check and revise the patch with above details asap. Thank you. -- - P J P 8685 545E B54C 486B C6EB 271E E285 8B5A F050 DE8D
On 5/18/21 5:01 AM, P J P wrote: > Hello John, > > +-- On Mon, 17 May 2021, John Snow wrote --+ > | > /* Selected drive */ > | > - fdctrl->cur_drv = value & FD_DOR_SELMASK; > | > + if (fdctrl->drives[value & FD_DOR_SELMASK].blk) { > | > + fdctrl->cur_drv = value & FD_DOR_SELMASK; > | > + } > | > | I don't think this is correct. If you look at get_cur_drv(), it uses the > | TDR_BOOTSEL bit to change the logical mappings of "drive 0" or "drive 1" to be > | reversed. You don't check that bit here, so you might be checking the wrong > | drive. > | > | Plus, the TDR bit can change later, so I think you shouldn't actually protect > | the register write like this. Just delete this bit of code. We ought to > | protect the drives when we go to use them instead of preventing the registers > | from getting "the wrong values". > > * I see. > (I know this is extremely backwards from how good code ought to be written where we centralize protecting sane values from becoming object state.) > | > cur_drv = get_cur_drv(fdctrl); > | > + if (!cur_drv->blk) { > | > + FLOPPY_DPRINTF("No drive connected\n"); > | > + return 0; > | > + } > | > | This seems fine ... or at least not worse than the other error handling we > | already have here. (Which seems to be ... basically, none. We just ignore the > | write and do nothing, which seems wrong. I guess it's better than a crash... > | but I don't have the time to do a proper audit of what this is SUPPOSED to do > | in this case.) > | (Which, to be clear, is 100% OK by me for this patch.) > | > - if (blk_pwrite(cur_drv->blk, fd_offset(cur_drv), fdctrl->fifo, > | > + if (cur_drv->blk == NULL > | > + || blk_pwrite(cur_drv->blk, fd_offset(cur_drv), > | > fdctrl->fifo, > | > | Seems fine, but if we had a drive for the earlier check, will we really be in > | a situation where we don't have one now? > | > | Ignore the bit I sent earlier about the qtest reproducer not correlating to > | this patch -- it does, I was experiencing an unrelated crash. > > * Okay. > > > | On 5/17/21 7:12 AM, P J P wrote: > | > Do we need a revised patch[-series] including a qtest? OR it can be done at > | > merge time? > | > | If you have the time to write a qtest reproducer, you can send it separately > | and I'll pick it up if everything looks correct. > > * Yes, that seems better, I'll try to create a qtest, but it may take time. > Understand. Maybe I can help. The fuzzer reproducer is a great first step, but just needs to be "back-translated" into the logical operations it is performing so that the test code is readable. I started doodling a tracer patch similar to the IDE one I checked in some ages ago to give symbolic names to the registers on read/write, which makes "reading" Alexander's fuzzing reproducers a bit easier. I'll go work on that for a little while. > * I'll check and revise the patch with above details asap. OK; on your own schedule. I will try to leap on the patches as soon as I get them before the FDC code falls out of my head again. If at all possible, I wouldn't mind seeing a series bundled with the other FDC fixes outstanding aggregated together. It will be easier (for me) to make sure I have everything up to date and together. (If it isn't too much hassle for you.) AFAIK there's one for reads and one for writes that are very similar -- they protect against null BLK reads when we do not have a floppy drive present. Thank you, --js > > > Thank you. > -- > - P J P > 8685 545E B54C 486B C6EB 271E E285 8B5A F050 DE8D >
On 5/18/21 5:01 AM, P J P wrote: > Hello John, > > +-- On Mon, 17 May 2021, John Snow wrote --+ > | > /* Selected drive */ > | > - fdctrl->cur_drv = value & FD_DOR_SELMASK; > | > + if (fdctrl->drives[value & FD_DOR_SELMASK].blk) { > | > + fdctrl->cur_drv = value & FD_DOR_SELMASK; > | > + } > | > | I don't think this is correct. If you look at get_cur_drv(), it uses the > | TDR_BOOTSEL bit to change the logical mappings of "drive 0" or "drive 1" to be > | reversed. You don't check that bit here, so you might be checking the wrong > | drive. > | > | Plus, the TDR bit can change later, so I think you shouldn't actually protect > | the register write like this. Just delete this bit of code. We ought to > | protect the drives when we go to use them instead of preventing the registers > | from getting "the wrong values". > > * I see. > > | > cur_drv = get_cur_drv(fdctrl); > | > + if (!cur_drv->blk) { > | > + FLOPPY_DPRINTF("No drive connected\n"); > | > + return 0; > | > + } > | > | This seems fine ... or at least not worse than the other error handling we > | already have here. (Which seems to be ... basically, none. We just ignore the > | write and do nothing, which seems wrong. I guess it's better than a crash... > | but I don't have the time to do a proper audit of what this is SUPPOSED to do > | in this case.) > | > | > - if (blk_pwrite(cur_drv->blk, fd_offset(cur_drv), fdctrl->fifo, > | > + if (cur_drv->blk == NULL > | > + || blk_pwrite(cur_drv->blk, fd_offset(cur_drv), > | > fdctrl->fifo, > | > | Seems fine, but if we had a drive for the earlier check, will we really be in > | a situation where we don't have one now? > | > | Ignore the bit I sent earlier about the qtest reproducer not correlating to > | this patch -- it does, I was experiencing an unrelated crash. > > * Okay. > > > | On 5/17/21 7:12 AM, P J P wrote: > | > Do we need a revised patch[-series] including a qtest? OR it can be done at > | > merge time? > | > | If you have the time to write a qtest reproducer, you can send it separately > | and I'll pick it up if everything looks correct. > > * Yes, that seems better, I'll try to create a qtest, but it may take time. > I minified alexander's reproducer, which uses as few writes and as few bits as possible to achieve the same crash. It makes it easier to see what's going on with the DPRINTF/traces a little better. outb 0x3f2 0x04 outb 0x3f4 0x03 outb 0x3f5 0x25 outb 0x3f5 0x01 outb 0x3f5 0x0 outb 0x3f5 0x0 outb 0x3f5 0x0 outb 0x3f5 0x0 outb 0x3f5 0x00 outb 0x3f5 0x00 outb 0x3f5 0x01 outb 0x3f3 0x04 outb 0x3f5 0x0 Annotated: # fdctrl->cur_drv starts at 0x00 # fdctrl->dor starts at 0x0c (DMA, RESET#) # fdctrl->dsr starts at 0x00 > outb 0x3f2 0x04 fdc_ioport_write write reg 0x02 [DOR] Digital Output Register val 0x04 DOR changed from default after SeaBIOS init from 0x0c to 0x04 DMA GATE# (0x08) set from 1 --> 0 DMA GATE# appears needed to coerce fdc into a "non-dma transfer". +RESET# remains on. Needed to avoid engaging RESET routine. > outb 0x3f4 0x03 fdc_ioport_write write reg 0x04 [DSR] Date Rate Select Register val 0x03 DSR: +DRATE SEL1 DSR: +DRATE SEL0 Needed to prevent "data rate mismatch" error handling by write cmd. The next 9 bytes (all to 0x3f5) set up the write command. 0x25 selects the "Write (BeOS)" command. 0x01 selects drive1. ... 0x01 appears to say that a sector is "1 byte", but oddly enough no other value seems to trigger this crash. Not sure why. Recommend investigating if you have time. Could be transfer length calculation bug. > outb 0x3f3 0x04 fdc_ioport_write write reg 0x03 [TDR] Tape Drive Register val 0x04 TDR: +BOOTSEL This changes the meaning of cur_drv and flips selection (as far as I can tell) back to drive0 instead of the command's programmed drive1. > outb 0x3f5 0x00 fdc_ioport_write write reg 0x05 [FIFO] Data val 0x00 write is attempted on "drv1" which due to BOOTSEL maps back to "drv0", which is undefined. This should (I hope) help guide to write a more targeted patch and a good qtest case. --js > * I'll check and revise the patch with above details asap. > > > Thank you. > -- > - P J P > 8685 545E B54C 486B C6EB 271E E285 8B5A F050 DE8D > >
Hello John, +-- On Tue, 18 May 2021, John Snow wrote --+ | Annotated: | | # fdctrl->cur_drv starts at 0x00 | # fdctrl->dor starts at 0x0c (DMA, RESET#) | # fdctrl->dsr starts at 0x00 | | > outb 0x3f2 0x04 | fdc_ioport_write write reg 0x02 [DOR] Digital Output Register val 0x04 | DOR changed from default after SeaBIOS init from 0x0c to 0x04 | DMA GATE# (0x08) set from 1 --> 0 | DMA GATE# appears needed to coerce fdc into a "non-dma transfer". | +RESET# remains on. Needed to avoid engaging RESET routine. | | > outb 0x3f4 0x03 | fdc_ioport_write write reg 0x04 [DSR] Date Rate Select Register val 0x03 | DSR: +DRATE SEL1 | DSR: +DRATE SEL0 | Needed to prevent "data rate mismatch" error handling by write cmd. | | The next 9 bytes (all to 0x3f5) set up the write command. | | 0x25 selects the "Write (BeOS)" command. | 0x01 selects drive1. | ... | 0x01 appears to say that a sector is "1 byte", but oddly enough no other value | seems to trigger this crash. Not sure why. Recommend investigating if you have | time. Could be transfer length calculation bug. | | > outb 0x3f3 0x04 | fdc_ioport_write write reg 0x03 [TDR] Tape Drive Register val 0x04 | TDR: +BOOTSEL | This changes the meaning of cur_drv and flips selection (as far as | I can tell) back to drive0 instead of the command's programmed drive1. | | > outb 0x3f5 0x00 | fdc_ioport_write write reg 0x05 [FIFO] Data val 0x00 | write is attempted on "drv1" which due to BOOTSEL maps back to "drv0", | which is undefined. | | This should (I hope) help guide to write a more targeted patch and a good | qtest case. * Cool, thank you so much for these details John, I appreciate it. * I'll go through the 3 fdc issues we've found open and try to fix them together as one series. Thank you. -- - P J P 8685 545E B54C 486B C6EB 271E E285 8B5A F050 DE8D
diff --git a/hw/block/fdc.c b/hw/block/fdc.c index 3636874432..13a9470d19 100644 --- a/hw/block/fdc.c +++ b/hw/block/fdc.c @@ -1429,7 +1429,9 @@ static void fdctrl_write_dor(FDCtrl *fdctrl, uint32_t value) } } /* Selected drive */ - fdctrl->cur_drv = value & FD_DOR_SELMASK; + if (fdctrl->drives[value & FD_DOR_SELMASK].blk) { + fdctrl->cur_drv = value & FD_DOR_SELMASK; + } fdctrl->dor = value; } @@ -1894,6 +1896,10 @@ static uint32_t fdctrl_read_data(FDCtrl *fdctrl) uint32_t pos; cur_drv = get_cur_drv(fdctrl); + if (!cur_drv->blk) { + FLOPPY_DPRINTF("No drive connected\n"); + return 0; + } fdctrl->dsr &= ~FD_DSR_PWRDOWN; if (!(fdctrl->msr & FD_MSR_RQM) || !(fdctrl->msr & FD_MSR_DIO)) { FLOPPY_DPRINTF("error: controller not ready for reading\n"); @@ -2420,7 +2426,8 @@ static void fdctrl_write_data(FDCtrl *fdctrl, uint32_t value) if (pos == FD_SECTOR_LEN - 1 || fdctrl->data_pos == fdctrl->data_len) { cur_drv = get_cur_drv(fdctrl); - if (blk_pwrite(cur_drv->blk, fd_offset(cur_drv), fdctrl->fifo, + if (cur_drv->blk == NULL + || blk_pwrite(cur_drv->blk, fd_offset(cur_drv), fdctrl->fifo, BDRV_SECTOR_SIZE, 0) < 0) { FLOPPY_DPRINTF("error writing sector %d\n", fd_sector(cur_drv));