diff mbox series

[5/7] x86: detect PIT aliasing on ports other than 0x4[0-3]

Message ID 042f76dd-d189-c40a-baec-68ded32aa797@suse.com (mailing list archive)
State New, archived
Headers show
Series x86: Dom0 I/O port access permissions | expand

Commit Message

Jan Beulich May 11, 2023, 12:07 p.m. UTC
... in order to also deny Dom0 access through the alias ports. Without
this it is only giving the impression of denying access to PIT. Unlike
for CMOS/RTC, do detection pretty early, to avoid disturbing normal
operation later on (even if typically we won't use much of the PIT).

Like for CMOS/RTC a fundamental assumption of the probing is that reads
from the probed alias port won't have side effects (beyond such that PIT
reads have anyway) in case it does not alias the PIT's.

At to the port 0x61 accesses: Unlike other accesses we do, this masks
off the top four bits (in addition to the bottom two ones), following
Intel chipset documentation saying that these (read-only) bits should
only be written with zero.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
If Xen was running on top of another instance of itself (in HVM mode,
not PVH, i.e. not as a shim), I'm afraid our vPIT logic would not allow
the "Try to further make sure ..." check to pass in the Xen running on
top: We don't respect the gate bit being clear when handling counter
reads. (There are more unhandled [and unmentioned as being so] aspects
of PIT behavior though, yet it's unclear in how far addressing at least
some of them would be useful.)

Comments

Roger Pau Monné Oct. 26, 2023, 10:25 a.m. UTC | #1
On Thu, May 11, 2023 at 02:07:12PM +0200, Jan Beulich wrote:
> ... in order to also deny Dom0 access through the alias ports. Without
> this it is only giving the impression of denying access to PIT. Unlike
> for CMOS/RTC, do detection pretty early, to avoid disturbing normal
> operation later on (even if typically we won't use much of the PIT).
> 
> Like for CMOS/RTC a fundamental assumption of the probing is that reads
> from the probed alias port won't have side effects (beyond such that PIT
> reads have anyway) in case it does not alias the PIT's.
> 
> At to the port 0x61 accesses: Unlike other accesses we do, this masks
> off the top four bits (in addition to the bottom two ones), following
> Intel chipset documentation saying that these (read-only) bits should
> only be written with zero.

As said in previous patches, I think this is likely too much risk for
little benefit.  I understand the desire to uniformly deny access to
any ports that allow interaction with devices in use by Xen (or not
allowed to be used by dom0), but there's certainly a risk in
configuring such devices in the way that we do by finding a register
that can be read and written to.

I think if anything this alias detection should have a command line
option in order to disable it.

Do you also have figures if the greatly increased amount of accesses
add a noticeable boot time regression?

We are usually cautious is not performing more accesses than strictly
required.

> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> ---
> If Xen was running on top of another instance of itself (in HVM mode,
> not PVH, i.e. not as a shim), I'm afraid our vPIT logic would not allow
> the "Try to further make sure ..." check to pass in the Xen running on
> top: We don't respect the gate bit being clear when handling counter
> reads. (There are more unhandled [and unmentioned as being so] aspects
> of PIT behavior though, yet it's unclear in how far addressing at least
> some of them would be useful.)
> 
> --- a/xen/arch/x86/dom0_build.c
> +++ b/xen/arch/x86/dom0_build.c
> @@ -504,7 +504,11 @@ int __init dom0_setup_permissions(struct
>      }
>  
>      /* Interval Timer (PIT). */
> -    rc |= ioports_deny_access(d, 0x40, 0x43);
> +    for ( offs = 0, i = pit_alias_mask & -pit_alias_mask ?: 4;
> +          offs <= pit_alias_mask; offs += i )
> +        if ( !(offs & ~pit_alias_mask) )
> +            rc |= ioports_deny_access(d, 0x40 + offs, 0x43 + offs);
> +
>      /* PIT Channel 2 / PC Speaker Control. */
>      rc |= ioports_deny_access(d, 0x61, 0x61);
>  
> --- a/xen/arch/x86/include/asm/setup.h
> +++ b/xen/arch/x86/include/asm/setup.h
> @@ -53,6 +53,7 @@ extern unsigned long highmem_start;
>  #endif
>  
>  extern unsigned int pic_alias_mask;
> +extern unsigned int pit_alias_mask;
>  
>  extern int8_t opt_smt;
>  
> --- a/xen/arch/x86/time.c
> +++ b/xen/arch/x86/time.c
> @@ -425,6 +425,69 @@ static struct platform_timesource __init
>      .resume = resume_pit,
>  };
>  
> +unsigned int __initdata pit_alias_mask;
> +
> +static void __init probe_pit_alias(void)
> +{
> +    unsigned int mask = 0x1c;
> +    uint8_t val = 0;
> +
> +    /*
> +     * Use channel 2 in mode 0 for probing.  In this mode even a non-initial
> +     * count is loaded independent of counting being / becoming enabled.  Thus
> +     * we have a 16-bit value fully under our control, to write and then check
> +     * whether we can also read it back unaltered.
> +     */
> +
> +    /* Turn off speaker output and disable channel 2 counting. */
> +    outb(inb(0x61) & 0x0c, 0x61);
> +
> +    outb((2 << 6) | (3 << 4) | (0 << 1), PIT_MODE); /* Mode 0, LSB/MSB. */
> +
> +    do {
> +        uint8_t val2;
> +        unsigned int offs;
> +
> +        outb(val, PIT_CH2);
> +        outb(val ^ 0xff, PIT_CH2);
> +
> +        /* Wait for the Null Count bit to clear. */
> +        do {
> +            /* Latch status. */
> +            outb((3 << 6) | (1 << 5) | (1 << 3), PIT_MODE);
> +
> +            /* Try to make sure we're actually having a PIT here. */
> +            val2 = inb(PIT_CH2);
> +            if ( (val2 & ~(3 << 6)) != ((3 << 4) | (0 << 1)) )
> +                return;
> +        } while ( val2 & (1 << 6) );

We should have some kind of timeout here, just in case...

> +
> +        /*
> +         * Try to further make sure we're actually having a PIT here.
> +         *
> +         * NB: Deliberately |, not ||, as we always want both reads.
> +         */
> +        val2 = inb(PIT_CH2);
> +        if ( (val2 ^ val) | (inb(PIT_CH2) ^ val ^ 0xff) )
> +            return;
> +
> +        for ( offs = mask & -mask; offs <= mask; offs <<= 1 )
> +        {
> +            if ( !(mask & offs) )
> +                continue;
> +            val2 = inb(PIT_CH2 + offs);
> +            if ( (val2 ^ val) | (inb(PIT_CH2 + offs) ^ val ^ 0xff) )
> +                mask &= ~offs;
> +        }
> +    } while ( mask && (val += 0x0b) );  /* Arbitrary uneven number. */
> +
> +    if ( mask )
> +    {
> +        dprintk(XENLOG_INFO, "PIT aliasing mask: %02x\n", mask);
> +        pit_alias_mask = mask;
> +    }

Is it fine to leave counting disabled for channel 2?

Shouldn't we restore the previous gate value?

Thanks, Roger.
Jan Beulich Oct. 26, 2023, 12:31 p.m. UTC | #2
On 26.10.2023 12:25, Roger Pau Monné wrote:
> On Thu, May 11, 2023 at 02:07:12PM +0200, Jan Beulich wrote:
>> ... in order to also deny Dom0 access through the alias ports. Without
>> this it is only giving the impression of denying access to PIT. Unlike
>> for CMOS/RTC, do detection pretty early, to avoid disturbing normal
>> operation later on (even if typically we won't use much of the PIT).
>>
>> Like for CMOS/RTC a fundamental assumption of the probing is that reads
>> from the probed alias port won't have side effects (beyond such that PIT
>> reads have anyway) in case it does not alias the PIT's.
>>
>> At to the port 0x61 accesses: Unlike other accesses we do, this masks
>> off the top four bits (in addition to the bottom two ones), following
>> Intel chipset documentation saying that these (read-only) bits should
>> only be written with zero.
> 
> As said in previous patches, I think this is likely too much risk for
> little benefit.  I understand the desire to uniformly deny access to
> any ports that allow interaction with devices in use by Xen (or not
> allowed to be used by dom0), but there's certainly a risk in
> configuring such devices in the way that we do by finding a register
> that can be read and written to.
> 
> I think if anything this alias detection should have a command line
> option in order to disable it.

Well, we could have command line options (for each of the RTC/CMOS,
PIC, and PIT probing allowing the alias masks to be specified (so we
don't need to probe). A value of 1 would uniformly mean "no probing,
no aliases" (as all three decode the low bit, so aliasing can happen
there). We could further make the default of these variables (yes/no,
no actual mask values of course) controllable by a Kconfig setting.

> Do you also have figures if the greatly increased amount of accesses
> add a noticeable boot time regression?

I didn't measure anything, but nothing's really noticeable, no. And
the number of accesses in all three of CMOS/RTC, PIC, and PIT is of
roughly the same order, I think (and the RTC/CMOS one has been in
for a while).

> We are usually cautious is not performing more accesses than strictly
> required.

Sadly "strictly required" is an uncertain quantity here. The more
checking we do, the less likely that we'd identify a false positive
case of aliasing.

>> --- a/xen/arch/x86/time.c
>> +++ b/xen/arch/x86/time.c
>> @@ -425,6 +425,69 @@ static struct platform_timesource __init
>>      .resume = resume_pit,
>>  };
>>  
>> +unsigned int __initdata pit_alias_mask;
>> +
>> +static void __init probe_pit_alias(void)
>> +{
>> +    unsigned int mask = 0x1c;
>> +    uint8_t val = 0;
>> +
>> +    /*
>> +     * Use channel 2 in mode 0 for probing.  In this mode even a non-initial
>> +     * count is loaded independent of counting being / becoming enabled.  Thus
>> +     * we have a 16-bit value fully under our control, to write and then check
>> +     * whether we can also read it back unaltered.
>> +     */
>> +
>> +    /* Turn off speaker output and disable channel 2 counting. */
>> +    outb(inb(0x61) & 0x0c, 0x61);
>> +
>> +    outb((2 << 6) | (3 << 4) | (0 << 1), PIT_MODE); /* Mode 0, LSB/MSB. */
>> +
>> +    do {
>> +        uint8_t val2;
>> +        unsigned int offs;
>> +
>> +        outb(val, PIT_CH2);
>> +        outb(val ^ 0xff, PIT_CH2);
>> +
>> +        /* Wait for the Null Count bit to clear. */
>> +        do {
>> +            /* Latch status. */
>> +            outb((3 << 6) | (1 << 5) | (1 << 3), PIT_MODE);
>> +
>> +            /* Try to make sure we're actually having a PIT here. */
>> +            val2 = inb(PIT_CH2);
>> +            if ( (val2 & ~(3 << 6)) != ((3 << 4) | (0 << 1)) )
>> +                return;
>> +        } while ( val2 & (1 << 6) );
> 
> We should have some kind of timeout here, just in case...

Hmm, I indeed did consider the need for a timeout here. With what
we've done up to here we already assume a functioning PIT, verifying
simply as we go. The issue with truly using some form of timeout is
the determination of how long to wait at most.

>> +
>> +        /*
>> +         * Try to further make sure we're actually having a PIT here.
>> +         *
>> +         * NB: Deliberately |, not ||, as we always want both reads.
>> +         */
>> +        val2 = inb(PIT_CH2);
>> +        if ( (val2 ^ val) | (inb(PIT_CH2) ^ val ^ 0xff) )
>> +            return;
>> +
>> +        for ( offs = mask & -mask; offs <= mask; offs <<= 1 )
>> +        {
>> +            if ( !(mask & offs) )
>> +                continue;
>> +            val2 = inb(PIT_CH2 + offs);
>> +            if ( (val2 ^ val) | (inb(PIT_CH2 + offs) ^ val ^ 0xff) )
>> +                mask &= ~offs;
>> +        }
>> +    } while ( mask && (val += 0x0b) );  /* Arbitrary uneven number. */
>> +
>> +    if ( mask )
>> +    {
>> +        dprintk(XENLOG_INFO, "PIT aliasing mask: %02x\n", mask);
>> +        pit_alias_mask = mask;
>> +    }
> 
> Is it fine to leave counting disabled for channel 2?
> 
> Shouldn't we restore the previous gate value?

See init_pit(), which also doesn't restore anything. The system is under
our control, and we have no other use for channel 2. (I really had
restore logic here initially, but then dropped it for said reason.)

Jan
Roger Pau Monné Oct. 26, 2023, 1:57 p.m. UTC | #3
On Thu, Oct 26, 2023 at 02:31:27PM +0200, Jan Beulich wrote:
> On 26.10.2023 12:25, Roger Pau Monné wrote:
> > On Thu, May 11, 2023 at 02:07:12PM +0200, Jan Beulich wrote:
> >> ... in order to also deny Dom0 access through the alias ports. Without
> >> this it is only giving the impression of denying access to PIT. Unlike
> >> for CMOS/RTC, do detection pretty early, to avoid disturbing normal
> >> operation later on (even if typically we won't use much of the PIT).
> >>
> >> Like for CMOS/RTC a fundamental assumption of the probing is that reads
> >> from the probed alias port won't have side effects (beyond such that PIT
> >> reads have anyway) in case it does not alias the PIT's.
> >>
> >> At to the port 0x61 accesses: Unlike other accesses we do, this masks
> >> off the top four bits (in addition to the bottom two ones), following
> >> Intel chipset documentation saying that these (read-only) bits should
> >> only be written with zero.
> > 
> > As said in previous patches, I think this is likely too much risk for
> > little benefit.  I understand the desire to uniformly deny access to
> > any ports that allow interaction with devices in use by Xen (or not
> > allowed to be used by dom0), but there's certainly a risk in
> > configuring such devices in the way that we do by finding a register
> > that can be read and written to.
> > 
> > I think if anything this alias detection should have a command line
> > option in order to disable it.
> 
> Well, we could have command line options (for each of the RTC/CMOS,
> PIC, and PIT probing allowing the alias masks to be specified (so we
> don't need to probe). A value of 1 would uniformly mean "no probing,
> no aliases" (as all three decode the low bit, so aliasing can happen
> there). We could further make the default of these variables (yes/no,
> no actual mask values of course) controllable by a Kconfig setting.

If you want to make this more fine grained, or even allow the user to
provide custom masks that's all fine, but there's already
dom0_ioports_disable that allows disabling a list of IO port ranges.

What I would require is a way to avoid all the probing, so that we
could return to the previous behavior.

> >> --- a/xen/arch/x86/time.c
> >> +++ b/xen/arch/x86/time.c
> >> @@ -425,6 +425,69 @@ static struct platform_timesource __init
> >>      .resume = resume_pit,
> >>  };
> >>  
> >> +unsigned int __initdata pit_alias_mask;
> >> +
> >> +static void __init probe_pit_alias(void)
> >> +{
> >> +    unsigned int mask = 0x1c;
> >> +    uint8_t val = 0;
> >> +
> >> +    /*
> >> +     * Use channel 2 in mode 0 for probing.  In this mode even a non-initial
> >> +     * count is loaded independent of counting being / becoming enabled.  Thus
> >> +     * we have a 16-bit value fully under our control, to write and then check
> >> +     * whether we can also read it back unaltered.
> >> +     */
> >> +
> >> +    /* Turn off speaker output and disable channel 2 counting. */
> >> +    outb(inb(0x61) & 0x0c, 0x61);
> >> +
> >> +    outb((2 << 6) | (3 << 4) | (0 << 1), PIT_MODE); /* Mode 0, LSB/MSB. */
> >> +
> >> +    do {
> >> +        uint8_t val2;
> >> +        unsigned int offs;
> >> +
> >> +        outb(val, PIT_CH2);
> >> +        outb(val ^ 0xff, PIT_CH2);
> >> +
> >> +        /* Wait for the Null Count bit to clear. */
> >> +        do {
> >> +            /* Latch status. */
> >> +            outb((3 << 6) | (1 << 5) | (1 << 3), PIT_MODE);
> >> +
> >> +            /* Try to make sure we're actually having a PIT here. */
> >> +            val2 = inb(PIT_CH2);
> >> +            if ( (val2 & ~(3 << 6)) != ((3 << 4) | (0 << 1)) )
> >> +                return;
> >> +        } while ( val2 & (1 << 6) );
> > 
> > We should have some kind of timeout here, just in case...
> 
> Hmm, I indeed did consider the need for a timeout here. With what
> we've done up to here we already assume a functioning PIT, verifying
> simply as we go. The issue with truly using some form of timeout is
> the determination of how long to wait at most.

I would likely make it based on iterations, could you get some figures
on how many iterations it takes for the bit to be clear?

I would think something like 1000 should be enough, but really have no
idea.

> >> +
> >> +        /*
> >> +         * Try to further make sure we're actually having a PIT here.
> >> +         *
> >> +         * NB: Deliberately |, not ||, as we always want both reads.
> >> +         */
> >> +        val2 = inb(PIT_CH2);
> >> +        if ( (val2 ^ val) | (inb(PIT_CH2) ^ val ^ 0xff) )
> >> +            return;
> >> +
> >> +        for ( offs = mask & -mask; offs <= mask; offs <<= 1 )
> >> +        {
> >> +            if ( !(mask & offs) )
> >> +                continue;
> >> +            val2 = inb(PIT_CH2 + offs);
> >> +            if ( (val2 ^ val) | (inb(PIT_CH2 + offs) ^ val ^ 0xff) )
> >> +                mask &= ~offs;
> >> +        }
> >> +    } while ( mask && (val += 0x0b) );  /* Arbitrary uneven number. */
> >> +
> >> +    if ( mask )
> >> +    {
> >> +        dprintk(XENLOG_INFO, "PIT aliasing mask: %02x\n", mask);
> >> +        pit_alias_mask = mask;
> >> +    }
> > 
> > Is it fine to leave counting disabled for channel 2?
> > 
> > Shouldn't we restore the previous gate value?
> 
> See init_pit(), which also doesn't restore anything. The system is under
> our control, and we have no other use for channel 2. (I really had
> restore logic here initially, but then dropped it for said reason.)

It might be used by a PV dom0 (see hwdom_pit_access()), so I wonder
whether we should try to hand off as Xen found it.

Thanks, Roger.
Jan Beulich Oct. 26, 2023, 3:10 p.m. UTC | #4
On 26.10.2023 15:57, Roger Pau Monné wrote:
> On Thu, Oct 26, 2023 at 02:31:27PM +0200, Jan Beulich wrote:
>> On 26.10.2023 12:25, Roger Pau Monné wrote:
>>> On Thu, May 11, 2023 at 02:07:12PM +0200, Jan Beulich wrote:
>>>> ... in order to also deny Dom0 access through the alias ports. Without
>>>> this it is only giving the impression of denying access to PIT. Unlike
>>>> for CMOS/RTC, do detection pretty early, to avoid disturbing normal
>>>> operation later on (even if typically we won't use much of the PIT).
>>>>
>>>> Like for CMOS/RTC a fundamental assumption of the probing is that reads
>>>> from the probed alias port won't have side effects (beyond such that PIT
>>>> reads have anyway) in case it does not alias the PIT's.
>>>>
>>>> At to the port 0x61 accesses: Unlike other accesses we do, this masks
>>>> off the top four bits (in addition to the bottom two ones), following
>>>> Intel chipset documentation saying that these (read-only) bits should
>>>> only be written with zero.
>>>
>>> As said in previous patches, I think this is likely too much risk for
>>> little benefit.  I understand the desire to uniformly deny access to
>>> any ports that allow interaction with devices in use by Xen (or not
>>> allowed to be used by dom0), but there's certainly a risk in
>>> configuring such devices in the way that we do by finding a register
>>> that can be read and written to.
>>>
>>> I think if anything this alias detection should have a command line
>>> option in order to disable it.
>>
>> Well, we could have command line options (for each of the RTC/CMOS,
>> PIC, and PIT probing allowing the alias masks to be specified (so we
>> don't need to probe). A value of 1 would uniformly mean "no probing,
>> no aliases" (as all three decode the low bit, so aliasing can happen
>> there). We could further make the default of these variables (yes/no,
>> no actual mask values of course) controllable by a Kconfig setting.
> 
> If you want to make this more fine grained, or even allow the user to
> provide custom masks that's all fine, but there's already
> dom0_ioports_disable that allows disabling a list of IO port ranges.
> 
> What I would require is a way to avoid all the probing, so that we
> could return to the previous behavior.
> 
>>>> --- a/xen/arch/x86/time.c
>>>> +++ b/xen/arch/x86/time.c
>>>> @@ -425,6 +425,69 @@ static struct platform_timesource __init
>>>>      .resume = resume_pit,
>>>>  };
>>>>  
>>>> +unsigned int __initdata pit_alias_mask;
>>>> +
>>>> +static void __init probe_pit_alias(void)
>>>> +{
>>>> +    unsigned int mask = 0x1c;
>>>> +    uint8_t val = 0;
>>>> +
>>>> +    /*
>>>> +     * Use channel 2 in mode 0 for probing.  In this mode even a non-initial
>>>> +     * count is loaded independent of counting being / becoming enabled.  Thus
>>>> +     * we have a 16-bit value fully under our control, to write and then check
>>>> +     * whether we can also read it back unaltered.
>>>> +     */
>>>> +
>>>> +    /* Turn off speaker output and disable channel 2 counting. */
>>>> +    outb(inb(0x61) & 0x0c, 0x61);
>>>> +
>>>> +    outb((2 << 6) | (3 << 4) | (0 << 1), PIT_MODE); /* Mode 0, LSB/MSB. */
>>>> +
>>>> +    do {
>>>> +        uint8_t val2;
>>>> +        unsigned int offs;
>>>> +
>>>> +        outb(val, PIT_CH2);
>>>> +        outb(val ^ 0xff, PIT_CH2);
>>>> +
>>>> +        /* Wait for the Null Count bit to clear. */
>>>> +        do {
>>>> +            /* Latch status. */
>>>> +            outb((3 << 6) | (1 << 5) | (1 << 3), PIT_MODE);
>>>> +
>>>> +            /* Try to make sure we're actually having a PIT here. */
>>>> +            val2 = inb(PIT_CH2);
>>>> +            if ( (val2 & ~(3 << 6)) != ((3 << 4) | (0 << 1)) )
>>>> +                return;
>>>> +        } while ( val2 & (1 << 6) );
>>>
>>> We should have some kind of timeout here, just in case...
>>
>> Hmm, I indeed did consider the need for a timeout here. With what
>> we've done up to here we already assume a functioning PIT, verifying
>> simply as we go. The issue with truly using some form of timeout is
>> the determination of how long to wait at most.
> 
> I would likely make it based on iterations, could you get some figures
> on how many iterations it takes for the bit to be clear?
> 
> I would think something like 1000 should be enough, but really have no
> idea.

Except that how long a given number of iterations takes is unknown. 1000
may be enough today or on the systems we test, but may not be tomorrow
or on other peoples' systems. Hence why I'm hesitant ...

Jan
Roger Pau Monné Oct. 26, 2023, 3:13 p.m. UTC | #5
On Thu, Oct 26, 2023 at 05:10:41PM +0200, Jan Beulich wrote:
> On 26.10.2023 15:57, Roger Pau Monné wrote:
> > On Thu, Oct 26, 2023 at 02:31:27PM +0200, Jan Beulich wrote:
> >> On 26.10.2023 12:25, Roger Pau Monné wrote:
> >>> On Thu, May 11, 2023 at 02:07:12PM +0200, Jan Beulich wrote:
> >>>> ... in order to also deny Dom0 access through the alias ports. Without
> >>>> this it is only giving the impression of denying access to PIT. Unlike
> >>>> for CMOS/RTC, do detection pretty early, to avoid disturbing normal
> >>>> operation later on (even if typically we won't use much of the PIT).
> >>>>
> >>>> Like for CMOS/RTC a fundamental assumption of the probing is that reads
> >>>> from the probed alias port won't have side effects (beyond such that PIT
> >>>> reads have anyway) in case it does not alias the PIT's.
> >>>>
> >>>> At to the port 0x61 accesses: Unlike other accesses we do, this masks
> >>>> off the top four bits (in addition to the bottom two ones), following
> >>>> Intel chipset documentation saying that these (read-only) bits should
> >>>> only be written with zero.
> >>>
> >>> As said in previous patches, I think this is likely too much risk for
> >>> little benefit.  I understand the desire to uniformly deny access to
> >>> any ports that allow interaction with devices in use by Xen (or not
> >>> allowed to be used by dom0), but there's certainly a risk in
> >>> configuring such devices in the way that we do by finding a register
> >>> that can be read and written to.
> >>>
> >>> I think if anything this alias detection should have a command line
> >>> option in order to disable it.
> >>
> >> Well, we could have command line options (for each of the RTC/CMOS,
> >> PIC, and PIT probing allowing the alias masks to be specified (so we
> >> don't need to probe). A value of 1 would uniformly mean "no probing,
> >> no aliases" (as all three decode the low bit, so aliasing can happen
> >> there). We could further make the default of these variables (yes/no,
> >> no actual mask values of course) controllable by a Kconfig setting.
> > 
> > If you want to make this more fine grained, or even allow the user to
> > provide custom masks that's all fine, but there's already
> > dom0_ioports_disable that allows disabling a list of IO port ranges.
> > 
> > What I would require is a way to avoid all the probing, so that we
> > could return to the previous behavior.
> > 
> >>>> --- a/xen/arch/x86/time.c
> >>>> +++ b/xen/arch/x86/time.c
> >>>> @@ -425,6 +425,69 @@ static struct platform_timesource __init
> >>>>      .resume = resume_pit,
> >>>>  };
> >>>>  
> >>>> +unsigned int __initdata pit_alias_mask;
> >>>> +
> >>>> +static void __init probe_pit_alias(void)
> >>>> +{
> >>>> +    unsigned int mask = 0x1c;
> >>>> +    uint8_t val = 0;
> >>>> +
> >>>> +    /*
> >>>> +     * Use channel 2 in mode 0 for probing.  In this mode even a non-initial
> >>>> +     * count is loaded independent of counting being / becoming enabled.  Thus
> >>>> +     * we have a 16-bit value fully under our control, to write and then check
> >>>> +     * whether we can also read it back unaltered.
> >>>> +     */
> >>>> +
> >>>> +    /* Turn off speaker output and disable channel 2 counting. */
> >>>> +    outb(inb(0x61) & 0x0c, 0x61);
> >>>> +
> >>>> +    outb((2 << 6) | (3 << 4) | (0 << 1), PIT_MODE); /* Mode 0, LSB/MSB. */
> >>>> +
> >>>> +    do {
> >>>> +        uint8_t val2;
> >>>> +        unsigned int offs;
> >>>> +
> >>>> +        outb(val, PIT_CH2);
> >>>> +        outb(val ^ 0xff, PIT_CH2);
> >>>> +
> >>>> +        /* Wait for the Null Count bit to clear. */
> >>>> +        do {
> >>>> +            /* Latch status. */
> >>>> +            outb((3 << 6) | (1 << 5) | (1 << 3), PIT_MODE);
> >>>> +
> >>>> +            /* Try to make sure we're actually having a PIT here. */
> >>>> +            val2 = inb(PIT_CH2);
> >>>> +            if ( (val2 & ~(3 << 6)) != ((3 << 4) | (0 << 1)) )
> >>>> +                return;
> >>>> +        } while ( val2 & (1 << 6) );
> >>>
> >>> We should have some kind of timeout here, just in case...
> >>
> >> Hmm, I indeed did consider the need for a timeout here. With what
> >> we've done up to here we already assume a functioning PIT, verifying
> >> simply as we go. The issue with truly using some form of timeout is
> >> the determination of how long to wait at most.
> > 
> > I would likely make it based on iterations, could you get some figures
> > on how many iterations it takes for the bit to be clear?
> > 
> > I would think something like 1000 should be enough, but really have no
> > idea.
> 
> Except that how long a given number of iterations takes is unknown. 1000
> may be enough today or on the systems we test, but may not be tomorrow
> or on other peoples' systems. Hence why I'm hesitant ...

Hm, but getting stuck in a loop here can't be good either.  Let's do
it time wise if you prefer, 1s should be more than enough I would
think.

In any case the worse that could if the timeout is hit is that aliases
are not properly detected, but that's way better than the possibility
of getting stuck in an infinite loop.

Thanks, Roger.
Jan Beulich Oct. 30, 2023, 12:50 p.m. UTC | #6
On 26.10.2023 17:13, Roger Pau Monné wrote:
> On Thu, Oct 26, 2023 at 05:10:41PM +0200, Jan Beulich wrote:
>> On 26.10.2023 15:57, Roger Pau Monné wrote:
>>> On Thu, Oct 26, 2023 at 02:31:27PM +0200, Jan Beulich wrote:
>>>> On 26.10.2023 12:25, Roger Pau Monné wrote:
>>>>> On Thu, May 11, 2023 at 02:07:12PM +0200, Jan Beulich wrote:
>>>>>> ... in order to also deny Dom0 access through the alias ports. Without
>>>>>> this it is only giving the impression of denying access to PIT. Unlike
>>>>>> for CMOS/RTC, do detection pretty early, to avoid disturbing normal
>>>>>> operation later on (even if typically we won't use much of the PIT).
>>>>>>
>>>>>> Like for CMOS/RTC a fundamental assumption of the probing is that reads
>>>>>> from the probed alias port won't have side effects (beyond such that PIT
>>>>>> reads have anyway) in case it does not alias the PIT's.
>>>>>>
>>>>>> At to the port 0x61 accesses: Unlike other accesses we do, this masks
>>>>>> off the top four bits (in addition to the bottom two ones), following
>>>>>> Intel chipset documentation saying that these (read-only) bits should
>>>>>> only be written with zero.
>>>>>
>>>>> As said in previous patches, I think this is likely too much risk for
>>>>> little benefit.  I understand the desire to uniformly deny access to
>>>>> any ports that allow interaction with devices in use by Xen (or not
>>>>> allowed to be used by dom0), but there's certainly a risk in
>>>>> configuring such devices in the way that we do by finding a register
>>>>> that can be read and written to.
>>>>>
>>>>> I think if anything this alias detection should have a command line
>>>>> option in order to disable it.
>>>>
>>>> Well, we could have command line options (for each of the RTC/CMOS,
>>>> PIC, and PIT probing allowing the alias masks to be specified (so we
>>>> don't need to probe). A value of 1 would uniformly mean "no probing,
>>>> no aliases" (as all three decode the low bit, so aliasing can happen
>>>> there). We could further make the default of these variables (yes/no,
>>>> no actual mask values of course) controllable by a Kconfig setting.
>>>
>>> If you want to make this more fine grained, or even allow the user to
>>> provide custom masks that's all fine, but there's already
>>> dom0_ioports_disable that allows disabling a list of IO port ranges.
>>>
>>> What I would require is a way to avoid all the probing, so that we
>>> could return to the previous behavior.
>>>
>>>>>> --- a/xen/arch/x86/time.c
>>>>>> +++ b/xen/arch/x86/time.c
>>>>>> @@ -425,6 +425,69 @@ static struct platform_timesource __init
>>>>>>      .resume = resume_pit,
>>>>>>  };
>>>>>>  
>>>>>> +unsigned int __initdata pit_alias_mask;
>>>>>> +
>>>>>> +static void __init probe_pit_alias(void)
>>>>>> +{
>>>>>> +    unsigned int mask = 0x1c;
>>>>>> +    uint8_t val = 0;
>>>>>> +
>>>>>> +    /*
>>>>>> +     * Use channel 2 in mode 0 for probing.  In this mode even a non-initial
>>>>>> +     * count is loaded independent of counting being / becoming enabled.  Thus
>>>>>> +     * we have a 16-bit value fully under our control, to write and then check
>>>>>> +     * whether we can also read it back unaltered.
>>>>>> +     */
>>>>>> +
>>>>>> +    /* Turn off speaker output and disable channel 2 counting. */
>>>>>> +    outb(inb(0x61) & 0x0c, 0x61);
>>>>>> +
>>>>>> +    outb((2 << 6) | (3 << 4) | (0 << 1), PIT_MODE); /* Mode 0, LSB/MSB. */
>>>>>> +
>>>>>> +    do {
>>>>>> +        uint8_t val2;
>>>>>> +        unsigned int offs;
>>>>>> +
>>>>>> +        outb(val, PIT_CH2);
>>>>>> +        outb(val ^ 0xff, PIT_CH2);
>>>>>> +
>>>>>> +        /* Wait for the Null Count bit to clear. */
>>>>>> +        do {
>>>>>> +            /* Latch status. */
>>>>>> +            outb((3 << 6) | (1 << 5) | (1 << 3), PIT_MODE);
>>>>>> +
>>>>>> +            /* Try to make sure we're actually having a PIT here. */
>>>>>> +            val2 = inb(PIT_CH2);
>>>>>> +            if ( (val2 & ~(3 << 6)) != ((3 << 4) | (0 << 1)) )
>>>>>> +                return;
>>>>>> +        } while ( val2 & (1 << 6) );
>>>>>
>>>>> We should have some kind of timeout here, just in case...
>>>>
>>>> Hmm, I indeed did consider the need for a timeout here. With what
>>>> we've done up to here we already assume a functioning PIT, verifying
>>>> simply as we go. The issue with truly using some form of timeout is
>>>> the determination of how long to wait at most.
>>>
>>> I would likely make it based on iterations, could you get some figures
>>> on how many iterations it takes for the bit to be clear?
>>>
>>> I would think something like 1000 should be enough, but really have no
>>> idea.
>>
>> Except that how long a given number of iterations takes is unknown. 1000
>> may be enough today or on the systems we test, but may not be tomorrow
>> or on other peoples' systems. Hence why I'm hesitant ...
> 
> Hm, but getting stuck in a loop here can't be good either.

I certainly understand that. The command line option I've just added in
a prereq patch would allow bypassing the probing, but of course I agree
that we want to avoid hanging here nevertheless (if we can).

>  Let's do
> it time wise if you prefer, 1s should be more than enough I would
> think.

Yet time-wise is also problematic ahead of us having calibrated clocks.
And using the PIT itself (which runs at a known frequency) doesn't look
to be a good idea when we mean to deal with the case of a broken PIT,
or none at all.

Jan
diff mbox series

Patch

--- a/xen/arch/x86/dom0_build.c
+++ b/xen/arch/x86/dom0_build.c
@@ -504,7 +504,11 @@  int __init dom0_setup_permissions(struct
     }
 
     /* Interval Timer (PIT). */
-    rc |= ioports_deny_access(d, 0x40, 0x43);
+    for ( offs = 0, i = pit_alias_mask & -pit_alias_mask ?: 4;
+          offs <= pit_alias_mask; offs += i )
+        if ( !(offs & ~pit_alias_mask) )
+            rc |= ioports_deny_access(d, 0x40 + offs, 0x43 + offs);
+
     /* PIT Channel 2 / PC Speaker Control. */
     rc |= ioports_deny_access(d, 0x61, 0x61);
 
--- a/xen/arch/x86/include/asm/setup.h
+++ b/xen/arch/x86/include/asm/setup.h
@@ -53,6 +53,7 @@  extern unsigned long highmem_start;
 #endif
 
 extern unsigned int pic_alias_mask;
+extern unsigned int pit_alias_mask;
 
 extern int8_t opt_smt;
 
--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -425,6 +425,69 @@  static struct platform_timesource __init
     .resume = resume_pit,
 };
 
+unsigned int __initdata pit_alias_mask;
+
+static void __init probe_pit_alias(void)
+{
+    unsigned int mask = 0x1c;
+    uint8_t val = 0;
+
+    /*
+     * Use channel 2 in mode 0 for probing.  In this mode even a non-initial
+     * count is loaded independent of counting being / becoming enabled.  Thus
+     * we have a 16-bit value fully under our control, to write and then check
+     * whether we can also read it back unaltered.
+     */
+
+    /* Turn off speaker output and disable channel 2 counting. */
+    outb(inb(0x61) & 0x0c, 0x61);
+
+    outb((2 << 6) | (3 << 4) | (0 << 1), PIT_MODE); /* Mode 0, LSB/MSB. */
+
+    do {
+        uint8_t val2;
+        unsigned int offs;
+
+        outb(val, PIT_CH2);
+        outb(val ^ 0xff, PIT_CH2);
+
+        /* Wait for the Null Count bit to clear. */
+        do {
+            /* Latch status. */
+            outb((3 << 6) | (1 << 5) | (1 << 3), PIT_MODE);
+
+            /* Try to make sure we're actually having a PIT here. */
+            val2 = inb(PIT_CH2);
+            if ( (val2 & ~(3 << 6)) != ((3 << 4) | (0 << 1)) )
+                return;
+        } while ( val2 & (1 << 6) );
+
+        /*
+         * Try to further make sure we're actually having a PIT here.
+         *
+         * NB: Deliberately |, not ||, as we always want both reads.
+         */
+        val2 = inb(PIT_CH2);
+        if ( (val2 ^ val) | (inb(PIT_CH2) ^ val ^ 0xff) )
+            return;
+
+        for ( offs = mask & -mask; offs <= mask; offs <<= 1 )
+        {
+            if ( !(mask & offs) )
+                continue;
+            val2 = inb(PIT_CH2 + offs);
+            if ( (val2 ^ val) | (inb(PIT_CH2 + offs) ^ val ^ 0xff) )
+                mask &= ~offs;
+        }
+    } while ( mask && (val += 0x0b) );  /* Arbitrary uneven number. */
+
+    if ( mask )
+    {
+        dprintk(XENLOG_INFO, "PIT aliasing mask: %02x\n", mask);
+        pit_alias_mask = mask;
+    }
+}
+
 /************************************************************
  * PLATFORM TIMER 2: HIGH PRECISION EVENT TIMER (HPET)
  */
@@ -2390,6 +2453,8 @@  void __init early_time_init(void)
     }
 
     preinit_pit();
+    probe_pit_alias();
+
     tmp = init_platform_timer();
     plt_tsc.frequency = tmp;