diff mbox series

[4/7] x86: detect PIC aliasing on ports other than 0x[2A][01]

Message ID 27dd8f40-1ea6-1e7e-49c2-31936a17e9d7@suse.com (mailing list archive)
State New, archived
Headers show
Series x86: Dom0 I/O port access permissions | expand

Commit Message

Jan Beulich May 11, 2023, 12:06 p.m. UTC
... in order to also deny Dom0 access through the alias ports. Without
this it is only giving the impression of denying access to both PICs.
Unlike for CMOS/RTC, do detection very early, to avoid disturbing normal
operation later on.

Like for CMOS/RTC a fundamental assumption of the probing is that reads
from the probed alias port won't have side effects in case it does not
alias the respective PIC's one.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

Comments

Roger Pau Monné Oct. 26, 2023, 8:34 a.m. UTC | #1
On Thu, May 11, 2023 at 02:06:46PM +0200, Jan Beulich wrote:
> ... in order to also deny Dom0 access through the alias ports. Without
> this it is only giving the impression of denying access to both PICs.
> Unlike for CMOS/RTC, do detection very early, to avoid disturbing normal
> operation later on.
> 
> Like for CMOS/RTC a fundamental assumption of the probing is that reads
> from the probed alias port won't have side effects in case it does not
> alias the respective PIC's one.

I'm slightly concerned about this probing.

Also I'm unsure we can fully isolate the hardware domain like this.
Preventing access to the non-aliased ports is IMO helpful for domains
to realize the PIT is not available, but in any case such accesses
shouldn't happen in the first place, as dom0 must be modified to run
in such mode.

> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> 
> --- a/xen/arch/x86/dom0_build.c
> +++ b/xen/arch/x86/dom0_build.c
> @@ -479,7 +479,7 @@ static void __init process_dom0_ioports_
>  int __init dom0_setup_permissions(struct domain *d)
>  {
>      unsigned long mfn;
> -    unsigned int i;
> +    unsigned int i, offs;
>      int rc;
>  
>      if ( pv_shim )
> @@ -492,10 +492,17 @@ int __init dom0_setup_permissions(struct
>  
>      /* Modify I/O port access permissions. */
>  
> -    /* Master Interrupt Controller (PIC). */
> -    rc |= ioports_deny_access(d, 0x20, 0x21);
> -    /* Slave Interrupt Controller (PIC). */
> -    rc |= ioports_deny_access(d, 0xA0, 0xA1);
> +    for ( offs = 0, i = pic_alias_mask & -pic_alias_mask ?: 2;
> +          offs <= pic_alias_mask; offs += i )

I'm a bit lost with this, specifically:

i = pic_alias_mask & -pic_alias_mask ?: 2

Which is then used as the increment step in

offs += i

I could see the usage of pic_alias_mask & -pic_alias_mask in order to
find the first offset, but afterwards don't you need to increment at
single bit left shifts in order to test all possibly set bits in
pic_alias_mask?

> +    {
> +        if ( offs & ~pic_alias_mask )
> +            continue;
> +        /* Master Interrupt Controller (PIC). */
> +        rc |= ioports_deny_access(d, 0x20 + offs, 0x21 + offs);
> +        /* Slave Interrupt Controller (PIC). */
> +        rc |= ioports_deny_access(d, 0xA0 + offs, 0xA1 + offs);
> +    }
> +
>      /* Interval Timer (PIT). */
>      rc |= ioports_deny_access(d, 0x40, 0x43);
>      /* PIT Channel 2 / PC Speaker Control. */
> --- a/xen/arch/x86/i8259.c
> +++ b/xen/arch/x86/i8259.c
> @@ -19,6 +19,7 @@
>  #include <xen/delay.h>
>  #include <asm/apic.h>
>  #include <asm/asm_defns.h>
> +#include <asm/setup.h>
>  #include <io_ports.h>
>  #include <irq_vectors.h>
>  
> @@ -332,6 +333,55 @@ void __init make_8259A_irq(unsigned int
>      irq_to_desc(irq)->handler = &i8259A_irq_type;
>  }
>  
> +unsigned int __initdata pic_alias_mask;

Should this be __hwdom_initdata?  I see it gets used in an __init
function, so I guess this all permissions stuff is not really indented
for a late hardware domain to use?

> +
> +static void __init probe_pic_alias(void)
> +{
> +    unsigned int mask = 0x1e;
> +    uint8_t val = 0;
> +
> +    /*
> +     * The only properly r/w register is OCW1.  While keeping the master
> +     * fully masked (thus also masking anything coming through the slave),
> +     * write all possible 256 values to the slave's base port, and check
> +     * whether the same value can then be read back through any of the
> +     * possible alias ports.  Probing just the slave of course builds on the
> +     * assumption that aliasing is identical for master and slave.
> +     */
> +
> +    outb(0xff, 0x21); /* Fully mask master. */
> +
> +    do {
> +        unsigned int offs;
> +
> +        outb(val, 0xa1);
> +
> +        /* Try to make sure we're actually having a PIC here. */
> +        if ( inb(0xa1) != val )
> +        {
> +            mask = 0;
> +            break;
> +        }
> +
> +        for ( offs = mask & -mask; offs <= mask; offs <<= 1 )
> +        {
> +            if ( !(mask & offs) )
> +                continue;
> +            if ( inb(0xa1 + offs) != val )
> +                mask &= ~offs;
> +        }
> +    } while ( mask && (val += 0x0d) );  /* Arbitrary uneven number. */
> +
> +    outb(cached_A1, 0xa1); /* Restore slave IRQ mask. */
> +    outb(cached_21, 0x21); /* Restore master IRQ mask. */
> +
> +    if ( mask )
> +    {
> +        dprintk(XENLOG_INFO, "PIC aliasing mask: %02x\n", mask);
> +        pic_alias_mask = mask;
> +    }
> +}
> +
>  static struct irqaction __read_mostly cascade = { no_action, "cascade", NULL};
>  
>  void __init init_IRQ(void)
> @@ -342,6 +392,8 @@ void __init init_IRQ(void)
>  
>      init_8259A(0);
>  
> +    probe_pic_alias();

Could we use 8259A instead of pic in the function name and mask
variable?  Just so that it's consistent with how we refer to the PIC
in other parts of the code.

Thanks, Roger.
Jan Beulich Oct. 26, 2023, 9:03 a.m. UTC | #2
On 26.10.2023 10:34, Roger Pau Monné wrote:
> On Thu, May 11, 2023 at 02:06:46PM +0200, Jan Beulich wrote:
>> ... in order to also deny Dom0 access through the alias ports. Without
>> this it is only giving the impression of denying access to both PICs.
>> Unlike for CMOS/RTC, do detection very early, to avoid disturbing normal
>> operation later on.
>>
>> Like for CMOS/RTC a fundamental assumption of the probing is that reads
>> from the probed alias port won't have side effects in case it does not
>> alias the respective PIC's one.
> 
> I'm slightly concerned about this probing.
> 
> Also I'm unsure we can fully isolate the hardware domain like this.
> Preventing access to the non-aliased ports is IMO helpful for domains
> to realize the PIT is not available, but in any case such accesses
> shouldn't happen in the first place, as dom0 must be modified to run
> in such mode.

That's true for PV Dom0, but not necessarily for PVH. Plus by denying
access to the aliases we also guard against bugs in Dom0, if some
component thinks there's something else at those ports (as they
indeed were used for other purposes by various vendors).

>> @@ -492,10 +492,17 @@ int __init dom0_setup_permissions(struct
>>  
>>      /* Modify I/O port access permissions. */
>>  
>> -    /* Master Interrupt Controller (PIC). */
>> -    rc |= ioports_deny_access(d, 0x20, 0x21);
>> -    /* Slave Interrupt Controller (PIC). */
>> -    rc |= ioports_deny_access(d, 0xA0, 0xA1);
>> +    for ( offs = 0, i = pic_alias_mask & -pic_alias_mask ?: 2;
>> +          offs <= pic_alias_mask; offs += i )
> 
> I'm a bit lost with this, specifically:
> 
> i = pic_alias_mask & -pic_alias_mask ?: 2
> 
> Which is then used as the increment step in
> 
> offs += i
> 
> I could see the usage of pic_alias_mask & -pic_alias_mask in order to
> find the first offset, but afterwards don't you need to increment at
> single bit left shifts in order to test all possibly set bits in
> pic_alias_mask?

No, the smallest sensible increment is the lowest bit set in
pic_alias_mask. There's specifically no shifting involved here (just
mentioning it because you use the word). E.g. if the aliasing was at
bits 2 and 3 (pic_alias_mask=0x0c), we'd need to deny access to 20/21,
24/25, 28/29, and 2C/2D, i.e. at an increment of 4.

>> --- a/xen/arch/x86/i8259.c
>> +++ b/xen/arch/x86/i8259.c
>> @@ -19,6 +19,7 @@
>>  #include <xen/delay.h>
>>  #include <asm/apic.h>
>>  #include <asm/asm_defns.h>
>> +#include <asm/setup.h>
>>  #include <io_ports.h>
>>  #include <irq_vectors.h>
>>  
>> @@ -332,6 +333,55 @@ void __init make_8259A_irq(unsigned int
>>      irq_to_desc(irq)->handler = &i8259A_irq_type;
>>  }
>>  
>> +unsigned int __initdata pic_alias_mask;
> 
> Should this be __hwdom_initdata?  I see it gets used in an __init
> function, so I guess this all permissions stuff is not really indented
> for a late hardware domain to use?

Late hwdom "inherits" Dom0's permissions (really the permissions of the
two domains are swapped in late_hwdom_init(), leaving Dom0 with no
permissions at all by default).

>> +static void __init probe_pic_alias(void)
>> +{
>> +    unsigned int mask = 0x1e;
>> +    uint8_t val = 0;
>> +
>> +    /*
>> +     * The only properly r/w register is OCW1.  While keeping the master
>> +     * fully masked (thus also masking anything coming through the slave),
>> +     * write all possible 256 values to the slave's base port, and check
>> +     * whether the same value can then be read back through any of the
>> +     * possible alias ports.  Probing just the slave of course builds on the
>> +     * assumption that aliasing is identical for master and slave.
>> +     */
>> +
>> +    outb(0xff, 0x21); /* Fully mask master. */
>> +
>> +    do {
>> +        unsigned int offs;
>> +
>> +        outb(val, 0xa1);
>> +
>> +        /* Try to make sure we're actually having a PIC here. */
>> +        if ( inb(0xa1) != val )
>> +        {
>> +            mask = 0;
>> +            break;
>> +        }
>> +
>> +        for ( offs = mask & -mask; offs <= mask; offs <<= 1 )
>> +        {
>> +            if ( !(mask & offs) )
>> +                continue;
>> +            if ( inb(0xa1 + offs) != val )
>> +                mask &= ~offs;
>> +        }
>> +    } while ( mask && (val += 0x0d) );  /* Arbitrary uneven number. */
>> +
>> +    outb(cached_A1, 0xa1); /* Restore slave IRQ mask. */
>> +    outb(cached_21, 0x21); /* Restore master IRQ mask. */
>> +
>> +    if ( mask )
>> +    {
>> +        dprintk(XENLOG_INFO, "PIC aliasing mask: %02x\n", mask);
>> +        pic_alias_mask = mask;
>> +    }
>> +}
>> +
>>  static struct irqaction __read_mostly cascade = { no_action, "cascade", NULL};
>>  
>>  void __init init_IRQ(void)
>> @@ -342,6 +392,8 @@ void __init init_IRQ(void)
>>  
>>      init_8259A(0);
>>  
>> +    probe_pic_alias();
> 
> Could we use 8259A instead of pic in the function name and mask
> variable?  Just so that it's consistent with how we refer to the PIC
> in other parts of the code.

I can certainly switch, no problem. For the variable it would be i8259A then,
though.

Jan
Roger Pau Monné Oct. 26, 2023, 1:24 p.m. UTC | #3
On Thu, Oct 26, 2023 at 11:03:42AM +0200, Jan Beulich wrote:
> On 26.10.2023 10:34, Roger Pau Monné wrote:
> > On Thu, May 11, 2023 at 02:06:46PM +0200, Jan Beulich wrote:
> >> ... in order to also deny Dom0 access through the alias ports. Without
> >> this it is only giving the impression of denying access to both PICs.
> >> Unlike for CMOS/RTC, do detection very early, to avoid disturbing normal
> >> operation later on.
> >>
> >> Like for CMOS/RTC a fundamental assumption of the probing is that reads
> >> from the probed alias port won't have side effects in case it does not
> >> alias the respective PIC's one.
> > 
> > I'm slightly concerned about this probing.
> > 
> > Also I'm unsure we can fully isolate the hardware domain like this.
> > Preventing access to the non-aliased ports is IMO helpful for domains
> > to realize the PIT is not available, but in any case such accesses
> > shouldn't happen in the first place, as dom0 must be modified to run
> > in such mode.
> 
> That's true for PV Dom0, but not necessarily for PVH. Plus by denying
> access to the aliases we also guard against bugs in Dom0, if some
> component thinks there's something else at those ports (as they
> indeed were used for other purposes by various vendors).

I think it would be safe to add a command line option to disable the
probing, as we would at least like to avoid it in pvshim mode.  Maybe
ut would be interesting to make it a Kconfig option so that exclusive
pvshim Kconfig can avoid all this?

Otherwise it will just make booting the pvshim slower.

> >> @@ -492,10 +492,17 @@ int __init dom0_setup_permissions(struct
> >>  
> >>      /* Modify I/O port access permissions. */
> >>  
> >> -    /* Master Interrupt Controller (PIC). */
> >> -    rc |= ioports_deny_access(d, 0x20, 0x21);
> >> -    /* Slave Interrupt Controller (PIC). */
> >> -    rc |= ioports_deny_access(d, 0xA0, 0xA1);
> >> +    for ( offs = 0, i = pic_alias_mask & -pic_alias_mask ?: 2;
> >> +          offs <= pic_alias_mask; offs += i )
> > 
> > I'm a bit lost with this, specifically:
> > 
> > i = pic_alias_mask & -pic_alias_mask ?: 2
> > 
> > Which is then used as the increment step in
> > 
> > offs += i
> > 
> > I could see the usage of pic_alias_mask & -pic_alias_mask in order to
> > find the first offset, but afterwards don't you need to increment at
> > single bit left shifts in order to test all possibly set bits in
> > pic_alias_mask?
> 
> No, the smallest sensible increment is the lowest bit set in
> pic_alias_mask. There's specifically no shifting involved here (just
> mentioning it because you use the word). E.g. if the aliasing was at
> bits 2 and 3 (pic_alias_mask=0x0c), we'd need to deny access to 20/21,
> 24/25, 28/29, and 2C/2D, i.e. at an increment of 4.

Right, it took me a bit to realize.

We assume that aliases are based on fused address pins, so for example
we don't explicitly test for an alias at port 0x34, but expect one if
there's an alias at port 0x30 and another one at port 0x24.

Thanks, Roger.
Jan Beulich Oct. 26, 2023, 3:07 p.m. UTC | #4
On 26.10.2023 15:24, Roger Pau Monné wrote:
> On Thu, Oct 26, 2023 at 11:03:42AM +0200, Jan Beulich wrote:
>> On 26.10.2023 10:34, Roger Pau Monné wrote:
>>> On Thu, May 11, 2023 at 02:06:46PM +0200, Jan Beulich wrote:
>>>> ... in order to also deny Dom0 access through the alias ports. Without
>>>> this it is only giving the impression of denying access to both PICs.
>>>> Unlike for CMOS/RTC, do detection very early, to avoid disturbing normal
>>>> operation later on.
>>>>
>>>> Like for CMOS/RTC a fundamental assumption of the probing is that reads
>>>> from the probed alias port won't have side effects in case it does not
>>>> alias the respective PIC's one.
>>>
>>> I'm slightly concerned about this probing.
>>>
>>> Also I'm unsure we can fully isolate the hardware domain like this.
>>> Preventing access to the non-aliased ports is IMO helpful for domains
>>> to realize the PIT is not available, but in any case such accesses
>>> shouldn't happen in the first place, as dom0 must be modified to run
>>> in such mode.
>>
>> That's true for PV Dom0, but not necessarily for PVH. Plus by denying
>> access to the aliases we also guard against bugs in Dom0, if some
>> component thinks there's something else at those ports (as they
>> indeed were used for other purposes by various vendors).
> 
> I think it would be safe to add a command line option to disable the
> probing, as we would at least like to avoid it in pvshim mode.  Maybe
> ut would be interesting to make it a Kconfig option so that exclusive
> pvshim Kconfig can avoid all this?
> 
> Otherwise it will just make booting the pvshim slower.

I've taken note to introduce such an option (not sure yet whether just
cmdline or also Kconfig). Still
- Shouldn't we already be bypassing related init logic in shim mode?
- A Kconfig option interfacing with PV_SHIM_EXCLUSIVE will collide with
  my patch inverting that option's sense (and renaming it), so it would
  be nice to have that sorted/accepted first (see
  https://lists.xen.org/archives/html/xen-devel/2023-03/msg00040.html).

>>>> @@ -492,10 +492,17 @@ int __init dom0_setup_permissions(struct
>>>>  
>>>>      /* Modify I/O port access permissions. */
>>>>  
>>>> -    /* Master Interrupt Controller (PIC). */
>>>> -    rc |= ioports_deny_access(d, 0x20, 0x21);
>>>> -    /* Slave Interrupt Controller (PIC). */
>>>> -    rc |= ioports_deny_access(d, 0xA0, 0xA1);
>>>> +    for ( offs = 0, i = pic_alias_mask & -pic_alias_mask ?: 2;
>>>> +          offs <= pic_alias_mask; offs += i )
>>>
>>> I'm a bit lost with this, specifically:
>>>
>>> i = pic_alias_mask & -pic_alias_mask ?: 2
>>>
>>> Which is then used as the increment step in
>>>
>>> offs += i
>>>
>>> I could see the usage of pic_alias_mask & -pic_alias_mask in order to
>>> find the first offset, but afterwards don't you need to increment at
>>> single bit left shifts in order to test all possibly set bits in
>>> pic_alias_mask?
>>
>> No, the smallest sensible increment is the lowest bit set in
>> pic_alias_mask. There's specifically no shifting involved here (just
>> mentioning it because you use the word). E.g. if the aliasing was at
>> bits 2 and 3 (pic_alias_mask=0x0c), we'd need to deny access to 20/21,
>> 24/25, 28/29, and 2C/2D, i.e. at an increment of 4.
> 
> Right, it took me a bit to realize.
> 
> We assume that aliases are based on fused address pins, so for example
> we don't explicitly test for an alias at port 0x34, but expect one if
> there's an alias at port 0x30 and another one at port 0x24.

Well, I wouldn't have called it "fused pins", but "not decoded address
bits". But yes. The same was already assumed in the CMOS/RTC patch.

Jan
Roger Pau Monné Oct. 26, 2023, 3:19 p.m. UTC | #5
On Thu, Oct 26, 2023 at 05:07:18PM +0200, Jan Beulich wrote:
> On 26.10.2023 15:24, Roger Pau Monné wrote:
> > On Thu, Oct 26, 2023 at 11:03:42AM +0200, Jan Beulich wrote:
> >> On 26.10.2023 10:34, Roger Pau Monné wrote:
> >>> On Thu, May 11, 2023 at 02:06:46PM +0200, Jan Beulich wrote:
> >>>> ... in order to also deny Dom0 access through the alias ports. Without
> >>>> this it is only giving the impression of denying access to both PICs.
> >>>> Unlike for CMOS/RTC, do detection very early, to avoid disturbing normal
> >>>> operation later on.
> >>>>
> >>>> Like for CMOS/RTC a fundamental assumption of the probing is that reads
> >>>> from the probed alias port won't have side effects in case it does not
> >>>> alias the respective PIC's one.
> >>>
> >>> I'm slightly concerned about this probing.
> >>>
> >>> Also I'm unsure we can fully isolate the hardware domain like this.
> >>> Preventing access to the non-aliased ports is IMO helpful for domains
> >>> to realize the PIT is not available, but in any case such accesses
> >>> shouldn't happen in the first place, as dom0 must be modified to run
> >>> in such mode.
> >>
> >> That's true for PV Dom0, but not necessarily for PVH. Plus by denying
> >> access to the aliases we also guard against bugs in Dom0, if some
> >> component thinks there's something else at those ports (as they
> >> indeed were used for other purposes by various vendors).
> > 
> > I think it would be safe to add a command line option to disable the
> > probing, as we would at least like to avoid it in pvshim mode.  Maybe
> > ut would be interesting to make it a Kconfig option so that exclusive
> > pvshim Kconfig can avoid all this?
> > 
> > Otherwise it will just make booting the pvshim slower.
> 
> I've taken note to introduce such an option (not sure yet whether just
> cmdline or also Kconfig). Still
> - Shouldn't we already be bypassing related init logic in shim mode?

Not sure what we bypass in pvshim mode, would be good to double
check.

> - A Kconfig option interfacing with PV_SHIM_EXCLUSIVE will collide with
>   my patch inverting that option's sense (and renaming it), so it would
>   be nice to have that sorted/accepted first (see
>   https://lists.xen.org/archives/html/xen-devel/2023-03/msg00040.html).

It being Andrew the one that made the request, I would like to get his
opinion on it.  UNCONSTRAINED does seem a bit weird.

Maybe the issue is that PV_SHIM_EXCLUSIVE shouldn't have been a
Kconfig option in the first place, and instead a specific Kconfig
config file?

Maybe it's not possible to achieve the same using just a Kconfig
config file.

Thanks, Roger.
Jan Beulich Oct. 30, 2023, 12:24 p.m. UTC | #6
On 26.10.2023 17:19, Roger Pau Monné wrote:
> On Thu, Oct 26, 2023 at 05:07:18PM +0200, Jan Beulich wrote:
>> On 26.10.2023 15:24, Roger Pau Monné wrote:
>>> On Thu, Oct 26, 2023 at 11:03:42AM +0200, Jan Beulich wrote:
>>>> On 26.10.2023 10:34, Roger Pau Monné wrote:
>>>>> On Thu, May 11, 2023 at 02:06:46PM +0200, Jan Beulich wrote:
>>>>>> ... in order to also deny Dom0 access through the alias ports. Without
>>>>>> this it is only giving the impression of denying access to both PICs.
>>>>>> Unlike for CMOS/RTC, do detection very early, to avoid disturbing normal
>>>>>> operation later on.
>>>>>>
>>>>>> Like for CMOS/RTC a fundamental assumption of the probing is that reads
>>>>>> from the probed alias port won't have side effects in case it does not
>>>>>> alias the respective PIC's one.
>>>>>
>>>>> I'm slightly concerned about this probing.
>>>>>
>>>>> Also I'm unsure we can fully isolate the hardware domain like this.
>>>>> Preventing access to the non-aliased ports is IMO helpful for domains
>>>>> to realize the PIT is not available, but in any case such accesses
>>>>> shouldn't happen in the first place, as dom0 must be modified to run
>>>>> in such mode.
>>>>
>>>> That's true for PV Dom0, but not necessarily for PVH. Plus by denying
>>>> access to the aliases we also guard against bugs in Dom0, if some
>>>> component thinks there's something else at those ports (as they
>>>> indeed were used for other purposes by various vendors).
>>>
>>> I think it would be safe to add a command line option to disable the
>>> probing, as we would at least like to avoid it in pvshim mode.  Maybe
>>> ut would be interesting to make it a Kconfig option so that exclusive
>>> pvshim Kconfig can avoid all this?
>>>
>>> Otherwise it will just make booting the pvshim slower.
>>
>> I've taken note to introduce such an option (not sure yet whether just
>> cmdline or also Kconfig). Still
>> - Shouldn't we already be bypassing related init logic in shim mode?
> 
> Not sure what we bypass in pvshim mode, would be good to double
> check.
> 
>> - A Kconfig option interfacing with PV_SHIM_EXCLUSIVE will collide with
>>   my patch inverting that option's sense (and renaming it), so it would
>>   be nice to have that sorted/accepted first (see
>>   https://lists.xen.org/archives/html/xen-devel/2023-03/msg00040.html).
> 
> It being Andrew the one that made the request, I would like to get his
> opinion on it.  UNCONSTRAINED does seem a bit weird.

I agree that the name is odd; I couldn't think of any better one (and
this already is the result of 3 or 4 rounds of renaming). I'll be more
than happy to consider other naming suggestions. The main issue with the
present option is, just to re-state it here, that we have grown negative
dependencies on it, which is a problem.

However, meanwhile I've realized that we don't really want to tie anything
here to PV_SHIM_EXCLUSIVE, at least not directly. What we care about is
whether we _actually_ run in shim mode, i.e. also when a full-fledged
hypervisor is in use. My plan is now to have said new command line option,
which - if not specified on the command line - I'd default to !pv_shim.

> Maybe the issue is that PV_SHIM_EXCLUSIVE shouldn't have been a
> Kconfig option in the first place, and instead a specific Kconfig
> config file?
> 
> Maybe it's not possible to achieve the same using just a Kconfig
> config file.

I'm afraid I don't understand what you mean by "Kconfig config file". It
can't really be just another .../Kconfig file somewhere in the tree, as
it doesn't really matter where an option like this would be defined.

Jan
Roger Pau Monné Oct. 30, 2023, 3:14 p.m. UTC | #7
On Mon, Oct 30, 2023 at 01:24:52PM +0100, Jan Beulich wrote:
> On 26.10.2023 17:19, Roger Pau Monné wrote:
> > On Thu, Oct 26, 2023 at 05:07:18PM +0200, Jan Beulich wrote:
> >> On 26.10.2023 15:24, Roger Pau Monné wrote:
> >>> On Thu, Oct 26, 2023 at 11:03:42AM +0200, Jan Beulich wrote:
> >>>> On 26.10.2023 10:34, Roger Pau Monné wrote:
> >>>>> On Thu, May 11, 2023 at 02:06:46PM +0200, Jan Beulich wrote:
> >>>>>> ... in order to also deny Dom0 access through the alias ports. Without
> >>>>>> this it is only giving the impression of denying access to both PICs.
> >>>>>> Unlike for CMOS/RTC, do detection very early, to avoid disturbing normal
> >>>>>> operation later on.
> >>>>>>
> >>>>>> Like for CMOS/RTC a fundamental assumption of the probing is that reads
> >>>>>> from the probed alias port won't have side effects in case it does not
> >>>>>> alias the respective PIC's one.
> >>>>>
> >>>>> I'm slightly concerned about this probing.
> >>>>>
> >>>>> Also I'm unsure we can fully isolate the hardware domain like this.
> >>>>> Preventing access to the non-aliased ports is IMO helpful for domains
> >>>>> to realize the PIT is not available, but in any case such accesses
> >>>>> shouldn't happen in the first place, as dom0 must be modified to run
> >>>>> in such mode.
> >>>>
> >>>> That's true for PV Dom0, but not necessarily for PVH. Plus by denying
> >>>> access to the aliases we also guard against bugs in Dom0, if some
> >>>> component thinks there's something else at those ports (as they
> >>>> indeed were used for other purposes by various vendors).
> >>>
> >>> I think it would be safe to add a command line option to disable the
> >>> probing, as we would at least like to avoid it in pvshim mode.  Maybe
> >>> ut would be interesting to make it a Kconfig option so that exclusive
> >>> pvshim Kconfig can avoid all this?
> >>>
> >>> Otherwise it will just make booting the pvshim slower.
> >>
> >> I've taken note to introduce such an option (not sure yet whether just
> >> cmdline or also Kconfig). Still
> >> - Shouldn't we already be bypassing related init logic in shim mode?
> > 
> > Not sure what we bypass in pvshim mode, would be good to double
> > check.
> > 
> >> - A Kconfig option interfacing with PV_SHIM_EXCLUSIVE will collide with
> >>   my patch inverting that option's sense (and renaming it), so it would
> >>   be nice to have that sorted/accepted first (see
> >>   https://lists.xen.org/archives/html/xen-devel/2023-03/msg00040.html).
> > 
> > It being Andrew the one that made the request, I would like to get his
> > opinion on it.  UNCONSTRAINED does seem a bit weird.
> 
> I agree that the name is odd; I couldn't think of any better one (and
> this already is the result of 3 or 4 rounds of renaming). I'll be more
> than happy to consider other naming suggestions. The main issue with the
> present option is, just to re-state it here, that we have grown negative
> dependencies on it, which is a problem.
> 
> However, meanwhile I've realized that we don't really want to tie anything
> here to PV_SHIM_EXCLUSIVE, at least not directly. What we care about is
> whether we _actually_ run in shim mode, i.e. also when a full-fledged
> hypervisor is in use. My plan is now to have said new command line option,
> which - if not specified on the command line - I'd default to !pv_shim.

So that tests for aliases are run unless in pv shim mode.

> > Maybe the issue is that PV_SHIM_EXCLUSIVE shouldn't have been a
> > Kconfig option in the first place, and instead a specific Kconfig
> > config file?
> > 
> > Maybe it's not possible to achieve the same using just a Kconfig
> > config file.
> 
> I'm afraid I don't understand what you mean by "Kconfig config file". It
> can't really be just another .../Kconfig file somewhere in the tree, as
> it doesn't really matter where an option like this would be defined.

No, I was thinking of splitting what PV_SHIM_EXCLUSIVE actually
implies, for example by adding CONFIG_DOMCTL_HYPERCALLL or
CONFIG_PLATFORM_HYPERCALL and re-work the pvshim_defconfig config file
based on those, so that we don't end up with negative relations.

Note sure all usages of PV_SHIM_EXCLUSIVE can be split in such a way,
maybe we would need some compromise.

Thanks, Roger.
Jan Beulich Oct. 30, 2023, 3:19 p.m. UTC | #8
On 30.10.2023 16:14, Roger Pau Monné wrote:
> On Mon, Oct 30, 2023 at 01:24:52PM +0100, Jan Beulich wrote:
>> On 26.10.2023 17:19, Roger Pau Monné wrote:
>>> Maybe the issue is that PV_SHIM_EXCLUSIVE shouldn't have been a
>>> Kconfig option in the first place, and instead a specific Kconfig
>>> config file?
>>>
>>> Maybe it's not possible to achieve the same using just a Kconfig
>>> config file.
>>
>> I'm afraid I don't understand what you mean by "Kconfig config file". It
>> can't really be just another .../Kconfig file somewhere in the tree, as
>> it doesn't really matter where an option like this would be defined.
> 
> No, I was thinking of splitting what PV_SHIM_EXCLUSIVE actually
> implies, for example by adding CONFIG_DOMCTL_HYPERCALLL or
> CONFIG_PLATFORM_HYPERCALL and re-work the pvshim_defconfig config file
> based on those, so that we don't end up with negative relations.
> 
> Note sure all usages of PV_SHIM_EXCLUSIVE can be split in such a way,
> maybe we would need some compromise.

Wouldn't such a CONFIG_DOMCTL_HYPERCALL then still want to depend on
!PV_SHIM_EXCLUSIVE, which is the kind of dependency we want to avoid?
Aiui the two (splitting and inverting) are largely orthogonal aspects.

Jan
Roger Pau Monné Oct. 30, 2023, 3:23 p.m. UTC | #9
On Mon, Oct 30, 2023 at 04:19:22PM +0100, Jan Beulich wrote:
> On 30.10.2023 16:14, Roger Pau Monné wrote:
> > On Mon, Oct 30, 2023 at 01:24:52PM +0100, Jan Beulich wrote:
> >> On 26.10.2023 17:19, Roger Pau Monné wrote:
> >>> Maybe the issue is that PV_SHIM_EXCLUSIVE shouldn't have been a
> >>> Kconfig option in the first place, and instead a specific Kconfig
> >>> config file?
> >>>
> >>> Maybe it's not possible to achieve the same using just a Kconfig
> >>> config file.
> >>
> >> I'm afraid I don't understand what you mean by "Kconfig config file". It
> >> can't really be just another .../Kconfig file somewhere in the tree, as
> >> it doesn't really matter where an option like this would be defined.
> > 
> > No, I was thinking of splitting what PV_SHIM_EXCLUSIVE actually
> > implies, for example by adding CONFIG_DOMCTL_HYPERCALLL or
> > CONFIG_PLATFORM_HYPERCALL and re-work the pvshim_defconfig config file
> > based on those, so that we don't end up with negative relations.
> > 
> > Note sure all usages of PV_SHIM_EXCLUSIVE can be split in such a way,
> > maybe we would need some compromise.
> 
> Wouldn't such a CONFIG_DOMCTL_HYPERCALL then still want to depend on
> !PV_SHIM_EXCLUSIVE, which is the kind of dependency we want to avoid?
> Aiui the two (splitting and inverting) are largely orthogonal aspects.

No, CONFIG_DOMCTL_HYPERCALL could be promptless option enabled by
default and disabled by the pvshim_defconfig.

Thanks, Roger.
Jan Beulich Oct. 30, 2023, 3:35 p.m. UTC | #10
On 30.10.2023 16:23, Roger Pau Monné wrote:
> On Mon, Oct 30, 2023 at 04:19:22PM +0100, Jan Beulich wrote:
>> On 30.10.2023 16:14, Roger Pau Monné wrote:
>>> On Mon, Oct 30, 2023 at 01:24:52PM +0100, Jan Beulich wrote:
>>>> On 26.10.2023 17:19, Roger Pau Monné wrote:
>>>>> Maybe the issue is that PV_SHIM_EXCLUSIVE shouldn't have been a
>>>>> Kconfig option in the first place, and instead a specific Kconfig
>>>>> config file?
>>>>>
>>>>> Maybe it's not possible to achieve the same using just a Kconfig
>>>>> config file.
>>>>
>>>> I'm afraid I don't understand what you mean by "Kconfig config file". It
>>>> can't really be just another .../Kconfig file somewhere in the tree, as
>>>> it doesn't really matter where an option like this would be defined.
>>>
>>> No, I was thinking of splitting what PV_SHIM_EXCLUSIVE actually
>>> implies, for example by adding CONFIG_DOMCTL_HYPERCALLL or
>>> CONFIG_PLATFORM_HYPERCALL and re-work the pvshim_defconfig config file
>>> based on those, so that we don't end up with negative relations.
>>>
>>> Note sure all usages of PV_SHIM_EXCLUSIVE can be split in such a way,
>>> maybe we would need some compromise.
>>
>> Wouldn't such a CONFIG_DOMCTL_HYPERCALL then still want to depend on
>> !PV_SHIM_EXCLUSIVE, which is the kind of dependency we want to avoid?
>> Aiui the two (splitting and inverting) are largely orthogonal aspects.
> 
> No, CONFIG_DOMCTL_HYPERCALL could be promptless option enabled by
> default and disabled by the pvshim_defconfig.

pvshim_defconfig shouldn't play a role here. Anyone configuring a shim
build from scratch should also get a consistent set of settings. When
there's no prompt and default-enabling, I'm also having some difficulty
seeing how pvshim_defconfig could override that then.

Jan
Roger Pau Monné Oct. 30, 2023, 4:25 p.m. UTC | #11
On Mon, Oct 30, 2023 at 04:35:41PM +0100, Jan Beulich wrote:
> On 30.10.2023 16:23, Roger Pau Monné wrote:
> > On Mon, Oct 30, 2023 at 04:19:22PM +0100, Jan Beulich wrote:
> >> On 30.10.2023 16:14, Roger Pau Monné wrote:
> >>> On Mon, Oct 30, 2023 at 01:24:52PM +0100, Jan Beulich wrote:
> >>>> On 26.10.2023 17:19, Roger Pau Monné wrote:
> >>>>> Maybe the issue is that PV_SHIM_EXCLUSIVE shouldn't have been a
> >>>>> Kconfig option in the first place, and instead a specific Kconfig
> >>>>> config file?
> >>>>>
> >>>>> Maybe it's not possible to achieve the same using just a Kconfig
> >>>>> config file.
> >>>>
> >>>> I'm afraid I don't understand what you mean by "Kconfig config file". It
> >>>> can't really be just another .../Kconfig file somewhere in the tree, as
> >>>> it doesn't really matter where an option like this would be defined.
> >>>
> >>> No, I was thinking of splitting what PV_SHIM_EXCLUSIVE actually
> >>> implies, for example by adding CONFIG_DOMCTL_HYPERCALLL or
> >>> CONFIG_PLATFORM_HYPERCALL and re-work the pvshim_defconfig config file
> >>> based on those, so that we don't end up with negative relations.
> >>>
> >>> Note sure all usages of PV_SHIM_EXCLUSIVE can be split in such a way,
> >>> maybe we would need some compromise.
> >>
> >> Wouldn't such a CONFIG_DOMCTL_HYPERCALL then still want to depend on
> >> !PV_SHIM_EXCLUSIVE, which is the kind of dependency we want to avoid?
> >> Aiui the two (splitting and inverting) are largely orthogonal aspects.
> > 
> > No, CONFIG_DOMCTL_HYPERCALL could be promptless option enabled by
> > default and disabled by the pvshim_defconfig.
> 
> pvshim_defconfig shouldn't play a role here. Anyone configuring a shim
> build from scratch should also get a consistent set of settings.

Then we might have to expose those more fine grained options.

My sauggestion was to still allow enabling shim mode from by the user,
but that building a pv-shim exclusive image would require using the
pvshim_defconfig, to simply avoid exposing a bunch of fine grained
options that only make sense for pv-shim exclusive builds.

If that's not sensible, we might consider exposing those more fine
grained options.

Thanks, Roger.
diff mbox series

Patch

--- a/xen/arch/x86/dom0_build.c
+++ b/xen/arch/x86/dom0_build.c
@@ -479,7 +479,7 @@  static void __init process_dom0_ioports_
 int __init dom0_setup_permissions(struct domain *d)
 {
     unsigned long mfn;
-    unsigned int i;
+    unsigned int i, offs;
     int rc;
 
     if ( pv_shim )
@@ -492,10 +492,17 @@  int __init dom0_setup_permissions(struct
 
     /* Modify I/O port access permissions. */
 
-    /* Master Interrupt Controller (PIC). */
-    rc |= ioports_deny_access(d, 0x20, 0x21);
-    /* Slave Interrupt Controller (PIC). */
-    rc |= ioports_deny_access(d, 0xA0, 0xA1);
+    for ( offs = 0, i = pic_alias_mask & -pic_alias_mask ?: 2;
+          offs <= pic_alias_mask; offs += i )
+    {
+        if ( offs & ~pic_alias_mask )
+            continue;
+        /* Master Interrupt Controller (PIC). */
+        rc |= ioports_deny_access(d, 0x20 + offs, 0x21 + offs);
+        /* Slave Interrupt Controller (PIC). */
+        rc |= ioports_deny_access(d, 0xA0 + offs, 0xA1 + offs);
+    }
+
     /* Interval Timer (PIT). */
     rc |= ioports_deny_access(d, 0x40, 0x43);
     /* PIT Channel 2 / PC Speaker Control. */
--- a/xen/arch/x86/i8259.c
+++ b/xen/arch/x86/i8259.c
@@ -19,6 +19,7 @@ 
 #include <xen/delay.h>
 #include <asm/apic.h>
 #include <asm/asm_defns.h>
+#include <asm/setup.h>
 #include <io_ports.h>
 #include <irq_vectors.h>
 
@@ -332,6 +333,55 @@  void __init make_8259A_irq(unsigned int
     irq_to_desc(irq)->handler = &i8259A_irq_type;
 }
 
+unsigned int __initdata pic_alias_mask;
+
+static void __init probe_pic_alias(void)
+{
+    unsigned int mask = 0x1e;
+    uint8_t val = 0;
+
+    /*
+     * The only properly r/w register is OCW1.  While keeping the master
+     * fully masked (thus also masking anything coming through the slave),
+     * write all possible 256 values to the slave's base port, and check
+     * whether the same value can then be read back through any of the
+     * possible alias ports.  Probing just the slave of course builds on the
+     * assumption that aliasing is identical for master and slave.
+     */
+
+    outb(0xff, 0x21); /* Fully mask master. */
+
+    do {
+        unsigned int offs;
+
+        outb(val, 0xa1);
+
+        /* Try to make sure we're actually having a PIC here. */
+        if ( inb(0xa1) != val )
+        {
+            mask = 0;
+            break;
+        }
+
+        for ( offs = mask & -mask; offs <= mask; offs <<= 1 )
+        {
+            if ( !(mask & offs) )
+                continue;
+            if ( inb(0xa1 + offs) != val )
+                mask &= ~offs;
+        }
+    } while ( mask && (val += 0x0d) );  /* Arbitrary uneven number. */
+
+    outb(cached_A1, 0xa1); /* Restore slave IRQ mask. */
+    outb(cached_21, 0x21); /* Restore master IRQ mask. */
+
+    if ( mask )
+    {
+        dprintk(XENLOG_INFO, "PIC aliasing mask: %02x\n", mask);
+        pic_alias_mask = mask;
+    }
+}
+
 static struct irqaction __read_mostly cascade = { no_action, "cascade", NULL};
 
 void __init init_IRQ(void)
@@ -342,6 +392,8 @@  void __init init_IRQ(void)
 
     init_8259A(0);
 
+    probe_pic_alias();
+
     for (irq = 0; platform_legacy_irq(irq); irq++) {
         struct irq_desc *desc = irq_to_desc(irq);
         
--- a/xen/arch/x86/include/asm/setup.h
+++ b/xen/arch/x86/include/asm/setup.h
@@ -52,6 +52,8 @@  extern uint8_t kbd_shift_flags;
 extern unsigned long highmem_start;
 #endif
 
+extern unsigned int pic_alias_mask;
+
 extern int8_t opt_smt;
 
 #ifdef CONFIG_SHADOW_PAGING