diff mbox

[v3,08/11] pvh/acpi: Handle ACPI accesses for PVH guests

Message ID 1479762047-29431-9-git-send-email-boris.ostrovsky@oracle.com (mailing list archive)
State New, archived
Headers show

Commit Message

Boris Ostrovsky Nov. 21, 2016, 9 p.m. UTC
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
CC: Paul Durrant <paul.durrant@citrix.com>
---
Changes in v3:
* Introduce a mask for pm1a and gpe0 that lists bits that a
  guest can operate on.
* Lots of small changes.

 xen/arch/x86/hvm/ioreq.c         | 87 +++++++++++++++++++++++++++++++++++++++-
 xen/include/asm-x86/hvm/domain.h |  6 +++
 2 files changed, 92 insertions(+), 1 deletion(-)

Comments

Paul Durrant Nov. 22, 2016, 2:11 p.m. UTC | #1
> -----Original Message-----
> From: Boris Ostrovsky [mailto:boris.ostrovsky@oracle.com]
> Sent: 21 November 2016 21:01
> To: xen-devel@lists.xen.org
> Cc: jbeulich@suse.com; Andrew Cooper <Andrew.Cooper3@citrix.com>;
> Wei Liu <wei.liu2@citrix.com>; Ian Jackson <Ian.Jackson@citrix.com>; Roger
> Pau Monne <roger.pau@citrix.com>; Boris Ostrovsky
> <boris.ostrovsky@oracle.com>; Paul Durrant <Paul.Durrant@citrix.com>
> Subject: [PATCH v3 08/11] pvh/acpi: Handle ACPI accesses for PVH guests
> 
> Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>

Reviewed-by: Paul Durrant <paul.durrant@citrix.com>

> ---
> CC: Paul Durrant <paul.durrant@citrix.com>
> ---
> Changes in v3:
> * Introduce a mask for pm1a and gpe0 that lists bits that a
>   guest can operate on.
> * Lots of small changes.
> 
>  xen/arch/x86/hvm/ioreq.c         | 87
> +++++++++++++++++++++++++++++++++++++++-
>  xen/include/asm-x86/hvm/domain.h |  6 +++
>  2 files changed, 92 insertions(+), 1 deletion(-)
> 
> diff --git a/xen/arch/x86/hvm/ioreq.c b/xen/arch/x86/hvm/ioreq.c
> index 51bb399..4ab0d0a 100644
> --- a/xen/arch/x86/hvm/ioreq.c
> +++ b/xen/arch/x86/hvm/ioreq.c
> @@ -16,6 +16,7 @@
>   * this program; If not, see <http://www.gnu.org/licenses/>.
>   */
> 
> +#include <xen/acpi.h>
>  #include <xen/config.h>
>  #include <xen/ctype.h>
>  #include <xen/init.h>
> @@ -1383,7 +1384,91 @@ static int hvm_access_cf8(
>  static int acpi_ioaccess(
>      int dir, unsigned int port, unsigned int bytes, uint32_t *val)
>  {
> -    return X86EMUL_UNHANDLEABLE;
> +    uint8_t *reg = NULL;
> +    const uint8_t *mask = NULL;
> +    bool is_cpu_map = false;
> +    struct domain *currd = current->domain;
> +    const static uint8_t pm1a_mask[4] =
> {ACPI_BITMASK_GLOBAL_LOCK_STATUS, 0,
> +                                         ACPI_BITMASK_GLOBAL_LOCK_ENABLE, 0};
> +    const static uint8_t gpe0_mask[4] = {1U << XEN_GPE0_CPUHP_BIT, 0,
> +                                         1U << XEN_GPE0_CPUHP_BIT, 0};
> +
> +    BUILD_BUG_ON((ACPI_PM1A_EVT_BLK_LEN != 4) ||
> +                 (ACPI_GPE0_BLK_LEN_V1 != 4));
> +
> +    ASSERT(!has_acpi_ff(currd));
> +
> +    switch ( port )
> +    {
> +    case ACPI_PM1A_EVT_BLK_ADDRESS_V1 ...
> +         ACPI_PM1A_EVT_BLK_ADDRESS_V1 + ACPI_PM1A_EVT_BLK_LEN - 1:
> +        reg = currd->arch.hvm_domain.acpi_io.pm1a;
> +        mask = pm1a_mask;
> +        break;
> +
> +    case ACPI_GPE0_BLK_ADDRESS_V1 ...
> +         ACPI_GPE0_BLK_ADDRESS_V1 + ACPI_GPE0_BLK_LEN_V1 - 1:
> +        reg = currd->arch.hvm_domain.acpi_io.gpe;
> +        mask = gpe0_mask;
> +        break;
> +
> +    case XEN_ACPI_CPU_MAP ...
> +         XEN_ACPI_CPU_MAP + XEN_ACPI_CPU_MAP_LEN - 1:
> +        is_cpu_map = true;
> +        break;
> +
> +    default:
> +        return X86EMUL_UNHANDLEABLE;
> +    }
> +
> +    if ( bytes == 0 )
> +        return X86EMUL_OKAY;
> +
> +    if ( dir == IOREQ_READ )
> +    {
> +        if ( is_cpu_map )
> +        {
> +            unsigned int first_byte = port - XEN_ACPI_CPU_MAP;
> +
> +            /*
> +             * Clear bits that we are about to read to in case we
> +             * copy fewer than @bytes.
> +             */
> +            *val &= (~((1ULL << (bytes * 8)) - 1)) & 0xffffffff;
> +
> +            if ( ((currd->max_vcpus + 7) / 8) > first_byte )
> +            {
> +                memcpy(val, (uint8_t *)currd->avail_vcpus + first_byte,
> +                       min(bytes, ((currd->max_vcpus + 7) / 8) - first_byte));
> +            }
> +        }
> +        else
> +            memcpy(val, &reg[port & 3], bytes);
> +    }
> +    else
> +    {
> +        unsigned int idx = port & 3;
> +        unsigned int i;
> +        uint8_t *ptr;
> +
> +        if ( is_cpu_map )
> +            /*
> +             * CPU map is only read by DSDT's PRSC method and should never
> +             * be written by a guest.
> +             */
> +            return X86EMUL_UNHANDLEABLE;
> +
> +        ptr = (uint8_t *)val;
> +        for ( i = 0; i < bytes; i++, idx++ )
> +        {
> +            if ( idx < 2 ) /* status, write 1 to clear. */
> +                reg[idx] &= ~(mask[i] & ptr[i]);
> +            else           /* enable */
> +                reg[idx] |= (mask[i] & ptr[i]);
> +        }
> +    }
> +
> +    return X86EMUL_OKAY;
>  }
> 
>  void hvm_ioreq_init(struct domain *d)
> diff --git a/xen/include/asm-x86/hvm/domain.h b/xen/include/asm-
> x86/hvm/domain.h
> index f34d784..f492a2b 100644
> --- a/xen/include/asm-x86/hvm/domain.h
> +++ b/xen/include/asm-x86/hvm/domain.h
> @@ -87,6 +87,12 @@ struct hvm_domain {
>      } ioreq_server;
>      struct hvm_ioreq_server *default_ioreq_server;
> 
> +    /* PVH guests */
> +    struct {
> +        uint8_t pm1a[ACPI_PM1A_EVT_BLK_LEN];
> +        uint8_t gpe[ACPI_GPE0_BLK_LEN_V1];
> +    } acpi_io;
> +
>      /* Cached CF8 for guest PCI config cycles */
>      uint32_t                pci_cf8;
> 
> --
> 2.7.4
Jan Beulich Nov. 22, 2016, 3:01 p.m. UTC | #2
>>> On 21.11.16 at 22:00, <boris.ostrovsky@oracle.com> wrote:
> --- a/xen/arch/x86/hvm/ioreq.c
> +++ b/xen/arch/x86/hvm/ioreq.c
> @@ -16,6 +16,7 @@
>   * this program; If not, see <http://www.gnu.org/licenses/>.
>   */
>  
> +#include <xen/acpi.h>
>  #include <xen/config.h>
>  #include <xen/ctype.h>
>  #include <xen/init.h>

Please take the opportunity and remove the pointless xen/config.h
inclusion at once.

> @@ -1383,7 +1384,91 @@ static int hvm_access_cf8(
>  static int acpi_ioaccess(
>      int dir, unsigned int port, unsigned int bytes, uint32_t *val)
>  {
> -    return X86EMUL_UNHANDLEABLE;
> +    uint8_t *reg = NULL;
> +    const uint8_t *mask = NULL;
> +    bool is_cpu_map = false;
> +    struct domain *currd = current->domain;

const?

> +    const static uint8_t pm1a_mask[4] = {ACPI_BITMASK_GLOBAL_LOCK_STATUS, 0,
> +                                         ACPI_BITMASK_GLOBAL_LOCK_ENABLE, 0};
> +    const static uint8_t gpe0_mask[4] = {1U << XEN_GPE0_CPUHP_BIT, 0,
> +                                         1U << XEN_GPE0_CPUHP_BIT, 0};

Hmm, funny, in someone else's patch I've recently seen the same.
Can we please stick to the more standard "storage type first"
ordering of declaration elements. After all const modifies the type,
and hence better stays together with it.

And then I'd like to have an explanation (in the commit message)
about the choice of the values for pm1a_mask. Plus you using
uint8_t here is at least odd, considering that this is about registers
consisting of two 16-bit halves. I'm not even certain the spec
permits these to be accessed with other than the specified
granularity.

Or wait - the literal 4-s here look bad too. Perhaps the two should
be combined into a variable of type
typeof(currd->arch.hvm_domain.acpi_io), so values and masks
really match up. Which would still seem to make it desirable for the
parts to be of type uint16_t, if permitted by the spec.

> +    BUILD_BUG_ON((ACPI_PM1A_EVT_BLK_LEN != 4) ||
> +                 (ACPI_GPE0_BLK_LEN_V1 != 4));

Please split these into two, so that one of them triggering uniquely
identifies the offender. There's no code being generated for them,
so it doesn't matter how many there are. Perhaps it might even be
worth moving each into its respective case block below.

> +    ASSERT(!has_acpi_ff(currd));
> +
> +    switch ( port )
> +    {
> +    case ACPI_PM1A_EVT_BLK_ADDRESS_V1 ...
> +         ACPI_PM1A_EVT_BLK_ADDRESS_V1 + ACPI_PM1A_EVT_BLK_LEN - 1:
> +        reg = currd->arch.hvm_domain.acpi_io.pm1a;
> +        mask = pm1a_mask;
> +        break;
> +
> +    case ACPI_GPE0_BLK_ADDRESS_V1 ...
> +         ACPI_GPE0_BLK_ADDRESS_V1 + ACPI_GPE0_BLK_LEN_V1 - 1:
> +        reg = currd->arch.hvm_domain.acpi_io.gpe;
> +        mask = gpe0_mask;
> +        break;
> +
> +    case XEN_ACPI_CPU_MAP ...
> +         XEN_ACPI_CPU_MAP + XEN_ACPI_CPU_MAP_LEN - 1:
> +        is_cpu_map = true;

In order to make more obvious in the code below that reg and mask
can't be NULL, wouldn't it make sense to ditch this variable and
instead use checks of reg against NULL in the code further down?

> +        break;
> +
> +    default:
> +        return X86EMUL_UNHANDLEABLE;
> +    }
> +
> +    if ( bytes == 0 )
> +        return X86EMUL_OKAY;

Did you find a check like this in any other I/O port handler? It doesn't
seem to make sense to me.

> +    if ( dir == IOREQ_READ )
> +    {
> +        if ( is_cpu_map )
> +        {
> +            unsigned int first_byte = port - XEN_ACPI_CPU_MAP;
> +
> +            /*
> +             * Clear bits that we are about to read to in case we
> +             * copy fewer than @bytes.
> +             */
> +            *val &= (~((1ULL << (bytes * 8)) - 1)) & 0xffffffff;

*val being of type uint32_t I understand neither the ULL suffix nor
the and-ing. How about

            if ( bytes < 4 )
                *val &= ~0U << (bytes * 8);

?

> +            if ( ((currd->max_vcpus + 7) / 8) > first_byte )
> +            {
> +                memcpy(val, (uint8_t *)currd->avail_vcpus + first_byte,
> +                       min(bytes, ((currd->max_vcpus + 7) / 8) - first_byte));
> +            }

Stray braces.

> +        }
> +        else
> +            memcpy(val, &reg[port & 3], bytes);
> +    }
> +    else
> +    {
> +        unsigned int idx = port & 3;
> +        unsigned int i;
> +        uint8_t *ptr;

const

> +        if ( is_cpu_map )
> +            /*
> +             * CPU map is only read by DSDT's PRSC method and should never
> +             * be written by a guest.
> +             */
> +            return X86EMUL_UNHANDLEABLE;
> +
> +        ptr = (uint8_t *)val;
> +        for ( i = 0; i < bytes; i++, idx++ )
> +        {
> +            if ( idx < 2 ) /* status, write 1 to clear. */
> +                reg[idx] &= ~(mask[i] & ptr[i]);
> +            else           /* enable */
> +                reg[idx] |= (mask[i] & ptr[i]);

Don't you mean mask[idx] in both cases?

Jan
Boris Ostrovsky Nov. 22, 2016, 3:30 p.m. UTC | #3
On 11/22/2016 10:01 AM, Jan Beulich wrote:

>
>> +    const static uint8_t pm1a_mask[4] = {ACPI_BITMASK_GLOBAL_LOCK_STATUS, 0,
>> +                                         ACPI_BITMASK_GLOBAL_LOCK_ENABLE, 0};
>> +    const static uint8_t gpe0_mask[4] = {1U << XEN_GPE0_CPUHP_BIT, 0,
>> +                                         1U << XEN_GPE0_CPUHP_BIT, 0};
>
> Hmm, funny, in someone else's patch I've recently seen the same.
> Can we please stick to the more standard "storage type first"
> ordering of declaration elements. After all const modifies the type,
> and hence better stays together with it.
>
> And then I'd like to have an explanation (in the commit message)
> about the choice of the values for pm1a_mask.

Sure (Lock status/enable is required)


> Plus you using
> uint8_t here is at least odd, considering that this is about registers
> consisting of two 16-bit halves. I'm not even certain the spec
> permits these to be accessed with other than the specified
> granularity.


GPE registers can be 1-byte long. And, in fact, that's how ACPICA 
accesses it.

PM1 is indeed 2-byte long. I can make a check in the switch statement 
but I think I should leave the IOREQ_WRITE handling (at the bottom of 
this message) as it is for simplicity.


>
> Or wait - the literal 4-s here look bad too. Perhaps the two should
> be combined into a variable of type
> typeof(currd->arch.hvm_domain.acpi_io), so values and masks
> really match up. Which would still seem to make it desirable for the
> parts to be of type uint16_t, if permitted by the spec.

But I then assign these masks to uint8_t mask. Wouldn't it be better to 
explicitly keep those as byte-size values? Especially given how they are 
used in IOREQ_WRITE case (below).


>> +    else
>> +    {
>> +        unsigned int idx = port & 3;
>> +        unsigned int i;
>> +        uint8_t *ptr;
>
> const
>
>> +        if ( is_cpu_map )
>> +            /*
>> +             * CPU map is only read by DSDT's PRSC method and should never
>> +             * be written by a guest.
>> +             */
>> +            return X86EMUL_UNHANDLEABLE;
>> +
>> +        ptr = (uint8_t *)val;
>> +        for ( i = 0; i < bytes; i++, idx++ )
>> +        {
>> +            if ( idx < 2 ) /* status, write 1 to clear. */
>> +                reg[idx] &= ~(mask[i] & ptr[i]);
>> +            else           /* enable */
>> +                reg[idx] |= (mask[i] & ptr[i]);
>
> Don't you mean mask[idx] in both cases?

Oh, right, of course.

-boris
Jan Beulich Nov. 22, 2016, 4:05 p.m. UTC | #4
>>> On 22.11.16 at 16:30, <boris.ostrovsky@oracle.com> wrote:
> On 11/22/2016 10:01 AM, Jan Beulich wrote:
>>
>>> +    const static uint8_t pm1a_mask[4] = {ACPI_BITMASK_GLOBAL_LOCK_STATUS, 
> 0,
>>> +                                         ACPI_BITMASK_GLOBAL_LOCK_ENABLE, 
> 0};
>>> +    const static uint8_t gpe0_mask[4] = {1U << XEN_GPE0_CPUHP_BIT, 0,
>>> +                                         1U << XEN_GPE0_CPUHP_BIT, 0};
>>
>> Hmm, funny, in someone else's patch I've recently seen the same.
>> Can we please stick to the more standard "storage type first"
>> ordering of declaration elements. After all const modifies the type,
>> and hence better stays together with it.
>>
>> And then I'd like to have an explanation (in the commit message)
>> about the choice of the values for pm1a_mask.
> 
> Sure (Lock status/enable is required)

And nothing else is? And there's no other implementation
required for the lock bit?

>> Plus you using
>> uint8_t here is at least odd, considering that this is about registers
>> consisting of two 16-bit halves. I'm not even certain the spec
>> permits these to be accessed with other than the specified
>> granularity.
> 
> 
> GPE registers can be 1-byte long. And, in fact, that's how ACPICA 
> accesses it.
> 
> PM1 is indeed 2-byte long. I can make a check in the switch statement 
> but I think I should leave the IOREQ_WRITE handling (at the bottom of 
> this message) as it is for simplicity.
> 
> 
>> Or wait - the literal 4-s here look bad too. Perhaps the two should
>> be combined into a variable of type
>> typeof(currd->arch.hvm_domain.acpi_io), so values and masks
>> really match up. Which would still seem to make it desirable for the
>> parts to be of type uint16_t, if permitted by the spec.
> 
> But I then assign these masks to uint8_t mask. Wouldn't it be better to 
> explicitly keep those as byte-size values? Especially given how they are 
> used in IOREQ_WRITE case (below).

Well, maybe, namely considering that the GPE and PM1a parts
would otherwise end up different, further complicating the code.

Jan
Boris Ostrovsky Nov. 22, 2016, 4:33 p.m. UTC | #5
On 11/22/2016 11:05 AM, Jan Beulich wrote:
>>>> On 22.11.16 at 16:30, <boris.ostrovsky@oracle.com> wrote:
>> On 11/22/2016 10:01 AM, Jan Beulich wrote:
>>>
>>>> +    const static uint8_t pm1a_mask[4] = {ACPI_BITMASK_GLOBAL_LOCK_STATUS,
>> 0,
>>>> +                                         ACPI_BITMASK_GLOBAL_LOCK_ENABLE,
>> 0};
>>>> +    const static uint8_t gpe0_mask[4] = {1U << XEN_GPE0_CPUHP_BIT, 0,
>>>> +                                         1U << XEN_GPE0_CPUHP_BIT, 0};
>>>
>>> Hmm, funny, in someone else's patch I've recently seen the same.
>>> Can we please stick to the more standard "storage type first"
>>> ordering of declaration elements. After all const modifies the type,
>>> and hence better stays together with it.
>>>
>>> And then I'd like to have an explanation (in the commit message)
>>> about the choice of the values for pm1a_mask.
>>
>> Sure (Lock status/enable is required)
>
> And nothing else is? And there's no other implementation
> required for the lock bit?

The other part is the global lock itself, which is part of the FACS that 
we allocate in build.c

-boris
diff mbox

Patch

diff --git a/xen/arch/x86/hvm/ioreq.c b/xen/arch/x86/hvm/ioreq.c
index 51bb399..4ab0d0a 100644
--- a/xen/arch/x86/hvm/ioreq.c
+++ b/xen/arch/x86/hvm/ioreq.c
@@ -16,6 +16,7 @@ 
  * this program; If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <xen/acpi.h>
 #include <xen/config.h>
 #include <xen/ctype.h>
 #include <xen/init.h>
@@ -1383,7 +1384,91 @@  static int hvm_access_cf8(
 static int acpi_ioaccess(
     int dir, unsigned int port, unsigned int bytes, uint32_t *val)
 {
-    return X86EMUL_UNHANDLEABLE;
+    uint8_t *reg = NULL;
+    const uint8_t *mask = NULL;
+    bool is_cpu_map = false;
+    struct domain *currd = current->domain;
+    const static uint8_t pm1a_mask[4] = {ACPI_BITMASK_GLOBAL_LOCK_STATUS, 0,
+                                         ACPI_BITMASK_GLOBAL_LOCK_ENABLE, 0};
+    const static uint8_t gpe0_mask[4] = {1U << XEN_GPE0_CPUHP_BIT, 0,
+                                         1U << XEN_GPE0_CPUHP_BIT, 0};
+
+    BUILD_BUG_ON((ACPI_PM1A_EVT_BLK_LEN != 4) ||
+                 (ACPI_GPE0_BLK_LEN_V1 != 4));
+
+    ASSERT(!has_acpi_ff(currd));
+
+    switch ( port )
+    {
+    case ACPI_PM1A_EVT_BLK_ADDRESS_V1 ...
+         ACPI_PM1A_EVT_BLK_ADDRESS_V1 + ACPI_PM1A_EVT_BLK_LEN - 1:
+        reg = currd->arch.hvm_domain.acpi_io.pm1a;
+        mask = pm1a_mask;
+        break;
+
+    case ACPI_GPE0_BLK_ADDRESS_V1 ...
+         ACPI_GPE0_BLK_ADDRESS_V1 + ACPI_GPE0_BLK_LEN_V1 - 1:
+        reg = currd->arch.hvm_domain.acpi_io.gpe;
+        mask = gpe0_mask;
+        break;
+
+    case XEN_ACPI_CPU_MAP ...
+         XEN_ACPI_CPU_MAP + XEN_ACPI_CPU_MAP_LEN - 1:
+        is_cpu_map = true;
+        break;
+
+    default:
+        return X86EMUL_UNHANDLEABLE;
+    }
+
+    if ( bytes == 0 )
+        return X86EMUL_OKAY;
+
+    if ( dir == IOREQ_READ )
+    {
+        if ( is_cpu_map )
+        {
+            unsigned int first_byte = port - XEN_ACPI_CPU_MAP;
+
+            /*
+             * Clear bits that we are about to read to in case we
+             * copy fewer than @bytes.
+             */
+            *val &= (~((1ULL << (bytes * 8)) - 1)) & 0xffffffff;
+
+            if ( ((currd->max_vcpus + 7) / 8) > first_byte )
+            {
+                memcpy(val, (uint8_t *)currd->avail_vcpus + first_byte,
+                       min(bytes, ((currd->max_vcpus + 7) / 8) - first_byte));
+            }
+        }
+        else
+            memcpy(val, &reg[port & 3], bytes);
+    }
+    else
+    {
+        unsigned int idx = port & 3;
+        unsigned int i;
+        uint8_t *ptr;
+
+        if ( is_cpu_map )
+            /*
+             * CPU map is only read by DSDT's PRSC method and should never
+             * be written by a guest.
+             */
+            return X86EMUL_UNHANDLEABLE;
+
+        ptr = (uint8_t *)val;
+        for ( i = 0; i < bytes; i++, idx++ )
+        {
+            if ( idx < 2 ) /* status, write 1 to clear. */
+                reg[idx] &= ~(mask[i] & ptr[i]);
+            else           /* enable */
+                reg[idx] |= (mask[i] & ptr[i]);
+        }
+    }
+
+    return X86EMUL_OKAY;
 }
 
 void hvm_ioreq_init(struct domain *d)
diff --git a/xen/include/asm-x86/hvm/domain.h b/xen/include/asm-x86/hvm/domain.h
index f34d784..f492a2b 100644
--- a/xen/include/asm-x86/hvm/domain.h
+++ b/xen/include/asm-x86/hvm/domain.h
@@ -87,6 +87,12 @@  struct hvm_domain {
     } ioreq_server;
     struct hvm_ioreq_server *default_ioreq_server;
 
+    /* PVH guests */
+    struct {
+        uint8_t pm1a[ACPI_PM1A_EVT_BLK_LEN];
+        uint8_t gpe[ACPI_GPE0_BLK_LEN_V1];
+    } acpi_io;
+
     /* Cached CF8 for guest PCI config cycles */
     uint32_t                pci_cf8;