Message ID | 1479762047-29431-9-git-send-email-boris.ostrovsky@oracle.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
> -----Original Message----- > From: Boris Ostrovsky [mailto:boris.ostrovsky@oracle.com] > Sent: 21 November 2016 21:01 > To: xen-devel@lists.xen.org > Cc: jbeulich@suse.com; Andrew Cooper <Andrew.Cooper3@citrix.com>; > Wei Liu <wei.liu2@citrix.com>; Ian Jackson <Ian.Jackson@citrix.com>; Roger > Pau Monne <roger.pau@citrix.com>; Boris Ostrovsky > <boris.ostrovsky@oracle.com>; Paul Durrant <Paul.Durrant@citrix.com> > Subject: [PATCH v3 08/11] pvh/acpi: Handle ACPI accesses for PVH guests > > Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com> Reviewed-by: Paul Durrant <paul.durrant@citrix.com> > --- > CC: Paul Durrant <paul.durrant@citrix.com> > --- > Changes in v3: > * Introduce a mask for pm1a and gpe0 that lists bits that a > guest can operate on. > * Lots of small changes. > > xen/arch/x86/hvm/ioreq.c | 87 > +++++++++++++++++++++++++++++++++++++++- > xen/include/asm-x86/hvm/domain.h | 6 +++ > 2 files changed, 92 insertions(+), 1 deletion(-) > > diff --git a/xen/arch/x86/hvm/ioreq.c b/xen/arch/x86/hvm/ioreq.c > index 51bb399..4ab0d0a 100644 > --- a/xen/arch/x86/hvm/ioreq.c > +++ b/xen/arch/x86/hvm/ioreq.c > @@ -16,6 +16,7 @@ > * this program; If not, see <http://www.gnu.org/licenses/>. > */ > > +#include <xen/acpi.h> > #include <xen/config.h> > #include <xen/ctype.h> > #include <xen/init.h> > @@ -1383,7 +1384,91 @@ static int hvm_access_cf8( > static int acpi_ioaccess( > int dir, unsigned int port, unsigned int bytes, uint32_t *val) > { > - return X86EMUL_UNHANDLEABLE; > + uint8_t *reg = NULL; > + const uint8_t *mask = NULL; > + bool is_cpu_map = false; > + struct domain *currd = current->domain; > + const static uint8_t pm1a_mask[4] = > {ACPI_BITMASK_GLOBAL_LOCK_STATUS, 0, > + ACPI_BITMASK_GLOBAL_LOCK_ENABLE, 0}; > + const static uint8_t gpe0_mask[4] = {1U << XEN_GPE0_CPUHP_BIT, 0, > + 1U << XEN_GPE0_CPUHP_BIT, 0}; > + > + BUILD_BUG_ON((ACPI_PM1A_EVT_BLK_LEN != 4) || > + (ACPI_GPE0_BLK_LEN_V1 != 4)); > + > + ASSERT(!has_acpi_ff(currd)); > + > + switch ( port ) > + { > + case ACPI_PM1A_EVT_BLK_ADDRESS_V1 ... > + ACPI_PM1A_EVT_BLK_ADDRESS_V1 + ACPI_PM1A_EVT_BLK_LEN - 1: > + reg = currd->arch.hvm_domain.acpi_io.pm1a; > + mask = pm1a_mask; > + break; > + > + case ACPI_GPE0_BLK_ADDRESS_V1 ... > + ACPI_GPE0_BLK_ADDRESS_V1 + ACPI_GPE0_BLK_LEN_V1 - 1: > + reg = currd->arch.hvm_domain.acpi_io.gpe; > + mask = gpe0_mask; > + break; > + > + case XEN_ACPI_CPU_MAP ... > + XEN_ACPI_CPU_MAP + XEN_ACPI_CPU_MAP_LEN - 1: > + is_cpu_map = true; > + break; > + > + default: > + return X86EMUL_UNHANDLEABLE; > + } > + > + if ( bytes == 0 ) > + return X86EMUL_OKAY; > + > + if ( dir == IOREQ_READ ) > + { > + if ( is_cpu_map ) > + { > + unsigned int first_byte = port - XEN_ACPI_CPU_MAP; > + > + /* > + * Clear bits that we are about to read to in case we > + * copy fewer than @bytes. > + */ > + *val &= (~((1ULL << (bytes * 8)) - 1)) & 0xffffffff; > + > + if ( ((currd->max_vcpus + 7) / 8) > first_byte ) > + { > + memcpy(val, (uint8_t *)currd->avail_vcpus + first_byte, > + min(bytes, ((currd->max_vcpus + 7) / 8) - first_byte)); > + } > + } > + else > + memcpy(val, ®[port & 3], bytes); > + } > + else > + { > + unsigned int idx = port & 3; > + unsigned int i; > + uint8_t *ptr; > + > + if ( is_cpu_map ) > + /* > + * CPU map is only read by DSDT's PRSC method and should never > + * be written by a guest. > + */ > + return X86EMUL_UNHANDLEABLE; > + > + ptr = (uint8_t *)val; > + for ( i = 0; i < bytes; i++, idx++ ) > + { > + if ( idx < 2 ) /* status, write 1 to clear. */ > + reg[idx] &= ~(mask[i] & ptr[i]); > + else /* enable */ > + reg[idx] |= (mask[i] & ptr[i]); > + } > + } > + > + return X86EMUL_OKAY; > } > > void hvm_ioreq_init(struct domain *d) > diff --git a/xen/include/asm-x86/hvm/domain.h b/xen/include/asm- > x86/hvm/domain.h > index f34d784..f492a2b 100644 > --- a/xen/include/asm-x86/hvm/domain.h > +++ b/xen/include/asm-x86/hvm/domain.h > @@ -87,6 +87,12 @@ struct hvm_domain { > } ioreq_server; > struct hvm_ioreq_server *default_ioreq_server; > > + /* PVH guests */ > + struct { > + uint8_t pm1a[ACPI_PM1A_EVT_BLK_LEN]; > + uint8_t gpe[ACPI_GPE0_BLK_LEN_V1]; > + } acpi_io; > + > /* Cached CF8 for guest PCI config cycles */ > uint32_t pci_cf8; > > -- > 2.7.4
>>> On 21.11.16 at 22:00, <boris.ostrovsky@oracle.com> wrote: > --- a/xen/arch/x86/hvm/ioreq.c > +++ b/xen/arch/x86/hvm/ioreq.c > @@ -16,6 +16,7 @@ > * this program; If not, see <http://www.gnu.org/licenses/>. > */ > > +#include <xen/acpi.h> > #include <xen/config.h> > #include <xen/ctype.h> > #include <xen/init.h> Please take the opportunity and remove the pointless xen/config.h inclusion at once. > @@ -1383,7 +1384,91 @@ static int hvm_access_cf8( > static int acpi_ioaccess( > int dir, unsigned int port, unsigned int bytes, uint32_t *val) > { > - return X86EMUL_UNHANDLEABLE; > + uint8_t *reg = NULL; > + const uint8_t *mask = NULL; > + bool is_cpu_map = false; > + struct domain *currd = current->domain; const? > + const static uint8_t pm1a_mask[4] = {ACPI_BITMASK_GLOBAL_LOCK_STATUS, 0, > + ACPI_BITMASK_GLOBAL_LOCK_ENABLE, 0}; > + const static uint8_t gpe0_mask[4] = {1U << XEN_GPE0_CPUHP_BIT, 0, > + 1U << XEN_GPE0_CPUHP_BIT, 0}; Hmm, funny, in someone else's patch I've recently seen the same. Can we please stick to the more standard "storage type first" ordering of declaration elements. After all const modifies the type, and hence better stays together with it. And then I'd like to have an explanation (in the commit message) about the choice of the values for pm1a_mask. Plus you using uint8_t here is at least odd, considering that this is about registers consisting of two 16-bit halves. I'm not even certain the spec permits these to be accessed with other than the specified granularity. Or wait - the literal 4-s here look bad too. Perhaps the two should be combined into a variable of type typeof(currd->arch.hvm_domain.acpi_io), so values and masks really match up. Which would still seem to make it desirable for the parts to be of type uint16_t, if permitted by the spec. > + BUILD_BUG_ON((ACPI_PM1A_EVT_BLK_LEN != 4) || > + (ACPI_GPE0_BLK_LEN_V1 != 4)); Please split these into two, so that one of them triggering uniquely identifies the offender. There's no code being generated for them, so it doesn't matter how many there are. Perhaps it might even be worth moving each into its respective case block below. > + ASSERT(!has_acpi_ff(currd)); > + > + switch ( port ) > + { > + case ACPI_PM1A_EVT_BLK_ADDRESS_V1 ... > + ACPI_PM1A_EVT_BLK_ADDRESS_V1 + ACPI_PM1A_EVT_BLK_LEN - 1: > + reg = currd->arch.hvm_domain.acpi_io.pm1a; > + mask = pm1a_mask; > + break; > + > + case ACPI_GPE0_BLK_ADDRESS_V1 ... > + ACPI_GPE0_BLK_ADDRESS_V1 + ACPI_GPE0_BLK_LEN_V1 - 1: > + reg = currd->arch.hvm_domain.acpi_io.gpe; > + mask = gpe0_mask; > + break; > + > + case XEN_ACPI_CPU_MAP ... > + XEN_ACPI_CPU_MAP + XEN_ACPI_CPU_MAP_LEN - 1: > + is_cpu_map = true; In order to make more obvious in the code below that reg and mask can't be NULL, wouldn't it make sense to ditch this variable and instead use checks of reg against NULL in the code further down? > + break; > + > + default: > + return X86EMUL_UNHANDLEABLE; > + } > + > + if ( bytes == 0 ) > + return X86EMUL_OKAY; Did you find a check like this in any other I/O port handler? It doesn't seem to make sense to me. > + if ( dir == IOREQ_READ ) > + { > + if ( is_cpu_map ) > + { > + unsigned int first_byte = port - XEN_ACPI_CPU_MAP; > + > + /* > + * Clear bits that we are about to read to in case we > + * copy fewer than @bytes. > + */ > + *val &= (~((1ULL << (bytes * 8)) - 1)) & 0xffffffff; *val being of type uint32_t I understand neither the ULL suffix nor the and-ing. How about if ( bytes < 4 ) *val &= ~0U << (bytes * 8); ? > + if ( ((currd->max_vcpus + 7) / 8) > first_byte ) > + { > + memcpy(val, (uint8_t *)currd->avail_vcpus + first_byte, > + min(bytes, ((currd->max_vcpus + 7) / 8) - first_byte)); > + } Stray braces. > + } > + else > + memcpy(val, ®[port & 3], bytes); > + } > + else > + { > + unsigned int idx = port & 3; > + unsigned int i; > + uint8_t *ptr; const > + if ( is_cpu_map ) > + /* > + * CPU map is only read by DSDT's PRSC method and should never > + * be written by a guest. > + */ > + return X86EMUL_UNHANDLEABLE; > + > + ptr = (uint8_t *)val; > + for ( i = 0; i < bytes; i++, idx++ ) > + { > + if ( idx < 2 ) /* status, write 1 to clear. */ > + reg[idx] &= ~(mask[i] & ptr[i]); > + else /* enable */ > + reg[idx] |= (mask[i] & ptr[i]); Don't you mean mask[idx] in both cases? Jan
On 11/22/2016 10:01 AM, Jan Beulich wrote: > >> + const static uint8_t pm1a_mask[4] = {ACPI_BITMASK_GLOBAL_LOCK_STATUS, 0, >> + ACPI_BITMASK_GLOBAL_LOCK_ENABLE, 0}; >> + const static uint8_t gpe0_mask[4] = {1U << XEN_GPE0_CPUHP_BIT, 0, >> + 1U << XEN_GPE0_CPUHP_BIT, 0}; > > Hmm, funny, in someone else's patch I've recently seen the same. > Can we please stick to the more standard "storage type first" > ordering of declaration elements. After all const modifies the type, > and hence better stays together with it. > > And then I'd like to have an explanation (in the commit message) > about the choice of the values for pm1a_mask. Sure (Lock status/enable is required) > Plus you using > uint8_t here is at least odd, considering that this is about registers > consisting of two 16-bit halves. I'm not even certain the spec > permits these to be accessed with other than the specified > granularity. GPE registers can be 1-byte long. And, in fact, that's how ACPICA accesses it. PM1 is indeed 2-byte long. I can make a check in the switch statement but I think I should leave the IOREQ_WRITE handling (at the bottom of this message) as it is for simplicity. > > Or wait - the literal 4-s here look bad too. Perhaps the two should > be combined into a variable of type > typeof(currd->arch.hvm_domain.acpi_io), so values and masks > really match up. Which would still seem to make it desirable for the > parts to be of type uint16_t, if permitted by the spec. But I then assign these masks to uint8_t mask. Wouldn't it be better to explicitly keep those as byte-size values? Especially given how they are used in IOREQ_WRITE case (below). >> + else >> + { >> + unsigned int idx = port & 3; >> + unsigned int i; >> + uint8_t *ptr; > > const > >> + if ( is_cpu_map ) >> + /* >> + * CPU map is only read by DSDT's PRSC method and should never >> + * be written by a guest. >> + */ >> + return X86EMUL_UNHANDLEABLE; >> + >> + ptr = (uint8_t *)val; >> + for ( i = 0; i < bytes; i++, idx++ ) >> + { >> + if ( idx < 2 ) /* status, write 1 to clear. */ >> + reg[idx] &= ~(mask[i] & ptr[i]); >> + else /* enable */ >> + reg[idx] |= (mask[i] & ptr[i]); > > Don't you mean mask[idx] in both cases? Oh, right, of course. -boris
>>> On 22.11.16 at 16:30, <boris.ostrovsky@oracle.com> wrote: > On 11/22/2016 10:01 AM, Jan Beulich wrote: >> >>> + const static uint8_t pm1a_mask[4] = {ACPI_BITMASK_GLOBAL_LOCK_STATUS, > 0, >>> + ACPI_BITMASK_GLOBAL_LOCK_ENABLE, > 0}; >>> + const static uint8_t gpe0_mask[4] = {1U << XEN_GPE0_CPUHP_BIT, 0, >>> + 1U << XEN_GPE0_CPUHP_BIT, 0}; >> >> Hmm, funny, in someone else's patch I've recently seen the same. >> Can we please stick to the more standard "storage type first" >> ordering of declaration elements. After all const modifies the type, >> and hence better stays together with it. >> >> And then I'd like to have an explanation (in the commit message) >> about the choice of the values for pm1a_mask. > > Sure (Lock status/enable is required) And nothing else is? And there's no other implementation required for the lock bit? >> Plus you using >> uint8_t here is at least odd, considering that this is about registers >> consisting of two 16-bit halves. I'm not even certain the spec >> permits these to be accessed with other than the specified >> granularity. > > > GPE registers can be 1-byte long. And, in fact, that's how ACPICA > accesses it. > > PM1 is indeed 2-byte long. I can make a check in the switch statement > but I think I should leave the IOREQ_WRITE handling (at the bottom of > this message) as it is for simplicity. > > >> Or wait - the literal 4-s here look bad too. Perhaps the two should >> be combined into a variable of type >> typeof(currd->arch.hvm_domain.acpi_io), so values and masks >> really match up. Which would still seem to make it desirable for the >> parts to be of type uint16_t, if permitted by the spec. > > But I then assign these masks to uint8_t mask. Wouldn't it be better to > explicitly keep those as byte-size values? Especially given how they are > used in IOREQ_WRITE case (below). Well, maybe, namely considering that the GPE and PM1a parts would otherwise end up different, further complicating the code. Jan
On 11/22/2016 11:05 AM, Jan Beulich wrote: >>>> On 22.11.16 at 16:30, <boris.ostrovsky@oracle.com> wrote: >> On 11/22/2016 10:01 AM, Jan Beulich wrote: >>> >>>> + const static uint8_t pm1a_mask[4] = {ACPI_BITMASK_GLOBAL_LOCK_STATUS, >> 0, >>>> + ACPI_BITMASK_GLOBAL_LOCK_ENABLE, >> 0}; >>>> + const static uint8_t gpe0_mask[4] = {1U << XEN_GPE0_CPUHP_BIT, 0, >>>> + 1U << XEN_GPE0_CPUHP_BIT, 0}; >>> >>> Hmm, funny, in someone else's patch I've recently seen the same. >>> Can we please stick to the more standard "storage type first" >>> ordering of declaration elements. After all const modifies the type, >>> and hence better stays together with it. >>> >>> And then I'd like to have an explanation (in the commit message) >>> about the choice of the values for pm1a_mask. >> >> Sure (Lock status/enable is required) > > And nothing else is? And there's no other implementation > required for the lock bit? The other part is the global lock itself, which is part of the FACS that we allocate in build.c -boris
diff --git a/xen/arch/x86/hvm/ioreq.c b/xen/arch/x86/hvm/ioreq.c index 51bb399..4ab0d0a 100644 --- a/xen/arch/x86/hvm/ioreq.c +++ b/xen/arch/x86/hvm/ioreq.c @@ -16,6 +16,7 @@ * this program; If not, see <http://www.gnu.org/licenses/>. */ +#include <xen/acpi.h> #include <xen/config.h> #include <xen/ctype.h> #include <xen/init.h> @@ -1383,7 +1384,91 @@ static int hvm_access_cf8( static int acpi_ioaccess( int dir, unsigned int port, unsigned int bytes, uint32_t *val) { - return X86EMUL_UNHANDLEABLE; + uint8_t *reg = NULL; + const uint8_t *mask = NULL; + bool is_cpu_map = false; + struct domain *currd = current->domain; + const static uint8_t pm1a_mask[4] = {ACPI_BITMASK_GLOBAL_LOCK_STATUS, 0, + ACPI_BITMASK_GLOBAL_LOCK_ENABLE, 0}; + const static uint8_t gpe0_mask[4] = {1U << XEN_GPE0_CPUHP_BIT, 0, + 1U << XEN_GPE0_CPUHP_BIT, 0}; + + BUILD_BUG_ON((ACPI_PM1A_EVT_BLK_LEN != 4) || + (ACPI_GPE0_BLK_LEN_V1 != 4)); + + ASSERT(!has_acpi_ff(currd)); + + switch ( port ) + { + case ACPI_PM1A_EVT_BLK_ADDRESS_V1 ... + ACPI_PM1A_EVT_BLK_ADDRESS_V1 + ACPI_PM1A_EVT_BLK_LEN - 1: + reg = currd->arch.hvm_domain.acpi_io.pm1a; + mask = pm1a_mask; + break; + + case ACPI_GPE0_BLK_ADDRESS_V1 ... + ACPI_GPE0_BLK_ADDRESS_V1 + ACPI_GPE0_BLK_LEN_V1 - 1: + reg = currd->arch.hvm_domain.acpi_io.gpe; + mask = gpe0_mask; + break; + + case XEN_ACPI_CPU_MAP ... + XEN_ACPI_CPU_MAP + XEN_ACPI_CPU_MAP_LEN - 1: + is_cpu_map = true; + break; + + default: + return X86EMUL_UNHANDLEABLE; + } + + if ( bytes == 0 ) + return X86EMUL_OKAY; + + if ( dir == IOREQ_READ ) + { + if ( is_cpu_map ) + { + unsigned int first_byte = port - XEN_ACPI_CPU_MAP; + + /* + * Clear bits that we are about to read to in case we + * copy fewer than @bytes. + */ + *val &= (~((1ULL << (bytes * 8)) - 1)) & 0xffffffff; + + if ( ((currd->max_vcpus + 7) / 8) > first_byte ) + { + memcpy(val, (uint8_t *)currd->avail_vcpus + first_byte, + min(bytes, ((currd->max_vcpus + 7) / 8) - first_byte)); + } + } + else + memcpy(val, ®[port & 3], bytes); + } + else + { + unsigned int idx = port & 3; + unsigned int i; + uint8_t *ptr; + + if ( is_cpu_map ) + /* + * CPU map is only read by DSDT's PRSC method and should never + * be written by a guest. + */ + return X86EMUL_UNHANDLEABLE; + + ptr = (uint8_t *)val; + for ( i = 0; i < bytes; i++, idx++ ) + { + if ( idx < 2 ) /* status, write 1 to clear. */ + reg[idx] &= ~(mask[i] & ptr[i]); + else /* enable */ + reg[idx] |= (mask[i] & ptr[i]); + } + } + + return X86EMUL_OKAY; } void hvm_ioreq_init(struct domain *d) diff --git a/xen/include/asm-x86/hvm/domain.h b/xen/include/asm-x86/hvm/domain.h index f34d784..f492a2b 100644 --- a/xen/include/asm-x86/hvm/domain.h +++ b/xen/include/asm-x86/hvm/domain.h @@ -87,6 +87,12 @@ struct hvm_domain { } ioreq_server; struct hvm_ioreq_server *default_ioreq_server; + /* PVH guests */ + struct { + uint8_t pm1a[ACPI_PM1A_EVT_BLK_LEN]; + uint8_t gpe[ACPI_GPE0_BLK_LEN_V1]; + } acpi_io; + /* Cached CF8 for guest PCI config cycles */ uint32_t pci_cf8;
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com> --- CC: Paul Durrant <paul.durrant@citrix.com> --- Changes in v3: * Introduce a mask for pm1a and gpe0 that lists bits that a guest can operate on. * Lots of small changes. xen/arch/x86/hvm/ioreq.c | 87 +++++++++++++++++++++++++++++++++++++++- xen/include/asm-x86/hvm/domain.h | 6 +++ 2 files changed, 92 insertions(+), 1 deletion(-)