diff mbox

[v4,2/5] target/i386: Populate AMD Processor Cache Information

Message ID 1520888449-4352-3-git-send-email-babu.moger@amd.com (mailing list archive)
State New, archived
Headers show

Commit Message

Babu Moger March 12, 2018, 9 p.m. UTC
From: Stanislav Lanci <pixo@polepetko.eu>

Add information for cpuid 0x8000001D leaf. Populate cache topology information
for different cache types(Data Cache, Instruction Cache, L2 and L3) supported
by 0x8000001D leaf. Please refer Processor Programming Reference (PPR) for AMD
Family 17h Model for more details.

Signed-off-by: Stanislav Lanci <pixo@polepetko.eu>
Signed-off-by: Babu Moger <babu.moger@amd.com>
---
 target/i386/cpu.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 target/i386/kvm.c | 29 ++++++++++++++++++++++---
 2 files changed, 91 insertions(+), 3 deletions(-)

Comments

Eduardo Habkost March 15, 2018, 7:04 p.m. UTC | #1
Hi,

Sorry for not reviewing the previous versions of this series (and
making it miss soft freeze).


On Mon, Mar 12, 2018 at 05:00:46PM -0400, Babu Moger wrote:
> From: Stanislav Lanci <pixo@polepetko.eu>
> 
> Add information for cpuid 0x8000001D leaf. Populate cache topology information
> for different cache types(Data Cache, Instruction Cache, L2 and L3) supported
> by 0x8000001D leaf. Please refer Processor Programming Reference (PPR) for AMD
> Family 17h Model for more details.
> 
> Signed-off-by: Stanislav Lanci <pixo@polepetko.eu>
> Signed-off-by: Babu Moger <babu.moger@amd.com>
> ---
>  target/i386/cpu.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  target/i386/kvm.c | 29 ++++++++++++++++++++++---
>  2 files changed, 91 insertions(+), 3 deletions(-)
> 
> diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> index 42dd381..5fdbedd 100644
> --- a/target/i386/cpu.c
> +++ b/target/i386/cpu.c
> @@ -118,6 +118,7 @@
>  #define L1I_LINE_SIZE         64
>  #define L1I_ASSOCIATIVITY      8
>  #define L1I_SETS              64
> +#define L1I_SETS_AMD         256
>  #define L1I_PARTITIONS         1
>  /* Size = LINE_SIZE*ASSOCIATIVITY*SETS*PARTITIONS = 32KiB */
>  #define L1I_DESCRIPTOR CPUID_2_L1I_32KB_8WAY_64B
> @@ -129,7 +130,9 @@
>  /* Level 2 unified cache: */
>  #define L2_LINE_SIZE          64
>  #define L2_ASSOCIATIVITY      16
> +#define L2_ASSOCIATIVITY_AMD   8
>  #define L2_SETS             4096
> +#define L2_SETS_AMD         1024
>  #define L2_PARTITIONS          1
>  /* Size = LINE_SIZE*ASSOCIATIVITY*SETS*PARTITIONS = 4MiB */
>  /*FIXME: CPUID leaf 2 descriptor is inconsistent with CPUID leaf 4 */
> @@ -146,6 +149,7 @@
>  #define L3_N_LINE_SIZE         64
>  #define L3_N_ASSOCIATIVITY     16
>  #define L3_N_SETS           16384
> +#define L3_N_SETS_AMD        8192
>  #define L3_N_PARTITIONS         1
>  #define L3_N_DESCRIPTOR CPUID_2_L3_16MB_16WAY_64B
>  #define L3_N_LINES_PER_TAG      1

I wouldn't like to add even more inconsistencies between
different CPUID leaves.

If you really wish to have different defaults on AMD and Intel,
let's either hide Intel-specific CPUID leaves when using AMD
values, or make all of them agree (and choose the defaults based
on CPU model or vendor id).
Eduardo Habkost March 16, 2018, 6 p.m. UTC | #2
On Mon, Mar 12, 2018 at 05:00:46PM -0400, Babu Moger wrote:
> From: Stanislav Lanci <pixo@polepetko.eu>
> 
> Add information for cpuid 0x8000001D leaf. Populate cache topology information
> for different cache types(Data Cache, Instruction Cache, L2 and L3) supported
> by 0x8000001D leaf. Please refer Processor Programming Reference (PPR) for AMD
> Family 17h Model for more details.
> 
> Signed-off-by: Stanislav Lanci <pixo@polepetko.eu>
> Signed-off-by: Babu Moger <babu.moger@amd.com>

The new CPUID leaves don't seem to match the existing AMD cache information
leaves.  Is this intentional?  Why?

Details below:

> ---
>  target/i386/cpu.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  target/i386/kvm.c | 29 ++++++++++++++++++++++---
>  2 files changed, 91 insertions(+), 3 deletions(-)
> 
> diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> index 42dd381..5fdbedd 100644
> --- a/target/i386/cpu.c
> +++ b/target/i386/cpu.c
[...]
> @@ -3590,6 +3594,67 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
>              *edx = 0;
>          }
>          break;
> +    case 0x8000001D: /* AMD TOPOEXT cache info */
> +        switch (count) {

Copying macro definitions here, for reference:

> /* L1 data cache: */
> #define L1D_LINE_SIZE         64
> #define L1D_ASSOCIATIVITY      8
> #define L1D_SETS              64
> #define L1D_PARTITIONS         1
> /* Size = LINE_SIZE*ASSOCIATIVITY*SETS*PARTITIONS = 32KiB */
> #define L1D_DESCRIPTOR CPUID_2_L1D_32KB_8WAY_64B
> /*FIXME: CPUID leaf 0x80000005 is inconsistent with leaves 2 & 4 */
> #define L1D_LINES_PER_TAG      1
> #define L1D_SIZE_KB_AMD       64
> #define L1D_ASSOCIATIVITY_AMD  2

So, we already have:

CPUID[2]: 32KB 8-way cache, 64-byte lines
CPUID[4]: 8-way cache, 64-byte lines, 64 sets, 1 partition (32 KB)
CPUID[0x80000005]: 64 KB 2-way cache, 1 line per tag


> +        case 0: /* L1 dcache info */
> +            *eax |= TYPE_DCACHE | \
> +                    CACHE_LEVEL(1) | \
> +                    CACHE_SELF_INIT_LEVEL | \
> +                    ((cs->nr_threads - 1) << 14);
> +            *ebx = (L1D_LINE_SIZE - 1) | \
> +                   ((L1D_PARTITIONS - 1) << 12) | \
> +                   ((L1D_ASSOCIATIVITY - 1) << 22);
> +            *ecx = L1D_SETS - 1;
> +            *edx = 0;
> +            break;

This adds:
CPUID[0x8000001D]: 8-way cache, 64-byte lines, 64 sets, 1 partition (32 KiB)

This agrees with CPUID[2] and CPUID[4] (Intel leaves, reserved on AMD), but not
with CPUID[0x80000005].

>  
>  /* L1 instruction cache: */
>  #define L1I_LINE_SIZE         64
>  #define L1I_ASSOCIATIVITY      8
>  #define L1I_SETS              64
> +#define L1I_SETS_AMD         256
>  #define L1I_PARTITIONS         1
>  /* Size = LINE_SIZE*ASSOCIATIVITY*SETS*PARTITIONS = 32KiB */
>  #define L1I_DESCRIPTOR CPUID_2_L1I_32KB_8WAY_64B
>  /*FIXME: CPUID leaf 0x80000005 is inconsistent with leaves 2 & 4 */
>  #define L1I_LINES_PER_TAG      1
>  #define L1I_SIZE_KB_AMD       64
>  #define L1I_ASSOCIATIVITY_AMD  2

Currently we have:

CPUID[2]: 32KiB 8-way cache, 64-byte lines
CPUID[4]: 8-way cache, 64-byte lines, 64 sets, 1 partition (32 KiB)
CPUID[0x80000005]: 64 KiB 2-way cache, 1 line per tag


>  
> +        case 1: /* L1 icache info */
> +            *eax |= TYPE_ICACHE | \
> +                    CACHE_LEVEL(1) | \
> +                    CACHE_SELF_INIT_LEVEL | \
> +                    ((cs->nr_threads - 1) << 14);
> +            *ebx = (L1I_LINE_SIZE - 1) | \
> +                   ((L1I_PARTITIONS - 1) << 12) | \
> +                   ((L1I_ASSOCIATIVITY_AMD - 1) << 22);
> +            *ecx = L1I_SETS_AMD - 1;
> +            *edx = 0;
> +            break;

This adds:
CPUID[0x8000001D]: 2-way cache, 64-byte lines, 256 sets, 1 partition (32 KiB)

This doesn't match any of the existing leaves.


>  /* Level 2 unified cache: */
>  #define L2_LINE_SIZE          64
>  #define L2_ASSOCIATIVITY      16
> +#define L2_ASSOCIATIVITY_AMD   8
>  #define L2_SETS             4096
> +#define L2_SETS_AMD         1024
>  #define L2_PARTITIONS          1
>  /* Size = LINE_SIZE*ASSOCIATIVITY*SETS*PARTITIONS = 4MiB */
>  /*FIXME: CPUID leaf 2 descriptor is inconsistent with CPUID leaf 4 */
>  #define L2_DESCRIPTOR CPUID_2_L2_2MB_8WAY_64B
>  /*FIXME: CPUID leaf 0x80000006 is inconsistent with leaves 2 & 4 */
>  #define L2_LINES_PER_TAG       1
>  #define L2_SIZE_KB_AMD       512

Currently we have:
CPUID[2]: 4MiB 8-way cache, 64-byte lines
CPUID[4]: 64-byte lines, 16-way, 1 partition, 4096 sets (4 MiB)
CPUID[0x80000006]: 512 KiB, 16-way cache, 1 line per tag

>  
> +        case 2: /* L2 cache info */
> +            *eax |= TYPE_UNIFIED | \
> +                    CACHE_LEVEL(2) | \
> +                    CACHE_SELF_INIT_LEVEL | \
> +                    ((cs->nr_threads - 1) << 14);
> +            *ebx = (L2_LINE_SIZE - 1) | \
> +                   ((L2_PARTITIONS - 1) << 12) | \
> +                   ((L2_ASSOCIATIVITY_AMD - 1) << 22);
> +            *ecx = L2_SETS_AMD - 1;
> +            *edx = CACHE_INCLUSIVE;
> +            break;

This adds:
CPUID[0x8000001D]: 64-byte lines, 8-way, 1 partition, 1024 sets (512 KiB).

This doesn't match any of the existing leaves.


>  /* Level 3 unified cache: */
>  #define L3_N_LINE_SIZE         64
>  #define L3_N_ASSOCIATIVITY     16
>  #define L3_N_SETS           16384
> +#define L3_N_SETS_AMD        8192
>  #define L3_N_PARTITIONS         1
>  #define L3_N_DESCRIPTOR CPUID_2_L3_16MB_16WAY_64B
>  #define L3_N_LINES_PER_TAG      1
>  #define L3_N_SIZE_KB_AMD    16384
>  

Currently we have:
CPUID[2]: 16MiB 16-way cache, 64-byte lines
CPUID[4]: 64-byte lines, 16-way, 16384 sets, 1 partition (16 MiB)
CPUID[0x80000006]: 16 MiB cache, 16-way, 1 line per tag


> +        case 3: /* L3 cache info */
> +            if (cpu->enable_l3_cache) {
> +                *eax |= TYPE_UNIFIED | \
> +                        CACHE_LEVEL(3) | \
> +                        CACHE_SELF_INIT_LEVEL | \
> +                        ((cs->nr_cores * cs->nr_threads - 1) << 14);
> +                *ebx = (L3_N_LINE_SIZE - 1) | \
> +                       ((L3_N_PARTITIONS - 1) << 12) | \
> +                       ((L3_N_ASSOCIATIVITY - 1) << 22);
> +                *ecx = L3_N_SETS_AMD - 1;
> +                *edx = CACHE_NO_INVD_SHARING;

This adds:
CPUID[0x8000001D]: 64-byte lines, 16-way, 1 partition, 8192 sets (8 MiB)

This doesn't match any of the existing leaves.


> +            } else {
> +                *eax = 0;
> +                *ebx = 0;
> +                *ecx = 0;
> +                *edx = 0;
> +            }
> +            break;
> +        default: /* end of info */
> +            *eax = 0;
> +            *ebx = 0;
> +            *ecx = 0;
> +            *edx = 0;
> +            break;
> +        }
> +        break;
>      case 0xC0000000:
>          *eax = env->cpuid_xlevel2;
>          *ebx = 0;
[...]
Babu Moger March 20, 2018, 5:25 p.m. UTC | #3
Hi Eduardo, Thanks for the comments. Please see the response inline.

> -----Original Message-----
> From: Eduardo Habkost <ehabkost@redhat.com>
> Sent: Friday, March 16, 2018 1:00 PM
> To: Moger, Babu <Babu.Moger@amd.com>
> Cc: pbonzini@redhat.com; rth@twiddle.net; rkrcmar@redhat.com;
> Lendacky, Thomas <Thomas.Lendacky@amd.com>; Singh, Brijesh
> <brijesh.singh@amd.com>; kvm@vger.kernel.org; kash@tripleback.net;
> mtosatti@redhat.com; Hook, Gary <Gary.Hook@amd.com>; qemu-
> devel@nongnu.org
> Subject: Re: [Qemu-devel] [PATCH v4 2/5] target/i386: Populate AMD
> Processor Cache Information
> 
> On Mon, Mar 12, 2018 at 05:00:46PM -0400, Babu Moger wrote:
> > From: Stanislav Lanci <pixo@polepetko.eu>
> >
> > Add information for cpuid 0x8000001D leaf. Populate cache topology
> information
> > for different cache types(Data Cache, Instruction Cache, L2 and L3)
> supported
> > by 0x8000001D leaf. Please refer Processor Programming Reference (PPR)
> for AMD
> > Family 17h Model for more details.
> >
> > Signed-off-by: Stanislav Lanci <pixo@polepetko.eu>
> > Signed-off-by: Babu Moger <babu.moger@amd.com>
> 
> The new CPUID leaves don't seem to match the existing AMD cache
> information
> leaves.  Is this intentional?  Why?

It is not intentional. These values are from older family of processors. These values have changed from Family 14  or later.
The latest one is Family 17. You can see the differences here.
 https://support.amd.com/TechDocs/41131.pdf
https://support.amd.com/TechDocs/55072_AMD_Family_15h_Models_70h-7Fh_BKDG.pdf
https://support.amd.com/TechDocs/54945_PPR_Family_17h_Models_00h-0Fh.pdf

Some of these are bugs in our code. For some we need to add checks for the family and correct these values.
You understand the code much better than me. Correct me if I missed something. 

Note that older family of processors don't support topology extensions.  

> 
> Details below:
> 
> > ---
> >  target/i386/cpu.c | 65
> +++++++++++++++++++++++++++++++++++++++++++++++++++++++
> >  target/i386/kvm.c | 29 ++++++++++++++++++++++---
> >  2 files changed, 91 insertions(+), 3 deletions(-)
> >
> > diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> > index 42dd381..5fdbedd 100644
> > --- a/target/i386/cpu.c
> > +++ b/target/i386/cpu.c
> [...]
> > @@ -3590,6 +3594,67 @@ void cpu_x86_cpuid(CPUX86State *env,
> uint32_t index, uint32_t count,
> >              *edx = 0;
> >          }
> >          break;
> > +    case 0x8000001D: /* AMD TOPOEXT cache info */
> > +        switch (count) {
> 
> Copying macro definitions here, for reference:
> 
> > /* L1 data cache: */
> > #define L1D_LINE_SIZE         64
> > #define L1D_ASSOCIATIVITY      8
> > #define L1D_SETS              64
> > #define L1D_PARTITIONS         1
> > /* Size = LINE_SIZE*ASSOCIATIVITY*SETS*PARTITIONS = 32KiB */
> > #define L1D_DESCRIPTOR CPUID_2_L1D_32KB_8WAY_64B
> > /*FIXME: CPUID leaf 0x80000005 is inconsistent with leaves 2 & 4 */
> > #define L1D_LINES_PER_TAG      1
> > #define L1D_SIZE_KB_AMD       64
> > #define L1D_ASSOCIATIVITY_AMD  2
> 
> So, we already have:
> 
> CPUID[2]: 32KB 8-way cache, 64-byte lines
> CPUID[4]: 8-way cache, 64-byte lines, 64 sets, 1 partition (32 KB)
> CPUID[0x80000005]: 64 KB 2-way cache, 1 line per tag

64 KiB, 2-way are for the older family products. 
Newer one should be 8-way, 32KiB. Will need to add checks here.

> 
> 
> > +        case 0: /* L1 dcache info */
> > +            *eax |= TYPE_DCACHE | \
> > +                    CACHE_LEVEL(1) | \
> > +                    CACHE_SELF_INIT_LEVEL | \
> > +                    ((cs->nr_threads - 1) << 14);
> > +            *ebx = (L1D_LINE_SIZE - 1) | \
> > +                   ((L1D_PARTITIONS - 1) << 12) | \
> > +                   ((L1D_ASSOCIATIVITY - 1) << 22);
> > +            *ecx = L1D_SETS - 1;
> > +            *edx = 0;
> > +            break;
> 
> This adds:
> CPUID[0x8000001D]: 8-way cache, 64-byte lines, 64 sets, 1 partition (32 KiB)

Should match after the above new check.

> 
> This agrees with CPUID[2] and CPUID[4] (Intel leaves, reserved on AMD), but
> not
> with CPUID[0x80000005].
> 
> >
> >  /* L1 instruction cache: */
> >  #define L1I_LINE_SIZE         64
> >  #define L1I_ASSOCIATIVITY      8
> >  #define L1I_SETS              64
> > +#define L1I_SETS_AMD         256
> >  #define L1I_PARTITIONS         1
> >  /* Size = LINE_SIZE*ASSOCIATIVITY*SETS*PARTITIONS = 32KiB */
> >  #define L1I_DESCRIPTOR CPUID_2_L1I_32KB_8WAY_64B
> >  /*FIXME: CPUID leaf 0x80000005 is inconsistent with leaves 2 & 4 */
> >  #define L1I_LINES_PER_TAG      1
> >  #define L1I_SIZE_KB_AMD       64
> >  #define L1I_ASSOCIATIVITY_AMD  2
> 
> Currently we have:
> 
> CPUID[2]: 32KiB 8-way cache, 64-byte lines
> CPUID[4]: 8-way cache, 64-byte lines, 64 sets, 1 partition (32 KiB)
> CPUID[0x80000005]: 64 KiB 2-way cache, 1 line per tag

This is for the older family(64 KiB 2-way). Newer one is 64 KiB 4-way.   We need to add check here.
> 
> 
> >
> > +        case 1: /* L1 icache info */
> > +            *eax |= TYPE_ICACHE | \
> > +                    CACHE_LEVEL(1) | \
> > +                    CACHE_SELF_INIT_LEVEL | \
> > +                    ((cs->nr_threads - 1) << 14);
> > +            *ebx = (L1I_LINE_SIZE - 1) | \
> > +                   ((L1I_PARTITIONS - 1) << 12) | \
> > +                   ((L1I_ASSOCIATIVITY_AMD - 1) << 22);
> > +            *ecx = L1I_SETS_AMD - 1;
> > +            *edx = 0;
> > +            break;
> 
> This adds:
> CPUID[0x8000001D]: 2-way cache, 64-byte lines, 256 sets, 1 partition (32 KiB)
> 
> This doesn't match any of the existing leaves.

This is bug in my code. This should be 4-way.  Should match after the fix.
> 
> 
> >  /* Level 2 unified cache: */
> >  #define L2_LINE_SIZE          64
> >  #define L2_ASSOCIATIVITY      16
> > +#define L2_ASSOCIATIVITY_AMD   8
> >  #define L2_SETS             4096
> > +#define L2_SETS_AMD         1024
> >  #define L2_PARTITIONS          1
> >  /* Size = LINE_SIZE*ASSOCIATIVITY*SETS*PARTITIONS = 4MiB */
> >  /*FIXME: CPUID leaf 2 descriptor is inconsistent with CPUID leaf 4 */
> >  #define L2_DESCRIPTOR CPUID_2_L2_2MB_8WAY_64B
> >  /*FIXME: CPUID leaf 0x80000006 is inconsistent with leaves 2 & 4 */
> >  #define L2_LINES_PER_TAG       1
> >  #define L2_SIZE_KB_AMD       512
> 
> Currently we have:
> CPUID[2]: 4MiB 8-way cache, 64-byte lines
> CPUID[4]: 64-byte lines, 16-way, 1 partition, 4096 sets (4 MiB)
> CPUID[0x80000006]: 512 KiB, 16-way cache, 1 line per tag

This should have been 8-way. This is a bug. Will fix.
This should have been (AMD_ENC_ASSOC(L2_ASSOCIATIVITY_AMD) << 12)


> 
> >
> > +        case 2: /* L2 cache info */
> > +            *eax |= TYPE_UNIFIED | \
> > +                    CACHE_LEVEL(2) | \
> > +                    CACHE_SELF_INIT_LEVEL | \
> > +                    ((cs->nr_threads - 1) << 14);
> > +            *ebx = (L2_LINE_SIZE - 1) | \
> > +                   ((L2_PARTITIONS - 1) << 12) | \
> > +                   ((L2_ASSOCIATIVITY_AMD - 1) << 22);
> > +            *ecx = L2_SETS_AMD - 1;
> > +            *edx = CACHE_INCLUSIVE;
> > +            break;
> 
> This adds:
> CPUID[0x8000001D]: 64-byte lines, 8-way, 1 partition, 1024 sets (512 KiB).
> 
> This doesn't match any of the existing leaves.

Should match after above fix.
> 
> 
> >  /* Level 3 unified cache: */
> >  #define L3_N_LINE_SIZE         64
> >  #define L3_N_ASSOCIATIVITY     16
> >  #define L3_N_SETS           16384
> > +#define L3_N_SETS_AMD        8192
> >  #define L3_N_PARTITIONS         1
> >  #define L3_N_DESCRIPTOR CPUID_2_L3_16MB_16WAY_64B
> >  #define L3_N_LINES_PER_TAG      1
> >  #define L3_N_SIZE_KB_AMD    16384
> >
> 
> Currently we have:
> CPUID[2]: 16MiB 16-way cache, 64-byte lines
> CPUID[4]: 64-byte lines, 16-way, 16384 sets, 1 partition (16 MiB)
> CPUID[0x80000006]: 16 MiB cache, 16-way, 1 line per tag
> 
> 
> > +        case 3: /* L3 cache info */
> > +            if (cpu->enable_l3_cache) {
> > +                *eax |= TYPE_UNIFIED | \
> > +                        CACHE_LEVEL(3) | \
> > +                        CACHE_SELF_INIT_LEVEL | \
> > +                        ((cs->nr_cores * cs->nr_threads - 1) << 14);
> > +                *ebx = (L3_N_LINE_SIZE - 1) | \
> > +                       ((L3_N_PARTITIONS - 1) << 12) | \
> > +                       ((L3_N_ASSOCIATIVITY - 1) << 22);
> > +                *ecx = L3_N_SETS_AMD - 1;
> > +                *edx = CACHE_NO_INVD_SHARING;
> 
> This adds:
> CPUID[0x8000001D]: 64-byte lines, 16-way, 1 partition, 8192 sets (8 MiB)

I am not very clear about this. It appears the number of sets should be 16384.
Let me talk to someone and confirm.

> 
> This doesn't match any of the existing leaves.
> 
> 
> > +            } else {
> > +                *eax = 0;
> > +                *ebx = 0;
> > +                *ecx = 0;
> > +                *edx = 0;
> > +            }
> > +            break;
> > +        default: /* end of info */
> > +            *eax = 0;
> > +            *ebx = 0;
> > +            *ecx = 0;
> > +            *edx = 0;
> > +            break;
> > +        }
> > +        break;
> >      case 0xC0000000:
> >          *eax = env->cpuid_xlevel2;
> >          *ebx = 0;
> [...]
> 
> --
> Eduardo
Eduardo Habkost March 20, 2018, 5:54 p.m. UTC | #4
On Tue, Mar 20, 2018 at 05:25:52PM +0000, Moger, Babu wrote:
> Hi Eduardo, Thanks for the comments. Please see the response inline.
> 
> > -----Original Message-----
> > From: Eduardo Habkost <ehabkost@redhat.com>
> > Sent: Friday, March 16, 2018 1:00 PM
> > To: Moger, Babu <Babu.Moger@amd.com>
> > Cc: pbonzini@redhat.com; rth@twiddle.net; rkrcmar@redhat.com;
> > Lendacky, Thomas <Thomas.Lendacky@amd.com>; Singh, Brijesh
> > <brijesh.singh@amd.com>; kvm@vger.kernel.org; kash@tripleback.net;
> > mtosatti@redhat.com; Hook, Gary <Gary.Hook@amd.com>; qemu-
> > devel@nongnu.org
> > Subject: Re: [Qemu-devel] [PATCH v4 2/5] target/i386: Populate AMD
> > Processor Cache Information
> > 
> > On Mon, Mar 12, 2018 at 05:00:46PM -0400, Babu Moger wrote:
> > > From: Stanislav Lanci <pixo@polepetko.eu>
> > >
> > > Add information for cpuid 0x8000001D leaf. Populate cache topology
> > information
> > > for different cache types(Data Cache, Instruction Cache, L2 and L3)
> > supported
> > > by 0x8000001D leaf. Please refer Processor Programming Reference (PPR)
> > for AMD
> > > Family 17h Model for more details.
> > >
> > > Signed-off-by: Stanislav Lanci <pixo@polepetko.eu>
> > > Signed-off-by: Babu Moger <babu.moger@amd.com>
> > 
> > The new CPUID leaves don't seem to match the existing AMD cache
> > information
> > leaves.  Is this intentional?  Why?
> 
> It is not intentional. These values are from older family of processors. These values have changed from Family 14  or later.
> The latest one is Family 17. You can see the differences here.
>  https://support.amd.com/TechDocs/41131.pdf
> https://support.amd.com/TechDocs/55072_AMD_Family_15h_Models_70h-7Fh_BKDG.pdf
> https://support.amd.com/TechDocs/54945_PPR_Family_17h_Models_00h-0Fh.pdf
> 
> Some of these are bugs in our code. For some we need to add checks for the family and correct these values.
> You understand the code much better than me. Correct me if I missed something. 
> 
> Note that older family of processors don't support topology extensions.  

If you want to make the cache size/topology look different
depending on the CPU model/options, this would require more work,
but it would be an interesting feature.

The "i386: Helpers to encode cache information consistently"
patch I sent last week might be a useful starting point for that.

If you plan to implement that, please keep in mind that existing
CPUID cache info needs to be kept on previous machine-types (this
is implemented by adding QOM properties that can be used to
enable the old behavior, and by setting them at
MachineClass::compat_props).
Babu Moger March 20, 2018, 7:20 p.m. UTC | #5
> -----Original Message-----
> From: Eduardo Habkost <ehabkost@redhat.com>
> Sent: Tuesday, March 20, 2018 12:54 PM
> To: Moger, Babu <Babu.Moger@amd.com>
> Cc: pbonzini@redhat.com; rth@twiddle.net; rkrcmar@redhat.com;
> Lendacky, Thomas <Thomas.Lendacky@amd.com>; Singh, Brijesh
> <brijesh.singh@amd.com>; kvm@vger.kernel.org; kash@tripleback.net;
> mtosatti@redhat.com; Hook, Gary <Gary.Hook@amd.com>; qemu-
> devel@nongnu.org
> Subject: Re: [Qemu-devel] [PATCH v4 2/5] target/i386: Populate AMD
> Processor Cache Information
> 
> On Tue, Mar 20, 2018 at 05:25:52PM +0000, Moger, Babu wrote:
> > Hi Eduardo, Thanks for the comments. Please see the response inline.
> >
> > > -----Original Message-----
> > > From: Eduardo Habkost <ehabkost@redhat.com>
> > > Sent: Friday, March 16, 2018 1:00 PM
> > > To: Moger, Babu <Babu.Moger@amd.com>
> > > Cc: pbonzini@redhat.com; rth@twiddle.net; rkrcmar@redhat.com;
> > > Lendacky, Thomas <Thomas.Lendacky@amd.com>; Singh, Brijesh
> > > <brijesh.singh@amd.com>; kvm@vger.kernel.org; kash@tripleback.net;
> > > mtosatti@redhat.com; Hook, Gary <Gary.Hook@amd.com>; qemu-
> > > devel@nongnu.org
> > > Subject: Re: [Qemu-devel] [PATCH v4 2/5] target/i386: Populate AMD
> > > Processor Cache Information
> > >
> > > On Mon, Mar 12, 2018 at 05:00:46PM -0400, Babu Moger wrote:
> > > > From: Stanislav Lanci <pixo@polepetko.eu>
> > > >
> > > > Add information for cpuid 0x8000001D leaf. Populate cache topology
> > > information
> > > > for different cache types(Data Cache, Instruction Cache, L2 and L3)
> > > supported
> > > > by 0x8000001D leaf. Please refer Processor Programming Reference
> (PPR)
> > > for AMD
> > > > Family 17h Model for more details.
> > > >
> > > > Signed-off-by: Stanislav Lanci <pixo@polepetko.eu>
> > > > Signed-off-by: Babu Moger <babu.moger@amd.com>
> > >
> > > The new CPUID leaves don't seem to match the existing AMD cache
> > > information
> > > leaves.  Is this intentional?  Why?
> >
> > It is not intentional. These values are from older family of processors.
> These values have changed from Family 14  or later.
> > The latest one is Family 17. You can see the differences here.
> >  https://support.amd.com/TechDocs/41131.pdf
> >
> https://support.amd.com/TechDocs/55072_AMD_Family_15h_Models_70h-
> 7Fh_BKDG.pdf
> >
> https://support.amd.com/TechDocs/54945_PPR_Family_17h_Models_00h-
> 0Fh.pdf
> >
> > Some of these are bugs in our code. For some we need to add checks for
> the family and correct these values.
> > You understand the code much better than me. Correct me if I missed
> something.
> >
> > Note that older family of processors don't support topology extensions.
> 
> If you want to make the cache size/topology look different
> depending on the CPU model/options, this would require more work,
> but it would be an interesting feature.
> 
> The "i386: Helpers to encode cache information consistently"
> patch I sent last week might be a useful starting point for that.

Yes. Looking at your patch.
> 
> If you plan to implement that, please keep in mind that existing
> CPUID cache info needs to be kept on previous machine-types (this
> is implemented by adding QOM properties that can be used to
> enable the old behavior, and by setting them at
> MachineClass::compat_props).

Yes. Will look into it.  This code is new to me.  Let me take a look.
Thanks

> 
> --
> Eduardo
diff mbox

Patch

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 42dd381..5fdbedd 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -118,6 +118,7 @@ 
 #define L1I_LINE_SIZE         64
 #define L1I_ASSOCIATIVITY      8
 #define L1I_SETS              64
+#define L1I_SETS_AMD         256
 #define L1I_PARTITIONS         1
 /* Size = LINE_SIZE*ASSOCIATIVITY*SETS*PARTITIONS = 32KiB */
 #define L1I_DESCRIPTOR CPUID_2_L1I_32KB_8WAY_64B
@@ -129,7 +130,9 @@ 
 /* Level 2 unified cache: */
 #define L2_LINE_SIZE          64
 #define L2_ASSOCIATIVITY      16
+#define L2_ASSOCIATIVITY_AMD   8
 #define L2_SETS             4096
+#define L2_SETS_AMD         1024
 #define L2_PARTITIONS          1
 /* Size = LINE_SIZE*ASSOCIATIVITY*SETS*PARTITIONS = 4MiB */
 /*FIXME: CPUID leaf 2 descriptor is inconsistent with CPUID leaf 4 */
@@ -146,6 +149,7 @@ 
 #define L3_N_LINE_SIZE         64
 #define L3_N_ASSOCIATIVITY     16
 #define L3_N_SETS           16384
+#define L3_N_SETS_AMD        8192
 #define L3_N_PARTITIONS         1
 #define L3_N_DESCRIPTOR CPUID_2_L3_16MB_16WAY_64B
 #define L3_N_LINES_PER_TAG      1
@@ -3590,6 +3594,67 @@  void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
             *edx = 0;
         }
         break;
+    case 0x8000001D: /* AMD TOPOEXT cache info */
+        switch (count) {
+        case 0: /* L1 dcache info */
+            *eax |= TYPE_DCACHE | \
+                    CACHE_LEVEL(1) | \
+                    CACHE_SELF_INIT_LEVEL | \
+                    ((cs->nr_threads - 1) << 14);
+            *ebx = (L1D_LINE_SIZE - 1) | \
+                   ((L1D_PARTITIONS - 1) << 12) | \
+                   ((L1D_ASSOCIATIVITY - 1) << 22);
+            *ecx = L1D_SETS - 1;
+            *edx = 0;
+            break;
+        case 1: /* L1 icache info */
+            *eax |= TYPE_ICACHE | \
+                    CACHE_LEVEL(1) | \
+                    CACHE_SELF_INIT_LEVEL | \
+                    ((cs->nr_threads - 1) << 14);
+            *ebx = (L1I_LINE_SIZE - 1) | \
+                   ((L1I_PARTITIONS - 1) << 12) | \
+                   ((L1I_ASSOCIATIVITY_AMD - 1) << 22);
+            *ecx = L1I_SETS_AMD - 1;
+            *edx = 0;
+            break;
+        case 2: /* L2 cache info */
+            *eax |= TYPE_UNIFIED | \
+                    CACHE_LEVEL(2) | \
+                    CACHE_SELF_INIT_LEVEL | \
+                    ((cs->nr_threads - 1) << 14);
+            *ebx = (L2_LINE_SIZE - 1) | \
+                   ((L2_PARTITIONS - 1) << 12) | \
+                   ((L2_ASSOCIATIVITY_AMD - 1) << 22);
+            *ecx = L2_SETS_AMD - 1;
+            *edx = CACHE_INCLUSIVE;
+            break;
+        case 3: /* L3 cache info */
+            if (cpu->enable_l3_cache) {
+                *eax |= TYPE_UNIFIED | \
+                        CACHE_LEVEL(3) | \
+                        CACHE_SELF_INIT_LEVEL | \
+                        ((cs->nr_cores * cs->nr_threads - 1) << 14);
+                *ebx = (L3_N_LINE_SIZE - 1) | \
+                       ((L3_N_PARTITIONS - 1) << 12) | \
+                       ((L3_N_ASSOCIATIVITY - 1) << 22);
+                *ecx = L3_N_SETS_AMD - 1;
+                *edx = CACHE_NO_INVD_SHARING;
+            } else {
+                *eax = 0;
+                *ebx = 0;
+                *ecx = 0;
+                *edx = 0;
+            }
+            break;
+        default: /* end of info */
+            *eax = 0;
+            *ebx = 0;
+            *ecx = 0;
+            *edx = 0;
+            break;
+        }
+        break;
     case 0xC0000000:
         *eax = env->cpuid_xlevel2;
         *ebx = 0;
diff --git a/target/i386/kvm.c b/target/i386/kvm.c
index ad4b159..0eb39b52 100644
--- a/target/i386/kvm.c
+++ b/target/i386/kvm.c
@@ -909,9 +909,32 @@  int kvm_arch_init_vcpu(CPUState *cs)
         }
         c = &cpuid_data.entries[cpuid_i++];
 
-        c->function = i;
-        c->flags = 0;
-        cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
+        switch (i) {
+        case 0x8000001d:
+            /* Query for all AMD cache information leaves */
+            for (j = 0; ; j++) {
+                c->function = i;
+                c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+                c->index = j;
+                cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx);
+
+                if (c->eax == 0) {
+                    break;
+                }
+                if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
+                    fprintf(stderr, "cpuid_data is full, no space for "
+                            "cpuid(eax:0x%x,ecx:0x%x)\n", i, j);
+                    abort();
+                }
+                c = &cpuid_data.entries[cpuid_i++];
+            }
+            break;
+        default:
+            c->function = i;
+            c->flags = 0;
+            cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
+            break;
+        }
     }
 
     /* Call Centaur's CPUID instructions they are supported. */