Message ID | 20230213093625.158170-15-zhao1.liu@linux.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Support smp.clusters for x86 | expand |
Hi Zhao, 在 2023/2/13 17:36, Zhao Liu 写道: > From: Zhao Liu <zhao1.liu@intel.com> > > Currently, by default, the cache topology is encoded as: > 1. i/d cache is shared in one core. > 2. L2 cache is shared in one core. > 3. L3 cache is shared in one die. > > This default general setting has caused a misunderstanding, that is, the > cache topology is completely equated with a specific cpu topology, such > as the connection between L2 cache and core level, and the connection > between L3 cache and die level. > > In fact, the settings of these topologies depend on the specific > platform and are not static. For example, on Alder Lake-P, every > four Atom cores share the same L2 cache. > > Thus, we should explicitly define the corresponding cache topology for > different cache models to increase scalability. > > Except legacy_l2_cache_cpuid2 (its default topo level is INVALID), It seems like its default topo level is UNKNOWN in this case. > explicitly set the corresponding topology level for all other cache > models. In order to be compatible with the existing cache topology, set > the CORE level for the i/d cache, set the CORE level for L2 cache, and > set the DIE level for L3 cache. > > The field for CPUID[4].EAX[bits 25:14] or CPUID[0x8000001D].EAX[bits > 25:14] will be set based on CPUCacheInfo.share_level. > > Signed-off-by: Zhao Liu <zhao1.liu@intel.com> > --- > target/i386/cpu.c | 19 +++++++++++++++++++ > target/i386/cpu.h | 16 ++++++++++++++++ > 2 files changed, 35 insertions(+) > > diff --git a/target/i386/cpu.c b/target/i386/cpu.c > index 27bbbc36b11c..364534e84b1b 100644 > --- a/target/i386/cpu.c > +++ b/target/i386/cpu.c > @@ -433,6 +433,7 @@ static CPUCacheInfo legacy_l1d_cache = { > .sets = 64, > .partitions = 1, > .no_invd_sharing = true, > + .share_level = CORE, > }; > > /*FIXME: CPUID leaf 0x80000005 is inconsistent with leaves 2 & 4 */ > @@ -447,6 +448,7 @@ static CPUCacheInfo legacy_l1d_cache_amd = { > .partitions = 1, > .lines_per_tag = 1, > .no_invd_sharing = true, > + .share_level = CORE, > }; > > /* L1 instruction cache: */ > @@ -460,6 +462,7 @@ static CPUCacheInfo legacy_l1i_cache = { > .sets = 64, > .partitions = 1, > .no_invd_sharing = true, > + .share_level = CORE, > }; > > /*FIXME: CPUID leaf 0x80000005 is inconsistent with leaves 2 & 4 */ > @@ -474,6 +477,7 @@ static CPUCacheInfo legacy_l1i_cache_amd = { > .partitions = 1, > .lines_per_tag = 1, > .no_invd_sharing = true, > + .share_level = CORE, > }; > > /* Level 2 unified cache: */ > @@ -487,6 +491,7 @@ static CPUCacheInfo legacy_l2_cache = { > .sets = 4096, > .partitions = 1, > .no_invd_sharing = true, > + .share_level = CORE, > }; > > /*FIXME: CPUID leaf 2 descriptor is inconsistent with CPUID leaf 4 */ > @@ -509,6 +514,7 @@ static CPUCacheInfo legacy_l2_cache_amd = { > .associativity = 16, > .sets = 512, > .partitions = 1, > + .share_level = CORE, > }; > > /* Level 3 unified cache: */ > @@ -524,6 +530,7 @@ static CPUCacheInfo legacy_l3_cache = { > .self_init = true, > .inclusive = true, > .complex_indexing = true, > + .share_level = DIE, > }; > > /* TLB definitions: */ > @@ -1668,6 +1675,7 @@ static const CPUCaches epyc_cache_info = { > .lines_per_tag = 1, > .self_init = 1, > .no_invd_sharing = true, > + .share_level = CORE, > }, > .l1i_cache = &(CPUCacheInfo) { > .type = INSTRUCTION_CACHE, > @@ -1680,6 +1688,7 @@ static const CPUCaches epyc_cache_info = { > .lines_per_tag = 1, > .self_init = 1, > .no_invd_sharing = true, > + .share_level = CORE, > }, > .l2_cache = &(CPUCacheInfo) { > .type = UNIFIED_CACHE, > @@ -1690,6 +1699,7 @@ static const CPUCaches epyc_cache_info = { > .partitions = 1, > .sets = 1024, > .lines_per_tag = 1, > + .share_level = CORE, > }, > .l3_cache = &(CPUCacheInfo) { > .type = UNIFIED_CACHE, > @@ -1703,6 +1713,7 @@ static const CPUCaches epyc_cache_info = { > .self_init = true, > .inclusive = true, > .complex_indexing = true, > + .share_level = DIE, > }, > }; > > @@ -1718,6 +1729,7 @@ static const CPUCaches epyc_rome_cache_info = { > .lines_per_tag = 1, > .self_init = 1, > .no_invd_sharing = true, > + .share_level = CORE, > }, > .l1i_cache = &(CPUCacheInfo) { > .type = INSTRUCTION_CACHE, > @@ -1730,6 +1742,7 @@ static const CPUCaches epyc_rome_cache_info = { > .lines_per_tag = 1, > .self_init = 1, > .no_invd_sharing = true, > + .share_level = CORE, > }, > .l2_cache = &(CPUCacheInfo) { > .type = UNIFIED_CACHE, > @@ -1740,6 +1753,7 @@ static const CPUCaches epyc_rome_cache_info = { > .partitions = 1, > .sets = 1024, > .lines_per_tag = 1, > + .share_level = CORE, > }, > .l3_cache = &(CPUCacheInfo) { > .type = UNIFIED_CACHE, > @@ -1753,6 +1767,7 @@ static const CPUCaches epyc_rome_cache_info = { > .self_init = true, > .inclusive = true, > .complex_indexing = true, > + .share_level = DIE, > }, > }; > > @@ -1768,6 +1783,7 @@ static const CPUCaches epyc_milan_cache_info = { > .lines_per_tag = 1, > .self_init = 1, > .no_invd_sharing = true, > + .share_level = CORE, > }, > .l1i_cache = &(CPUCacheInfo) { > .type = INSTRUCTION_CACHE, > @@ -1780,6 +1796,7 @@ static const CPUCaches epyc_milan_cache_info = { > .lines_per_tag = 1, > .self_init = 1, > .no_invd_sharing = true, > + .share_level = CORE, > }, > .l2_cache = &(CPUCacheInfo) { > .type = UNIFIED_CACHE, > @@ -1790,6 +1807,7 @@ static const CPUCaches epyc_milan_cache_info = { > .partitions = 1, > .sets = 1024, > .lines_per_tag = 1, > + .share_level = CORE, > }, > .l3_cache = &(CPUCacheInfo) { > .type = UNIFIED_CACHE, > @@ -1803,6 +1821,7 @@ static const CPUCaches epyc_milan_cache_info = { > .self_init = true, > .inclusive = true, > .complex_indexing = true, > + .share_level = DIE, > }, > }; > > diff --git a/target/i386/cpu.h b/target/i386/cpu.h > index 8668e74e0c87..5a955431f759 100644 > --- a/target/i386/cpu.h > +++ b/target/i386/cpu.h > @@ -1476,6 +1476,15 @@ enum CacheType { > UNIFIED_CACHE > }; > > +enum CPUTopoLevel { > + INVALID = 0, Maybe UNKNOWN? > + SMT, > + CORE, > + MODULE, > + DIE, > + PACKAGE, Do we need a prefix like CPU_TOPO_LEVEL_* or shorter CPU_TL_* to avoid possible naming conflicts with other micros or enums? Not sure.. Thanks, Yanan > +}; > + > typedef struct CPUCacheInfo { > enum CacheType type; > uint8_t level; > @@ -1517,6 +1526,13 @@ typedef struct CPUCacheInfo { > * address bits. CPUID[4].EDX[bit 2]. > */ > bool complex_indexing; > + > + /* > + * Cache Topology. The level that cache is shared in. > + * Used to encode CPUID[4].EAX[bits 25:14] or > + * CPUID[0x8000001D].EAX[bits 25:14]. > + */ > + enum CPUTopoLevel share_level; > } CPUCacheInfo; > >
On Wed, Feb 15, 2023 at 08:17:22PM +0800, wangyanan (Y) wrote: > Date: Wed, 15 Feb 2023 20:17:22 +0800 > From: "wangyanan (Y)" <wangyanan55@huawei.com> > Subject: Re: [PATCH RESEND 14/18] i386: Add cache topology info in > CPUCacheInfo > > Hi Zhao, > > 在 2023/2/13 17:36, Zhao Liu 写道: > > From: Zhao Liu <zhao1.liu@intel.com> > > > > Currently, by default, the cache topology is encoded as: > > 1. i/d cache is shared in one core. > > 2. L2 cache is shared in one core. > > 3. L3 cache is shared in one die. > > > > This default general setting has caused a misunderstanding, that is, the > > cache topology is completely equated with a specific cpu topology, such > > as the connection between L2 cache and core level, and the connection > > between L3 cache and die level. > > > > In fact, the settings of these topologies depend on the specific > > platform and are not static. For example, on Alder Lake-P, every > > four Atom cores share the same L2 cache. > > > > Thus, we should explicitly define the corresponding cache topology for > > different cache models to increase scalability. > > > > Except legacy_l2_cache_cpuid2 (its default topo level is INVALID), > It seems like its default topo level is UNKNOWN in this case. > > explicitly set the corresponding topology level for all other cache > > models. In order to be compatible with the existing cache topology, set > > the CORE level for the i/d cache, set the CORE level for L2 cache, and > > set the DIE level for L3 cache. > > > > The field for CPUID[4].EAX[bits 25:14] or CPUID[0x8000001D].EAX[bits > > 25:14] will be set based on CPUCacheInfo.share_level. > > > > Signed-off-by: Zhao Liu <zhao1.liu@intel.com> > > --- > > target/i386/cpu.c | 19 +++++++++++++++++++ > > target/i386/cpu.h | 16 ++++++++++++++++ > > 2 files changed, 35 insertions(+) > > > > diff --git a/target/i386/cpu.c b/target/i386/cpu.c > > index 27bbbc36b11c..364534e84b1b 100644 > > --- a/target/i386/cpu.c > > +++ b/target/i386/cpu.c > > @@ -433,6 +433,7 @@ static CPUCacheInfo legacy_l1d_cache = { > > .sets = 64, > > .partitions = 1, > > .no_invd_sharing = true, > > + .share_level = CORE, > > }; > > /*FIXME: CPUID leaf 0x80000005 is inconsistent with leaves 2 & 4 */ > > @@ -447,6 +448,7 @@ static CPUCacheInfo legacy_l1d_cache_amd = { > > .partitions = 1, > > .lines_per_tag = 1, > > .no_invd_sharing = true, > > + .share_level = CORE, > > }; > > /* L1 instruction cache: */ > > @@ -460,6 +462,7 @@ static CPUCacheInfo legacy_l1i_cache = { > > .sets = 64, > > .partitions = 1, > > .no_invd_sharing = true, > > + .share_level = CORE, > > }; > > /*FIXME: CPUID leaf 0x80000005 is inconsistent with leaves 2 & 4 */ > > @@ -474,6 +477,7 @@ static CPUCacheInfo legacy_l1i_cache_amd = { > > .partitions = 1, > > .lines_per_tag = 1, > > .no_invd_sharing = true, > > + .share_level = CORE, > > }; > > /* Level 2 unified cache: */ > > @@ -487,6 +491,7 @@ static CPUCacheInfo legacy_l2_cache = { > > .sets = 4096, > > .partitions = 1, > > .no_invd_sharing = true, > > + .share_level = CORE, > > }; > > /*FIXME: CPUID leaf 2 descriptor is inconsistent with CPUID leaf 4 */ > > @@ -509,6 +514,7 @@ static CPUCacheInfo legacy_l2_cache_amd = { > > .associativity = 16, > > .sets = 512, > > .partitions = 1, > > + .share_level = CORE, > > }; > > /* Level 3 unified cache: */ > > @@ -524,6 +530,7 @@ static CPUCacheInfo legacy_l3_cache = { > > .self_init = true, > > .inclusive = true, > > .complex_indexing = true, > > + .share_level = DIE, > > }; > > /* TLB definitions: */ > > @@ -1668,6 +1675,7 @@ static const CPUCaches epyc_cache_info = { > > .lines_per_tag = 1, > > .self_init = 1, > > .no_invd_sharing = true, > > + .share_level = CORE, > > }, > > .l1i_cache = &(CPUCacheInfo) { > > .type = INSTRUCTION_CACHE, > > @@ -1680,6 +1688,7 @@ static const CPUCaches epyc_cache_info = { > > .lines_per_tag = 1, > > .self_init = 1, > > .no_invd_sharing = true, > > + .share_level = CORE, > > }, > > .l2_cache = &(CPUCacheInfo) { > > .type = UNIFIED_CACHE, > > @@ -1690,6 +1699,7 @@ static const CPUCaches epyc_cache_info = { > > .partitions = 1, > > .sets = 1024, > > .lines_per_tag = 1, > > + .share_level = CORE, > > }, > > .l3_cache = &(CPUCacheInfo) { > > .type = UNIFIED_CACHE, > > @@ -1703,6 +1713,7 @@ static const CPUCaches epyc_cache_info = { > > .self_init = true, > > .inclusive = true, > > .complex_indexing = true, > > + .share_level = DIE, > > }, > > }; > > @@ -1718,6 +1729,7 @@ static const CPUCaches epyc_rome_cache_info = { > > .lines_per_tag = 1, > > .self_init = 1, > > .no_invd_sharing = true, > > + .share_level = CORE, > > }, > > .l1i_cache = &(CPUCacheInfo) { > > .type = INSTRUCTION_CACHE, > > @@ -1730,6 +1742,7 @@ static const CPUCaches epyc_rome_cache_info = { > > .lines_per_tag = 1, > > .self_init = 1, > > .no_invd_sharing = true, > > + .share_level = CORE, > > }, > > .l2_cache = &(CPUCacheInfo) { > > .type = UNIFIED_CACHE, > > @@ -1740,6 +1753,7 @@ static const CPUCaches epyc_rome_cache_info = { > > .partitions = 1, > > .sets = 1024, > > .lines_per_tag = 1, > > + .share_level = CORE, > > }, > > .l3_cache = &(CPUCacheInfo) { > > .type = UNIFIED_CACHE, > > @@ -1753,6 +1767,7 @@ static const CPUCaches epyc_rome_cache_info = { > > .self_init = true, > > .inclusive = true, > > .complex_indexing = true, > > + .share_level = DIE, > > }, > > }; > > @@ -1768,6 +1783,7 @@ static const CPUCaches epyc_milan_cache_info = { > > .lines_per_tag = 1, > > .self_init = 1, > > .no_invd_sharing = true, > > + .share_level = CORE, > > }, > > .l1i_cache = &(CPUCacheInfo) { > > .type = INSTRUCTION_CACHE, > > @@ -1780,6 +1796,7 @@ static const CPUCaches epyc_milan_cache_info = { > > .lines_per_tag = 1, > > .self_init = 1, > > .no_invd_sharing = true, > > + .share_level = CORE, > > }, > > .l2_cache = &(CPUCacheInfo) { > > .type = UNIFIED_CACHE, > > @@ -1790,6 +1807,7 @@ static const CPUCaches epyc_milan_cache_info = { > > .partitions = 1, > > .sets = 1024, > > .lines_per_tag = 1, > > + .share_level = CORE, > > }, > > .l3_cache = &(CPUCacheInfo) { > > .type = UNIFIED_CACHE, > > @@ -1803,6 +1821,7 @@ static const CPUCaches epyc_milan_cache_info = { > > .self_init = true, > > .inclusive = true, > > .complex_indexing = true, > > + .share_level = DIE, > > }, > > }; > > diff --git a/target/i386/cpu.h b/target/i386/cpu.h > > index 8668e74e0c87..5a955431f759 100644 > > --- a/target/i386/cpu.h > > +++ b/target/i386/cpu.h > > @@ -1476,6 +1476,15 @@ enum CacheType { > > UNIFIED_CACHE > > }; > > +enum CPUTopoLevel { > > + INVALID = 0, > Maybe UNKNOWN? I agree, it's a better name than INVALID. > > + SMT, > > + CORE, > > + MODULE, > > + DIE, > > + PACKAGE, > Do we need a prefix like CPU_TOPO_LEVEL_* or shorter CPU_TL_* > to avoid possible naming conflicts with other micros or enums? > Not sure.. Yes, adding a prefix for enum is more in line with the QEMU naming style, I will add it. I support the first name, it looks more clear. Thanks, Zhao > > Thanks, > Yanan > > +}; > > + > > typedef struct CPUCacheInfo { > > enum CacheType type; > > uint8_t level; > > @@ -1517,6 +1526,13 @@ typedef struct CPUCacheInfo { > > * address bits. CPUID[4].EDX[bit 2]. > > */ > > bool complex_indexing; > > + > > + /* > > + * Cache Topology. The level that cache is shared in. > > + * Used to encode CPUID[4].EAX[bits 25:14] or > > + * CPUID[0x8000001D].EAX[bits 25:14]. > > + */ > > + enum CPUTopoLevel share_level; > > } CPUCacheInfo; >
diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 27bbbc36b11c..364534e84b1b 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -433,6 +433,7 @@ static CPUCacheInfo legacy_l1d_cache = { .sets = 64, .partitions = 1, .no_invd_sharing = true, + .share_level = CORE, }; /*FIXME: CPUID leaf 0x80000005 is inconsistent with leaves 2 & 4 */ @@ -447,6 +448,7 @@ static CPUCacheInfo legacy_l1d_cache_amd = { .partitions = 1, .lines_per_tag = 1, .no_invd_sharing = true, + .share_level = CORE, }; /* L1 instruction cache: */ @@ -460,6 +462,7 @@ static CPUCacheInfo legacy_l1i_cache = { .sets = 64, .partitions = 1, .no_invd_sharing = true, + .share_level = CORE, }; /*FIXME: CPUID leaf 0x80000005 is inconsistent with leaves 2 & 4 */ @@ -474,6 +477,7 @@ static CPUCacheInfo legacy_l1i_cache_amd = { .partitions = 1, .lines_per_tag = 1, .no_invd_sharing = true, + .share_level = CORE, }; /* Level 2 unified cache: */ @@ -487,6 +491,7 @@ static CPUCacheInfo legacy_l2_cache = { .sets = 4096, .partitions = 1, .no_invd_sharing = true, + .share_level = CORE, }; /*FIXME: CPUID leaf 2 descriptor is inconsistent with CPUID leaf 4 */ @@ -509,6 +514,7 @@ static CPUCacheInfo legacy_l2_cache_amd = { .associativity = 16, .sets = 512, .partitions = 1, + .share_level = CORE, }; /* Level 3 unified cache: */ @@ -524,6 +530,7 @@ static CPUCacheInfo legacy_l3_cache = { .self_init = true, .inclusive = true, .complex_indexing = true, + .share_level = DIE, }; /* TLB definitions: */ @@ -1668,6 +1675,7 @@ static const CPUCaches epyc_cache_info = { .lines_per_tag = 1, .self_init = 1, .no_invd_sharing = true, + .share_level = CORE, }, .l1i_cache = &(CPUCacheInfo) { .type = INSTRUCTION_CACHE, @@ -1680,6 +1688,7 @@ static const CPUCaches epyc_cache_info = { .lines_per_tag = 1, .self_init = 1, .no_invd_sharing = true, + .share_level = CORE, }, .l2_cache = &(CPUCacheInfo) { .type = UNIFIED_CACHE, @@ -1690,6 +1699,7 @@ static const CPUCaches epyc_cache_info = { .partitions = 1, .sets = 1024, .lines_per_tag = 1, + .share_level = CORE, }, .l3_cache = &(CPUCacheInfo) { .type = UNIFIED_CACHE, @@ -1703,6 +1713,7 @@ static const CPUCaches epyc_cache_info = { .self_init = true, .inclusive = true, .complex_indexing = true, + .share_level = DIE, }, }; @@ -1718,6 +1729,7 @@ static const CPUCaches epyc_rome_cache_info = { .lines_per_tag = 1, .self_init = 1, .no_invd_sharing = true, + .share_level = CORE, }, .l1i_cache = &(CPUCacheInfo) { .type = INSTRUCTION_CACHE, @@ -1730,6 +1742,7 @@ static const CPUCaches epyc_rome_cache_info = { .lines_per_tag = 1, .self_init = 1, .no_invd_sharing = true, + .share_level = CORE, }, .l2_cache = &(CPUCacheInfo) { .type = UNIFIED_CACHE, @@ -1740,6 +1753,7 @@ static const CPUCaches epyc_rome_cache_info = { .partitions = 1, .sets = 1024, .lines_per_tag = 1, + .share_level = CORE, }, .l3_cache = &(CPUCacheInfo) { .type = UNIFIED_CACHE, @@ -1753,6 +1767,7 @@ static const CPUCaches epyc_rome_cache_info = { .self_init = true, .inclusive = true, .complex_indexing = true, + .share_level = DIE, }, }; @@ -1768,6 +1783,7 @@ static const CPUCaches epyc_milan_cache_info = { .lines_per_tag = 1, .self_init = 1, .no_invd_sharing = true, + .share_level = CORE, }, .l1i_cache = &(CPUCacheInfo) { .type = INSTRUCTION_CACHE, @@ -1780,6 +1796,7 @@ static const CPUCaches epyc_milan_cache_info = { .lines_per_tag = 1, .self_init = 1, .no_invd_sharing = true, + .share_level = CORE, }, .l2_cache = &(CPUCacheInfo) { .type = UNIFIED_CACHE, @@ -1790,6 +1807,7 @@ static const CPUCaches epyc_milan_cache_info = { .partitions = 1, .sets = 1024, .lines_per_tag = 1, + .share_level = CORE, }, .l3_cache = &(CPUCacheInfo) { .type = UNIFIED_CACHE, @@ -1803,6 +1821,7 @@ static const CPUCaches epyc_milan_cache_info = { .self_init = true, .inclusive = true, .complex_indexing = true, + .share_level = DIE, }, }; diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 8668e74e0c87..5a955431f759 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1476,6 +1476,15 @@ enum CacheType { UNIFIED_CACHE }; +enum CPUTopoLevel { + INVALID = 0, + SMT, + CORE, + MODULE, + DIE, + PACKAGE, +}; + typedef struct CPUCacheInfo { enum CacheType type; uint8_t level; @@ -1517,6 +1526,13 @@ typedef struct CPUCacheInfo { * address bits. CPUID[4].EDX[bit 2]. */ bool complex_indexing; + + /* + * Cache Topology. The level that cache is shared in. + * Used to encode CPUID[4].EAX[bits 25:14] or + * CPUID[0x8000001D].EAX[bits 25:14]. + */ + enum CPUTopoLevel share_level; } CPUCacheInfo;