Message ID | 1647331137-69890-1-git-send-email-wangqing@vivo.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | sched: dynamic config sd_flags if described in DT | expand |
On Tue, Mar 15, 2022 at 12:58:54AM -0700, Qing Wang wrote: > --- a/kernel/sched/topology.c > +++ b/kernel/sched/topology.c > @@ -1527,6 +1527,7 @@ sd_init(struct sched_domain_topology_level *tl, > struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu); > int sd_id, sd_weight, sd_flags = 0; > struct cpumask *sd_span; > + int ret; > > #ifdef CONFIG_NUMA > /* > @@ -1539,6 +1540,15 @@ sd_init(struct sched_domain_topology_level *tl, > > if (tl->sd_flags) > sd_flags = (*tl->sd_flags)(); > + > +#ifdef CONFIG_GENERIC_ARCH_TOPOLOGY > + ret = cpus_share_self_cache(cpu_map); > + if (ret == 1) > + sd_flags |= SD_SHARE_PKG_RESOURCES; > + else if (ret == 0) > + sd_flags &= ~SD_SHARE_PKG_RESOURCES; > +#endif > + > if (WARN_ONCE(sd_flags & ~TOPOLOGY_SD_FLAGS, > "wrong sd_flags in topology description\n")) > sd_flags &= TOPOLOGY_SD_FLAGS; Still NAK, you don't have to touch generic code for any of this.
Hi Qing, Thank you for the patch! Yet something to improve: [auto build test ERROR on arm64/for-next/core] [also build test ERROR on driver-core/driver-core-testing tip/sched/core linus/master v5.17-rc8 next-20220310] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch] url: https://github.com/0day-ci/linux/commits/Qing-Wang/sched-dynamic-config-sd_flags-if-described-in-DT/20220315-160039 base: https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git for-next/core config: riscv-nommu_k210_sdcard_defconfig (https://download.01.org/0day-ci/archive/20220315/202203151939.NI7Rsold-lkp@intel.com/config) compiler: riscv64-linux-gcc (GCC) 11.2.0 reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # https://github.com/0day-ci/linux/commit/3322560249c42b0a3e719b19842a3ace7d5ffb6a git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review Qing-Wang/sched-dynamic-config-sd_flags-if-described-in-DT/20220315-160039 git checkout 3322560249c42b0a3e719b19842a3ace7d5ffb6a # save the config file to linux build tree mkdir build_dir COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-11.2.0 make.cross O=build_dir ARCH=riscv SHELL=/bin/bash drivers/base/ If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot <lkp@intel.com> All errors (new ones prefixed by >>): drivers/base/arch_topology.c: In function 'cpus_share_self_cache': >> drivers/base/arch_topology.c:648:37: error: 'cpu' undeclared (first use in this function) 648 | if (!cache_topology[cpu][cache_level]) | ^~~ drivers/base/arch_topology.c:648:37: note: each undeclared identifier is reported only once for each function it appears in vim +/cpu +648 drivers/base/arch_topology.c 639 640 int cpus_share_self_cache(const struct cpumask *cpu_map) 641 { 642 int cache_level, cpu_id; 643 int first, last; 644 int id = cpumask_first(cpu_map); 645 int size = cpumask_weight(cpu_map); 646 647 for (cache_level = 0; cache_level < MAX_CACHE_LEVEL; cache_level++) { > 648 if (!cache_topology[cpu][cache_level]) 649 return -1; 650 651 first = -1; 652 last = id; 653 for (cpu_id = 0; cpu_id < NR_CPUS; cpu_id++) { 654 if (cache_topology[cpu][cache_level] == cache_topology[cpu_id][cache_level]) { 655 if (cpu_id < id || cpu_id >= id + size) 656 return 0; 657 658 first = (first == -1)?cpu_id:first; 659 last = cpu_id; 660 } 661 } 662 663 if (first == id && last == id + size) 664 return 1; 665 } 666 667 return 0; 668 } 669 --- 0-DAY CI Kernel Test Service https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
Hi Qing, Thank you for the patch! Yet something to improve: [auto build test ERROR on arm64/for-next/core] [also build test ERROR on driver-core/driver-core-testing tip/sched/core linus/master v5.17-rc8 next-20220310] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch] url: https://github.com/0day-ci/linux/commits/Qing-Wang/sched-dynamic-config-sd_flags-if-described-in-DT/20220315-160039 base: https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git for-next/core config: riscv-randconfig-r025-20220314 (https://download.01.org/0day-ci/archive/20220315/202203152145.vPr4Qv42-lkp@intel.com/config) compiler: clang version 15.0.0 (https://github.com/llvm/llvm-project a6b2f50fb47da3baeee10b1906da6e30ac5d26ec) reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # install riscv cross compiling tool for clang build # apt-get install binutils-riscv64-linux-gnu # https://github.com/0day-ci/linux/commit/3322560249c42b0a3e719b19842a3ace7d5ffb6a git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review Qing-Wang/sched-dynamic-config-sd_flags-if-described-in-DT/20220315-160039 git checkout 3322560249c42b0a3e719b19842a3ace7d5ffb6a # save the config file to linux build tree mkdir build_dir COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=riscv SHELL=/bin/bash drivers/base/ If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot <lkp@intel.com> All errors (new ones prefixed by >>): >> drivers/base/arch_topology.c:648:23: error: use of undeclared identifier 'cpu' if (!cache_topology[cpu][cache_level]) ^ drivers/base/arch_topology.c:654:23: error: use of undeclared identifier 'cpu' if (cache_topology[cpu][cache_level] == cache_topology[cpu_id][cache_level]) { ^ 2 errors generated. vim +/cpu +648 drivers/base/arch_topology.c 639 640 int cpus_share_self_cache(const struct cpumask *cpu_map) 641 { 642 int cache_level, cpu_id; 643 int first, last; 644 int id = cpumask_first(cpu_map); 645 int size = cpumask_weight(cpu_map); 646 647 for (cache_level = 0; cache_level < MAX_CACHE_LEVEL; cache_level++) { > 648 if (!cache_topology[cpu][cache_level]) 649 return -1; 650 651 first = -1; 652 last = id; 653 for (cpu_id = 0; cpu_id < NR_CPUS; cpu_id++) { 654 if (cache_topology[cpu][cache_level] == cache_topology[cpu_id][cache_level]) { 655 if (cpu_id < id || cpu_id >= id + size) 656 return 0; 657 658 first = (first == -1)?cpu_id:first; 659 last = cpu_id; 660 } 661 } 662 663 if (first == id && last == id + size) 664 return 1; 665 } 666 667 return 0; 668 } 669 --- 0-DAY CI Kernel Test Service https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
On 15/03/2022 08:58, Qing Wang wrote:
> From: Wang Qing <wangqing@vivo.com>
(1) Can you share more information about your CPU topology?
I guess it is a single DSU (DynamIQ Shared Unit) ARMv9 system with 8
CPUs? So L3 spans over [CPU0..CPU7].
You also mentioned complexes. Am I right in assuming that [CPU0..CPU3]
are Cortex-A510 cores where each 2 CPUs share a complex?
What kind of uarch are the CPUs in [CPU4..CPU7]? Are they Cortex-A510's
as well? I'm not sure after reading your email:
https://lkml.kernel.org/r/SL2PR06MB30828CF9FF2879AFC9DC53D2BD0C9@SL2PR06MB3082.apcprd06.prod.outlook.com
You might run into the issue that individual CPUs of your system see a
different SD hierarchy in case that [CPU4..CPU7] aren't Cortex-A510's,
i.e. CPUs not sharing complexes.
(2) Related to your MC Sched Domain (SD) layer:
If you have a single DSU ARMv9 system, then in Linux kernel mainline you
shouldn't have sub-clustering of [CPU0..CPU3] and [CPU4...CPU7].
I.e. the cpu-map entry in your dts file should only list cores, not
clusters.
I know that in Android the cluster entries are used to sub-group
different uarch CPUs in an asymmetric CPU capacity system (a.k.a. Arm
DynamIQ and Phantom domains) but this is eclipsing the true L3 (LLC)
information and is not "supported" (in the sense of "used") in mainline.
But I have a hard time to see what [CPU0..CPU3] or [CPU4..CPU7] are
shareing in your system.
(3) Why do you want this different SD hierarchy?
I assume in mainline your system will have a single SD which is MC (w/o
the Phantom domain approach from Android).
You mentioned cpus_share_cache(). Or is it the extra SD level which
changes the behaviour of CFS load-balancing? I'm just wondering since
EAS wouldn't be affected here. I'm sure I can understand this better
once we know more about your CPU topology.
[...]
>(1) Can you share more information about your CPU topology? > >I guess it is a single DSU (DynamIQ Shared Unit) ARMv9 system with 8 >CPUs? So L3 spans over [CPU0..CPU7]. > >You also mentioned complexes. Am I right in assuming that [CPU0..CPU3] >are Cortex-A510 cores where each 2 CPUs share a complex? > >What kind of uarch are the CPUs in [CPU4..CPU7]? Are they Cortex-A510's >as well? I'm not sure after reading your email: Yes, Android systems are currently used default_domain with wrong sd_flags, take Qualcomm SM8450 as an example, the CPU and cache topology(1+3+4): | DSU | | cluster0 | cluster1 |cluster2| | core0 core1 core2 core3 | core4 core5 core6 | core7 | | complex0 | complex1 | ------------------------ | | L2 cache | L2 cache | L2 | L2 | L2 | L2 | | L3 cache | The sched domain now: DIE[0-7] (no SD_SHARE_PKG_RESOURCES) MC[0-3][4-6][7] (SD_SHARE_PKG_RESOURCES) The sched domain should be: DIE[0-7] (SD_SHARE_PKG_RESOURCES) MC[0-3][4-6][7] (no SD_SHARE_PKG_RESOURCES) *CLS[0-1][2-3](SD_SHARE_PKG_RESOURCES) >https://lkml.kernel.org/r/SL2PR06MB30828CF9FF2879AFC9DC53D2BD0C9@SL2PR06MB3082.apcprd06.prod.outlook.com > >You might run into the issue that individual CPUs of your system see a >different SD hierarchy in case that [CPU4..CPU7] aren't Cortex-A510's, >i.e. CPUs not sharing complexes. > >(2) Related to your MC Sched Domain (SD) layer: > >If you have a single DSU ARMv9 system, then in Linux kernel mainline you >shouldn't have sub-clustering of [CPU0..CPU3] and [CPU4...CPU7]. > >I.e. the cpu-map entry in your dts file should only list cores, not >clusters. But in fact we will, as mentioned above. > >I know that in Android the cluster entries are used to sub-group >different uarch CPUs in an asymmetric CPU capacity system (a.k.a. Arm >DynamIQ and Phantom domains) but this is eclipsing the true L3 (LLC) >information and is not "supported" (in the sense of "used") in mainline. > >But I have a hard time to see what [CPU0..CPU3] or [CPU4..CPU7] are >shareing in your system. They share L3 cache, but no share L2 which only shared within complex. > >(3) Why do you want this different SD hierarchy? > >I assume in mainline your system will have a single SD which is MC (w/o >the Phantom domain approach from Android). > >You mentioned cpus_share_cache(). Or is it the extra SD level which >changes the behaviour of CFS load-balancing? I'm just wondering since >EAS wouldn't be affected here. I'm sure I can understand this better >once we know more about your CPU topology. What I want to do is : 1.Config the right sd_llc to sd, better to get it dynamically from DT 2.Benefit from the shared cache(L2) of the complex i.e. when we look for sibling idle CPU, prior select the L2 shared CPU(inner complex) if L3 is all shared. Thanks, Wang
On 16/03/2022 03:46, 王擎 wrote: > >> (1) Can you share more information about your CPU topology? >> >> I guess it is a single DSU (DynamIQ Shared Unit) ARMv9 system with 8 >> CPUs? So L3 spans over [CPU0..CPU7]. >> >> You also mentioned complexes. Am I right in assuming that [CPU0..CPU3] >> are Cortex-A510 cores where each 2 CPUs share a complex? >> >> What kind of uarch are the CPUs in [CPU4..CPU7]? Are they Cortex-A510's >> as well? I'm not sure after reading your email: > > Yes, Android systems are currently used default_domain with wrong sd_flags, > take Qualcomm SM8450 as an example, the CPU and cache topology(1+3+4): Ah, your system looks like this: .---------------. CPU |0 1 2 3 4 5 6 7| +---------------+ uarch |l l l l m m m b| (so called tri-gear: little, medium, big) +---------------+ L2 | | | | | | | +---------------+ L3 |<-- -->| +---------------+ |<-- cluster -->| +---------------+ |<-- DSU -->| '---------------' > | DSU | > | cluster0 | cluster1 |cluster2| ^^^ Those aren't real clusters, hence the name <Phantom> SD. The cluster is [CPU0...CPU7]. Android uses Phantom SD to subgroup CPUs with the same uarch. That's why you get your MC->DIE SD's on your system and SHARE_PKG_RESOURCES (ShPR) on MC, rather DIE. Note, you should already have an asymmetric SD hierarchy. CPU7 should only have DIE not MC! Each CPU has its own SD hierarchy! > | core0 core1 core2 core3 | core4 core5 core6 | core7 | > | complex0 | complex1 | ------------------------ | > | L2 cache | L2 cache | L2 | L2 | L2 | L2 | > | L3 cache | > > The sched domain now: > DIE[0-7] (no SD_SHARE_PKG_RESOURCES) > MC[0-3][4-6][7] (SD_SHARE_PKG_RESOURCES) > > The sched domain should be: > DIE[0-7] (SD_SHARE_PKG_RESOURCES) > MC[0-3][4-6][7] (no SD_SHARE_PKG_RESOURCES) First remember, using Phantom SD in Android is already a hack. Normally your system should only have an MC SD for each CPU (with ShPR). Now, if you want to move ShPR from MC to DIE then a custom topology table should do it, i.e. you don't have to change any generic task scheduler code. static inline int cpu_cpu_flags(void) { return SD_SHARE_PKG_RESOURCES; } static struct sched_domain_topology_level custom_topology[] = { #ifdef CONFIG_SCHED_SMT { cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) }, #endif #ifdef CONFIG_SCHED_CLUSTER { cpu_clustergroup_mask, cpu_cluster_flags, SD_INIT_NAME(CLS) }, #endif #ifdef CONFIG_SCHED_MC { cpu_coregroup_mask, SD_INIT_NAME(MC) }, ^^^^ #endif { cpu_cpu_mask, cpu_cpu_flags, SD_INIT_NAME(DIE) }, ^^^^^^^^^^^^^ { NULL, }, }; set_sched_topology(custom_topology); > *CLS[0-1][2-3](SD_SHARE_PKG_RESOURCES) But why do you want to have yet another SD underneath MC for CPU0-CPU3? sd_llc is assigned to the highest ShPR SD, which would be DIE. >> https://lkml.kernel.org/r/SL2PR06MB30828CF9FF2879AFC9DC53D2BD0C9@SL2PR06MB3082.apcprd06.prod.outlook.com >> >> You might run into the issue that individual CPUs of your system see a >> different SD hierarchy in case that [CPU4..CPU7] aren't Cortex-A510's, >> i.e. CPUs not sharing complexes. >> >> (2) Related to your MC Sched Domain (SD) layer: >> >> If you have a single DSU ARMv9 system, then in Linux kernel mainline you >> shouldn't have sub-clustering of [CPU0..CPU3] and [CPU4...CPU7]. >> >> I.e. the cpu-map entry in your dts file should only list cores, not >> clusters. > > But in fact we will, as mentioned above. OK, so your system needs this `fake` sub-grouping on uarch boundaries. Probably because of (out-of-tree) Android/platform code? Have you tried to run on a clean mainline SD hierarchy (only MC)? Is the Phantom SD still required? >> I know that in Android the cluster entries are used to sub-group >> different uarch CPUs in an asymmetric CPU capacity system (a.k.a. Arm >> DynamIQ and Phantom domains) but this is eclipsing the true L3 (LLC) >> information and is not "supported" (in the sense of "used") in mainline. >> >> But I have a hard time to see what [CPU0..CPU3] or [CPU4..CPU7] are >> shareing in your system. > > They share L3 cache, but no share L2 which only shared within complex. I should have written: What do they share exclusively? I can only see that they ([CPU0..CPU3], [CPU4..CPU6], [CPU7]) share uarch exclusively. Which relates to this fake (Phantom) SD. L3 is shared between all CPUs. >> (3) Why do you want this different SD hierarchy? >> >> I assume in mainline your system will have a single SD which is MC (w/o >> the Phantom domain approach from Android). >> >> You mentioned cpus_share_cache(). Or is it the extra SD level which >> changes the behaviour of CFS load-balancing? I'm just wondering since >> EAS wouldn't be affected here. I'm sure I can understand this better >> once we know more about your CPU topology. > > What I want to do is : > 1.Config the right sd_llc to sd, better to get it dynamically from DT So this should be ShPR on DIE. You have 2 choices here. Use flat MC SD (mainline) or Phantom SD's + custom_topology. > 2.Benefit from the shared cache(L2) of the complex > i.e. when we look for sibling idle CPU, prior select the L2 shared CPU(inner complex) if L3 is all shared. But cpus_share_cache() operates on sd_llc_id which relates to DIE SD even for [CPU0..CPU3]?
>> >>> (1) Can you share more information about your CPU topology? >>> >>> I guess it is a single DSU (DynamIQ Shared Unit) ARMv9 system with 8 >>> CPUs? So L3 spans over [CPU0..CPU7]. >>> >>> You also mentioned complexes. Am I right in assuming that [CPU0..CPU3] >>> are Cortex-A510 cores where each 2 CPUs share a complex? >>> >>> What kind of uarch are the CPUs in [CPU4..CPU7]? Are they Cortex-A510's >>> as well? I'm not sure after reading your email: >> >> Yes, Android systems are currently used default_domain with wrong sd_flags, >> take Qualcomm SM8450 as an example, the CPU and cache topology(1+3+4): > >Ah, your system looks like this: > > .---------------. >CPU |0 1 2 3 4 5 6 7| > +---------------+ >uarch |l l l l m m m b| (so called tri-gear: little, medium, big) > +---------------+ > L2 | | | | | | | > +---------------+ > L3 |<-- -->| > +---------------+ > |<-- cluster -->| > +---------------+ > |<-- DSU -->| > '---------------' > >> | DSU | >> | cluster0 | cluster1 |cluster2| > >^^^ Those aren't real clusters, hence the name <Phantom> SD. The cluster >is [CPU0...CPU7]. Android uses Phantom SD to subgroup CPUs with the same >uarch. That's why you get your MC->DIE SD's on your system and >SHARE_PKG_RESOURCES (ShPR) on MC, rather DIE. > >Note, you should already have an asymmetric SD hierarchy. CPU7 should >only have DIE not MC! Each CPU has its own SD hierarchy! > >> | core0 core1 core2 core3 | core4 core5 core6 | core7 | >> | complex0 | complex1 | ------------------------ | >> | L2 cache | L2 cache | L2 | L2 | L2 | L2 | >> | L3 cache | >> >> The sched domain now: >> DIE[0-7] (no SD_SHARE_PKG_RESOURCES) >> MC[0-3][4-6][7] (SD_SHARE_PKG_RESOURCES) >> >> The sched domain should be: >> DIE[0-7] (SD_SHARE_PKG_RESOURCES) >> MC[0-3][4-6][7] (no SD_SHARE_PKG_RESOURCES) > >First remember, using Phantom SD in Android is already a hack. Normally >your system should only have an MC SD for each CPU (with ShPR). > >Now, if you want to move ShPR from MC to DIE then a custom topology >table should do it, i.e. you don't have to change any generic task >scheduler code. > >static inline int cpu_cpu_flags(void) >{ > return SD_SHARE_PKG_RESOURCES; >} > >static struct sched_domain_topology_level custom_topology[] = { >#ifdef CONFIG_SCHED_SMT > { cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) }, >#endif > >#ifdef CONFIG_SCHED_CLUSTER > { cpu_clustergroup_mask, cpu_cluster_flags, SD_INIT_NAME(CLS) }, >#endif > >#ifdef CONFIG_SCHED_MC > { cpu_coregroup_mask, SD_INIT_NAME(MC) }, > ^^^^ >#endif > { cpu_cpu_mask, cpu_cpu_flags, SD_INIT_NAME(DIE) }, > ^^^^^^^^^^^^^ > { NULL, }, >}; > >set_sched_topology(custom_topology); However, due to the limitation of GKI, we cannot change the sd topology by ourselves. But we can configure CPU and cache topology through DT. So why not get the ShPR from DT first? If not configured, use the default. > >> *CLS[0-1][2-3](SD_SHARE_PKG_RESOURCES) > >But why do you want to have yet another SD underneath MC for CPU0-CPU3? >sd_llc is assigned to the highest ShPR SD, which would be DIE. We want do something from the shared L2 cache(for complex, like walt), you can ignore it here and talk about it when we done. Thanks, Wang
On 23/03/2022 07:45, 王擎 wrote: [...] >> Now, if you want to move ShPR from MC to DIE then a custom topology >> table should do it, i.e. you don't have to change any generic task >> scheduler code. >> >> static inline int cpu_cpu_flags(void) >> { >> return SD_SHARE_PKG_RESOURCES; >> } >> >> static struct sched_domain_topology_level custom_topology[] = { >> #ifdef CONFIG_SCHED_SMT >> { cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) }, >> #endif >> >> #ifdef CONFIG_SCHED_CLUSTER >> { cpu_clustergroup_mask, cpu_cluster_flags, SD_INIT_NAME(CLS) }, >> #endif >> >> #ifdef CONFIG_SCHED_MC >> { cpu_coregroup_mask, SD_INIT_NAME(MC) }, >> ^^^^ >> #endif >> { cpu_cpu_mask, cpu_cpu_flags, SD_INIT_NAME(DIE) }, >> ^^^^^^^^^^^^^ >> { NULL, }, >> }; >> >> set_sched_topology(custom_topology); > > However, due to the limitation of GKI, we cannot change the sd topology > by ourselves. But we can configure CPU and cache topology through DT. IMHO, mainline can't do anything here. You should talk to your Android platform provider in this case. Android concepts like Generic Kernel Image (GKI) are normally not discussed here. From mainline perspective we're OK with scheduling such a system flat, e.g. only with a single MC SD [CPU0..CPU7] for each CPU. It could be that the Phantom SD is still needed for additional proprietary or Android add-ons though? In case you would remove `clusterX` from your DT cpu-map (Phantom SD information, i.e. the reason for why you have e.g. for CPU0: `MC (ShPR) [CPU0..CPU3] and DIE [CPU0..CPU7]`) , you should see the natural topology: only `MC (ShPR) [CPU0..CPU7]`. > So why not get the ShPR from DT first? If not configured, use the default. I'm not convinced that mainline will accept a change which is necessary for a out-of-tree tweak (Phantom SD). >>> *CLS[0-1][2-3](SD_SHARE_PKG_RESOURCES) >> >> But why do you want to have yet another SD underneath MC for CPU0-CPU3? >> sd_llc is assigned to the highest ShPR SD, which would be DIE. > > We want do something from the shared L2 cache(for complex, like walt), > you can ignore it here and talk about it when we done. I assume you refer to the proprietary load-tracking mechanism `Window Assisted Load Tracking` (WALT) here? It's also not in mainline.
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 73fc645..62bbd9a --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -508,6 +508,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) unsigned int ncores = num_possible_cpus(); init_cpu_topology(); + init_cpu_cache_topology(); smp_store_cpu_info(smp_processor_id()); diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 27df5c1..94cf649 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -723,6 +723,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) unsigned int this_cpu; init_cpu_topology(); + init_cpu_cache_topology(); this_cpu = smp_processor_id(); store_cpu_topology(this_cpu); diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c index 622f226..4f5a8b7 --- a/arch/riscv/kernel/smpboot.c +++ b/arch/riscv/kernel/smpboot.c @@ -41,6 +41,7 @@ static DECLARE_COMPLETION(cpu_running); void __init smp_prepare_boot_cpu(void) { init_cpu_topology(); + init_cpu_cache_topology(); #ifdef CONFIG_RISCV_ERRATA_ALTERNATIVE apply_boot_alternatives(); #endif diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index 9761541..127f540 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -609,6 +609,65 @@ static int __init parse_dt_topology(void) #endif /* + * cpu cache topology table + */ +#define MAX_CACHE_LEVEL 7 +struct device_node *cache_topology[NR_CPUS][MAX_CACHE_LEVEL]; + +void init_cpu_cache_topology(void) +{ + struct device_node *node_cpu, *node_cache; + int cpu, level; + + for_each_possible_cpu(cpu) { + node_cpu = of_get_cpu_node(cpu, NULL); + if (!node_cpu) + continue; + + level = 0; + node_cache = node_cpu; + while (level < MAX_CACHE_LEVEL) { + node_cache = of_parse_phandle(node_cache, "next-level-cache", 0); + if (!node_cache) + break; + + cache_topology[cpu][level++] = node_cache; + } + of_node_put(node_cpu); + } +} + +int cpus_share_self_cache(const struct cpumask *cpu_map) +{ + int cache_level, cpu_id; + int first, last; + int id = cpumask_first(cpu_map); + int size = cpumask_weight(cpu_map); + + for (cache_level = 0; cache_level < MAX_CACHE_LEVEL; cache_level++) { + if (!cache_topology[cpu][cache_level]) + return -1; + + first = -1; + last = id; + for (cpu_id = 0; cpu_id < NR_CPUS; cpu_id++) { + if (cache_topology[cpu][cache_level] == cache_topology[cpu_id][cache_level]) { + if (cpu_id < id || cpu_id >= id + size) + return 0; + + first = (first == -1)?cpu_id:first; + last = cpu_id; + } + } + + if (first == id && last == id + size) + return 1; + } + + return 0; +} + +/* * cpu topology table */ struct cpu_topology cpu_topology[NR_CPUS]; diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h index cce6136b..862e584 --- a/include/linux/arch_topology.h +++ b/include/linux/arch_topology.h @@ -82,6 +82,8 @@ extern struct cpu_topology cpu_topology[NR_CPUS]; #define topology_cluster_cpumask(cpu) (&cpu_topology[cpu].cluster_sibling) #define topology_llc_cpumask(cpu) (&cpu_topology[cpu].llc_sibling) void init_cpu_topology(void); +void init_cpu_cache_topology(void); +int cpus_share_self_cache(const struct cpumask *cpu_map); void store_cpu_topology(unsigned int cpuid); const struct cpumask *cpu_coregroup_mask(int cpu); const struct cpumask *cpu_clustergroup_mask(int cpu); diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index d201a70..8264e2d --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -1527,6 +1527,7 @@ sd_init(struct sched_domain_topology_level *tl, struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu); int sd_id, sd_weight, sd_flags = 0; struct cpumask *sd_span; + int ret; #ifdef CONFIG_NUMA /* @@ -1539,6 +1540,15 @@ sd_init(struct sched_domain_topology_level *tl, if (tl->sd_flags) sd_flags = (*tl->sd_flags)(); + +#ifdef CONFIG_GENERIC_ARCH_TOPOLOGY + ret = cpus_share_self_cache(cpu_map); + if (ret == 1) + sd_flags |= SD_SHARE_PKG_RESOURCES; + else if (ret == 0) + sd_flags &= ~SD_SHARE_PKG_RESOURCES; +#endif + if (WARN_ONCE(sd_flags & ~TOPOLOGY_SD_FLAGS, "wrong sd_flags in topology description\n")) sd_flags &= TOPOLOGY_SD_FLAGS;