[v5,2/6] arm/virt.c: add cache hierarchy to device tree

Message ID	20250102152012.1049-3-alireza.sanaee@huawei.com (mailing list archive)
State	New
Headers	show Return-Path: <qemu-devel-bounces+qemu-devel=archiver.kernel.org@nongnu.org> To: <qemu-devel@nongnu.org>, <qemu-arm@nongnu.org> CC: <zhao1.liu@intel.com>, <zhenyu.z.wang@intel.com>, <dapeng1.mi@linux.intel.com>, <yongwei.ma@intel.com>, <armbru@redhat.com>, <farman@linux.ibm.com>, <peter.maydell@linaro.org>, <mst@redhat.com>, <anisinha@redhat.com>, <shannon.zhaosl@gmail.com>, <imammedo@redhat.com>, <mtosatti@redhat.com>, <berrange@redhat.com>, <richard.henderson@linaro.org>, <linuxarm@huawei.com>, <shameerali.kolothum.thodi@huawei.com>, <Jonathan.Cameron@Huawei.com>, <jiangkunkun@huawei.com>, <yangyicong@hisilicon.com>, <robh@kernel.org>, <sarsanaee@gmail.com> Subject: [PATCH v5 2/6] arm/virt.c: add cache hierarchy to device tree Date: Thu, 2 Jan 2025 15:20:08 +0000 Message-ID: <20250102152012.1049-3-alireza.sanaee@huawei.com> In-Reply-To: <20250102152012.1049-1-alireza.sanaee@huawei.com> References: <20250102152012.1049-1-alireza.sanaee@huawei.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Content-Type: text/plain Received-SPF: pass client-ip=185.176.79.56; envelope-from=alireza.sanaee@huawei.com; helo=frasgout.his.huawei.com X-Spam_score_int: -41 X-Spam_score: -4.2 X-Spam_bar: ---- X-Spam_report: (-4.2 / 5.0 requ) BAYES_00=-1.9, RCVD_IN_DNSWL_MED=-2.3, RCVD_IN_MSPIKE_H2=-0.001, RCVD_IN_VALIDITY_RPBL_BLOCKED=0.001, RCVD_IN_VALIDITY_SAFE_BLOCKED=0.001, SPF_HELO_NONE=0.001, SPF_PASS=-0.001 autolearn=ham autolearn_force=no X-Spam_action: no action Precedence: list Reply-to: Alireza Sanaee <alireza.sanaee@huawei.com> From: Alireza Sanaee via <qemu-devel@nongnu.org> Errors-To: qemu-devel-bounces+qemu-devel=archiver.kernel.org@nongnu.org Sender: qemu-devel-bounces+qemu-devel=archiver.kernel.org@nongnu.org
Series	Specifying cache topology on ARM \| expand [RFC,v5,0/6] Specifying cache topology on ARM [v5,1/6] target/arm/tcg: increase cache level for cpu=max [v5,2/6] arm/virt.c: add cache hierarchy to device tree [v5,3/6] bios-tables-test: prepare to change ARM ACPI virt PPTT [v5,4/6] hw/acpi/aml-build.c: add cache hierarchy to pptt table [v5,5/6] tests/qtest/bios-table-test: testing new ARM ACPI PPTT topology [v5,6/6] Update the ACPI tables according to the acpi aml_build change, also empty bios-tables-test…

diff --git a/hw/arm/virt.c b/hw/arm/virt.c index f9b3380815..4d51b25056 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -229,6 +229,132 @@ static const int a15irqmap[] = { [VIRT_PLATFORM_BUS] = 112, /* ...to 112 + PLATFORM_BUS_NUM_IRQS -1 */ }; +unsigned int virt_get_caches(const VirtMachineState *vms, + CPUCaches *caches) +{ + ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(0)); /* assume homogeneous CPUs */ + bool ccidx = cpu_isar_feature(any_ccidx, armcpu); + unsigned int num_cache, i; + int level_instr = 1, level_data = 1; + + for (i = 0, num_cache = 0; i < CPU_MAX_CACHES; i++, num_cache++) { + int type = (armcpu->clidr >> (3 * i)) & 7; + int bank_index; + int level; + CPUCacheType cache_type; + + if (type == 0) { + break; + } + + switch (type) { + case 1: + cache_type = INSTRUCTION; + level = level_instr; + break; + case 2: + cache_type = DATA; + level = level_data; + break; + case 4: + cache_type = UNIFIED; + level = level_instr > level_data ? level_instr : level_data; + break; + case 3: /* Split - Do data first */ + cache_type = DATA; + level = level_data; + break; + default: + error_setg(&error_abort, "Unrecognized cache type"); + return 0; + } + /* + * ccsidr is indexed using both the level and whether it is + * an instruction cache. Unified caches use the same storage + * as data caches. + */ + bank_index = (i * 2) | ((type == 1) ? 1 : 0); + if (ccidx) { + caches[num_cache] = (CPUCaches) { + .type = cache_type, + .level = level, + .linesize = 1 << (FIELD_EX64(armcpu->ccsidr[bank_index], + CCSIDR_EL1, + CCIDX_LINESIZE) + 4), + .associativity = FIELD_EX64(armcpu->ccsidr[bank_index], + CCSIDR_EL1, + CCIDX_ASSOCIATIVITY) + 1, + .sets = FIELD_EX64(armcpu->ccsidr[bank_index], CCSIDR_EL1, + CCIDX_NUMSETS) + 1, + }; + } else { + caches[num_cache] = (CPUCaches) { + .type = cache_type, + .level = level, + .linesize = 1 << (FIELD_EX64(armcpu->ccsidr[bank_index], + CCSIDR_EL1, LINESIZE) + 4), + .associativity = FIELD_EX64(armcpu->ccsidr[bank_index], + CCSIDR_EL1, + ASSOCIATIVITY) + 1, + .sets = FIELD_EX64(armcpu->ccsidr[bank_index], CCSIDR_EL1, + NUMSETS) + 1, + }; + } + caches[num_cache].size = caches[num_cache].associativity * + caches[num_cache].sets * caches[num_cache].linesize; + + /* Break one 'split' entry up into two records */ + if (type == 3) { + num_cache++; + bank_index = (i * 2) | 1; + if (ccidx) { + /* Instruction cache: bottom bit set when reading banked reg */ + caches[num_cache] = (CPUCaches) { + .type = INSTRUCTION, + .level = level_instr, + .linesize = 1 << (FIELD_EX64(armcpu->ccsidr[bank_index], + CCSIDR_EL1, + CCIDX_LINESIZE) + 4), + .associativity = FIELD_EX64(armcpu->ccsidr[bank_index], + CCSIDR_EL1, + CCIDX_ASSOCIATIVITY) + 1, + .sets = FIELD_EX64(armcpu->ccsidr[bank_index], CCSIDR_EL1, + CCIDX_NUMSETS) + 1, + }; + } else { + caches[num_cache] = (CPUCaches) { + .type = INSTRUCTION, + .level = level_instr, + .linesize = 1 << (FIELD_EX64(armcpu->ccsidr[bank_index], + CCSIDR_EL1, LINESIZE) + 4), + .associativity = FIELD_EX64(armcpu->ccsidr[bank_index], + CCSIDR_EL1, + ASSOCIATIVITY) + 1, + .sets = FIELD_EX64(armcpu->ccsidr[bank_index], CCSIDR_EL1, + NUMSETS) + 1, + }; + } + caches[num_cache].size = caches[num_cache].associativity * + caches[num_cache].sets * caches[num_cache].linesize; + } + switch (type) { + case 1: + level_instr++; + break; + case 2: + level_data++; + break; + case 3: + case 4: + level_instr++; + level_data++; + break; + } + } + + return num_cache; +} + static void create_randomness(MachineState *ms, const char *node) { struct { @@ -412,13 +538,96 @@ static void fdt_add_timer_nodes(const VirtMachineState *vms) } } +static void add_cache_node(void *fdt, char * nodepath, CPUCaches cache, + uint32_t *next_level) { + /* Assume L2/3 are unified caches. */ + + uint32_t phandle; + + qemu_fdt_add_path(fdt, nodepath); + phandle = qemu_fdt_alloc_phandle(fdt); + qemu_fdt_setprop_cell(fdt, nodepath, "phandle", phandle); + qemu_fdt_setprop_cell(fdt, nodepath, "cache-level", cache.level); + qemu_fdt_setprop_cell(fdt, nodepath, "cache-size", cache.size); + qemu_fdt_setprop_cell(fdt, nodepath, "cache-block-size", cache.linesize); + qemu_fdt_setprop_cell(fdt, nodepath, "cache-sets", cache.sets); + qemu_fdt_setprop(fdt, nodepath, "cache-unified", NULL, 0); + if (cache.level != 3) { + /* top level cache doesn't have next-level-cache property */ + qemu_fdt_setprop_cell(fdt, nodepath, "next-level-cache", *next_level); + } + + *next_level = phandle; +} + +static bool add_cpu_cache_hierarchy(void *fdt, CPUCaches* cache, + uint32_t cache_cnt, + uint32_t top_level, + uint32_t bottom_level, + uint32_t cpu_id, + uint32_t *next_level) { + bool found_cache = false; + char *nodepath; + + for (int level = top_level; level >= bottom_level; level--) { + for (int i = 0; i < cache_cnt; i++) { + if (i != level) { + continue; + } + + nodepath = g_strdup_printf("/cpus/cpu@%d/l%d-cache", + cpu_id, level); + add_cache_node(fdt, nodepath, cache[i], next_level); + found_cache = true; + g_free(nodepath); + + } + } + + return found_cache; +} + +static void set_cache_properties(void *fdt, const char *nodename, + const char *prefix, CPUCaches cache) +{ + char prop_name[64]; + + snprintf(prop_name, sizeof(prop_name), "%s-block-size", prefix); + qemu_fdt_setprop_cell(fdt, nodename, prop_name, cache.linesize); + + snprintf(prop_name, sizeof(prop_name), "%s-size", prefix); + qemu_fdt_setprop_cell(fdt, nodename, prop_name, cache.size); + + snprintf(prop_name, sizeof(prop_name), "%s-sets", prefix); + qemu_fdt_setprop_cell(fdt, nodename, prop_name, cache.sets); +} + static void fdt_add_cpu_nodes(const VirtMachineState *vms) { int cpu; int addr_cells = 1; const MachineState *ms = MACHINE(vms); + const MachineClass *mc = MACHINE_GET_CLASS(ms); const VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms); int smp_cpus = ms->smp.cpus; + int socket_id, cluster_id, core_id, thread_id; + uint32_t next_level = 0; + uint32_t socket_offset = 0, cluster_offset = 0, core_offset = 0; + uint32_t thread_offset = 0; + int last_socket = -1, last_cluster = -1, last_core = -1, last_thread = -1; + int top_node = 3, top_cluster = 3, top_core = 3, top_thread = 3; + int bottom_node = 3, bottom_cluster = 3, bottom_core = 3, bottom_thread = 3; + unsigned int num_cache; + CPUCaches caches[16]; + bool cache_created = false; + + num_cache = virt_get_caches(vms, caches); + + if (mc->smp_props.has_caches && + partial_cache_description(ms, caches, num_cache)) { + error_setg(&error_fatal, "Missing cache description"); + return; + } /* * See Linux Documentation/devicetree/bindings/arm/cpus.yaml @@ -447,9 +656,15 @@ static void fdt_add_cpu_nodes(const VirtMachineState *vms) qemu_fdt_setprop_cell(ms->fdt, "/cpus", "#size-cells", 0x0); for (cpu = smp_cpus - 1; cpu >= 0; cpu--) { + socket_id = cpu / (ms->smp.clusters * ms->smp.cores * ms->smp.threads); + cluster_id = cpu / (ms->smp.cores * ms->smp.threads) % ms->smp.clusters; + core_id = cpu / (ms->smp.threads) % ms->smp.cores; + thread_id = cpu % ms->smp.cores; + char *nodename = g_strdup_printf("/cpus/cpu@%d", cpu); ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(cpu)); CPUState *cs = CPU(armcpu); + const char *prefix = NULL; qemu_fdt_add_subnode(ms->fdt, nodename); qemu_fdt_setprop_string(ms->fdt, nodename, "device_type", "cpu"); @@ -479,6 +694,180 @@ static void fdt_add_cpu_nodes(const VirtMachineState *vms) qemu_fdt_alloc_phandle(ms->fdt)); } + if (!vmc->no_cpu_topology && num_cache) { + for (uint8_t i = 0; i < num_cache; i++) { + /* only level 1 in the CPU entry */ + if (caches[i].level > 1) { + continue; + } + + if (caches[i].type == INSTRUCTION) { + prefix = "i-cache"; + } else if (caches[i].type == DATA) { + prefix = "d-cache"; + } else if (caches[i].type == UNIFIED) { + error_setg(&error_fatal, + "Unified type is not implemented at level %d", + caches[i].level); + return; + } else { + error_setg(&error_fatal, "Undefined cache type"); + return; + } + + set_cache_properties(ms->fdt, nodename, prefix, caches[i]); + } + } + + if (socket_id != last_socket) { + bottom_node = top_node; + /* this assumes socket as the highest topological level */ + socket_offset = 0; + cluster_offset = 0; + if (cache_described_at(ms, CPU_TOPOLOGY_LEVEL_SOCKET) && + find_the_lowest_level_cache_defined_at_level(ms, + &bottom_node, + CPU_TOPOLOGY_LEVEL_SOCKET)) { + + if (bottom_node == 1) { + error_report( + "Cannot share L1 at socket_id %d. DT limiation on " + "sharing at cache level = 1", + socket_id); + } + + cache_created = add_cpu_cache_hierarchy(ms->fdt, caches, + num_cache, + top_node, + bottom_node, cpu, + &socket_offset); + + if (!cache_created) { + error_setg(&error_fatal, + "Socket: No caches at levels %d-%d", + top_node, bottom_node); + return; + } + + top_cluster = bottom_node - 1; + } + + last_socket = socket_id; + } + + if (cluster_id != last_cluster) { + bottom_cluster = top_cluster; + cluster_offset = socket_offset; + core_offset = 0; + if (cache_described_at(ms, CPU_TOPOLOGY_LEVEL_CLUSTER) && + find_the_lowest_level_cache_defined_at_level(ms, + &bottom_cluster, + CPU_TOPOLOGY_LEVEL_CLUSTER)) { + + cache_created = add_cpu_cache_hierarchy(ms->fdt, caches, + num_cache, + top_cluster, + bottom_cluster, cpu, + &cluster_offset); + if (bottom_cluster == 1) { + error_report( + "Cannot share L1 at socket_id %d, cluster_id %d. " + "DT limitation on sharing at cache level = 1.", + socket_id, cluster_id); + } + + if (!cache_created) { + error_setg(&error_fatal, + "Cluster: No caches at levels %d-%d", + top_cluster, bottom_cluster); + return; + } + + top_core = bottom_cluster - 1; + top_thread = top_core; + } else if (top_cluster == bottom_node - 1) { + top_core = bottom_node - 1; + top_thread = top_core; + } + + last_cluster = cluster_id; + } + + if (core_id != last_core) { + bottom_core = top_core; + core_offset = cluster_offset; + if (cache_described_at(ms, CPU_TOPOLOGY_LEVEL_CORE) && + find_the_lowest_level_cache_defined_at_level(ms, + &bottom_core, + CPU_TOPOLOGY_LEVEL_CORE)) { + + if (bottom_core == 1) { + bottom_core++; + } else { + cache_created = add_cpu_cache_hierarchy(ms->fdt, + caches, + num_cache, + top_core, + bottom_core, cpu, + &core_offset); + + if (!cache_created) { + error_setg(&error_fatal, + "Core: No caches at levels %d-%d", + top_core, bottom_core); + return; + } + } + + top_thread = bottom_core - 1; + } else if (top_cluster == bottom_node - 1) { + /* socket cache but no cluster cache and no core cache */ + top_thread = top_cluster; + } else if (top_core == bottom_cluster - 1) { + /* cluster cache but no socket and no core cache */ + top_thread = top_core; + } + + last_core = core_id; + } + + if (ms->smp.threads > 1) { + thread_offset = core_offset; + if (thread_id != last_thread) { + bottom_thread = top_thread; + if (cache_described_at(ms, CPU_TOPOLOGY_LEVEL_THREAD) && + find_the_lowest_level_cache_defined_at_level(ms, + &bottom_thread, + CPU_TOPOLOGY_LEVEL_THREAD)) { + + if (bottom_thread == 1) { + bottom_thread++; + } else { + cache_created = add_cpu_cache_hierarchy(ms->fdt, + caches, + num_cache, + top_thread, + bottom_thread, + cpu, + &thread_offset); + + if (!cache_created) { + error_setg(&error_fatal, + "No caches at levels %d-%d", + top_thread, bottom_thread); + return; + } + } + } + + last_thread = thread_id; + } + } + + next_level = (ms->smp.threads > 1) ? thread_offset : core_offset; + qemu_fdt_setprop_cell(ms->fdt, nodename, "next-level-cache", + next_level); + g_free(nodename); } @@ -3147,6 +3536,11 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) hc->unplug = virt_machine_device_unplug_cb; mc->nvdimm_supported = true; mc->smp_props.clusters_supported = true; + /* Supported caches */ + mc->smp_props.cache_supported[CACHE_LEVEL_AND_TYPE_L1D] = true; + mc->smp_props.cache_supported[CACHE_LEVEL_AND_TYPE_L1I] = true; + mc->smp_props.cache_supported[CACHE_LEVEL_AND_TYPE_L2] = true; + mc->smp_props.cache_supported[CACHE_LEVEL_AND_TYPE_L3] = true; mc->auto_enable_numa_with_memhp = true; mc->auto_enable_numa_with_memdev = true; /* platform instead of architectural choice */ diff --git a/hw/cpu/core.c b/hw/cpu/core.c index 495a5c30ff..e396b61a1e 100644 --- a/hw/cpu/core.c +++ b/hw/cpu/core.c @@ -102,4 +102,96 @@ static void cpu_core_register_types(void) type_register_static(&cpu_core_type_info); } +bool cache_described_at(const MachineState *ms, CpuTopologyLevel level) +{ + if (machine_get_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L3) == level || + machine_get_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L2) == level || + machine_get_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L1I) == level || + machine_get_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L1D) == level) { + return true; + } + return false; +} + +int partial_cache_description(const MachineState *ms, CPUCaches *caches, + int num_caches) +{ + int level, c; + + for (level = 1; level < num_caches; level++) { + for (c = 0; c < num_caches; c++) { + if (caches[c].level != level) { + continue; + } + + switch (level) { + case 1: + /* + * L1 cache is assumed to have both L1I and L1D available. + * Technically both need to be checked. + */ + if (machine_get_cache_topo_level(ms, + CACHE_LEVEL_AND_TYPE_L1I) == + CPU_TOPOLOGY_LEVEL_DEFAULT) { + return level; + } + break; + case 2: + if (machine_get_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L2) == + CPU_TOPOLOGY_LEVEL_DEFAULT) { + return level; + } + break; + case 3: + if (machine_get_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L3) == + CPU_TOPOLOGY_LEVEL_DEFAULT) { + return level; + } + break; + } + } + } + + return 0; +} + +/* + * This function assumes l3 and l2 have unified cache and l1 is split l1d + * and l1i, and further prepares the lowest cache level for a topology + * level. The info will be fed to build_caches to create caches at the + * right level. + */ +bool find_the_lowest_level_cache_defined_at_level(const MachineState *ms, + int *level_found, + CpuTopologyLevel topo_level) { + + CpuTopologyLevel level; + + level = machine_get_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L1I); + if (level == topo_level) { + *level_found = 1; + return true; + } + + level = machine_get_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L1D); + if (level == topo_level) { + *level_found = 1; + return true; + } + + level = machine_get_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L2); + if (level == topo_level) { + *level_found = 2; + return true; + } + + level = machine_get_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L3); + if (level == topo_level) { + *level_found = 3; + return true; + } + + return false; +} + type_init(cpu_core_register_types) diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h index c8e94e6aed..294adcfd9e 100644 --- a/include/hw/arm/virt.h +++ b/include/hw/arm/virt.h @@ -39,6 +39,7 @@ #include "system/kvm.h" #include "hw/intc/arm_gicv3_common.h" #include "qom/object.h" +#include "hw/cpu/core.h" #define NUM_GICV2M_SPIS 64 #define NUM_VIRTIO_TRANSPORTS 32 @@ -50,6 +51,8 @@ /* GPIO pins */ #define GPIO_PIN_POWER_BUTTON 3 +#define CPU_MAX_CACHES 16 + enum { VIRT_FLASH, VIRT_MEM, @@ -189,6 +192,7 @@ OBJECT_DECLARE_TYPE(VirtMachineState, VirtMachineClass, VIRT_MACHINE) void virt_acpi_setup(VirtMachineState *vms); bool virt_is_acpi_enabled(VirtMachineState *vms); +unsigned int virt_get_caches(const VirtMachineState *vms, CPUCaches *caches); /* Return number of redistributors that fit in the specified region */ static uint32_t virt_redist_capacity(VirtMachineState *vms, int region) diff --git a/include/hw/cpu/core.h b/include/hw/cpu/core.h index 98ab91647e..ba55b115c5 100644 --- a/include/hw/cpu/core.h +++ b/include/hw/cpu/core.h @@ -25,6 +25,32 @@ struct CPUCore { int nr_threads; }; +typedef enum CPUCacheType { + DATA, + INSTRUCTION, + UNIFIED, +} CPUCacheType; + +typedef struct CPUCaches { + CPUCacheType type; + uint32_t pptt_id; + uint32_t sets; + uint32_t size; + uint32_t level; + uint16_t linesize; + uint8_t attributes; /* write policy: 0x0 write back, 0x1 write through */ + uint8_t associativity; +} CPUCaches; + +int partial_cache_description(const MachineState *ms, CPUCaches *caches, + int num_caches); + +bool cache_described_at(const MachineState *ms, CpuTopologyLevel level); + +bool find_the_lowest_level_cache_defined_at_level(const MachineState *ms, + int *level_found, + CpuTopologyLevel topo_level); + /* Note: topology field names need to be kept in sync with * 'CpuInstanceProperties' */

[v5,2/6] arm/virt.c: add cache hierarchy to device tree

Commit Message

Patch