@@ -1,15 +1,20 @@
#ifndef KVM__KVM_CONFIG_ARCH_H
#define KVM__KVM_CONFIG_ARCH_H
+int vcpu_affinity_parser(const struct option *opt, const char *arg, int unset);
+
#define ARM_OPT_ARCH_RUN(cfg) \
OPT_BOOLEAN('\0', "aarch32", &(cfg)->aarch32_guest, \
"Run AArch32 guest"), \
OPT_BOOLEAN('\0', "pmu", &(cfg)->has_pmuv3, \
"Create PMUv3 device. The emulated PMU will be" \
" set to the PMU associated with the" \
- " main thread"), \
+ " main thread, unless --vcpu-affinity is set"), \
OPT_BOOLEAN('\0', "disable-mte", &(cfg)->mte_disabled, \
"Disable Memory Tagging Extension"), \
+ OPT_CALLBACK('\0', "vcpu-affinity", kvm, "cpulist", \
+ "Specify the CPU affinity that will apply to " \
+ "all VCPUs", vcpu_affinity_parser, kvm), \
OPT_U64('\0', "kaslr-seed", &(cfg)->kaslr_seed, \
"Specify random seed for Kernel Address Space " \
"Layout Randomization (KASLR)"), \
@@ -88,7 +88,16 @@ static void reset_vcpu_aarch64(struct kvm_cpu *vcpu)
{
struct kvm *kvm = vcpu->kvm;
struct kvm_one_reg reg;
+ cpu_set_t *affinity;
u64 data;
+ int ret;
+
+ affinity = kvm->arch.vcpu_affinity_cpuset;
+ if (affinity) {
+ ret = sched_setaffinity(0, sizeof(cpu_set_t), affinity);
+ if (ret == -1)
+ die_perror("sched_setaffinity");
+ }
reg.addr = (u64)&data;
@@ -3,8 +3,40 @@
#include <asm/image.h>
#include <linux/byteorder.h>
+#include <linux/cpumask.h>
+
#include <kvm/util.h>
+int vcpu_affinity_parser(const struct option *opt, const char *arg, int unset)
+{
+ struct kvm *kvm = opt->ptr;
+ const char *cpulist = arg;
+ cpumask_t *cpumask;
+ int cpu, ret;
+
+ kvm->cfg.arch.vcpu_affinity = cpulist;
+
+ cpumask = calloc(1, cpumask_size());
+ if (!cpumask)
+ die_perror("calloc");
+
+ ret = cpulist_parse(cpulist, cpumask);
+ if (ret) {
+ free(cpumask);
+ return ret;
+ }
+
+ kvm->arch.vcpu_affinity_cpuset = CPU_ALLOC(NR_CPUS);
+ if (!kvm->arch.vcpu_affinity_cpuset)
+ die_perror("CPU_ALLOC");
+ CPU_ZERO_S(CPU_ALLOC_SIZE(NR_CPUS), kvm->arch.vcpu_affinity_cpuset);
+
+ for_each_cpu(cpu, cpumask)
+ CPU_SET(cpu, kvm->arch.vcpu_affinity_cpuset);
+
+ return 0;
+}
+
/*
* Return the TEXT_OFFSET value that the guest kernel expects. Note
* that pre-3.17 kernels expose this value using the native endianness
@@ -49,34 +49,19 @@ static void set_pmu_attr(struct kvm_cpu *vcpu, void *addr, u64 attr)
*/
#define PMU_ID_MAXLEN 12
-/*
- * In the case of homogeneous systems, there only one hardware PMU, and all
- * VCPUs will use the same PMU, regardless of where the attribute gets set.
- *
- * For heterogeneous systems, the assumption is that the user has pinned the VM
- * (via taskset or similar) to a set of CPUs that share the same hardware PMU.
- * This simplifies things for kvmtool, as correctness is not affected by setting
- * the PMU for each VCPU from the main thread, instead of setting it from each
- * individual VCPU thread.
- */
-static int find_pmu(void)
+static int find_pmu_cpumask(struct kvm *kvm, cpumask_t *cpumask)
{
+ cpumask_t pmu_cpumask, tmp;
char buf[PMU_ID_MAXLEN];
struct dirent *dirent;
char *cpulist, *path;
int pmu_id = -ENXIO;
unsigned long val;
- cpumask_t cpumask;
ssize_t fd_sz;
- int this_cpu;
int fd, ret;
DIR *dir;
- memset(buf, 0, PMU_ID_MAXLEN);
-
- this_cpu = sched_getcpu();
- if (this_cpu < 0)
- return -errno;
+ memset(buf, 0, sizeof(buf));
cpulist = calloc(1, PAGE_SIZE);
if (!cpulist)
@@ -112,15 +97,27 @@ static int find_pmu(void)
}
close(fd);
- ret = cpulist_parse(cpulist, &cpumask);
+ ret = cpulist_parse(cpulist, &pmu_cpumask);
if (ret) {
pmu_id = ret;
goto out_free;
}
- if (!cpumask_test_cpu(this_cpu, &cpumask))
+ if (!cpumask_and(&tmp, cpumask, &pmu_cpumask))
goto next_dir;
+ /*
+ * One CPU cannot more than one PMU, hence the set of CPUs which
+ * share PMU A and the set of CPUs which share PMU B are
+ * disjoint. If the target CPUs and the current PMU have at
+ * least one CPU in common, but the target CPUs is not a subset
+ * of the current PMU, then a PMU which is associated with all
+ * the target CPUs does not exist. Stop searching for a PMU when
+ * this happens.
+ */
+ if (!cpumask_subset(cpumask, &pmu_cpumask))
+ goto out_free;
+
strcpy(&path[strlen(path) - 4], "type");
fd = open(path, O_RDONLY);
if (fd < 0)
@@ -154,6 +151,46 @@ out_free:
return pmu_id;
}
+/*
+ * In the case of homogeneous systems, there only one hardware PMU, and all
+ * VCPUs will use the same PMU, regardless of the physical CPUs on which the
+ * VCPU threads will be executing.
+ *
+ * For heterogeneous systems, there are 2 ways for the user to ensure that the
+ * VM runs on CPUs that have the same PMU:
+ *
+ * 1. By pinning the entire VM to the desired CPUs, in which case kvmtool will
+ * choose the PMU associated with the CPU on which the main thread is executing
+ * (the thread that calls find_pmu()).
+ *
+ * 2. By setting the affinity mask for the VCPUs with the --vcpu-affinity
+ * command line argument. All CPUs in the affinity mask must have the same PMU,
+ * otherwise kvmtool will not be able to set a PMU.
+ */
+static int find_pmu(struct kvm *kvm)
+{
+ cpumask_t *cpumask;
+ int i, this_cpu;
+
+ cpumask = calloc(1, cpumask_size());
+ if (!cpumask)
+ die_perror("calloc");
+
+ if (!kvm->arch.vcpu_affinity_cpuset) {
+ this_cpu = sched_getcpu();
+ if (this_cpu < 0)
+ return -errno;
+ cpumask_set_cpu(this_cpu, cpumask);
+ } else {
+ for (i = 0; i < CPU_SETSIZE; i ++) {
+ if (CPU_ISSET(i, kvm->arch.vcpu_affinity_cpuset))
+ cpumask_set_cpu(i, cpumask);
+ }
+ }
+
+ return find_pmu_cpumask(kvm, cpumask);
+}
+
void pmu__generate_fdt_nodes(void *fdt, struct kvm *kvm)
{
const char compatible[] = "arm,armv8-pmuv3";
@@ -174,7 +211,7 @@ void pmu__generate_fdt_nodes(void *fdt, struct kvm *kvm)
return;
if (pmu_has_attr(kvm->cpus[0], KVM_ARM_VCPU_PMU_V3_SET_PMU)) {
- pmu_id = find_pmu();
+ pmu_id = find_pmu(kvm);
if (pmu_id < 0) {
pr_debug("Failed to find a PMU (errno: %d), "
"PMU events might not work", -pmu_id);
@@ -2,6 +2,11 @@
#define ARM_COMMON__KVM_ARCH_H
#include <stdbool.h>
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <sched.h>
+
#include <linux/const.h>
#include <linux/types.h>
@@ -105,6 +110,8 @@ struct kvm_arch {
u64 initrd_guest_start;
u64 initrd_size;
u64 dtb_guest_start;
+
+ cpu_set_t *vcpu_affinity_cpuset;
};
#endif /* ARM_COMMON__KVM_ARCH_H */
@@ -5,6 +5,7 @@
struct kvm_config_arch {
const char *dump_dtb_filename;
+ const char *vcpu_affinity;
unsigned int force_cntfrq;
bool virtio_trans_pci;
bool aarch32_guest;
@@ -11,6 +11,11 @@ typedef struct cpumask { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t;
#define cpumask_bits(maskp) ((maskp)->bits)
+static inline unsigned int cpumask_size(void)
+{
+ return BITS_TO_LONGS(NR_CPUS) * sizeof(long);
+}
+
static inline void cpumask_set_cpu(int cpu, cpumask_t *dstp)
{
set_bit(cpu, cpumask_bits(dstp));
Add a new command line argument, --vcpu-affinity, to set the CPU affinity for the VCPUs. The affinity is expressed as a cpulist and will apply to all VCPU threads. This gives the user a second option for choosing the PMU on a heterogeneous system. The PMU setup code, when --vcpu-affinity is specified, will search for the PMU associated with the CPUs specified with this command line argument instead of the PMU associated with the CPU on which the main thread is executing. Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com> --- arm/aarch64/include/kvm/kvm-config-arch.h | 7 +- arm/aarch64/kvm-cpu.c | 9 +++ arm/aarch64/kvm.c | 32 +++++++++ arm/aarch64/pmu.c | 79 +++++++++++++++++------ arm/include/arm-common/kvm-arch.h | 7 ++ arm/include/arm-common/kvm-config-arch.h | 1 + include/linux/cpumask.h | 5 ++ 7 files changed, 118 insertions(+), 22 deletions(-)