diff mbox series

[1/2] trace-cmd: Do not use KVM debug vcpu directories as the CPU mapping

Message ID 20220708014244.677826-2-rostedt@goodmis.org (mailing list archive)
State Accepted
Commit 741dd08b6abd7c902f19e64b76b130241372d826
Headers show
Series trace-cmd: Fix mappings when kvm vcpus do not match the guests CPU numbers | expand

Commit Message

Steven Rostedt July 8, 2022, 1:42 a.m. UTC
From: "Steven Rostedt (Google)" <rostedt@goodmis.org>

There are some setups where the vcpu directories in:

  /sys/kernel/debug/kvm/<pid>/vcpuX

Does not match the CPU of the guest. That is, the guest CPU 3 may be
vcpu8.

This breaks the algorithm used when setting up the kvm time
synchronization. As it expects these files to map to the CPU used, and if
one of the vcpuX is greater than the number of CPUs of the guest, it will
fail.

Currently, the locations that do this at least have the guest CPUS
consecutive (non sparse) and they map to the kvm vcpuX in order. That is,
if the vcpuX is sorted by the X it will then map to the CPUs of the guest.

This may not hold true for all setups, but we are currently just going to
solve this one. It appears that the vcpuX in kvm maps to the guest's
apicid in /proc/cpuinfo. More can be done later to make sure the two are
in order and if not to put in meta data that will allow the kvm mappings
to still work.

For now restructure the code by creating a single kvm_clock_files
structure to hold the offset, scaling and frac arrays in a single
structure, and also add the vcpu to it when adding new information from
the vcpuX directories. Then sort the list by the vcpu such that it should
map to the 0-nr_cpus of the guest.

Link: https://lore.kernel.org/all/20220504010242.1388192-1-vineethrp@google.com/

Reported-by: Vineeth Pillai <vineethrp@google.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 lib/trace-cmd/trace-timesync-kvm.c | 104 ++++++++++++++++-------------
 1 file changed, 58 insertions(+), 46 deletions(-)
diff mbox series

Patch

diff --git a/lib/trace-cmd/trace-timesync-kvm.c b/lib/trace-cmd/trace-timesync-kvm.c
index 1db63d94f545..aa1799b1eefb 100644
--- a/lib/trace-cmd/trace-timesync-kvm.c
+++ b/lib/trace-cmd/trace-timesync-kvm.c
@@ -33,15 +33,20 @@  typedef __s64 s64;
 #define KVM_ACCURACY	0
 #define KVM_NAME	"kvm"
 
+struct kvm_clock_files {
+	int		vcpu;
+	char		*offsets;
+	char		*scalings;
+	char		*frac;
+};
+
 struct kvm_clock_sync {
-	int vcpu_count;
-	char **vcpu_offsets;
-	char **vcpu_scalings;
-	char **vcpu_frac;
-	int marker_fd;
-	struct tep_handle *tep;
-	int raw_id;
-	unsigned long long ts;
+	int			vcpu_count;
+	int			marker_fd;
+	struct kvm_clock_files	*clock_files;
+	struct tep_handle	*tep;
+	int			raw_id;
+	unsigned long long	ts;
 };
 
 struct kvm_clock_offset_msg {
@@ -210,7 +215,7 @@  static bool kvm_support_check(bool guest)
 	return kvm_scaling_check();
 }
 
-static int kvm_open_vcpu_dir(struct kvm_clock_sync *kvm, int cpu, char *dir_str)
+static int kvm_open_vcpu_dir(struct kvm_clock_sync *kvm, int i, char *dir_str)
 {
 	struct dirent *entry;
 	char path[PATH_MAX];
@@ -224,21 +229,21 @@  static int kvm_open_vcpu_dir(struct kvm_clock_sync *kvm, int cpu, char *dir_str)
 			if (!strcmp(entry->d_name, KVM_DEBUG_OFFSET_FILE)) {
 				snprintf(path, sizeof(path), "%s/%s",
 					 dir_str, entry->d_name);
-				kvm->vcpu_offsets[cpu] = strdup(path);
+				kvm->clock_files[i].offsets = strdup(path);
 			}
 			if (!strcmp(entry->d_name, KVM_DEBUG_SCALING_FILE)) {
 				snprintf(path, sizeof(path), "%s/%s",
 					 dir_str, entry->d_name);
-				kvm->vcpu_scalings[cpu] = strdup(path);
+				kvm->clock_files[i].scalings = strdup(path);
 			}
 			if (!strcmp(entry->d_name, KVM_DEBUG_FRACTION_FILE)) {
 				snprintf(path, sizeof(path), "%s/%s",
 					 dir_str, entry->d_name);
-				kvm->vcpu_frac[cpu] = strdup(path);
+				kvm->clock_files[i].frac = strdup(path);
 			}
 		}
 	}
-	if (!kvm->vcpu_offsets[cpu])
+	if (!kvm->clock_files[i].offsets)
 		goto error;
 	closedir(dir);
 	return 0;
@@ -246,15 +251,25 @@  static int kvm_open_vcpu_dir(struct kvm_clock_sync *kvm, int cpu, char *dir_str)
 error:
 	if (dir)
 		closedir(dir);
-	free(kvm->vcpu_offsets[cpu]);
-	kvm->vcpu_offsets[cpu] = NULL;
-	free(kvm->vcpu_scalings[cpu]);
-	kvm->vcpu_scalings[cpu] = NULL;
-	free(kvm->vcpu_frac[cpu]);
-	kvm->vcpu_frac[cpu] = NULL;
+	free(kvm->clock_files[i].offsets);
+	kvm->clock_files[i].offsets = NULL;
+	free(kvm->clock_files[i].scalings);
+	kvm->clock_files[i].scalings = NULL;
+	free(kvm->clock_files[i].frac);
+	kvm->clock_files[i].frac = NULL;
 	return -1;
 }
 
+static int cmp_clock(const void *A, const void *B)
+{
+	const struct kvm_clock_files *a = A;
+	const struct kvm_clock_files *b = B;
+
+	if (a->vcpu < b->vcpu)
+		return -1;
+	return a->vcpu > b->vcpu;
+}
+
 static int kvm_open_debug_files(struct kvm_clock_sync *kvm, int pid)
 {
 	char *vm_dir_str = NULL;
@@ -284,21 +299,25 @@  static int kvm_open_debug_files(struct kvm_clock_sync *kvm, int pid)
 	dir = opendir(vm_dir_str);
 	if (!dir)
 		goto error;
+	i = 0;
 	while ((entry = readdir(dir))) {
 		if (!(entry->d_type == DT_DIR &&
 		    !strncmp(entry->d_name, KVM_DEBUG_VCPU_DIR, strlen(KVM_DEBUG_VCPU_DIR))))
 			continue;
-		vcpu =  strtol(entry->d_name + strlen(KVM_DEBUG_VCPU_DIR), NULL, 10);
-		if (vcpu < 0 || vcpu >= kvm->vcpu_count)
-			continue;
-		snprintf(path, sizeof(path), "%s/%s", vm_dir_str, entry->d_name);
-		if (kvm_open_vcpu_dir(kvm, vcpu, path) < 0)
+		if (i == kvm->vcpu_count)
 			goto error;
-	}
-	for (i = 0; i < kvm->vcpu_count; i++) {
-		if (!kvm->vcpu_offsets[i])
+		vcpu = strtol(entry->d_name + strlen(KVM_DEBUG_VCPU_DIR), NULL, 10);
+		kvm->clock_files[i].vcpu = vcpu;
+		snprintf(path, sizeof(path), "%s/%s", vm_dir_str, entry->d_name);
+		if (kvm_open_vcpu_dir(kvm, i, path) < 0)
 			goto error;
+		i++;
 	}
+	if (i < kvm->vcpu_count)
+		goto error;
+
+	qsort(kvm->clock_files, kvm->vcpu_count, sizeof(*kvm->clock_files), cmp_clock);
+
 	closedir(dir);
 	free(pid_str);
 	free(vm_dir_str);
@@ -315,19 +334,15 @@  static int kvm_clock_sync_init_host(struct tracecmd_time_sync *tsync,
 				    struct kvm_clock_sync *kvm)
 {
 	kvm->vcpu_count = tsync->vcpu_count;
-	kvm->vcpu_offsets = calloc(kvm->vcpu_count, sizeof(char *));
-	kvm->vcpu_scalings = calloc(kvm->vcpu_count, sizeof(char *));
-	kvm->vcpu_frac = calloc(kvm->vcpu_count, sizeof(char *));
-	if (!kvm->vcpu_offsets || !kvm->vcpu_scalings || !kvm->vcpu_frac)
+	kvm->clock_files = calloc(kvm->vcpu_count, sizeof(*kvm->clock_files));
+	if (!kvm->clock_files)
 		goto error;
 	if (kvm_open_debug_files(kvm, tsync->guest_pid) < 0)
 		goto error;
 	return 0;
 
 error:
-	free(kvm->vcpu_offsets);
-	free(kvm->vcpu_scalings);
-	free(kvm->vcpu_frac);
+	free(kvm->clock_files);
 	return -1;
 }
 
@@ -414,12 +429,9 @@  static int kvm_clock_sync_free(struct tracecmd_time_sync *tsync)
 		kvm = (struct kvm_clock_sync *)clock_context->proto_data;
 	if (kvm) {
 		for (i = 0; i < kvm->vcpu_count; i++) {
-			free(kvm->vcpu_offsets[i]);
-			kvm->vcpu_offsets[i] = NULL;
-			free(kvm->vcpu_scalings[i]);
-			kvm->vcpu_scalings[i] = NULL;
-			free(kvm->vcpu_frac[i]);
-			kvm->vcpu_frac[i] = NULL;
+			free(kvm->clock_files[i].offsets);
+			free(kvm->clock_files[i].scalings);
+			free(kvm->clock_files[i].frac);
 		}
 		if (kvm->tep)
 			tep_free(kvm->tep);
@@ -449,23 +461,23 @@  static int kvm_clock_host(struct tracecmd_time_sync *tsync,
 	clock_context = (struct clock_sync_context *)tsync->context;
 	if (clock_context)
 		kvm = (struct kvm_clock_sync *)clock_context->proto_data;
-	if (!kvm || !kvm->vcpu_offsets || !kvm->vcpu_offsets[0])
+	if (!kvm || !kvm->clock_files || !kvm->clock_files[0].offsets)
 		return -1;
 	if (cpu >= kvm->vcpu_count)
 		return -1;
-	ret = read_ll_from_file(kvm->vcpu_offsets[cpu], &kvm_offset);
+	ret = read_ll_from_file(kvm->clock_files[cpu].offsets, &kvm_offset);
 	if (ret < 0)
 		return -1;
 
-	if (kvm->vcpu_scalings && kvm->vcpu_scalings[cpu]) {
-		read_ll_from_file(kvm->vcpu_scalings[cpu], &kvm_scaling);
+	if (kvm->clock_files[cpu].scalings) {
+		read_ll_from_file(kvm->clock_files[cpu].scalings, &kvm_scaling);
 		if (kvm_scaling == KVM_SCALING_AMD_DEFAULT ||
 		    kvm_scaling == KVM_SCALING_INTEL_DEFAULT)
 			kvm_scaling = 1;
 	}
 
-	if (kvm->vcpu_frac && kvm->vcpu_frac[cpu] && kvm_scaling != 1)
-		ret = read_ll_from_file(kvm->vcpu_frac[cpu], &kvm_frac);
+	if (kvm->clock_files[cpu].frac && kvm_scaling != 1)
+		ret = read_ll_from_file(kvm->clock_files[cpu].frac, &kvm_frac);
 	msg = (char *)&packet;
 	size = sizeof(packet);
 	ret = tracecmd_msg_recv_time_sync(tsync->msg_handle,