diff mbox series

[RFC,v5,09/29] KVM: selftests: TDX: Add report_fatal_error test

Message ID 20231212204647.2170650-10-sagis@google.com (mailing list archive)
State New
Headers show
Series TDX KVM selftests | expand

Commit Message

Sagi Shahar Dec. 12, 2023, 8:46 p.m. UTC
The test checks report_fatal_error functionality.

Signed-off-by: Sagi Shahar <sagis@google.com>
Signed-off-by: Ackerley Tng <ackerleytng@google.com>
Signed-off-by: Ryan Afranji <afranji@google.com>
---
 .../selftests/kvm/include/x86_64/tdx/tdx.h    |  6 ++-
 .../kvm/include/x86_64/tdx/tdx_util.h         |  1 +
 .../kvm/include/x86_64/tdx/test_util.h        | 19 ++++++++
 .../selftests/kvm/lib/x86_64/tdx/tdx.c        | 39 ++++++++++++++++
 .../selftests/kvm/lib/x86_64/tdx/tdx_util.c   | 12 +++++
 .../selftests/kvm/lib/x86_64/tdx/test_util.c  | 10 +++++
 .../selftests/kvm/x86_64/tdx_vm_tests.c       | 45 +++++++++++++++++++
 7 files changed, 131 insertions(+), 1 deletion(-)

Comments

Binbin Wu Feb. 29, 2024, 12:31 p.m. UTC | #1
On 12/13/2023 4:46 AM, Sagi Shahar wrote:
> The test checks report_fatal_error functionality.
>
> Signed-off-by: Sagi Shahar <sagis@google.com>
> Signed-off-by: Ackerley Tng <ackerleytng@google.com>
> Signed-off-by: Ryan Afranji <afranji@google.com>
> ---
>   .../selftests/kvm/include/x86_64/tdx/tdx.h    |  6 ++-
>   .../kvm/include/x86_64/tdx/tdx_util.h         |  1 +
>   .../kvm/include/x86_64/tdx/test_util.h        | 19 ++++++++
>   .../selftests/kvm/lib/x86_64/tdx/tdx.c        | 39 ++++++++++++++++
>   .../selftests/kvm/lib/x86_64/tdx/tdx_util.c   | 12 +++++
>   .../selftests/kvm/lib/x86_64/tdx/test_util.c  | 10 +++++
>   .../selftests/kvm/x86_64/tdx_vm_tests.c       | 45 +++++++++++++++++++
>   7 files changed, 131 insertions(+), 1 deletion(-)
>
> diff --git a/tools/testing/selftests/kvm/include/x86_64/tdx/tdx.h b/tools/testing/selftests/kvm/include/x86_64/tdx/tdx.h
> index a7161efe4ee2..1340c1070002 100644
> --- a/tools/testing/selftests/kvm/include/x86_64/tdx/tdx.h
> +++ b/tools/testing/selftests/kvm/include/x86_64/tdx/tdx.h
> @@ -3,10 +3,14 @@
>   #define SELFTEST_TDX_TDX_H
>   
>   #include <stdint.h>
> +#include "kvm_util_base.h"
>   
> -#define TDG_VP_VMCALL_INSTRUCTION_IO 30
> +#define TDG_VP_VMCALL_REPORT_FATAL_ERROR 0x10003
>   
> +#define TDG_VP_VMCALL_INSTRUCTION_IO 30
> +void handle_userspace_tdg_vp_vmcall_exit(struct kvm_vcpu *vcpu);
>   uint64_t tdg_vp_vmcall_instruction_io(uint64_t port, uint64_t size,
>   				      uint64_t write, uint64_t *data);
> +void tdg_vp_vmcall_report_fatal_error(uint64_t error_code, uint64_t data_gpa);
>   
>   #endif // SELFTEST_TDX_TDX_H
> diff --git a/tools/testing/selftests/kvm/include/x86_64/tdx/tdx_util.h b/tools/testing/selftests/kvm/include/x86_64/tdx/tdx_util.h
> index 274b245f200b..32dd6b8fda46 100644
> --- a/tools/testing/selftests/kvm/include/x86_64/tdx/tdx_util.h
> +++ b/tools/testing/selftests/kvm/include/x86_64/tdx/tdx_util.h
> @@ -12,5 +12,6 @@ struct kvm_vm *td_create(void);
>   void td_initialize(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
>   		   uint64_t attributes);
>   void td_finalize(struct kvm_vm *vm);
> +void td_vcpu_run(struct kvm_vcpu *vcpu);
>   
>   #endif // SELFTESTS_TDX_KVM_UTIL_H
> diff --git a/tools/testing/selftests/kvm/include/x86_64/tdx/test_util.h b/tools/testing/selftests/kvm/include/x86_64/tdx/test_util.h
> index b570b6d978ff..6d69921136bd 100644
> --- a/tools/testing/selftests/kvm/include/x86_64/tdx/test_util.h
> +++ b/tools/testing/selftests/kvm/include/x86_64/tdx/test_util.h
> @@ -49,4 +49,23 @@ bool is_tdx_enabled(void);
>    */
>   void tdx_test_success(void);
>   
> +/**
> + * Report an error with @error_code to userspace.
> + *
> + * Return value from tdg_vp_vmcall_report_fatal_error is ignored since execution
> + * is not expected to continue beyond this point.
> + */
> +void tdx_test_fatal(uint64_t error_code);
> +
> +/**
> + * Report an error with @error_code to userspace.
> + *
> + * @data_gpa may point to an optional shared guest memory holding the error
> + * string.
> + *
> + * Return value from tdg_vp_vmcall_report_fatal_error is ignored since execution
> + * is not expected to continue beyond this point.
> + */
> +void tdx_test_fatal_with_data(uint64_t error_code, uint64_t data_gpa);
> +
>   #endif // SELFTEST_TDX_TEST_UTIL_H
> diff --git a/tools/testing/selftests/kvm/lib/x86_64/tdx/tdx.c b/tools/testing/selftests/kvm/lib/x86_64/tdx/tdx.c
> index c2414523487a..b854c3aa34ff 100644
> --- a/tools/testing/selftests/kvm/lib/x86_64/tdx/tdx.c
> +++ b/tools/testing/selftests/kvm/lib/x86_64/tdx/tdx.c
> @@ -1,8 +1,31 @@
>   // SPDX-License-Identifier: GPL-2.0-only
>   
> +#include <string.h>
> +
>   #include "tdx/tdcall.h"
>   #include "tdx/tdx.h"
>   
> +void handle_userspace_tdg_vp_vmcall_exit(struct kvm_vcpu *vcpu)
> +{
> +	struct kvm_tdx_vmcall *vmcall_info = &vcpu->run->tdx.u.vmcall;
> +	uint64_t vmcall_subfunction = vmcall_info->subfunction;
> +
> +	switch (vmcall_subfunction) {
> +	case TDG_VP_VMCALL_REPORT_FATAL_ERROR:
> +		vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
> +		vcpu->run->system_event.ndata = 3;
> +		vcpu->run->system_event.data[0] =
> +			TDG_VP_VMCALL_REPORT_FATAL_ERROR;
> +		vcpu->run->system_event.data[1] = vmcall_info->in_r12;
> +		vcpu->run->system_event.data[2] = vmcall_info->in_r13;
> +		vmcall_info->status_code = 0;
> +		break;
> +	default:
> +		TEST_FAIL("TD VMCALL subfunction %lu is unsupported.\n",
> +			  vmcall_subfunction);
> +	}
> +}
> +
>   uint64_t tdg_vp_vmcall_instruction_io(uint64_t port, uint64_t size,
>   				      uint64_t write, uint64_t *data)
>   {
> @@ -25,3 +48,19 @@ uint64_t tdg_vp_vmcall_instruction_io(uint64_t port, uint64_t size,
>   
>   	return ret;
>   }
> +
> +void tdg_vp_vmcall_report_fatal_error(uint64_t error_code, uint64_t data_gpa)
> +{
> +	struct tdx_hypercall_args args;
> +
> +	memset(&args, 0, sizeof(struct tdx_hypercall_args));
> +
> +	if (data_gpa)
> +		error_code |= 0x8000000000000000;
> +
> +	args.r11 = TDG_VP_VMCALL_REPORT_FATAL_ERROR;
> +	args.r12 = error_code;
> +	args.r13 = data_gpa;
> +
> +	__tdx_hypercall(&args, 0);
> +}
> diff --git a/tools/testing/selftests/kvm/lib/x86_64/tdx/tdx_util.c b/tools/testing/selftests/kvm/lib/x86_64/tdx/tdx_util.c
> index b302060049d5..d745bb6287c1 100644
> --- a/tools/testing/selftests/kvm/lib/x86_64/tdx/tdx_util.c
> +++ b/tools/testing/selftests/kvm/lib/x86_64/tdx/tdx_util.c
> @@ -10,6 +10,7 @@
>   
>   #include "kvm_util.h"
>   #include "test_util.h"
> +#include "tdx/tdx.h"
>   #include "tdx/td_boot.h"
>   #include "kvm_util_base.h"
>   #include "processor.h"
> @@ -519,3 +520,14 @@ void td_finalize(struct kvm_vm *vm)
>   
>   	tdx_td_finalizemr(vm);
>   }
> +
> +void td_vcpu_run(struct kvm_vcpu *vcpu)
> +{
> +	vcpu_run(vcpu);
> +
> +	/* Handle TD VMCALLs that require userspace handling. */
> +	if (vcpu->run->exit_reason == KVM_EXIT_TDX &&
> +	    vcpu->run->tdx.type == KVM_EXIT_TDX_VMCALL) {
> +		handle_userspace_tdg_vp_vmcall_exit(vcpu);
> +	}
> +}
> diff --git a/tools/testing/selftests/kvm/lib/x86_64/tdx/test_util.c b/tools/testing/selftests/kvm/lib/x86_64/tdx/test_util.c
> index 6905d0ca3877..7f3cd8089cea 100644
> --- a/tools/testing/selftests/kvm/lib/x86_64/tdx/test_util.c
> +++ b/tools/testing/selftests/kvm/lib/x86_64/tdx/test_util.c
> @@ -32,3 +32,13 @@ void tdx_test_success(void)
>   				     TDX_TEST_SUCCESS_SIZE,
>   				     TDG_VP_VMCALL_INSTRUCTION_IO_WRITE, &code);
>   }
> +
> +void tdx_test_fatal_with_data(uint64_t error_code, uint64_t data_gpa)
> +{
> +	tdg_vp_vmcall_report_fatal_error(error_code, data_gpa);
> +}
> +
> +void tdx_test_fatal(uint64_t error_code)
> +{
> +	tdx_test_fatal_with_data(error_code, 0);
> +}
> diff --git a/tools/testing/selftests/kvm/x86_64/tdx_vm_tests.c b/tools/testing/selftests/kvm/x86_64/tdx_vm_tests.c
> index a18d1c9d6026..8638c7bbedaa 100644
> --- a/tools/testing/selftests/kvm/x86_64/tdx_vm_tests.c
> +++ b/tools/testing/selftests/kvm/x86_64/tdx_vm_tests.c
> @@ -2,6 +2,7 @@
>   
>   #include <signal.h>
>   #include "kvm_util_base.h"
> +#include "tdx/tdx.h"
>   #include "tdx/tdx_util.h"
>   #include "tdx/test_util.h"
>   #include "test_util.h"
> @@ -30,6 +31,49 @@ void verify_td_lifecycle(void)
>   	printf("\t ... PASSED\n");
>   }
>   
> +void guest_code_report_fatal_error(void)
> +{
> +	uint64_t err;
> +
> +	/*
> +	 * Note: err should follow the GHCI spec definition:
> +	 * bits 31:0 should be set to 0.
> +	 * bits 62:32 are used for TD-specific extended error code.
> +	 * bit 63 is used to mark additional information in shared memory.
> +	 */
> +	err = 0x0BAAAAAD00000000;
> +	if (err)
> +		tdx_test_fatal(err);

Nit:
Since the error code is a constant, no need to use if statement.
Or even save the variable?


> +
> +	tdx_test_success();
> +}
> +void verify_report_fatal_error(void)
> +{
> +	struct kvm_vm *vm;
> +	struct kvm_vcpu *vcpu;
> +
> +	vm = td_create();
> +	td_initialize(vm, VM_MEM_SRC_ANONYMOUS, 0);
> +	vcpu = td_vcpu_add(vm, 0, guest_code_report_fatal_error);
> +	td_finalize(vm);
> +
> +	printf("Verifying report_fatal_error:\n");
> +
> +	td_vcpu_run(vcpu);
> +
> +	TEST_ASSERT_EQ(vcpu->run->exit_reason, KVM_EXIT_SYSTEM_EVENT);
> +	TEST_ASSERT_EQ(vcpu->run->system_event.ndata, 3);
> +	TEST_ASSERT_EQ(vcpu->run->system_event.data[0], TDG_VP_VMCALL_REPORT_FATAL_ERROR);
> +	TEST_ASSERT_EQ(vcpu->run->system_event.data[1], 0x0BAAAAAD00000000);
> +	TEST_ASSERT_EQ(vcpu->run->system_event.data[2], 0);
> +
> +	vcpu_run(vcpu);
> +	TDX_TEST_ASSERT_SUCCESS(vcpu);
> +
> +	kvm_vm_free(vm);
> +	printf("\t ... PASSED\n");
> +}
> +
>   int main(int argc, char **argv)
>   {
>   	setbuf(stdout, NULL);
> @@ -40,6 +84,7 @@ int main(int argc, char **argv)
>   	}
>   
>   	run_in_new_process(&verify_td_lifecycle);
> +	run_in_new_process(&verify_report_fatal_error);
>   
>   	return 0;
>   }
Binbin Wu March 1, 2024, 6:52 a.m. UTC | #2
On 12/13/2023 4:46 AM, Sagi Shahar wrote:
> The test checks report_fatal_error functionality.
>
> Signed-off-by: Sagi Shahar <sagis@google.com>
> Signed-off-by: Ackerley Tng <ackerleytng@google.com>
> Signed-off-by: Ryan Afranji <afranji@google.com>
> ---
>   .../selftests/kvm/include/x86_64/tdx/tdx.h    |  6 ++-
>   .../kvm/include/x86_64/tdx/tdx_util.h         |  1 +
>   .../kvm/include/x86_64/tdx/test_util.h        | 19 ++++++++
>   .../selftests/kvm/lib/x86_64/tdx/tdx.c        | 39 ++++++++++++++++
>   .../selftests/kvm/lib/x86_64/tdx/tdx_util.c   | 12 +++++
>   .../selftests/kvm/lib/x86_64/tdx/test_util.c  | 10 +++++
>   .../selftests/kvm/x86_64/tdx_vm_tests.c       | 45 +++++++++++++++++++
>   7 files changed, 131 insertions(+), 1 deletion(-)
>
> diff --git a/tools/testing/selftests/kvm/include/x86_64/tdx/tdx.h b/tools/testing/selftests/kvm/include/x86_64/tdx/tdx.h
> index a7161efe4ee2..1340c1070002 100644
> --- a/tools/testing/selftests/kvm/include/x86_64/tdx/tdx.h
> +++ b/tools/testing/selftests/kvm/include/x86_64/tdx/tdx.h
> @@ -3,10 +3,14 @@
>   #define SELFTEST_TDX_TDX_H
>   
>   #include <stdint.h>
> +#include "kvm_util_base.h"
>   
> -#define TDG_VP_VMCALL_INSTRUCTION_IO 30
> +#define TDG_VP_VMCALL_REPORT_FATAL_ERROR 0x10003
>   
> +#define TDG_VP_VMCALL_INSTRUCTION_IO 30
> +void handle_userspace_tdg_vp_vmcall_exit(struct kvm_vcpu *vcpu);
>   uint64_t tdg_vp_vmcall_instruction_io(uint64_t port, uint64_t size,
>   				      uint64_t write, uint64_t *data);
> +void tdg_vp_vmcall_report_fatal_error(uint64_t error_code, uint64_t data_gpa);
>   
>   #endif // SELFTEST_TDX_TDX_H
> diff --git a/tools/testing/selftests/kvm/include/x86_64/tdx/tdx_util.h b/tools/testing/selftests/kvm/include/x86_64/tdx/tdx_util.h
> index 274b245f200b..32dd6b8fda46 100644
> --- a/tools/testing/selftests/kvm/include/x86_64/tdx/tdx_util.h
> +++ b/tools/testing/selftests/kvm/include/x86_64/tdx/tdx_util.h
> @@ -12,5 +12,6 @@ struct kvm_vm *td_create(void);
>   void td_initialize(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
>   		   uint64_t attributes);
>   void td_finalize(struct kvm_vm *vm);
> +void td_vcpu_run(struct kvm_vcpu *vcpu);
>   
>   #endif // SELFTESTS_TDX_KVM_UTIL_H
> diff --git a/tools/testing/selftests/kvm/include/x86_64/tdx/test_util.h b/tools/testing/selftests/kvm/include/x86_64/tdx/test_util.h
> index b570b6d978ff..6d69921136bd 100644
> --- a/tools/testing/selftests/kvm/include/x86_64/tdx/test_util.h
> +++ b/tools/testing/selftests/kvm/include/x86_64/tdx/test_util.h
> @@ -49,4 +49,23 @@ bool is_tdx_enabled(void);
>    */
>   void tdx_test_success(void);
>   
> +/**
> + * Report an error with @error_code to userspace.
> + *
> + * Return value from tdg_vp_vmcall_report_fatal_error is ignored since execution
> + * is not expected to continue beyond this point.
> + */
> +void tdx_test_fatal(uint64_t error_code);
> +
> +/**
> + * Report an error with @error_code to userspace.
> + *
> + * @data_gpa may point to an optional shared guest memory holding the error
> + * string.
> + *
> + * Return value from tdg_vp_vmcall_report_fatal_error is ignored since execution
> + * is not expected to continue beyond this point.
> + */
> +void tdx_test_fatal_with_data(uint64_t error_code, uint64_t data_gpa);
> +
>   #endif // SELFTEST_TDX_TEST_UTIL_H
> diff --git a/tools/testing/selftests/kvm/lib/x86_64/tdx/tdx.c b/tools/testing/selftests/kvm/lib/x86_64/tdx/tdx.c
> index c2414523487a..b854c3aa34ff 100644
> --- a/tools/testing/selftests/kvm/lib/x86_64/tdx/tdx.c
> +++ b/tools/testing/selftests/kvm/lib/x86_64/tdx/tdx.c
> @@ -1,8 +1,31 @@
>   // SPDX-License-Identifier: GPL-2.0-only
>   
> +#include <string.h>
> +
>   #include "tdx/tdcall.h"
>   #include "tdx/tdx.h"
>   
> +void handle_userspace_tdg_vp_vmcall_exit(struct kvm_vcpu *vcpu)
> +{
> +	struct kvm_tdx_vmcall *vmcall_info = &vcpu->run->tdx.u.vmcall;
> +	uint64_t vmcall_subfunction = vmcall_info->subfunction;
> +
> +	switch (vmcall_subfunction) {
> +	case TDG_VP_VMCALL_REPORT_FATAL_ERROR:
> +		vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
> +		vcpu->run->system_event.ndata = 3;
> +		vcpu->run->system_event.data[0] =
> +			TDG_VP_VMCALL_REPORT_FATAL_ERROR;
> +		vcpu->run->system_event.data[1] = vmcall_info->in_r12;
> +		vcpu->run->system_event.data[2] = vmcall_info->in_r13;
> +		vmcall_info->status_code = 0;
> +		break;
> +	default:
> +		TEST_FAIL("TD VMCALL subfunction %lu is unsupported.\n",
> +			  vmcall_subfunction);
> +	}
> +}
> +
>   uint64_t tdg_vp_vmcall_instruction_io(uint64_t port, uint64_t size,
>   				      uint64_t write, uint64_t *data)
>   {
> @@ -25,3 +48,19 @@ uint64_t tdg_vp_vmcall_instruction_io(uint64_t port, uint64_t size,
>   
>   	return ret;
>   }
> +
> +void tdg_vp_vmcall_report_fatal_error(uint64_t error_code, uint64_t data_gpa)
> +{
> +	struct tdx_hypercall_args args;
> +
> +	memset(&args, 0, sizeof(struct tdx_hypercall_args));
> +
> +	if (data_gpa)
> +		error_code |= 0x8000000000000000;
> +
> +	args.r11 = TDG_VP_VMCALL_REPORT_FATAL_ERROR;
> +	args.r12 = error_code;
> +	args.r13 = data_gpa;
> +
> +	__tdx_hypercall(&args, 0);
> +}
> diff --git a/tools/testing/selftests/kvm/lib/x86_64/tdx/tdx_util.c b/tools/testing/selftests/kvm/lib/x86_64/tdx/tdx_util.c
> index b302060049d5..d745bb6287c1 100644
> --- a/tools/testing/selftests/kvm/lib/x86_64/tdx/tdx_util.c
> +++ b/tools/testing/selftests/kvm/lib/x86_64/tdx/tdx_util.c
> @@ -10,6 +10,7 @@
>   
>   #include "kvm_util.h"
>   #include "test_util.h"
> +#include "tdx/tdx.h"
>   #include "tdx/td_boot.h"
>   #include "kvm_util_base.h"
>   #include "processor.h"
> @@ -519,3 +520,14 @@ void td_finalize(struct kvm_vm *vm)
>   
>   	tdx_td_finalizemr(vm);
>   }
> +
> +void td_vcpu_run(struct kvm_vcpu *vcpu)
> +{
> +	vcpu_run(vcpu);
> +
> +	/* Handle TD VMCALLs that require userspace handling. */
> +	if (vcpu->run->exit_reason == KVM_EXIT_TDX &&
> +	    vcpu->run->tdx.type == KVM_EXIT_TDX_VMCALL) {
> +		handle_userspace_tdg_vp_vmcall_exit(vcpu);
> +	}
> +}
> diff --git a/tools/testing/selftests/kvm/lib/x86_64/tdx/test_util.c b/tools/testing/selftests/kvm/lib/x86_64/tdx/test_util.c
> index 6905d0ca3877..7f3cd8089cea 100644
> --- a/tools/testing/selftests/kvm/lib/x86_64/tdx/test_util.c
> +++ b/tools/testing/selftests/kvm/lib/x86_64/tdx/test_util.c
> @@ -32,3 +32,13 @@ void tdx_test_success(void)
>   				     TDX_TEST_SUCCESS_SIZE,
>   				     TDG_VP_VMCALL_INSTRUCTION_IO_WRITE, &code);
>   }
> +
> +void tdx_test_fatal_with_data(uint64_t error_code, uint64_t data_gpa)
> +{
> +	tdg_vp_vmcall_report_fatal_error(error_code, data_gpa);
> +}
> +
> +void tdx_test_fatal(uint64_t error_code)
> +{
> +	tdx_test_fatal_with_data(error_code, 0);
> +}
> diff --git a/tools/testing/selftests/kvm/x86_64/tdx_vm_tests.c b/tools/testing/selftests/kvm/x86_64/tdx_vm_tests.c
> index a18d1c9d6026..8638c7bbedaa 100644
> --- a/tools/testing/selftests/kvm/x86_64/tdx_vm_tests.c
> +++ b/tools/testing/selftests/kvm/x86_64/tdx_vm_tests.c
> @@ -2,6 +2,7 @@
>   
>   #include <signal.h>
>   #include "kvm_util_base.h"
> +#include "tdx/tdx.h"
>   #include "tdx/tdx_util.h"
>   #include "tdx/test_util.h"
>   #include "test_util.h"
> @@ -30,6 +31,49 @@ void verify_td_lifecycle(void)
>   	printf("\t ... PASSED\n");
>   }
>   
> +void guest_code_report_fatal_error(void)
> +{
> +	uint64_t err;
> +
> +	/*
> +	 * Note: err should follow the GHCI spec definition:
> +	 * bits 31:0 should be set to 0.
> +	 * bits 62:32 are used for TD-specific extended error code.
> +	 * bit 63 is used to mark additional information in shared memory.
> +	 */
> +	err = 0x0BAAAAAD00000000;
> +	if (err)
> +		tdx_test_fatal(err);

I find tdx_test_fatal() is called a lot and each call site checks the err
before calling it. Is it simpler to move the check of err inside of
tdx_test_fatal() so that the callers just call it without check it every 
time?


> +
> +	tdx_test_success();
> +}
> +void verify_report_fatal_error(void)
> +{
> +	struct kvm_vm *vm;
> +	struct kvm_vcpu *vcpu;
> +
> +	vm = td_create();
> +	td_initialize(vm, VM_MEM_SRC_ANONYMOUS, 0);
> +	vcpu = td_vcpu_add(vm, 0, guest_code_report_fatal_error);
> +	td_finalize(vm);
> +
> +	printf("Verifying report_fatal_error:\n");
> +
> +	td_vcpu_run(vcpu);
> +
> +	TEST_ASSERT_EQ(vcpu->run->exit_reason, KVM_EXIT_SYSTEM_EVENT);
> +	TEST_ASSERT_EQ(vcpu->run->system_event.ndata, 3);
> +	TEST_ASSERT_EQ(vcpu->run->system_event.data[0], TDG_VP_VMCALL_REPORT_FATAL_ERROR);
> +	TEST_ASSERT_EQ(vcpu->run->system_event.data[1], 0x0BAAAAAD00000000);
> +	TEST_ASSERT_EQ(vcpu->run->system_event.data[2], 0);
> +
> +	vcpu_run(vcpu);
> +	TDX_TEST_ASSERT_SUCCESS(vcpu);
> +
> +	kvm_vm_free(vm);
> +	printf("\t ... PASSED\n");
> +}
> +
>   int main(int argc, char **argv)
>   {
>   	setbuf(stdout, NULL);
> @@ -40,6 +84,7 @@ int main(int argc, char **argv)
>   	}
>   
>   	run_in_new_process(&verify_td_lifecycle);
> +	run_in_new_process(&verify_report_fatal_error);
>   
>   	return 0;
>   }
Yan Zhao March 1, 2024, 12:09 p.m. UTC | #3
...
> diff --git a/tools/testing/selftests/kvm/include/x86_64/tdx/test_util.h b/tools/testing/selftests/kvm/include/x86_64/tdx/test_util.h
> index b570b6d978ff..6d69921136bd 100644
> --- a/tools/testing/selftests/kvm/include/x86_64/tdx/test_util.h
> +++ b/tools/testing/selftests/kvm/include/x86_64/tdx/test_util.h
> @@ -49,4 +49,23 @@ bool is_tdx_enabled(void);
>   */
>  void tdx_test_success(void);
>  
> +/**
> + * Report an error with @error_code to userspace.
> + *
> + * Return value from tdg_vp_vmcall_report_fatal_error is ignored since execution
> + * is not expected to continue beyond this point.
> + */
> +void tdx_test_fatal(uint64_t error_code);
> +
> +/**
> + * Report an error with @error_code to userspace.
> + *
> + * @data_gpa may point to an optional shared guest memory holding the error
> + * string.
> + *
> + * Return value from tdg_vp_vmcall_report_fatal_error is ignored since execution
> + * is not expected to continue beyond this point.
> + */
> +void tdx_test_fatal_with_data(uint64_t error_code, uint64_t data_gpa);
I found nowhere is using "data_gpa" as a gpa, even in patch 23, it's
usage is to pass a line number ("tdx_test_fatal_with_data(ret, __LINE__)").


>  #endif // SELFTEST_TDX_TEST_UTIL_H
> diff --git a/tools/testing/selftests/kvm/lib/x86_64/tdx/tdx.c b/tools/testing/selftests/kvm/lib/x86_64/tdx/tdx.c
> index c2414523487a..b854c3aa34ff 100644
> --- a/tools/testing/selftests/kvm/lib/x86_64/tdx/tdx.c
> +++ b/tools/testing/selftests/kvm/lib/x86_64/tdx/tdx.c
> @@ -1,8 +1,31 @@
>  // SPDX-License-Identifier: GPL-2.0-only
>  
> +#include <string.h>
> +
>  #include "tdx/tdcall.h"
>  #include "tdx/tdx.h"
>  
> +void handle_userspace_tdg_vp_vmcall_exit(struct kvm_vcpu *vcpu)
> +{
> +	struct kvm_tdx_vmcall *vmcall_info = &vcpu->run->tdx.u.vmcall;
> +	uint64_t vmcall_subfunction = vmcall_info->subfunction;
> +
> +	switch (vmcall_subfunction) {
> +	case TDG_VP_VMCALL_REPORT_FATAL_ERROR:
> +		vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
> +		vcpu->run->system_event.ndata = 3;
> +		vcpu->run->system_event.data[0] =
> +			TDG_VP_VMCALL_REPORT_FATAL_ERROR;
> +		vcpu->run->system_event.data[1] = vmcall_info->in_r12;
> +		vcpu->run->system_event.data[2] = vmcall_info->in_r13;
> +		vmcall_info->status_code = 0;
> +		break;
> +	default:
> +		TEST_FAIL("TD VMCALL subfunction %lu is unsupported.\n",
> +			  vmcall_subfunction);
> +	}
> +}
> +
>  uint64_t tdg_vp_vmcall_instruction_io(uint64_t port, uint64_t size,
>  				      uint64_t write, uint64_t *data)
>  {
> @@ -25,3 +48,19 @@ uint64_t tdg_vp_vmcall_instruction_io(uint64_t port, uint64_t size,
>  
>  	return ret;
>  }
> +
> +void tdg_vp_vmcall_report_fatal_error(uint64_t error_code, uint64_t data_gpa)
> +{
> +	struct tdx_hypercall_args args;
> +
> +	memset(&args, 0, sizeof(struct tdx_hypercall_args));
> +
> +	if (data_gpa)
> +		error_code |= 0x8000000000000000;
> 
So, why this error_code needs to set bit 63?


> +	args.r11 = TDG_VP_VMCALL_REPORT_FATAL_ERROR;
> +	args.r12 = error_code;
> +	args.r13 = data_gpa;
> +
> +	__tdx_hypercall(&args, 0);
> +}
> diff --git a/tools/testing/selftests/kvm/lib/x86_64/tdx/tdx_util.c b/tools/testing/selftests/kvm/lib/x86_64/tdx/tdx_util.c
> index b302060049d5..d745bb6287c1 100644
> --- a/tools/testing/selftests/kvm/lib/x86_64/tdx/tdx_util.c
> +++ b/tools/testing/selftests/kvm/lib/x86_64/tdx/tdx_util.c
> @@ -10,6 +10,7 @@
>  
>  #include "kvm_util.h"
>  #include "test_util.h"
> +#include "tdx/tdx.h"
>  #include "tdx/td_boot.h"
>  #include "kvm_util_base.h"
>  #include "processor.h"
> @@ -519,3 +520,14 @@ void td_finalize(struct kvm_vm *vm)
>  
>  	tdx_td_finalizemr(vm);
>  }
> +
> +void td_vcpu_run(struct kvm_vcpu *vcpu)
> +{
> +	vcpu_run(vcpu);
> +
> +	/* Handle TD VMCALLs that require userspace handling. */
> +	if (vcpu->run->exit_reason == KVM_EXIT_TDX &&
> +	    vcpu->run->tdx.type == KVM_EXIT_TDX_VMCALL) {
> +		handle_userspace_tdg_vp_vmcall_exit(vcpu);
> +	}
> +}
> diff --git a/tools/testing/selftests/kvm/lib/x86_64/tdx/test_util.c b/tools/testing/selftests/kvm/lib/x86_64/tdx/test_util.c
> index 6905d0ca3877..7f3cd8089cea 100644
> --- a/tools/testing/selftests/kvm/lib/x86_64/tdx/test_util.c
> +++ b/tools/testing/selftests/kvm/lib/x86_64/tdx/test_util.c
> @@ -32,3 +32,13 @@ void tdx_test_success(void)
>  				     TDX_TEST_SUCCESS_SIZE,
>  				     TDG_VP_VMCALL_INSTRUCTION_IO_WRITE, &code);
>  }
> +
> +void tdx_test_fatal_with_data(uint64_t error_code, uint64_t data_gpa)
> +{
> +	tdg_vp_vmcall_report_fatal_error(error_code, data_gpa);
> +}
> +
> +void tdx_test_fatal(uint64_t error_code)
> +{
> +	tdx_test_fatal_with_data(error_code, 0);
> +}
> diff --git a/tools/testing/selftests/kvm/x86_64/tdx_vm_tests.c b/tools/testing/selftests/kvm/x86_64/tdx_vm_tests.c
> index a18d1c9d6026..8638c7bbedaa 100644
> --- a/tools/testing/selftests/kvm/x86_64/tdx_vm_tests.c
> +++ b/tools/testing/selftests/kvm/x86_64/tdx_vm_tests.c
> @@ -2,6 +2,7 @@
>  
>  #include <signal.h>
>  #include "kvm_util_base.h"
> +#include "tdx/tdx.h"
>  #include "tdx/tdx_util.h"
>  #include "tdx/test_util.h"
>  #include "test_util.h"
> @@ -30,6 +31,49 @@ void verify_td_lifecycle(void)
>  	printf("\t ... PASSED\n");
>  }
>  
> +void guest_code_report_fatal_error(void)
> +{
> +	uint64_t err;
> +
> +	/*
> +	 * Note: err should follow the GHCI spec definition:
> +	 * bits 31:0 should be set to 0.
> +	 * bits 62:32 are used for TD-specific extended error code.
> +	 * bit 63 is used to mark additional information in shared memory.
> +	 */
> +	err = 0x0BAAAAAD00000000;
> +	if (err)
> +		tdx_test_fatal(err);
> +
> +	tdx_test_success();
> +}
> +void verify_report_fatal_error(void)
> +{
> +	struct kvm_vm *vm;
> +	struct kvm_vcpu *vcpu;
> +
> +	vm = td_create();
> +	td_initialize(vm, VM_MEM_SRC_ANONYMOUS, 0);
> +	vcpu = td_vcpu_add(vm, 0, guest_code_report_fatal_error);
> +	td_finalize(vm);
> +
> +	printf("Verifying report_fatal_error:\n");
> +
> +	td_vcpu_run(vcpu);
> +
> +	TEST_ASSERT_EQ(vcpu->run->exit_reason, KVM_EXIT_SYSTEM_EVENT);
> +	TEST_ASSERT_EQ(vcpu->run->system_event.ndata, 3);
> +	TEST_ASSERT_EQ(vcpu->run->system_event.data[0], TDG_VP_VMCALL_REPORT_FATAL_ERROR);
> +	TEST_ASSERT_EQ(vcpu->run->system_event.data[1], 0x0BAAAAAD00000000);
> +	TEST_ASSERT_EQ(vcpu->run->system_event.data[2], 0);
> +
> +	vcpu_run(vcpu);
> +	TDX_TEST_ASSERT_SUCCESS(vcpu);
> +
> +	kvm_vm_free(vm);
> +	printf("\t ... PASSED\n");
> +}
> +
>  int main(int argc, char **argv)
>  {
>  	setbuf(stdout, NULL);
> @@ -40,6 +84,7 @@ int main(int argc, char **argv)
>  	}
>  
>  	run_in_new_process(&verify_td_lifecycle);
> +	run_in_new_process(&verify_report_fatal_error);
>  
>  	return 0;
>  }
> -- 
> 2.43.0.472.g3155946c3a-goog
> 
>
Yan Zhao April 12, 2024, 11:57 a.m. UTC | #4
On Fri, Apr 12, 2024 at 04:56:36AM +0000, Ackerley Tng wrote:
> Yan Zhao <yan.y.zhao@intel.com> writes:
> 
> > ...
> >> diff --git a/tools/testing/selftests/kvm/include/x86_64/tdx/test_util.h b/tools/testing/selftests/kvm/include/x86_64/tdx/test_util.h
> >> index b570b6d978ff..6d69921136bd 100644
> >> --- a/tools/testing/selftests/kvm/include/x86_64/tdx/test_util.h
> >> +++ b/tools/testing/selftests/kvm/include/x86_64/tdx/test_util.h
> >> @@ -49,4 +49,23 @@ bool is_tdx_enabled(void);
> >>   */
> >>  void tdx_test_success(void);
> >>  
> >> +/**
> >> + * Report an error with @error_code to userspace.
> >> + *
> >> + * Return value from tdg_vp_vmcall_report_fatal_error is ignored since execution
> >> + * is not expected to continue beyond this point.
> >> + */
> >> +void tdx_test_fatal(uint64_t error_code);
> >> +
> >> +/**
> >> + * Report an error with @error_code to userspace.
> >> + *
> >> + * @data_gpa may point to an optional shared guest memory holding the error
> >> + * string.
> >> + *
> >> + * Return value from tdg_vp_vmcall_report_fatal_error is ignored since execution
> >> + * is not expected to continue beyond this point.
> >> + */
> >> +void tdx_test_fatal_with_data(uint64_t error_code, uint64_t data_gpa);
> > I found nowhere is using "data_gpa" as a gpa, even in patch 23, it's
> > usage is to pass a line number ("tdx_test_fatal_with_data(ret, __LINE__)").
> >
> >
> 
> This function tdx_test_fatal_with_data() is meant to provide a generic
> interface for TDX tests to use TDG.VP.VMCALL<ReportFatalError>, and so
> the parameters of tdx_test_fatal_with_data() generically allow error_code and
> data_gpa to be specified.
> 
> The tests just happen to use the data_gpa parameter to pass __LINE__ to
> the host VMM, but other tests in future that use the
> tdx_test_fatal_with_data() function in the TDX testing library could
> actually pass a GPA through using data_gpa.
> 
> >>  #endif // SELFTEST_TDX_TEST_UTIL_H
> >> diff --git a/tools/testing/selftests/kvm/lib/x86_64/tdx/tdx.c b/tools/testing/selftests/kvm/lib/x86_64/tdx/tdx.c
> >> index c2414523487a..b854c3aa34ff 100644
> >> --- a/tools/testing/selftests/kvm/lib/x86_64/tdx/tdx.c
> >> +++ b/tools/testing/selftests/kvm/lib/x86_64/tdx/tdx.c
> >> @@ -1,8 +1,31 @@
> >>  // SPDX-License-Identifier: GPL-2.0-only
> >>  
> >> +#include <string.h>
> >> +
> >>  #include "tdx/tdcall.h"
> >>  #include "tdx/tdx.h"
> >>  
> >> +void handle_userspace_tdg_vp_vmcall_exit(struct kvm_vcpu *vcpu)
> >> +{
> >> +	struct kvm_tdx_vmcall *vmcall_info = &vcpu->run->tdx.u.vmcall;
> >> +	uint64_t vmcall_subfunction = vmcall_info->subfunction;
> >> +
> >> +	switch (vmcall_subfunction) {
> >> +	case TDG_VP_VMCALL_REPORT_FATAL_ERROR:
> >> +		vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
> >> +		vcpu->run->system_event.ndata = 3;
> >> +		vcpu->run->system_event.data[0] =
> >> +			TDG_VP_VMCALL_REPORT_FATAL_ERROR;
> >> +		vcpu->run->system_event.data[1] = vmcall_info->in_r12;
> >> +		vcpu->run->system_event.data[2] = vmcall_info->in_r13;
> >> +		vmcall_info->status_code = 0;
> >> +		break;
> >> +	default:
> >> +		TEST_FAIL("TD VMCALL subfunction %lu is unsupported.\n",
> >> +			  vmcall_subfunction);
> >> +	}
> >> +}
> >> +
> >>  uint64_t tdg_vp_vmcall_instruction_io(uint64_t port, uint64_t size,
> >>  				      uint64_t write, uint64_t *data)
> >>  {
> >> @@ -25,3 +48,19 @@ uint64_t tdg_vp_vmcall_instruction_io(uint64_t port, uint64_t size,
> >>  
> >>  	return ret;
> >>  }
> >> +
> >> +void tdg_vp_vmcall_report_fatal_error(uint64_t error_code, uint64_t data_gpa)
> >> +{
> >> +	struct tdx_hypercall_args args;
> >> +
> >> +	memset(&args, 0, sizeof(struct tdx_hypercall_args));
> >> +
> >> +	if (data_gpa)
> >> +		error_code |= 0x8000000000000000;
> >> 
> > So, why this error_code needs to set bit 63?
> >
> >
> 
> The Intel GHCI Spec says in R12, bit 63 is set if the GPA is valid. As a
But above "__LINE__" is obviously not a valid GPA.

Do you think it's better to check "data_gpa" is with shared bit on and
aligned in 4K before setting bit 63?

> generic TDX testing library function, this check allows the user to use
> tdg_vp_vmcall_report_fatal_error() with error_code and data_gpa and not
> worry about setting bit 63 before calling
> tdg_vp_vmcall_report_fatal_error(), though if the user set bit 63 before
> that, there is no issue.
> 
> >> +	args.r11 = TDG_VP_VMCALL_REPORT_FATAL_ERROR;
> >> +	args.r12 = error_code;
> >> +	args.r13 = data_gpa;
> >> +
> >> +	__tdx_hypercall(&args, 0);
> >> +}
> 
> >> <snip>
>
Yan Zhao April 15, 2024, 10:09 a.m. UTC | #5
On Mon, Apr 15, 2024 at 08:05:49AM +0000, Ackerley Tng wrote:
> Yan Zhao <yan.y.zhao@intel.com> writes:
> 
> > On Fri, Apr 12, 2024 at 04:56:36AM +0000, Ackerley Tng wrote:
> >> Yan Zhao <yan.y.zhao@intel.com> writes:
> >> 
> >> > ...
> >> >> diff --git a/tools/testing/selftests/kvm/include/x86_64/tdx/test_util.h b/tools/testing/selftests/kvm/include/x86_64/tdx/test_util.h
> >> >> index b570b6d978ff..6d69921136bd 100644
> >> >> --- a/tools/testing/selftests/kvm/include/x86_64/tdx/test_util.h
> >> >> +++ b/tools/testing/selftests/kvm/include/x86_64/tdx/test_util.h
> >> >> @@ -49,4 +49,23 @@ bool is_tdx_enabled(void);
> >> >>   */
> >> >>  void tdx_test_success(void);
> >> >>  
> >> >> +/**
> >> >> + * Report an error with @error_code to userspace.
> >> >> + *
> >> >> + * Return value from tdg_vp_vmcall_report_fatal_error is ignored since execution
> >> >> + * is not expected to continue beyond this point.
> >> >> + */
> >> >> +void tdx_test_fatal(uint64_t error_code);
> >> >> +
> >> >> +/**
> >> >> + * Report an error with @error_code to userspace.
> >> >> + *
> >> >> + * @data_gpa may point to an optional shared guest memory holding the error
> >> >> + * string.
> >> >> + *
> >> >> + * Return value from tdg_vp_vmcall_report_fatal_error is ignored since execution
> >> >> + * is not expected to continue beyond this point.
> >> >> + */
> >> >> +void tdx_test_fatal_with_data(uint64_t error_code, uint64_t data_gpa);
> >> > I found nowhere is using "data_gpa" as a gpa, even in patch 23, it's
> >> > usage is to pass a line number ("tdx_test_fatal_with_data(ret, __LINE__)").
> >> >
> >> >
> >> 
> >> This function tdx_test_fatal_with_data() is meant to provide a generic
> >> interface for TDX tests to use TDG.VP.VMCALL<ReportFatalError>, and so
> >> the parameters of tdx_test_fatal_with_data() generically allow error_code and
> >> data_gpa to be specified.
> >> 
> >> The tests just happen to use the data_gpa parameter to pass __LINE__ to
> >> the host VMM, but other tests in future that use the
> >> tdx_test_fatal_with_data() function in the TDX testing library could
> >> actually pass a GPA through using data_gpa.
> >> 
> >> >>  #endif // SELFTEST_TDX_TEST_UTIL_H
> >> >> diff --git a/tools/testing/selftests/kvm/lib/x86_64/tdx/tdx.c b/tools/testing/selftests/kvm/lib/x86_64/tdx/tdx.c
> >> >> index c2414523487a..b854c3aa34ff 100644
> >> >> --- a/tools/testing/selftests/kvm/lib/x86_64/tdx/tdx.c
> >> >> +++ b/tools/testing/selftests/kvm/lib/x86_64/tdx/tdx.c
> >> >> @@ -1,8 +1,31 @@
> >> >>  // SPDX-License-Identifier: GPL-2.0-only
> >> >>  
> >> >> +#include <string.h>
> >> >> +
> >> >>  #include "tdx/tdcall.h"
> >> >>  #include "tdx/tdx.h"
> >> >>  
> >> >> +void handle_userspace_tdg_vp_vmcall_exit(struct kvm_vcpu *vcpu)
> >> >> +{
> >> >> +	struct kvm_tdx_vmcall *vmcall_info = &vcpu->run->tdx.u.vmcall;
> >> >> +	uint64_t vmcall_subfunction = vmcall_info->subfunction;
> >> >> +
> >> >> +	switch (vmcall_subfunction) {
> >> >> +	case TDG_VP_VMCALL_REPORT_FATAL_ERROR:
> >> >> +		vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
> >> >> +		vcpu->run->system_event.ndata = 3;
> >> >> +		vcpu->run->system_event.data[0] =
> >> >> +			TDG_VP_VMCALL_REPORT_FATAL_ERROR;
> >> >> +		vcpu->run->system_event.data[1] = vmcall_info->in_r12;
> >> >> +		vcpu->run->system_event.data[2] = vmcall_info->in_r13;
> >> >> +		vmcall_info->status_code = 0;
> >> >> +		break;
> >> >> +	default:
> >> >> +		TEST_FAIL("TD VMCALL subfunction %lu is unsupported.\n",
> >> >> +			  vmcall_subfunction);
> >> >> +	}
> >> >> +}
> >> >> +
> >> >>  uint64_t tdg_vp_vmcall_instruction_io(uint64_t port, uint64_t size,
> >> >>  				      uint64_t write, uint64_t *data)
> >> >>  {
> >> >> @@ -25,3 +48,19 @@ uint64_t tdg_vp_vmcall_instruction_io(uint64_t port, uint64_t size,
> >> >>  
> >> >>  	return ret;
> >> >>  }
> >> >> +
> >> >> +void tdg_vp_vmcall_report_fatal_error(uint64_t error_code, uint64_t data_gpa)
> >> >> +{
> >> >> +	struct tdx_hypercall_args args;
> >> >> +
> >> >> +	memset(&args, 0, sizeof(struct tdx_hypercall_args));
> >> >> +
> >> >> +	if (data_gpa)
> >> >> +		error_code |= 0x8000000000000000;
> >> >> 
> >> > So, why this error_code needs to set bit 63?
> >> >
> >> >
> >> 
> >> The Intel GHCI Spec says in R12, bit 63 is set if the GPA is valid. As a
> > But above "__LINE__" is obviously not a valid GPA.
> >
> > Do you think it's better to check "data_gpa" is with shared bit on and
> > aligned in 4K before setting bit 63?
> >
> 
> I read "valid" in the spec to mean that the value in R13 "should be
> considered as useful" or "should be passed on to the host VMM via the
> TDX module", and not so much as in "validated".
> 
> We could validate the data_gpa as you suggested to check alignment and
> shared bit, but perhaps that could be a higher-level function that calls
> tdg_vp_vmcall_report_fatal_error()?
> 
> If it helps, shall we rename "data_gpa" to "data" for this lower-level,
> generic helper function and remove these two lines
> 
> if (data_gpa)
> 	error_code |= 0x8000000000000000;
> 
> A higher-level function could perhaps do the validation as you suggested
> and then set bit 63.
This could be all right. But I'm not sure if it would be a burden for
higher-level function to set bit 63 which is of GHCI details.

What about adding another "data_gpa_valid" parameter and then test
"data_gpa_valid" rather than test "data_gpa" to set bit 63?

> 
> Are you objecting to the use of R13 to hold extra test information, such
> as __LINE__?
> 
> I feel that R13 is just another register that could be used to hold
> error information, and in the case of this test, we can use it to send
> __LINE__ to aid in debugging selftests. On the host side of the
> selftest we can printf() :).
>
Hmm, I just feel it's confusing to use R13 as error code holder and set
gpa_valid bit on at the same time.
As it looks complicated to convert __LINE__ to a string in a shared GPA,
maybe it's ok to pass it in R13 :)

> >> generic TDX testing library function, this check allows the user to use
> >> tdg_vp_vmcall_report_fatal_error() with error_code and data_gpa and not
> >> worry about setting bit 63 before calling
> >> tdg_vp_vmcall_report_fatal_error(), though if the user set bit 63 before
> >> that, there is no issue.
> >> 
> >> >> +	args.r11 = TDG_VP_VMCALL_REPORT_FATAL_ERROR;
> >> >> +	args.r12 = error_code;
> >> >> +	args.r13 = data_gpa;
> >> >> +
> >> >> +	__tdx_hypercall(&args, 0);
> >> >> +}
> >> 
> >> >> <snip>
> >>
Sean Christopherson April 16, 2024, 6:50 p.m. UTC | #6
On Mon, Apr 15, 2024, Yan Zhao wrote:
> On Mon, Apr 15, 2024 at 08:05:49AM +0000, Ackerley Tng wrote:
> > >> The Intel GHCI Spec says in R12, bit 63 is set if the GPA is valid. As a
> > > But above "__LINE__" is obviously not a valid GPA.
> > >
> > > Do you think it's better to check "data_gpa" is with shared bit on and
> > > aligned in 4K before setting bit 63?
> > >
> > 
> > I read "valid" in the spec to mean that the value in R13 "should be
> > considered as useful" or "should be passed on to the host VMM via the
> > TDX module", and not so much as in "validated".
> > 
> > We could validate the data_gpa as you suggested to check alignment and
> > shared bit, but perhaps that could be a higher-level function that calls
> > tdg_vp_vmcall_report_fatal_error()?
> > 
> > If it helps, shall we rename "data_gpa" to "data" for this lower-level,
> > generic helper function and remove these two lines
> > 
> > if (data_gpa)
> > 	error_code |= 0x8000000000000000;
> > 
> > A higher-level function could perhaps do the validation as you suggested
> > and then set bit 63.
> This could be all right. But I'm not sure if it would be a burden for
> higher-level function to set bit 63 which is of GHCI details.
> 
> What about adding another "data_gpa_valid" parameter and then test
> "data_gpa_valid" rather than test "data_gpa" to set bit 63?

Who cares what the GHCI says about validity?  The GHCI is a spec for getting
random guests to play nice with random hosts.  Selftests own both, and the goal
of selftests is to test that KVM (and KVM's dependencies) adhere to their relevant
specs.  And more importantly, KVM is NOT inheriting the GHCI ABI verbatim[*].

So except for the bits and bobs that *KVM* (or the TDX module) gets involved in,
just ignore the GHCI (or even deliberately abuse it).  To put it differently, use
selftests verify *KVM's* ABI and functionality.

As it pertains to this thread, while I haven't looked at any of this in detail,
I'm guessing that whether or not bit 63 is set is a complete "don't care", i.e.
KVM and the TDX Module should pass it through as-is.

[*] https://lore.kernel.org/all/Zg18ul8Q4PGQMWam@google.com
Yan Zhao April 17, 2024, 10:41 p.m. UTC | #7
On Tue, Apr 16, 2024 at 11:50:19AM -0700, Sean Christopherson wrote:
> On Mon, Apr 15, 2024, Yan Zhao wrote:
> > On Mon, Apr 15, 2024 at 08:05:49AM +0000, Ackerley Tng wrote:
> > > >> The Intel GHCI Spec says in R12, bit 63 is set if the GPA is valid. As a
> > > > But above "__LINE__" is obviously not a valid GPA.
> > > >
> > > > Do you think it's better to check "data_gpa" is with shared bit on and
> > > > aligned in 4K before setting bit 63?
> > > >
> > > 
> > > I read "valid" in the spec to mean that the value in R13 "should be
> > > considered as useful" or "should be passed on to the host VMM via the
> > > TDX module", and not so much as in "validated".
> > > 
> > > We could validate the data_gpa as you suggested to check alignment and
> > > shared bit, but perhaps that could be a higher-level function that calls
> > > tdg_vp_vmcall_report_fatal_error()?
> > > 
> > > If it helps, shall we rename "data_gpa" to "data" for this lower-level,
> > > generic helper function and remove these two lines
> > > 
> > > if (data_gpa)
> > > 	error_code |= 0x8000000000000000;
> > > 
> > > A higher-level function could perhaps do the validation as you suggested
> > > and then set bit 63.
> > This could be all right. But I'm not sure if it would be a burden for
> > higher-level function to set bit 63 which is of GHCI details.
> > 
> > What about adding another "data_gpa_valid" parameter and then test
> > "data_gpa_valid" rather than test "data_gpa" to set bit 63?
> 
> Who cares what the GHCI says about validity?  The GHCI is a spec for getting
> random guests to play nice with random hosts.  Selftests own both, and the goal
> of selftests is to test that KVM (and KVM's dependencies) adhere to their relevant
> specs.  And more importantly, KVM is NOT inheriting the GHCI ABI verbatim[*].
> 
> So except for the bits and bobs that *KVM* (or the TDX module) gets involved in,
> just ignore the GHCI (or even deliberately abuse it).  To put it differently, use
> selftests verify *KVM's* ABI and functionality.
> 
> As it pertains to this thread, while I haven't looked at any of this in detail,
> I'm guessing that whether or not bit 63 is set is a complete "don't care", i.e.
> KVM and the TDX Module should pass it through as-is.
> 
> [*] https://lore.kernel.org/all/Zg18ul8Q4PGQMWam@google.com
Ok. It makes sense to KVM_EXIT_TDX.
But what if the TDVMCALL is handled in TDX specific code in kernel in future?
(not possible?)
Should guest set bits correctly according to GHCI?
Sean Christopherson April 22, 2024, 9:23 p.m. UTC | #8
On Thu, Apr 18, 2024, Yan Zhao wrote:
> On Tue, Apr 16, 2024 at 11:50:19AM -0700, Sean Christopherson wrote:
> > On Mon, Apr 15, 2024, Yan Zhao wrote:
> > > On Mon, Apr 15, 2024 at 08:05:49AM +0000, Ackerley Tng wrote:
> > > > >> The Intel GHCI Spec says in R12, bit 63 is set if the GPA is valid. As a
> > > > > But above "__LINE__" is obviously not a valid GPA.
> > > > >
> > > > > Do you think it's better to check "data_gpa" is with shared bit on and
> > > > > aligned in 4K before setting bit 63?
> > > > >
> > > > 
> > > > I read "valid" in the spec to mean that the value in R13 "should be
> > > > considered as useful" or "should be passed on to the host VMM via the
> > > > TDX module", and not so much as in "validated".
> > > > 
> > > > We could validate the data_gpa as you suggested to check alignment and
> > > > shared bit, but perhaps that could be a higher-level function that calls
> > > > tdg_vp_vmcall_report_fatal_error()?
> > > > 
> > > > If it helps, shall we rename "data_gpa" to "data" for this lower-level,
> > > > generic helper function and remove these two lines
> > > > 
> > > > if (data_gpa)
> > > > 	error_code |= 0x8000000000000000;
> > > > 
> > > > A higher-level function could perhaps do the validation as you suggested
> > > > and then set bit 63.
> > > This could be all right. But I'm not sure if it would be a burden for
> > > higher-level function to set bit 63 which is of GHCI details.
> > > 
> > > What about adding another "data_gpa_valid" parameter and then test
> > > "data_gpa_valid" rather than test "data_gpa" to set bit 63?
> > 
> > Who cares what the GHCI says about validity?  The GHCI is a spec for getting
> > random guests to play nice with random hosts.  Selftests own both, and the goal
> > of selftests is to test that KVM (and KVM's dependencies) adhere to their relevant
> > specs.  And more importantly, KVM is NOT inheriting the GHCI ABI verbatim[*].
> > 
> > So except for the bits and bobs that *KVM* (or the TDX module) gets involved in,
> > just ignore the GHCI (or even deliberately abuse it).  To put it differently, use
> > selftests verify *KVM's* ABI and functionality.
> > 
> > As it pertains to this thread, while I haven't looked at any of this in detail,
> > I'm guessing that whether or not bit 63 is set is a complete "don't care", i.e.
> > KVM and the TDX Module should pass it through as-is.
> > 
> > [*] https://lore.kernel.org/all/Zg18ul8Q4PGQMWam@google.com
> Ok. It makes sense to KVM_EXIT_TDX.
> But what if the TDVMCALL is handled in TDX specific code in kernel in future?
> (not possible?)

KVM will "handle" ReportFatalError, and will do so before this code lands[*], but
I *highly* doubt KVM will ever do anything but forward the information to userspace,
e.g. as KVM_SYSTEM_EVENT_CRASH with data[] filled in with the raw register information.  

> Should guest set bits correctly according to GHCI?

No.  Selftests exist first and foremost to verify KVM behavior, not to verify
firmware behavior.  We can and should use selftests to verify that *KVM* doesn't
*violate* the GHCI, but that doesn't mean that selftests themselves can't ignore
and/or abuse the GCHI, especially since the GHCI definition for ReportFatalError
is frankly awful.

E.g. the GHCI prescibes actual behavior for R13, but then doesn't say *anything*
about what's in the data page.  Why!?!?!  If the format in the data page is
completely undefined, what's the point of restricting R13 to only be allowed to
hold a GPA?

And the wording is just as awful:

  The VMM must validate that this GPA has the Shared bit set. In other words,
  that a shared-mapping is used, and that this is a valid mapping for the TD.

I'm pretty sure it's just saying that the TDX module isn't going to verify the
operate, i.e. that the VMM needs to protect itself, but it would be so much
better to simply state "The TDX Module does not verify this GPA", because saying
the VMM "must" do something leads to pointless discussions like this one, where
we're debating over whether or *our* VMM should inject an error into *our* guest.

Anyways, we should do what makes sense for selftests and ignore the stupidity of
the GHCI when doing so yields better code.  If that means abusing R13, go for it.
If it's a sticking point for anyone, just use one of the "optional" registers.

Whatever we do, bury the host and guest side of selftests behind #defines or helpers
so that there are at most two pieces of code that care which register holds which
piece of information. 

[*] https://lore.kernel.org/all/20240404230247.GU2444378@ls.amr.corp.intel.com
Sagi Shahar July 25, 2024, 8:37 p.m. UTC | #9
On Fri, Mar 1, 2024 at 12:52 AM Binbin Wu <binbin.wu@linux.intel.com> wrote:
>
>
>
> On 12/13/2023 4:46 AM, Sagi Shahar wrote:
> > The test checks report_fatal_error functionality.
> >
> > Signed-off-by: Sagi Shahar <sagis@google.com>
> > Signed-off-by: Ackerley Tng <ackerleytng@google.com>
> > Signed-off-by: Ryan Afranji <afranji@google.com>
> > ---
> >   .../selftests/kvm/include/x86_64/tdx/tdx.h    |  6 ++-
> >   .../kvm/include/x86_64/tdx/tdx_util.h         |  1 +
> >   .../kvm/include/x86_64/tdx/test_util.h        | 19 ++++++++
> >   .../selftests/kvm/lib/x86_64/tdx/tdx.c        | 39 ++++++++++++++++
> >   .../selftests/kvm/lib/x86_64/tdx/tdx_util.c   | 12 +++++
> >   .../selftests/kvm/lib/x86_64/tdx/test_util.c  | 10 +++++
> >   .../selftests/kvm/x86_64/tdx_vm_tests.c       | 45 +++++++++++++++++++
> >   7 files changed, 131 insertions(+), 1 deletion(-)
> >
> > diff --git a/tools/testing/selftests/kvm/include/x86_64/tdx/tdx.h b/tools/testing/selftests/kvm/include/x86_64/tdx/tdx.h
> > index a7161efe4ee2..1340c1070002 100644
> > --- a/tools/testing/selftests/kvm/include/x86_64/tdx/tdx.h
> > +++ b/tools/testing/selftests/kvm/include/x86_64/tdx/tdx.h
> > @@ -3,10 +3,14 @@
> >   #define SELFTEST_TDX_TDX_H
> >
> >   #include <stdint.h>
> > +#include "kvm_util_base.h"
> >
> > -#define TDG_VP_VMCALL_INSTRUCTION_IO 30
> > +#define TDG_VP_VMCALL_REPORT_FATAL_ERROR 0x10003
> >
> > +#define TDG_VP_VMCALL_INSTRUCTION_IO 30
> > +void handle_userspace_tdg_vp_vmcall_exit(struct kvm_vcpu *vcpu);
> >   uint64_t tdg_vp_vmcall_instruction_io(uint64_t port, uint64_t size,
> >                                     uint64_t write, uint64_t *data);
> > +void tdg_vp_vmcall_report_fatal_error(uint64_t error_code, uint64_t data_gpa);
> >
> >   #endif // SELFTEST_TDX_TDX_H
> > diff --git a/tools/testing/selftests/kvm/include/x86_64/tdx/tdx_util.h b/tools/testing/selftests/kvm/include/x86_64/tdx/tdx_util.h
> > index 274b245f200b..32dd6b8fda46 100644
> > --- a/tools/testing/selftests/kvm/include/x86_64/tdx/tdx_util.h
> > +++ b/tools/testing/selftests/kvm/include/x86_64/tdx/tdx_util.h
> > @@ -12,5 +12,6 @@ struct kvm_vm *td_create(void);
> >   void td_initialize(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
> >                  uint64_t attributes);
> >   void td_finalize(struct kvm_vm *vm);
> > +void td_vcpu_run(struct kvm_vcpu *vcpu);
> >
> >   #endif // SELFTESTS_TDX_KVM_UTIL_H
> > diff --git a/tools/testing/selftests/kvm/include/x86_64/tdx/test_util.h b/tools/testing/selftests/kvm/include/x86_64/tdx/test_util.h
> > index b570b6d978ff..6d69921136bd 100644
> > --- a/tools/testing/selftests/kvm/include/x86_64/tdx/test_util.h
> > +++ b/tools/testing/selftests/kvm/include/x86_64/tdx/test_util.h
> > @@ -49,4 +49,23 @@ bool is_tdx_enabled(void);
> >    */
> >   void tdx_test_success(void);
> >
> > +/**
> > + * Report an error with @error_code to userspace.
> > + *
> > + * Return value from tdg_vp_vmcall_report_fatal_error is ignored since execution
> > + * is not expected to continue beyond this point.
> > + */
> > +void tdx_test_fatal(uint64_t error_code);
> > +
> > +/**
> > + * Report an error with @error_code to userspace.
> > + *
> > + * @data_gpa may point to an optional shared guest memory holding the error
> > + * string.
> > + *
> > + * Return value from tdg_vp_vmcall_report_fatal_error is ignored since execution
> > + * is not expected to continue beyond this point.
> > + */
> > +void tdx_test_fatal_with_data(uint64_t error_code, uint64_t data_gpa);
> > +
> >   #endif // SELFTEST_TDX_TEST_UTIL_H
> > diff --git a/tools/testing/selftests/kvm/lib/x86_64/tdx/tdx.c b/tools/testing/selftests/kvm/lib/x86_64/tdx/tdx.c
> > index c2414523487a..b854c3aa34ff 100644
> > --- a/tools/testing/selftests/kvm/lib/x86_64/tdx/tdx.c
> > +++ b/tools/testing/selftests/kvm/lib/x86_64/tdx/tdx.c
> > @@ -1,8 +1,31 @@
> >   // SPDX-License-Identifier: GPL-2.0-only
> >
> > +#include <string.h>
> > +
> >   #include "tdx/tdcall.h"
> >   #include "tdx/tdx.h"
> >
> > +void handle_userspace_tdg_vp_vmcall_exit(struct kvm_vcpu *vcpu)
> > +{
> > +     struct kvm_tdx_vmcall *vmcall_info = &vcpu->run->tdx.u.vmcall;
> > +     uint64_t vmcall_subfunction = vmcall_info->subfunction;
> > +
> > +     switch (vmcall_subfunction) {
> > +     case TDG_VP_VMCALL_REPORT_FATAL_ERROR:
> > +             vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
> > +             vcpu->run->system_event.ndata = 3;
> > +             vcpu->run->system_event.data[0] =
> > +                     TDG_VP_VMCALL_REPORT_FATAL_ERROR;
> > +             vcpu->run->system_event.data[1] = vmcall_info->in_r12;
> > +             vcpu->run->system_event.data[2] = vmcall_info->in_r13;
> > +             vmcall_info->status_code = 0;
> > +             break;
> > +     default:
> > +             TEST_FAIL("TD VMCALL subfunction %lu is unsupported.\n",
> > +                       vmcall_subfunction);
> > +     }
> > +}
> > +
> >   uint64_t tdg_vp_vmcall_instruction_io(uint64_t port, uint64_t size,
> >                                     uint64_t write, uint64_t *data)
> >   {
> > @@ -25,3 +48,19 @@ uint64_t tdg_vp_vmcall_instruction_io(uint64_t port, uint64_t size,
> >
> >       return ret;
> >   }
> > +
> > +void tdg_vp_vmcall_report_fatal_error(uint64_t error_code, uint64_t data_gpa)
> > +{
> > +     struct tdx_hypercall_args args;
> > +
> > +     memset(&args, 0, sizeof(struct tdx_hypercall_args));
> > +
> > +     if (data_gpa)
> > +             error_code |= 0x8000000000000000;
> > +
> > +     args.r11 = TDG_VP_VMCALL_REPORT_FATAL_ERROR;
> > +     args.r12 = error_code;
> > +     args.r13 = data_gpa;
> > +
> > +     __tdx_hypercall(&args, 0);
> > +}
> > diff --git a/tools/testing/selftests/kvm/lib/x86_64/tdx/tdx_util.c b/tools/testing/selftests/kvm/lib/x86_64/tdx/tdx_util.c
> > index b302060049d5..d745bb6287c1 100644
> > --- a/tools/testing/selftests/kvm/lib/x86_64/tdx/tdx_util.c
> > +++ b/tools/testing/selftests/kvm/lib/x86_64/tdx/tdx_util.c
> > @@ -10,6 +10,7 @@
> >
> >   #include "kvm_util.h"
> >   #include "test_util.h"
> > +#include "tdx/tdx.h"
> >   #include "tdx/td_boot.h"
> >   #include "kvm_util_base.h"
> >   #include "processor.h"
> > @@ -519,3 +520,14 @@ void td_finalize(struct kvm_vm *vm)
> >
> >       tdx_td_finalizemr(vm);
> >   }
> > +
> > +void td_vcpu_run(struct kvm_vcpu *vcpu)
> > +{
> > +     vcpu_run(vcpu);
> > +
> > +     /* Handle TD VMCALLs that require userspace handling. */
> > +     if (vcpu->run->exit_reason == KVM_EXIT_TDX &&
> > +         vcpu->run->tdx.type == KVM_EXIT_TDX_VMCALL) {
> > +             handle_userspace_tdg_vp_vmcall_exit(vcpu);
> > +     }
> > +}
> > diff --git a/tools/testing/selftests/kvm/lib/x86_64/tdx/test_util.c b/tools/testing/selftests/kvm/lib/x86_64/tdx/test_util.c
> > index 6905d0ca3877..7f3cd8089cea 100644
> > --- a/tools/testing/selftests/kvm/lib/x86_64/tdx/test_util.c
> > +++ b/tools/testing/selftests/kvm/lib/x86_64/tdx/test_util.c
> > @@ -32,3 +32,13 @@ void tdx_test_success(void)
> >                                    TDX_TEST_SUCCESS_SIZE,
> >                                    TDG_VP_VMCALL_INSTRUCTION_IO_WRITE, &code);
> >   }
> > +
> > +void tdx_test_fatal_with_data(uint64_t error_code, uint64_t data_gpa)
> > +{
> > +     tdg_vp_vmcall_report_fatal_error(error_code, data_gpa);
> > +}
> > +
> > +void tdx_test_fatal(uint64_t error_code)
> > +{
> > +     tdx_test_fatal_with_data(error_code, 0);
> > +}
> > diff --git a/tools/testing/selftests/kvm/x86_64/tdx_vm_tests.c b/tools/testing/selftests/kvm/x86_64/tdx_vm_tests.c
> > index a18d1c9d6026..8638c7bbedaa 100644
> > --- a/tools/testing/selftests/kvm/x86_64/tdx_vm_tests.c
> > +++ b/tools/testing/selftests/kvm/x86_64/tdx_vm_tests.c
> > @@ -2,6 +2,7 @@
> >
> >   #include <signal.h>
> >   #include "kvm_util_base.h"
> > +#include "tdx/tdx.h"
> >   #include "tdx/tdx_util.h"
> >   #include "tdx/test_util.h"
> >   #include "test_util.h"
> > @@ -30,6 +31,49 @@ void verify_td_lifecycle(void)
> >       printf("\t ... PASSED\n");
> >   }
> >
> > +void guest_code_report_fatal_error(void)
> > +{
> > +     uint64_t err;
> > +
> > +     /*
> > +      * Note: err should follow the GHCI spec definition:
> > +      * bits 31:0 should be set to 0.
> > +      * bits 62:32 are used for TD-specific extended error code.
> > +      * bit 63 is used to mark additional information in shared memory.
> > +      */
> > +     err = 0x0BAAAAAD00000000;
> > +     if (err)
> > +             tdx_test_fatal(err);
>
> I find tdx_test_fatal() is called a lot and each call site checks the err
> before calling it. Is it simpler to move the check of err inside of
> tdx_test_fatal() so that the callers just call it without check it every
> time?
>
tdx_test_fatal is used for more cases than just testing error code but
we can add another higher level helper. Maybe tdx_assert_error.
>
> > +
> > +     tdx_test_success();
> > +}
> > +void verify_report_fatal_error(void)
> > +{
> > +     struct kvm_vm *vm;
> > +     struct kvm_vcpu *vcpu;
> > +
> > +     vm = td_create();
> > +     td_initialize(vm, VM_MEM_SRC_ANONYMOUS, 0);
> > +     vcpu = td_vcpu_add(vm, 0, guest_code_report_fatal_error);
> > +     td_finalize(vm);
> > +
> > +     printf("Verifying report_fatal_error:\n");
> > +
> > +     td_vcpu_run(vcpu);
> > +
> > +     TEST_ASSERT_EQ(vcpu->run->exit_reason, KVM_EXIT_SYSTEM_EVENT);
> > +     TEST_ASSERT_EQ(vcpu->run->system_event.ndata, 3);
> > +     TEST_ASSERT_EQ(vcpu->run->system_event.data[0], TDG_VP_VMCALL_REPORT_FATAL_ERROR);
> > +     TEST_ASSERT_EQ(vcpu->run->system_event.data[1], 0x0BAAAAAD00000000);
> > +     TEST_ASSERT_EQ(vcpu->run->system_event.data[2], 0);
> > +
> > +     vcpu_run(vcpu);
> > +     TDX_TEST_ASSERT_SUCCESS(vcpu);
> > +
> > +     kvm_vm_free(vm);
> > +     printf("\t ... PASSED\n");
> > +}
> > +
> >   int main(int argc, char **argv)
> >   {
> >       setbuf(stdout, NULL);
> > @@ -40,6 +84,7 @@ int main(int argc, char **argv)
> >       }
> >
> >       run_in_new_process(&verify_td_lifecycle);
> > +     run_in_new_process(&verify_report_fatal_error);
> >
> >       return 0;
> >   }
>
Binbin Wu July 28, 2024, 11:16 a.m. UTC | #10
On 4/23/2024 5:23 AM, Sean Christopherson wrote:
> On Thu, Apr 18, 2024, Yan Zhao wrote:
>> On Tue, Apr 16, 2024 at 11:50:19AM -0700, Sean Christopherson wrote:
>>> On Mon, Apr 15, 2024, Yan Zhao wrote:
>>>> On Mon, Apr 15, 2024 at 08:05:49AM +0000, Ackerley Tng wrote:
>>>>>>> The Intel GHCI Spec says in R12, bit 63 is set if the GPA is valid. As a
>>>>>> But above "__LINE__" is obviously not a valid GPA.
>>>>>>
>>>>>> Do you think it's better to check "data_gpa" is with shared bit on and
>>>>>> aligned in 4K before setting bit 63?
>>>>>>
>>>>> I read "valid" in the spec to mean that the value in R13 "should be
>>>>> considered as useful" or "should be passed on to the host VMM via the
>>>>> TDX module", and not so much as in "validated".
>>>>>
>>>>> We could validate the data_gpa as you suggested to check alignment and
>>>>> shared bit, but perhaps that could be a higher-level function that calls
>>>>> tdg_vp_vmcall_report_fatal_error()?
>>>>>
>>>>> If it helps, shall we rename "data_gpa" to "data" for this lower-level,
>>>>> generic helper function and remove these two lines
>>>>>
>>>>> if (data_gpa)
>>>>> 	error_code |= 0x8000000000000000;
>>>>>
>>>>> A higher-level function could perhaps do the validation as you suggested
>>>>> and then set bit 63.
>>>> This could be all right. But I'm not sure if it would be a burden for
>>>> higher-level function to set bit 63 which is of GHCI details.
>>>>
>>>> What about adding another "data_gpa_valid" parameter and then test
>>>> "data_gpa_valid" rather than test "data_gpa" to set bit 63?
>>> Who cares what the GHCI says about validity?  The GHCI is a spec for getting
>>> random guests to play nice with random hosts.  Selftests own both, and the goal
>>> of selftests is to test that KVM (and KVM's dependencies) adhere to their relevant
>>> specs.  And more importantly, KVM is NOT inheriting the GHCI ABI verbatim[*].
>>>
>>> So except for the bits and bobs that *KVM* (or the TDX module) gets involved in,
>>> just ignore the GHCI (or even deliberately abuse it).  To put it differently, use
>>> selftests verify *KVM's* ABI and functionality.
>>>
>>> As it pertains to this thread, while I haven't looked at any of this in detail,
>>> I'm guessing that whether or not bit 63 is set is a complete "don't care", i.e.
>>> KVM and the TDX Module should pass it through as-is.
>>>
>>> [*] https://lore.kernel.org/all/Zg18ul8Q4PGQMWam@google.com
>> Ok. It makes sense to KVM_EXIT_TDX.
>> But what if the TDVMCALL is handled in TDX specific code in kernel in future?
>> (not possible?)
> KVM will "handle" ReportFatalError, and will do so before this code lands[*], but
> I *highly* doubt KVM will ever do anything but forward the information to userspace,
> e.g. as KVM_SYSTEM_EVENT_CRASH with data[] filled in with the raw register information.
>
>> Should guest set bits correctly according to GHCI?
> No.  Selftests exist first and foremost to verify KVM behavior, not to verify
> firmware behavior.  We can and should use selftests to verify that *KVM* doesn't
> *violate* the GHCI, but that doesn't mean that selftests themselves can't ignore
> and/or abuse the GCHI, especially since the GHCI definition for ReportFatalError
> is frankly awful.
>
> E.g. the GHCI prescibes actual behavior for R13, but then doesn't say *anything*
> about what's in the data page.  Why!?!?!  If the format in the data page is
> completely undefined, what's the point of restricting R13 to only be allowed to
> hold a GPA?

The description of R13 in GHCI:
   4KB-aligned GPA where additional error data is shared by the TD. The
   VMM must validate that this GPA has the Shared bit set. In other words,
   that a shared-mapping is used, and that this is a valid mapping for the
   TD. This shared memory region is expected to hold a zero-terminated
   string.

IIUC, according the GHCI, R13 is a 4K aligned shared buffer provided by
the TDX guest to pass additional error message to VMM, i.e., it needs to
be a shared GPA.  And the content in the buffer is expected to hold a
zero-terminated string.

Do you think "a zero-terminated string" describes the format in the data
page?


>
> And the wording is just as awful:
>
>    The VMM must validate that this GPA has the Shared bit set. In other words,
>    that a shared-mapping is used, and that this is a valid mapping for the TD.
>
> I'm pretty sure it's just saying that the TDX module isn't going to verify the
> operate, i.e. that the VMM needs to protect itself, but it would be so much
> better to simply state "The TDX Module does not verify this GPA", because saying
> the VMM "must" do something leads to pointless discussions like this one, where
> we're debating over whether or *our* VMM should inject an error into *our* guest.
>
> Anyways, we should do what makes sense for selftests and ignore the stupidity of
> the GHCI when doing so yields better code.  If that means abusing R13, go for it.
> If it's a sticking point for anyone, just use one of the "optional" registers.
>
> Whatever we do, bury the host and guest side of selftests behind #defines or helpers
> so that there are at most two pieces of code that care which register holds which
> piece of information.
>
> [*] https://lore.kernel.org/all/20240404230247.GU2444378@ls.amr.corp.intel.com
>
diff mbox series

Patch

diff --git a/tools/testing/selftests/kvm/include/x86_64/tdx/tdx.h b/tools/testing/selftests/kvm/include/x86_64/tdx/tdx.h
index a7161efe4ee2..1340c1070002 100644
--- a/tools/testing/selftests/kvm/include/x86_64/tdx/tdx.h
+++ b/tools/testing/selftests/kvm/include/x86_64/tdx/tdx.h
@@ -3,10 +3,14 @@ 
 #define SELFTEST_TDX_TDX_H
 
 #include <stdint.h>
+#include "kvm_util_base.h"
 
-#define TDG_VP_VMCALL_INSTRUCTION_IO 30
+#define TDG_VP_VMCALL_REPORT_FATAL_ERROR 0x10003
 
+#define TDG_VP_VMCALL_INSTRUCTION_IO 30
+void handle_userspace_tdg_vp_vmcall_exit(struct kvm_vcpu *vcpu);
 uint64_t tdg_vp_vmcall_instruction_io(uint64_t port, uint64_t size,
 				      uint64_t write, uint64_t *data);
+void tdg_vp_vmcall_report_fatal_error(uint64_t error_code, uint64_t data_gpa);
 
 #endif // SELFTEST_TDX_TDX_H
diff --git a/tools/testing/selftests/kvm/include/x86_64/tdx/tdx_util.h b/tools/testing/selftests/kvm/include/x86_64/tdx/tdx_util.h
index 274b245f200b..32dd6b8fda46 100644
--- a/tools/testing/selftests/kvm/include/x86_64/tdx/tdx_util.h
+++ b/tools/testing/selftests/kvm/include/x86_64/tdx/tdx_util.h
@@ -12,5 +12,6 @@  struct kvm_vm *td_create(void);
 void td_initialize(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
 		   uint64_t attributes);
 void td_finalize(struct kvm_vm *vm);
+void td_vcpu_run(struct kvm_vcpu *vcpu);
 
 #endif // SELFTESTS_TDX_KVM_UTIL_H
diff --git a/tools/testing/selftests/kvm/include/x86_64/tdx/test_util.h b/tools/testing/selftests/kvm/include/x86_64/tdx/test_util.h
index b570b6d978ff..6d69921136bd 100644
--- a/tools/testing/selftests/kvm/include/x86_64/tdx/test_util.h
+++ b/tools/testing/selftests/kvm/include/x86_64/tdx/test_util.h
@@ -49,4 +49,23 @@  bool is_tdx_enabled(void);
  */
 void tdx_test_success(void);
 
+/**
+ * Report an error with @error_code to userspace.
+ *
+ * Return value from tdg_vp_vmcall_report_fatal_error is ignored since execution
+ * is not expected to continue beyond this point.
+ */
+void tdx_test_fatal(uint64_t error_code);
+
+/**
+ * Report an error with @error_code to userspace.
+ *
+ * @data_gpa may point to an optional shared guest memory holding the error
+ * string.
+ *
+ * Return value from tdg_vp_vmcall_report_fatal_error is ignored since execution
+ * is not expected to continue beyond this point.
+ */
+void tdx_test_fatal_with_data(uint64_t error_code, uint64_t data_gpa);
+
 #endif // SELFTEST_TDX_TEST_UTIL_H
diff --git a/tools/testing/selftests/kvm/lib/x86_64/tdx/tdx.c b/tools/testing/selftests/kvm/lib/x86_64/tdx/tdx.c
index c2414523487a..b854c3aa34ff 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/tdx/tdx.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/tdx/tdx.c
@@ -1,8 +1,31 @@ 
 // SPDX-License-Identifier: GPL-2.0-only
 
+#include <string.h>
+
 #include "tdx/tdcall.h"
 #include "tdx/tdx.h"
 
+void handle_userspace_tdg_vp_vmcall_exit(struct kvm_vcpu *vcpu)
+{
+	struct kvm_tdx_vmcall *vmcall_info = &vcpu->run->tdx.u.vmcall;
+	uint64_t vmcall_subfunction = vmcall_info->subfunction;
+
+	switch (vmcall_subfunction) {
+	case TDG_VP_VMCALL_REPORT_FATAL_ERROR:
+		vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
+		vcpu->run->system_event.ndata = 3;
+		vcpu->run->system_event.data[0] =
+			TDG_VP_VMCALL_REPORT_FATAL_ERROR;
+		vcpu->run->system_event.data[1] = vmcall_info->in_r12;
+		vcpu->run->system_event.data[2] = vmcall_info->in_r13;
+		vmcall_info->status_code = 0;
+		break;
+	default:
+		TEST_FAIL("TD VMCALL subfunction %lu is unsupported.\n",
+			  vmcall_subfunction);
+	}
+}
+
 uint64_t tdg_vp_vmcall_instruction_io(uint64_t port, uint64_t size,
 				      uint64_t write, uint64_t *data)
 {
@@ -25,3 +48,19 @@  uint64_t tdg_vp_vmcall_instruction_io(uint64_t port, uint64_t size,
 
 	return ret;
 }
+
+void tdg_vp_vmcall_report_fatal_error(uint64_t error_code, uint64_t data_gpa)
+{
+	struct tdx_hypercall_args args;
+
+	memset(&args, 0, sizeof(struct tdx_hypercall_args));
+
+	if (data_gpa)
+		error_code |= 0x8000000000000000;
+
+	args.r11 = TDG_VP_VMCALL_REPORT_FATAL_ERROR;
+	args.r12 = error_code;
+	args.r13 = data_gpa;
+
+	__tdx_hypercall(&args, 0);
+}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/tdx/tdx_util.c b/tools/testing/selftests/kvm/lib/x86_64/tdx/tdx_util.c
index b302060049d5..d745bb6287c1 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/tdx/tdx_util.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/tdx/tdx_util.c
@@ -10,6 +10,7 @@ 
 
 #include "kvm_util.h"
 #include "test_util.h"
+#include "tdx/tdx.h"
 #include "tdx/td_boot.h"
 #include "kvm_util_base.h"
 #include "processor.h"
@@ -519,3 +520,14 @@  void td_finalize(struct kvm_vm *vm)
 
 	tdx_td_finalizemr(vm);
 }
+
+void td_vcpu_run(struct kvm_vcpu *vcpu)
+{
+	vcpu_run(vcpu);
+
+	/* Handle TD VMCALLs that require userspace handling. */
+	if (vcpu->run->exit_reason == KVM_EXIT_TDX &&
+	    vcpu->run->tdx.type == KVM_EXIT_TDX_VMCALL) {
+		handle_userspace_tdg_vp_vmcall_exit(vcpu);
+	}
+}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/tdx/test_util.c b/tools/testing/selftests/kvm/lib/x86_64/tdx/test_util.c
index 6905d0ca3877..7f3cd8089cea 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/tdx/test_util.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/tdx/test_util.c
@@ -32,3 +32,13 @@  void tdx_test_success(void)
 				     TDX_TEST_SUCCESS_SIZE,
 				     TDG_VP_VMCALL_INSTRUCTION_IO_WRITE, &code);
 }
+
+void tdx_test_fatal_with_data(uint64_t error_code, uint64_t data_gpa)
+{
+	tdg_vp_vmcall_report_fatal_error(error_code, data_gpa);
+}
+
+void tdx_test_fatal(uint64_t error_code)
+{
+	tdx_test_fatal_with_data(error_code, 0);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/tdx_vm_tests.c b/tools/testing/selftests/kvm/x86_64/tdx_vm_tests.c
index a18d1c9d6026..8638c7bbedaa 100644
--- a/tools/testing/selftests/kvm/x86_64/tdx_vm_tests.c
+++ b/tools/testing/selftests/kvm/x86_64/tdx_vm_tests.c
@@ -2,6 +2,7 @@ 
 
 #include <signal.h>
 #include "kvm_util_base.h"
+#include "tdx/tdx.h"
 #include "tdx/tdx_util.h"
 #include "tdx/test_util.h"
 #include "test_util.h"
@@ -30,6 +31,49 @@  void verify_td_lifecycle(void)
 	printf("\t ... PASSED\n");
 }
 
+void guest_code_report_fatal_error(void)
+{
+	uint64_t err;
+
+	/*
+	 * Note: err should follow the GHCI spec definition:
+	 * bits 31:0 should be set to 0.
+	 * bits 62:32 are used for TD-specific extended error code.
+	 * bit 63 is used to mark additional information in shared memory.
+	 */
+	err = 0x0BAAAAAD00000000;
+	if (err)
+		tdx_test_fatal(err);
+
+	tdx_test_success();
+}
+void verify_report_fatal_error(void)
+{
+	struct kvm_vm *vm;
+	struct kvm_vcpu *vcpu;
+
+	vm = td_create();
+	td_initialize(vm, VM_MEM_SRC_ANONYMOUS, 0);
+	vcpu = td_vcpu_add(vm, 0, guest_code_report_fatal_error);
+	td_finalize(vm);
+
+	printf("Verifying report_fatal_error:\n");
+
+	td_vcpu_run(vcpu);
+
+	TEST_ASSERT_EQ(vcpu->run->exit_reason, KVM_EXIT_SYSTEM_EVENT);
+	TEST_ASSERT_EQ(vcpu->run->system_event.ndata, 3);
+	TEST_ASSERT_EQ(vcpu->run->system_event.data[0], TDG_VP_VMCALL_REPORT_FATAL_ERROR);
+	TEST_ASSERT_EQ(vcpu->run->system_event.data[1], 0x0BAAAAAD00000000);
+	TEST_ASSERT_EQ(vcpu->run->system_event.data[2], 0);
+
+	vcpu_run(vcpu);
+	TDX_TEST_ASSERT_SUCCESS(vcpu);
+
+	kvm_vm_free(vm);
+	printf("\t ... PASSED\n");
+}
+
 int main(int argc, char **argv)
 {
 	setbuf(stdout, NULL);
@@ -40,6 +84,7 @@  int main(int argc, char **argv)
 	}
 
 	run_in_new_process(&verify_td_lifecycle);
+	run_in_new_process(&verify_report_fatal_error);
 
 	return 0;
 }