Message ID | 20210707204249.3046665-6-sathyanarayanan.kuppuswamy@linux.intel.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | Add TDX Guest Support (Attestation support) | expand |
Context | Check | Description |
---|---|---|
netdev/tree_selection | success | Not a local patch |
On 7/7/21 1:42 PM, Kuppuswamy Sathyanarayanan wrote: > The interaction with the TDX module is like a RPM protocol here. There > are several operations (get tdreport, get quote) that need to input a > blob, and then output another blob. It was considered to use a sysfs > interface for this, but it doesn't fit well into the standard sysfs > model for configuring values. It would be possible to do read/write on > files, but it would need multiple file descriptors, which would be > somewhat messy. ioctls seems to be the best fitting and simplest model > here. There is one ioctl per operation, that takes the input blob and > returns the output blob, and as well as auxiliary ioctls to return the > blob lengths. The ioctls are documented in the header file. > > Reviewed-by: Tony Luck <tony.luck@intel.com> > Reviewed-by: Andi Kleen <ak@linux.intel.com> > Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> > --- > drivers/platform/x86/Kconfig | 9 ++ > drivers/platform/x86/Makefile | 1 + > drivers/platform/x86/intel_tdx_attest.c | 171 ++++++++++++++++++++++++ > include/uapi/misc/tdx.h | 37 +++++ > 4 files changed, 218 insertions(+) > create mode 100644 drivers/platform/x86/intel_tdx_attest.c > create mode 100644 include/uapi/misc/tdx.h > > diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig > index 60592fb88e7a..7d01c473aef6 100644 > --- a/drivers/platform/x86/Kconfig > +++ b/drivers/platform/x86/Kconfig > @@ -1301,6 +1301,15 @@ config INTEL_SCU_IPC_UTIL > low level access for debug work and updating the firmware. Say > N unless you will be doing this on an Intel MID platform. > > +config INTEL_TDX_ATTESTATION > + tristate "Intel TDX attestation driver" > + depends on INTEL_TDX_GUEST > + help > + The TDX attestation driver provides IOCTL or MMAP interfaces to > + the user to request TDREPORT from the TDX module or request quote > + from VMM. It is mainly used to get secure disk decryption keys from > + the key server. What's the MMAP interface > + > config INTEL_TELEMETRY > tristate "Intel SoC Telemetry Driver" > depends on X86_64 > diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile > index dcc8cdb95b4d..83439990ae47 100644 > --- a/drivers/platform/x86/Makefile > +++ b/drivers/platform/x86/Makefile > @@ -138,6 +138,7 @@ obj-$(CONFIG_INTEL_SCU_PCI) += intel_scu_pcidrv.o > obj-$(CONFIG_INTEL_SCU_PLATFORM) += intel_scu_pltdrv.o > obj-$(CONFIG_INTEL_SCU_WDT) += intel_scu_wdt.o > obj-$(CONFIG_INTEL_SCU_IPC_UTIL) += intel_scu_ipcutil.o > +obj-$(CONFIG_INTEL_TDX_ATTESTATION) += intel_tdx_attest.o > obj-$(CONFIG_INTEL_TELEMETRY) += intel_telemetry_core.o \ > intel_telemetry_pltdrv.o \ > intel_telemetry_debugfs.o > diff --git a/drivers/platform/x86/intel_tdx_attest.c b/drivers/platform/x86/intel_tdx_attest.c > new file mode 100644 > index 000000000000..a0225d053851 > --- /dev/null > +++ b/drivers/platform/x86/intel_tdx_attest.c > @@ -0,0 +1,171 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* > + * intel_tdx_attest.c - TDX guest attestation interface driver. > + * > + * Implements user interface to trigger attestation process and > + * read the TD Quote result. > + * > + * Copyright (C) 2020 Intel Corporation > + * > + * Author: > + * Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> > + */ > + > +#define pr_fmt(fmt) "x86/tdx: attest: " fmt > + > +#include <linux/module.h> > +#include <linux/miscdevice.h> > +#include <linux/uaccess.h> > +#include <linux/fs.h> > +#include <linux/mm.h> > +#include <linux/slab.h> > +#include <linux/set_memory.h> > +#include <linux/io.h> > +#include <asm/apic.h> > +#include <asm/tdx.h> > +#include <asm/irq_vectors.h> > +#include <uapi/misc/tdx.h> > + > +#define VERSION "1.0" > + > +/* Used in Quote memory allocation */ > +#define QUOTE_SIZE (2 * PAGE_SIZE) > + > +/* Mutex to synchronize attestation requests */ > +static DEFINE_MUTEX(attestation_lock); > +/* Completion object to track attestation status */ > +static DECLARE_COMPLETION(attestation_done); > + > +static void attestation_callback_handler(void) > +{ > + complete(&attestation_done); > +} > + > +static long tdg_attest_ioctl(struct file *file, unsigned int cmd, > + unsigned long arg) > +{ > + u64 data = virt_to_phys(file->private_data); > + void __user *argp = (void __user *)arg; > + u8 *reportdata; > + long ret = 0; > + > + mutex_lock(&attestation_lock); > + > + reportdata = kzalloc(TDX_TDREPORT_LEN, GFP_KERNEL); > + if (!reportdata) { > + mutex_unlock(&attestation_lock); > + return -ENOMEM; > + } > + > + switch (cmd) { > + case TDX_CMD_GET_TDREPORT: > + if (copy_from_user(reportdata, argp, TDX_REPORT_DATA_LEN)) { > + ret = -EFAULT; > + break; > + } This copies from user memory to reportdata. > + > + /* Generate TDREPORT_STRUCT */ > + if (tdx_mcall_tdreport(data, virt_to_phys(reportdata))) { > + ret = -EIO; > + break; > + } This does the hypercall. > + > + if (copy_to_user(argp, file->private_data, TDX_TDREPORT_LEN)) > + ret = -EFAULT; This copies from private_data to user memory. How did the report get to private_data? > + break; > + case TDX_CMD_GEN_QUOTE: > + if (copy_from_user(reportdata, argp, TDX_REPORT_DATA_LEN)) { > + ret = -EFAULT; > + break; > + } > + > + /* Generate TDREPORT_STRUCT */ > + if (tdx_mcall_tdreport(data, virt_to_phys(reportdata))) { > + ret = -EIO; > + break; > + } > + > + ret = set_memory_decrypted((unsigned long)file->private_data, > + 1UL << get_order(QUOTE_SIZE)); > + if (ret) > + break; Now private_data is decrypted. (And this operation is *expensive*. Why is it done at ioctl time?) > + > + /* Submit GetQuote Request */ > + if (tdx_hcall_get_quote(data)) { > + ret = -EIO; > + goto done; > + } > + > + /* Wait for attestation completion */ > + wait_for_completion_interruptible(&attestation_done); > + > + if (copy_to_user(argp, file->private_data, QUOTE_SIZE)) > + ret = -EFAULT; > +done: > + ret = set_memory_encrypted((unsigned long)file->private_data, > + 1UL << get_order(QUOTE_SIZE)); And this is, again, quite expensive. > + > + break; > + case TDX_CMD_GET_QUOTE_SIZE: > + if (put_user(QUOTE_SIZE, (u64 __user *)argp)) > + ret = -EFAULT; > + > + break; > + default: > + pr_err("cmd %d not supported\n", cmd); > + break; > + } > + > + mutex_unlock(&attestation_lock); > + > + kfree(reportdata); > + > + return ret; > +} > + > +static int tdg_attest_open(struct inode *inode, struct file *file) > +{ > + /* > + * Currently tdg_event_notify_handler is only used in attestation > + * driver. But, WRITE_ONCE is used as benign data race notice. > + */ > + WRITE_ONCE(tdg_event_notify_handler, attestation_callback_handler); > + > + file->private_data = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, > + get_order(QUOTE_SIZE)); This allocation has negligible cost compared to changing memory to decrypted. Shouldn't you allocate a buffer once at driver load time or even at boot and just keep reusing it as needed? You could have a few pages of shared memory for the specific purposes of hypercalls, and you could check them out and release them when you need some.
On 7/8/21 3:21 PM, Andy Lutomirski wrote: >> + ret = set_memory_decrypted((unsigned long)file->private_data, >> + 1UL << get_order(QUOTE_SIZE)); >> + if (ret) >> + break; > Now private_data is decrypted. (And this operation is *expensive*. Why > is it done at ioctl time?) Expensive and permanently fractures the direct map. I'm struggling to figure out why the direct map is even touched here. Why not just use a vmalloc area mapping? You really just need *a* decrypted mapping to the page. You don't need to make *every* mapping to the page decrypted.
Hi, On 7/8/21 3:21 PM, Andy Lutomirski wrote: > On 7/7/21 1:42 PM, Kuppuswamy Sathyanarayanan wrote: > >> The interaction with the TDX module is like a RPM protocol here. There >> are several operations (get tdreport, get quote) that need to input a >> blob, and then output another blob. It was considered to use a sysfs >> interface for this, but it doesn't fit well into the standard sysfs >> model for configuring values. It would be possible to do read/write on >> files, but it would need multiple file descriptors, which would be >> somewhat messy. ioctls seems to be the best fitting and simplest model >> here. There is one ioctl per operation, that takes the input blob and >> returns the output blob, and as well as auxiliary ioctls to return the >> blob lengths. The ioctls are documented in the header file. >> >> Reviewed-by: Tony Luck <tony.luck@intel.com> >> Reviewed-by: Andi Kleen <ak@linux.intel.com> >> Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> >> --- >> +config INTEL_TDX_ATTESTATION >> + tristate "Intel TDX attestation driver" >> + depends on INTEL_TDX_GUEST >> + help >> + The TDX attestation driver provides IOCTL or MMAP interfaces to >> + the user to request TDREPORT from the TDX module or request quote >> + from VMM. It is mainly used to get secure disk decryption keys from >> + the key server. > > What's the MMAP interface Initially this driver supported MMAP to allow user directly get the quote data without internal copies. But it was later removed based on internal review comments to simplify the driver interfaces. During the cleanup I somehow missed its reference in Kconfig file. Sorry, I will fix it in next version. >> +static long tdg_attest_ioctl(struct file *file, unsigned int cmd, >> + unsigned long arg) >> +{ >> + u64 data = virt_to_phys(file->private_data); > > >> + void __user *argp = (void __user *)arg; >> + u8 *reportdata; >> + long ret = 0; >> + >> + mutex_lock(&attestation_lock); >> + >> + reportdata = kzalloc(TDX_TDREPORT_LEN, GFP_KERNEL); >> + if (!reportdata) { >> + mutex_unlock(&attestation_lock); >> + return -ENOMEM; >> + } >> + >> + switch (cmd) { >> + case TDX_CMD_GET_TDREPORT: >> + if (copy_from_user(reportdata, argp, TDX_REPORT_DATA_LEN)) { >> + ret = -EFAULT; >> + break; >> + } > > This copies from user memory to reportdata. > >> + >> + /* Generate TDREPORT_STRUCT */ >> + if (tdx_mcall_tdreport(data, virt_to_phys(reportdata))) { >> + ret = -EIO; >> + break; >> + } > > This does the hypercall. > >> + >> + if (copy_to_user(argp, file->private_data, TDX_TDREPORT_LEN)) >> + ret = -EFAULT; > > This copies from private_data to user memory. How did the report get to > private_data? Report data is copied by previous TDX module call. The pointer we pass to it is the physical address of file->private_data. > >> + break; >> + case TDX_CMD_GEN_QUOTE: >> + if (copy_from_user(reportdata, argp, TDX_REPORT_DATA_LEN)) { >> + ret = -EFAULT; >> + break; >> + } >> + >> + /* Generate TDREPORT_STRUCT */ >> + if (tdx_mcall_tdreport(data, virt_to_phys(reportdata))) { >> + ret = -EIO; >> + break; >> + } >> + >> + ret = set_memory_decrypted((unsigned long)file->private_data, >> + 1UL << get_order(QUOTE_SIZE)); >> + if (ret) >> + break; > > Now private_data is decrypted. (And this operation is *expensive*. Why > is it done at ioctl time?) We are trying to use the same buffer in both tdx_mcall_*() and tdx_hcall_*() functions to avoid unnecessary data copies. Since we cannot pass decrypted page address to TDX module call, we have decrypted it in IOCTL after doing the TDX module call. We can move this decrypted/encrypted mapping code to open()/close() functions. But in that case, after the TDX module call, we need to separately copy the report data to the quote buffer before making the GetQuote hypercall. > >> + >> + /* Submit GetQuote Request */ >> + if (tdx_hcall_get_quote(data)) { >> + ret = -EIO; >> + goto done; >> + } >> + >> + /* Wait for attestation completion */ >> + wait_for_completion_interruptible(&attestation_done); >> + >> + if (copy_to_user(argp, file->private_data, QUOTE_SIZE)) >> + ret = -EFAULT; >> +done: >> + ret = set_memory_encrypted((unsigned long)file->private_data, >> + 1UL << get_order(QUOTE_SIZE)); > > And this is, again, quite expensive. Same as above. > >> + >> + break; >> + case TDX_CMD_GET_QUOTE_SIZE: >> + if (put_user(QUOTE_SIZE, (u64 __user *)argp)) >> + ret = -EFAULT; >> + >> + break; >> + default: >> + pr_err("cmd %d not supported\n", cmd); >> + break; >> + } >> + >> + mutex_unlock(&attestation_lock); >> + >> + kfree(reportdata); >> + >> + return ret; >> +} >> + >> +static int tdg_attest_open(struct inode *inode, struct file *file) >> +{ >> + /* >> + * Currently tdg_event_notify_handler is only used in attestation >> + * driver. But, WRITE_ONCE is used as benign data race notice. >> + */ >> + WRITE_ONCE(tdg_event_notify_handler, attestation_callback_handler); >> + >> + file->private_data = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, >> + get_order(QUOTE_SIZE)); > > This allocation has negligible cost compared to changing memory to > decrypted. > > Shouldn't you allocate a buffer once at driver load time or even at boot > and just keep reusing it as needed? You could have a few pages of > shared memory for the specific purposes of hypercalls, and you could > check them out and release them when you need some. If you are fine with additional data copy cost, I can move the decrypted/ encrypted mapping code to open/close() functions. >
On Wed, Jul 7, 2021 at 1:43 PM Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> wrote: > > TDX guest supports encrypted disk as root or secondary drives. > Decryption keys required to access such drives are usually maintained > by 3rd party key servers. Attestation is required by 3rd party key > servers to get the key for an encrypted disk volume, or possibly other > encrypted services. Attestation is used to prove to the key server that > the TD guest is running in a valid TD and the kernel and virtual BIOS > and other environment are secure. > > During the boot process various components before the kernel accumulate > hashes in the TDX module, which can then combined into a report. This > would typically include a hash of the bios, bios configuration, boot > loader, command line, kernel, initrd. After checking the hashes the > key server will securely release the keys. > > The actual details of the attestation protocol depend on the particular > key server configuration, but some parts are common and need to > communicate with the TDX module. > > This communication is implemented in the attestation driver. > > The supported steps are: > > 1. TD guest generates the TDREPORT that contains version information > about the Intel TDX module, measurement of the TD, along with a > TD-specified nonce. > 2. TD guest shares the TDREPORT with TD host via GetQuote hypercall > which is used by the host to generate a quote via quoting > enclave (QE). > 3. Quote generation completion notification is sent to TD OS via > callback interrupt vector configured by TD using > SetupEventNotifyInterrupt hypercall. > 4. After receiving the generated TDQUOTE, a remote verifier can be > used to verify the quote and confirm the trustworthiness of the > TD. > > Attestation agent uses IOCTLs implemented by the attestation driver to > complete the various steps of the attestation process. > > Also note that, explicit access permissions are not enforced in this > driver because the quote and measurements are not a secret. However > the access permissions of the device node can be used to set any > desired access policy. The udev default is usually root access > only. > > TDX_CMD_GEN_QUOTE IOCTL can be used to create an computation on the > host, but TDX assumes that the host is able to deal with malicious > guest flooding it anyways. > > The interaction with the TDX module is like a RPM protocol here. There > are several operations (get tdreport, get quote) that need to input a > blob, and then output another blob. It was considered to use a sysfs > interface for this, but it doesn't fit well into the standard sysfs > model for configuring values. It would be possible to do read/write on > files, but it would need multiple file descriptors, which would be > somewhat messy. ioctls seems to be the best fitting and simplest model > here. There is one ioctl per operation, that takes the input blob and > returns the output blob, and as well as auxiliary ioctls to return the > blob lengths. The ioctls are documented in the header file. > > Reviewed-by: Tony Luck <tony.luck@intel.com> > Reviewed-by: Andi Kleen <ak@linux.intel.com> > Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> > --- > drivers/platform/x86/Kconfig | 9 ++ > drivers/platform/x86/Makefile | 1 + > drivers/platform/x86/intel_tdx_attest.c | 171 ++++++++++++++++++++++++ > include/uapi/misc/tdx.h | 37 +++++ > 4 files changed, 218 insertions(+) > create mode 100644 drivers/platform/x86/intel_tdx_attest.c > create mode 100644 include/uapi/misc/tdx.h > > diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig > index 60592fb88e7a..7d01c473aef6 100644 > --- a/drivers/platform/x86/Kconfig > +++ b/drivers/platform/x86/Kconfig > @@ -1301,6 +1301,15 @@ config INTEL_SCU_IPC_UTIL > low level access for debug work and updating the firmware. Say > N unless you will be doing this on an Intel MID platform. > > +config INTEL_TDX_ATTESTATION > + tristate "Intel TDX attestation driver" > + depends on INTEL_TDX_GUEST > + help > + The TDX attestation driver provides IOCTL or MMAP interfaces to > + the user to request TDREPORT from the TDX module or request quote > + from VMM. It is mainly used to get secure disk decryption keys from > + the key server. > + > config INTEL_TELEMETRY > tristate "Intel SoC Telemetry Driver" > depends on X86_64 > diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile > index dcc8cdb95b4d..83439990ae47 100644 > --- a/drivers/platform/x86/Makefile > +++ b/drivers/platform/x86/Makefile > @@ -138,6 +138,7 @@ obj-$(CONFIG_INTEL_SCU_PCI) += intel_scu_pcidrv.o > obj-$(CONFIG_INTEL_SCU_PLATFORM) += intel_scu_pltdrv.o > obj-$(CONFIG_INTEL_SCU_WDT) += intel_scu_wdt.o > obj-$(CONFIG_INTEL_SCU_IPC_UTIL) += intel_scu_ipcutil.o > +obj-$(CONFIG_INTEL_TDX_ATTESTATION) += intel_tdx_attest.o > obj-$(CONFIG_INTEL_TELEMETRY) += intel_telemetry_core.o \ > intel_telemetry_pltdrv.o \ > intel_telemetry_debugfs.o > diff --git a/drivers/platform/x86/intel_tdx_attest.c b/drivers/platform/x86/intel_tdx_attest.c > new file mode 100644 > index 000000000000..a0225d053851 > --- /dev/null > +++ b/drivers/platform/x86/intel_tdx_attest.c > @@ -0,0 +1,171 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* > + * intel_tdx_attest.c - TDX guest attestation interface driver. > + * > + * Implements user interface to trigger attestation process and > + * read the TD Quote result. > + * > + * Copyright (C) 2020 Intel Corporation > + * > + * Author: > + * Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> > + */ > + > +#define pr_fmt(fmt) "x86/tdx: attest: " fmt > + > +#include <linux/module.h> > +#include <linux/miscdevice.h> > +#include <linux/uaccess.h> > +#include <linux/fs.h> > +#include <linux/mm.h> > +#include <linux/slab.h> > +#include <linux/set_memory.h> > +#include <linux/io.h> > +#include <asm/apic.h> > +#include <asm/tdx.h> > +#include <asm/irq_vectors.h> > +#include <uapi/misc/tdx.h> > + > +#define VERSION "1.0" Individual module versions are typically useless as the kernel version is sufficient. > + > +/* Used in Quote memory allocation */ > +#define QUOTE_SIZE (2 * PAGE_SIZE) > + > +/* Mutex to synchronize attestation requests */ > +static DEFINE_MUTEX(attestation_lock); > +/* Completion object to track attestation status */ > +static DECLARE_COMPLETION(attestation_done); > + > +static void attestation_callback_handler(void) > +{ > + complete(&attestation_done); > +} > + > +static long tdg_attest_ioctl(struct file *file, unsigned int cmd, > + unsigned long arg) > +{ > + u64 data = virt_to_phys(file->private_data); > + void __user *argp = (void __user *)arg; > + u8 *reportdata; > + long ret = 0; > + > + mutex_lock(&attestation_lock); > + > + reportdata = kzalloc(TDX_TDREPORT_LEN, GFP_KERNEL); > + if (!reportdata) { > + mutex_unlock(&attestation_lock); > + return -ENOMEM; > + } > + > + switch (cmd) { > + case TDX_CMD_GET_TDREPORT: > + if (copy_from_user(reportdata, argp, TDX_REPORT_DATA_LEN)) { > + ret = -EFAULT; > + break; > + } > + > + /* Generate TDREPORT_STRUCT */ > + if (tdx_mcall_tdreport(data, virt_to_phys(reportdata))) { > + ret = -EIO; > + break; > + } > + > + if (copy_to_user(argp, file->private_data, TDX_TDREPORT_LEN)) > + ret = -EFAULT; > + break; > + case TDX_CMD_GEN_QUOTE: > + if (copy_from_user(reportdata, argp, TDX_REPORT_DATA_LEN)) { > + ret = -EFAULT; > + break; > + } > + > + /* Generate TDREPORT_STRUCT */ > + if (tdx_mcall_tdreport(data, virt_to_phys(reportdata))) { > + ret = -EIO; > + break; > + } > + > + ret = set_memory_decrypted((unsigned long)file->private_data, > + 1UL << get_order(QUOTE_SIZE)); > + if (ret) > + break; > + > + /* Submit GetQuote Request */ > + if (tdx_hcall_get_quote(data)) { > + ret = -EIO; > + goto done; > + } > + > + /* Wait for attestation completion */ > + wait_for_completion_interruptible(&attestation_done); > + > + if (copy_to_user(argp, file->private_data, QUOTE_SIZE)) > + ret = -EFAULT; > +done: > + ret = set_memory_encrypted((unsigned long)file->private_data, > + 1UL << get_order(QUOTE_SIZE)); This wants a get_nr_pages() helper. > + > + break; > + case TDX_CMD_GET_QUOTE_SIZE: > + if (put_user(QUOTE_SIZE, (u64 __user *)argp)) > + ret = -EFAULT; > + > + break; > + default: > + pr_err("cmd %d not supported\n", cmd); > + break; > + } > + > + mutex_unlock(&attestation_lock); > + > + kfree(reportdata); > + > + return ret; > +} > + > +static int tdg_attest_open(struct inode *inode, struct file *file) > +{ > + /* > + * Currently tdg_event_notify_handler is only used in attestation > + * driver. But, WRITE_ONCE is used as benign data race notice. > + */ > + WRITE_ONCE(tdg_event_notify_handler, attestation_callback_handler); Why is this ioctl not part of the driver that registered the interrupt handler for this callback in the first instance? I've never seen this style of cross-driver communication before. > + > + file->private_data = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, > + get_order(QUOTE_SIZE)); Why does this driver abandon all semblance of type-safety and use ->private_data directly? This also seems an easy way to consume memory, just keep opening this device over and over again. AFAICS this buffer is only used ephemerally. I see no reason it needs to be allocated once per open file. Unless you need several threads to be running the attestation process in parallel just allocate a single buffer at module init (statically defined or on the heap) and use a lock to enforce only one user of this buffer at a time. That would also solve your direct-map fracturing problem. All that said, this new user ABI for passing blobs in and out of the kernel is something that the keyutils API already does. Did you consider add_key() / request_key() for this case? That would also be the natural path for the end step of requesting the drive decrypt key. I.e. a chain of key payloads starting with establishing the attestation blob.
On 7/8/21 4:36 PM, Dan Williams wrote: >> +static int tdg_attest_open(struct inode *inode, struct file *file) >> +{ >> + /* >> + * Currently tdg_event_notify_handler is only used in attestation >> + * driver. But, WRITE_ONCE is used as benign data race notice. >> + */ >> + WRITE_ONCE(tdg_event_notify_handler, attestation_callback_handler); > Why is this ioctl not part of the driver that registered the interrupt We cannot club them because they are not functionally related. Even notification is a separate common feature supported by TDX and configured using SetupEventNotifyInterrupt hypercall. It is not related to TDX attestation. Attestation just uses event notification interface to get the quote completion event. > handler for this callback in the first instance? I've never seen this > style of cross-driver communication before. This is similar to x86_platform_ipi_callback() acrn_setup_intr_handler() use cases. > >> + >> + file->private_data = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, >> + get_order(QUOTE_SIZE)); > Why does this driver abandon all semblance of type-safety and use > ->private_data directly? This also seems an easy way to consume > memory, just keep opening this device over and over again. > > AFAICS this buffer is only used ephemerally. I see no reason it needs > to be allocated once per open file. Unless you need several threads to > be running the attestation process in parallel just allocate a single > buffer at module init (statically defined or on the heap) and use a > lock to enforce only one user of this buffer at a time. That would > also solve your direct-map fracturing problem. Theoretically attestation requests can be sent in parallel. I have allocated the memory in open() call mainly for this reason. But current TDX ABI specification does not clearly specify this possibility and I am not sure whether TDX KVM supports it. Let me confirm about it again with TDX KVM owner. If such model is not currently supported, then I will move the memory allocation to init code. > > All that said, this new user ABI for passing blobs in and out of the > kernel is something that the keyutils API already does. Did you > consider add_key() / request_key() for this case? That would also be > the natural path for the end step of requesting the drive decrypt key. > I.e. a chain of key payloads starting with establishing the > attestation blob. I am not sure whether we can use keyutil interface for attestation. AFAIK, there are other use cases for attestation other than getting keys for encrypted drives.
On Thu, Jul 8, 2021 at 4:57 PM Kuppuswamy, Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> wrote: > > > > On 7/8/21 4:36 PM, Dan Williams wrote: > >> +static int tdg_attest_open(struct inode *inode, struct file *file) > >> +{ > >> + /* > >> + * Currently tdg_event_notify_handler is only used in attestation > >> + * driver. But, WRITE_ONCE is used as benign data race notice. > >> + */ > >> + WRITE_ONCE(tdg_event_notify_handler, attestation_callback_handler); > > Why is this ioctl not part of the driver that registered the interrupt > > We cannot club them because they are not functionally related. Even notification > is a separate common feature supported by TDX and configured using > SetupEventNotifyInterrupt hypercall. It is not related to TDX attestation. > Attestation just uses event notification interface to get the quote > completion event. > > > handler for this callback in the first instance? I've never seen this > > style of cross-driver communication before. > > This is similar to x86_platform_ipi_callback() acrn_setup_intr_handler() > use cases. Those appear to be for core functionality, not one off drivers. Where is the code that does the SetupEventNotifyInterrupt, is it a driver? > > > > >> + > >> + file->private_data = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, > >> + get_order(QUOTE_SIZE)); > > Why does this driver abandon all semblance of type-safety and use > > ->private_data directly? This also seems an easy way to consume > > memory, just keep opening this device over and over again. > > > > AFAICS this buffer is only used ephemerally. I see no reason it needs > > to be allocated once per open file. Unless you need several threads to > > be running the attestation process in parallel just allocate a single > > buffer at module init (statically defined or on the heap) and use a > > lock to enforce only one user of this buffer at a time. That would > > also solve your direct-map fracturing problem. > > Theoretically attestation requests can be sent in parallel. I have > allocated the memory in open() call mainly for this reason. But current > TDX ABI specification does not clearly specify this possibility and I am > not sure whether TDX KVM supports it. Let me confirm about it again with > TDX KVM owner. If such model is not currently supported, then I will move > the memory allocation to init code. If you have a lock would TDX KVM even notice that its parallel requests are being handled serially? I.e. even if they said "yes, multiple requests may happen in parallel", until it becomes an actual latency problem in practice it's not clear that this generous use of resources is justified. Scratch that... this driver already has the attestation_lock! So, it's already the case that only one thread can be attesting at a time. The per-file buffer is unecessary. > > > > > All that said, this new user ABI for passing blobs in and out of the > > kernel is something that the keyutils API already does. Did you > > consider add_key() / request_key() for this case? That would also be > > the natural path for the end step of requesting the drive decrypt key. > > I.e. a chain of key payloads starting with establishing the > > attestation blob. > > I am not sure whether we can use keyutil interface for attestation. AFAIK, > there are other use cases for attestation other than getting keys for > encrypted drives. keyutils supports generating and passing blobs into and out of the kernel with a handle associated to those blobs. This driver adds a TDX way to pass blobs into and out of the kernel. If Linux grows other TDX-like attestation requirements in the future (e.g. PCI SPDM) should each of those invent their own user ABI for passing blobs around?
On 7/8/2021 5:20 PM, Dan Williams wrote: > > If you have a lock would TDX KVM even notice that its parallel > requests are being handled serially? I.e. even if they said "yes, > multiple requests may happen in parallel", until it becomes an actual > latency problem in practice it's not clear that this generous use of > resources is justified. The worst case usage is 2 pages * file descriptor. There are lots of other ways to use that much and more memory for each file descriptor. > > Scratch that... this driver already has the attestation_lock! So, it's > already the case that only one thread can be attesting at a time. The > per-file buffer is unecessary. But then you couldn't free the buffer. So it would be leaked forever for likely only one attestation. Not sure what problem you're trying to solve here. > > keyutils supports generating and passing blobs into and out of the > kernel with a handle associated to those blobs. This driver adds a TDX > way to pass blobs into and out of the kernel. If Linux grows other > TDX-like attestation requirements in the future (e.g. PCI SPDM) should > each of those invent their own user ABI for passing blobs around? The TDX blobs are different than any blobs that keyutils supports today. The TDX operations are different too. TDREPORT doesn't even involve any keys, it's just attestation reports. keyutils today nothing related to attestation. I just don't see any commonality. If there was commonality it would be more with the TPM interface, but TDX attestation is different enough that it also isn't feasible to directly convert it into TPM operation (apart from standard TPM being a beast that you better avoid as much as possible anyways) -Andi
> Expensive and permanently fractures the direct map. > > I'm struggling to figure out why the direct map is even touched here. I think Sathya did it this way because the TD interface requires a physical address. > Why not just use a vmalloc area mapping? You really just need *a* > decrypted mapping to the page. You don't need to make *every* mapping > to the page decrypted. Yes it would be possible to use vmap() on the page and only set the vmap encrypted by passing the right flags directly. That would avoid breaking up the direct mapping. -Andi
On Thu, Jul 8, 2021 at 5:36 PM Andi Kleen <ak@linux.intel.com> wrote: > > > On 7/8/2021 5:20 PM, Dan Williams wrote: > > > > If you have a lock would TDX KVM even notice that its parallel > > requests are being handled serially? I.e. even if they said "yes, > > multiple requests may happen in parallel", until it becomes an actual > > latency problem in practice it's not clear that this generous use of > > resources is justified. > The worst case usage is 2 pages * file descriptor. There are lots of > other ways to use that much and more memory for each file descriptor. > > > > > Scratch that... this driver already has the attestation_lock! So, it's > > already the case that only one thread can be attesting at a time. The > > per-file buffer is unecessary. > > But then you couldn't free the buffer. So it would be leaked forever for > likely only one attestation. > > Not sure what problem you're trying to solve here. One allocation for the life of the driver that can have its direct map permissions changed rather than an allocation per-file descriptor and fragmenting the direct map. > > keyutils supports generating and passing blobs into and out of the > > kernel with a handle associated to those blobs. This driver adds a TDX > > way to pass blobs into and out of the kernel. If Linux grows other > > TDX-like attestation requirements in the future (e.g. PCI SPDM) should > > each of those invent their own user ABI for passing blobs around? > > The TDX blobs are different than any blobs that keyutils supports today. > The TDX operations are different too. > > TDREPORT doesn't even involve any keys, it's just attestation reports. > > keyutils today nothing related to attestation. > > I just don't see any commonality. If there was commonality it would be > more with the TPM interface, but TDX attestation is different enough > that it also isn't feasible to directly convert it into TPM operation > (apart from standard TPM being a beast that you better avoid as much as > possible anyways) > Ok. I'll leave that alone for TDX, but I still have my eyes on keyutils for aspects of PCI SPDM.
> One allocation for the life of the driver that can have its direct map > permissions changed rather than an allocation per-file descriptor and > fragmenting the direct map. The vmap() approach discussed in another mail will solve that. -Andi
On Thu, Jul 8, 2021 at 6:44 PM Andi Kleen <ak@linux.intel.com> wrote: > > > > One allocation for the life of the driver that can have its direct map > > permissions changed rather than an allocation per-file descriptor and > > fragmenting the direct map. > > The vmap() approach discussed in another mail will solve that. Ok, not my first choice for how to handle the allocation side of that, but not broken. I'd still feel better if there was an actual data structure assigned to file->private_data rather than using that 'void *' pointer directly and casting throughout the driver.
On 7/8/21 7:04 PM, Dan Williams wrote: > Ok, not my first choice for how to handle the allocation side of that, > but not broken. > > I'd still feel better if there was an actual data structure assigned > to file->private_data rather than using that 'void *' pointer directly > and casting throughout the driver. We can add a data structure if we have more member requirements. Currently we only need to pass the memory pointer. But after moving memory allocation to init code (even for vmap), we may not need to use this private pointer.
Hi Dave, On 7/8/21 5:38 PM, Andi Kleen wrote: >> Expensive and permanently fractures the direct map. >> >> I'm struggling to figure out why the direct map is even touched here. > I think Sathya did it this way because the TD interface requires a physical address. >> Why not just use a vmalloc area mapping? You really just need *a* >> decrypted mapping to the page. You don't need to make *every* mapping >> to the page decrypted. > > Yes it would be possible to use vmap() on the page and only set the vmap encrypted by passing the > right flags directly. Is it alright to have non coherent mappings? If yes, any documentation reference for it? > > That would avoid breaking up the direct mapping.
On 7/12/21 5:33 PM, Kuppuswamy, Sathyanarayanan wrote: > On 7/8/21 5:38 PM, Andi Kleen wrote: >>> Expensive and permanently fractures the direct map. >>> >>> I'm struggling to figure out why the direct map is even touched >>> here. >> I think Sathya did it this way because the TD interface requires a >> physical address. >>> Why not just use a vmalloc area mapping? You really just need >>> *a* decrypted mapping to the page. You don't need to make >>> *every* mapping to the page decrypted. >> >> Yes it would be possible to use vmap() on the page and only set >> the vmap encrypted by passing the right flags directly. > > Is it alright to have non coherent mappings? If yes, any > documentation reference for it? Do you mean non-cache-coherent mappings? I'm not sure what that has to do with creating "unencrypted" (shared) mappings. Are you asking exactly which arguments to pass to vmap() or to vmap_pfn()?
diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index 60592fb88e7a..7d01c473aef6 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -1301,6 +1301,15 @@ config INTEL_SCU_IPC_UTIL low level access for debug work and updating the firmware. Say N unless you will be doing this on an Intel MID platform. +config INTEL_TDX_ATTESTATION + tristate "Intel TDX attestation driver" + depends on INTEL_TDX_GUEST + help + The TDX attestation driver provides IOCTL or MMAP interfaces to + the user to request TDREPORT from the TDX module or request quote + from VMM. It is mainly used to get secure disk decryption keys from + the key server. + config INTEL_TELEMETRY tristate "Intel SoC Telemetry Driver" depends on X86_64 diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile index dcc8cdb95b4d..83439990ae47 100644 --- a/drivers/platform/x86/Makefile +++ b/drivers/platform/x86/Makefile @@ -138,6 +138,7 @@ obj-$(CONFIG_INTEL_SCU_PCI) += intel_scu_pcidrv.o obj-$(CONFIG_INTEL_SCU_PLATFORM) += intel_scu_pltdrv.o obj-$(CONFIG_INTEL_SCU_WDT) += intel_scu_wdt.o obj-$(CONFIG_INTEL_SCU_IPC_UTIL) += intel_scu_ipcutil.o +obj-$(CONFIG_INTEL_TDX_ATTESTATION) += intel_tdx_attest.o obj-$(CONFIG_INTEL_TELEMETRY) += intel_telemetry_core.o \ intel_telemetry_pltdrv.o \ intel_telemetry_debugfs.o diff --git a/drivers/platform/x86/intel_tdx_attest.c b/drivers/platform/x86/intel_tdx_attest.c new file mode 100644 index 000000000000..a0225d053851 --- /dev/null +++ b/drivers/platform/x86/intel_tdx_attest.c @@ -0,0 +1,171 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * intel_tdx_attest.c - TDX guest attestation interface driver. + * + * Implements user interface to trigger attestation process and + * read the TD Quote result. + * + * Copyright (C) 2020 Intel Corporation + * + * Author: + * Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> + */ + +#define pr_fmt(fmt) "x86/tdx: attest: " fmt + +#include <linux/module.h> +#include <linux/miscdevice.h> +#include <linux/uaccess.h> +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/set_memory.h> +#include <linux/io.h> +#include <asm/apic.h> +#include <asm/tdx.h> +#include <asm/irq_vectors.h> +#include <uapi/misc/tdx.h> + +#define VERSION "1.0" + +/* Used in Quote memory allocation */ +#define QUOTE_SIZE (2 * PAGE_SIZE) + +/* Mutex to synchronize attestation requests */ +static DEFINE_MUTEX(attestation_lock); +/* Completion object to track attestation status */ +static DECLARE_COMPLETION(attestation_done); + +static void attestation_callback_handler(void) +{ + complete(&attestation_done); +} + +static long tdg_attest_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + u64 data = virt_to_phys(file->private_data); + void __user *argp = (void __user *)arg; + u8 *reportdata; + long ret = 0; + + mutex_lock(&attestation_lock); + + reportdata = kzalloc(TDX_TDREPORT_LEN, GFP_KERNEL); + if (!reportdata) { + mutex_unlock(&attestation_lock); + return -ENOMEM; + } + + switch (cmd) { + case TDX_CMD_GET_TDREPORT: + if (copy_from_user(reportdata, argp, TDX_REPORT_DATA_LEN)) { + ret = -EFAULT; + break; + } + + /* Generate TDREPORT_STRUCT */ + if (tdx_mcall_tdreport(data, virt_to_phys(reportdata))) { + ret = -EIO; + break; + } + + if (copy_to_user(argp, file->private_data, TDX_TDREPORT_LEN)) + ret = -EFAULT; + break; + case TDX_CMD_GEN_QUOTE: + if (copy_from_user(reportdata, argp, TDX_REPORT_DATA_LEN)) { + ret = -EFAULT; + break; + } + + /* Generate TDREPORT_STRUCT */ + if (tdx_mcall_tdreport(data, virt_to_phys(reportdata))) { + ret = -EIO; + break; + } + + ret = set_memory_decrypted((unsigned long)file->private_data, + 1UL << get_order(QUOTE_SIZE)); + if (ret) + break; + + /* Submit GetQuote Request */ + if (tdx_hcall_get_quote(data)) { + ret = -EIO; + goto done; + } + + /* Wait for attestation completion */ + wait_for_completion_interruptible(&attestation_done); + + if (copy_to_user(argp, file->private_data, QUOTE_SIZE)) + ret = -EFAULT; +done: + ret = set_memory_encrypted((unsigned long)file->private_data, + 1UL << get_order(QUOTE_SIZE)); + + break; + case TDX_CMD_GET_QUOTE_SIZE: + if (put_user(QUOTE_SIZE, (u64 __user *)argp)) + ret = -EFAULT; + + break; + default: + pr_err("cmd %d not supported\n", cmd); + break; + } + + mutex_unlock(&attestation_lock); + + kfree(reportdata); + + return ret; +} + +static int tdg_attest_open(struct inode *inode, struct file *file) +{ + /* + * Currently tdg_event_notify_handler is only used in attestation + * driver. But, WRITE_ONCE is used as benign data race notice. + */ + WRITE_ONCE(tdg_event_notify_handler, attestation_callback_handler); + + file->private_data = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, + get_order(QUOTE_SIZE)); + + return !file->private_data ? -ENOMEM : 0; +} + +static int tdg_attest_release(struct inode *inode, struct file *file) +{ + /* + * Currently tdg_event_notify_handler is only used in attestation + * driver. But, WRITE_ONCE is used as benign data race notice. + */ + WRITE_ONCE(tdg_event_notify_handler, NULL); + free_pages((unsigned long)file->private_data, get_order(QUOTE_SIZE)); + file->private_data = NULL; + + return 0; +} + +static const struct file_operations tdg_attest_fops = { + .owner = THIS_MODULE, + .open = tdg_attest_open, + .release = tdg_attest_release, + .unlocked_ioctl = tdg_attest_ioctl, + .llseek = no_llseek, +}; + +static struct miscdevice tdg_attest_device = { + .minor = MISC_DYNAMIC_MINOR, + .name = "tdx-attest", + .fops = &tdg_attest_fops, +}; +module_misc_device(tdg_attest_device); + +MODULE_AUTHOR("Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>"); +MODULE_DESCRIPTION("TDX attestation driver ver " VERSION); +MODULE_VERSION(VERSION); +MODULE_LICENSE("GPL"); diff --git a/include/uapi/misc/tdx.h b/include/uapi/misc/tdx.h new file mode 100644 index 000000000000..59e6561c0892 --- /dev/null +++ b/include/uapi/misc/tdx.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_MISC_TDX_H +#define _UAPI_MISC_TDX_H + +#include <linux/types.h> +#include <linux/ioctl.h> + +/* Input report data length for TDX_CMD_GET_TDREPORT IOCTL request */ +#define TDX_REPORT_DATA_LEN 64 + +/* Output TD report data length after TDX_CMD_GET_TDREPORT IOCTL execution */ +#define TDX_TDREPORT_LEN 1024 + +/* + * TDX_CMD_GET_TDREPORT IOCTL is used to get TDREPORT data from the TDX + * Module. Users should pass report data of size TDX_REPORT_DATA_LEN bytes + * via user input buffer of size TDX_TDREPORT_LEN. Once IOCTL is successful + * TDREPORT data is copied to the user buffer. + */ +#define TDX_CMD_GET_TDREPORT _IOWR('T', 0x01, __u64) + +/* + * TDX_CMD_GEN_QUOTE IOCTL is used to request TD QUOTE from the VMM. User + * should pass report data of size TDX_REPORT_DATA_LEN bytes via user input + * buffer of quote size. Once IOCTL is successful quote data is copied back to + * the user buffer. + */ +#define TDX_CMD_GEN_QUOTE _IOR('T', 0x02, __u64) + +/* + * TDX_CMD_GET_QUOTE_SIZE IOCTL is used to get the TD Quote size info in bytes. + * This will be used for determining the input buffer allocation size when + * using TDX_CMD_GEN_QUOTE IOCTL. + */ +#define TDX_CMD_GET_QUOTE_SIZE _IOR('T', 0x03, __u64) + +#endif /* _UAPI_MISC_TDX_H */