diff mbox series

[RFC,2/4] x86/sgx: Implement support for MADV_WILLNEED

Message ID 20221019191413.48752-3-haitao.huang@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series x86/sgx: implement support for MADV_WILLNEED | expand

Commit Message

Haitao Huang Oct. 19, 2022, 7:14 p.m. UTC
Add support for madvise(..., MADV_WILLNEED) by adding
pages with EAUG. Implement fops->fadvise() callback to
achieve this behaviour.

Note this is only done with best effort possible. If any
errors encountered or EPC is under swapping, the operation
will stop and return as normal.

Signed-off-by: Haitao Huang <haitao.huang@linux.intel.com>
---
 arch/x86/kernel/cpu/sgx/driver.c | 81 ++++++++++++++++++++++++++++++++
 1 file changed, 81 insertions(+)

Comments

Jarkko Sakkinen Oct. 23, 2022, 9:23 p.m. UTC | #1
On Wed, Oct 19, 2022 at 12:14:11PM -0700, Haitao Huang wrote:
> Add support for madvise(..., MADV_WILLNEED) by adding
> pages with EAUG. Implement fops->fadvise() callback to
> achieve this behaviour.
> 
> Note this is only done with best effort possible. If any
> errors encountered or EPC is under swapping, the operation
> will stop and return as normal.
> 
> Signed-off-by: Haitao Huang <haitao.huang@linux.intel.com>
> ---
>  arch/x86/kernel/cpu/sgx/driver.c | 81 ++++++++++++++++++++++++++++++++
>  1 file changed, 81 insertions(+)
> 
> diff --git a/arch/x86/kernel/cpu/sgx/driver.c b/arch/x86/kernel/cpu/sgx/driver.c
> index aa9b8b868867..54b24897605b 100644
> --- a/arch/x86/kernel/cpu/sgx/driver.c
> +++ b/arch/x86/kernel/cpu/sgx/driver.c
> @@ -2,6 +2,7 @@
>  /*  Copyright(c) 2016-20 Intel Corporation. */
>  
>  #include <linux/acpi.h>
> +#include <linux/fadvise.h>
>  #include <linux/miscdevice.h>
>  #include <linux/mman.h>
>  #include <linux/security.h>
> @@ -9,6 +10,7 @@
>  #include <asm/traps.h>
>  #include "driver.h"
>  #include "encl.h"
> +#include "encls.h"
>  
>  u64 sgx_attributes_reserved_mask;
>  u64 sgx_xfrm_reserved_mask = ~0x3;
> @@ -97,10 +99,88 @@ static int sgx_mmap(struct file *file, struct vm_area_struct *vma)
>  	vma->vm_ops = &sgx_vm_ops;
>  	vma->vm_flags |= VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | VM_IO;
>  	vma->vm_private_data = encl;
> +	/* Anchor vm_pgoff to the enclave base.
> +	 * So offset passed back to sgx_fadvise hook
> +	 * is relative to the enclave base
> +	 */
> +	vma->vm_pgoff = (vma->vm_start - encl->base) >> PAGE_SHIFT;
>  
>  	return 0;
>  }
>  
> +/*
> + * Add new pages to the enclave sequentially with ENCLS[EAUG] for the WILLNEED advice.
> + * Only do this to existing VMAs in the same enclave and reject the request.
> + * Returns:	0 if EAUG done with best effort, -EINVAL if any sub-range given
> + * is not in the enclave, or enclave is not initialized..
> + */
> +static int sgx_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
> +{
> +	struct sgx_encl *encl = file->private_data;
> +	unsigned long start, end, pos;
> +	int ret = -EINVAL;
> +	struct vm_area_struct *vma = NULL;
> +
> +	/* Only support WILLNEED */
> +	if (advice != POSIX_FADV_WILLNEED)
> +		return -EINVAL;
> +	if (!encl)
> +		return -EINVAL;
> +	if (!cpu_feature_enabled(X86_FEATURE_SGX2))
> +		return -EINVAL;
> +
> +	if (offset + len < offset)
> +		return -EINVAL;
> +	if (encl->base + offset < encl->base)
> +		return -EINVAL;
> +	start  = offset + encl->base;
> +	end = start + len;
> +	if (end < start)
> +		return -EINVAL;
> +	if (end > encl->base + encl->size)
> +		return -EINVAL;
> +
> +	/* EAUG works only for initialized enclaves. */
> +	if (!test_bit(SGX_ENCL_INITIALIZED, &encl->flags))
> +		return -EINVAL;
> +
> +	mmap_read_lock(current->mm);
> +
> +	vma = find_vma(current->mm, start);
> +	if (!vma)
> +		goto unlock;
> +	if (vma->vm_private_data != encl)
> +		goto unlock;
> +
> +	pos = start;
> +	if (pos < vma->vm_start || end > vma->vm_end) {
> +		/* Don't allow any gaps */
> +		goto unlock;
> +	}
> +	/* Here: vm_start <= pos < end <= vm_end */
> +	while (pos < end) {
> +		if (xa_load(&encl->page_array, PFN_DOWN(pos)))
> +			continue;
> +		if (signal_pending(current)) {
> +			if (pos == start)
> +				ret = -ERESTARTSYS;
> +			else
> +				ret = -EINTR;
> +			goto unlock;
> +		}
> +		ret = sgx_encl_eaug_page(vma, encl, pos);

You should instead just do the export in the previous commit,
and convert the return values here. Less pollution to the
existing code base.

> +		/* It's OK to not finish */
> +		if (ret)
> +			break;
> +		pos = pos + PAGE_SIZE;
> +		cond_resched();
> +	}
> +	ret = 0;
> +unlock:
> +	mmap_read_unlock(current->mm);
> +	return ret;
> +}
> +
>  static unsigned long sgx_get_unmapped_area(struct file *file,
>  					   unsigned long addr,
>  					   unsigned long len,
> @@ -133,6 +213,7 @@ static const struct file_operations sgx_encl_fops = {
>  	.compat_ioctl		= sgx_compat_ioctl,
>  #endif
>  	.mmap			= sgx_mmap,
> +	.fadvise		= sgx_fadvise,
>  	.get_unmapped_area	= sgx_get_unmapped_area,
>  };
>  
> -- 
> 2.25.1
>
Haitao Huang Oct. 24, 2022, 3:14 p.m. UTC | #2
Hi Jarkko,

On Sun, 23 Oct 2022 16:23:30 -0500, Jarkko Sakkinen <jarkko@kernel.org>  
wrote:
...
>> +	/* Here: vm_start <= pos < end <= vm_end */
>> +	while (pos < end) {
>> +		if (xa_load(&encl->page_array, PFN_DOWN(pos)))
>> +			continue;
>> +		if (signal_pending(current)) {
>> +			if (pos == start)
>> +				ret = -ERESTARTSYS;
>> +			else
>> +				ret = -EINTR;
>> +			goto unlock;
>> +		}
>> +		ret = sgx_encl_eaug_page(vma, encl, pos);
>
> You should instead just do the export in the previous commit,
> and convert the return values here. Less pollution to the
> existing code base.
>

Thanks for the feedback. Just to clarify, you prefer following:

1. do export only in separate patch, no return type and value change
2. change return type and value in this patch.

Note the sgx_fadvise function needs to stop EAUGing and return in case of  
-EBUSY. So sgx_encl_eaug_page has to return error code -EBUSY separately,  
instead of lumping -EBUSY with VM_FAULT_NOPAGE which was fine for page  
fault handler.

Thanks
Haitao
Jarkko Sakkinen Nov. 1, 2022, 12:49 a.m. UTC | #3
On Mon, Oct 24, 2022 at 10:14:03AM -0500, Haitao Huang wrote:
> Hi Jarkko,
> 
> On Sun, 23 Oct 2022 16:23:30 -0500, Jarkko Sakkinen <jarkko@kernel.org>
> wrote:
> ...
> > > +	/* Here: vm_start <= pos < end <= vm_end */
> > > +	while (pos < end) {
> > > +		if (xa_load(&encl->page_array, PFN_DOWN(pos)))
> > > +			continue;
> > > +		if (signal_pending(current)) {
> > > +			if (pos == start)
> > > +				ret = -ERESTARTSYS;
> > > +			else
> > > +				ret = -EINTR;
> > > +			goto unlock;
> > > +		}
> > > +		ret = sgx_encl_eaug_page(vma, encl, pos);
> > 
> > You should instead just do the export in the previous commit,
> > and convert the return values here. Less pollution to the
> > existing code base.
> > 
> 
> Thanks for the feedback. Just to clarify, you prefer following:
> 
> 1. do export only in separate patch, no return type and value change
> 2. change return type and value in this patch.
> 
> Note the sgx_fadvise function needs to stop EAUGing and return in case of
> -EBUSY. So sgx_encl_eaug_page has to return error code -EBUSY separately,
> instead of lumping -EBUSY with VM_FAULT_NOPAGE which was fine for page fault
> handler.

I guess the problem is that any of this is not explained in the commit
message, i.e. should already answer to this question. My preference is
argumented solution.

BR, Jarkko
diff mbox series

Patch

diff --git a/arch/x86/kernel/cpu/sgx/driver.c b/arch/x86/kernel/cpu/sgx/driver.c
index aa9b8b868867..54b24897605b 100644
--- a/arch/x86/kernel/cpu/sgx/driver.c
+++ b/arch/x86/kernel/cpu/sgx/driver.c
@@ -2,6 +2,7 @@ 
 /*  Copyright(c) 2016-20 Intel Corporation. */
 
 #include <linux/acpi.h>
+#include <linux/fadvise.h>
 #include <linux/miscdevice.h>
 #include <linux/mman.h>
 #include <linux/security.h>
@@ -9,6 +10,7 @@ 
 #include <asm/traps.h>
 #include "driver.h"
 #include "encl.h"
+#include "encls.h"
 
 u64 sgx_attributes_reserved_mask;
 u64 sgx_xfrm_reserved_mask = ~0x3;
@@ -97,10 +99,88 @@  static int sgx_mmap(struct file *file, struct vm_area_struct *vma)
 	vma->vm_ops = &sgx_vm_ops;
 	vma->vm_flags |= VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | VM_IO;
 	vma->vm_private_data = encl;
+	/* Anchor vm_pgoff to the enclave base.
+	 * So offset passed back to sgx_fadvise hook
+	 * is relative to the enclave base
+	 */
+	vma->vm_pgoff = (vma->vm_start - encl->base) >> PAGE_SHIFT;
 
 	return 0;
 }
 
+/*
+ * Add new pages to the enclave sequentially with ENCLS[EAUG] for the WILLNEED advice.
+ * Only do this to existing VMAs in the same enclave and reject the request.
+ * Returns:	0 if EAUG done with best effort, -EINVAL if any sub-range given
+ * is not in the enclave, or enclave is not initialized..
+ */
+static int sgx_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
+{
+	struct sgx_encl *encl = file->private_data;
+	unsigned long start, end, pos;
+	int ret = -EINVAL;
+	struct vm_area_struct *vma = NULL;
+
+	/* Only support WILLNEED */
+	if (advice != POSIX_FADV_WILLNEED)
+		return -EINVAL;
+	if (!encl)
+		return -EINVAL;
+	if (!cpu_feature_enabled(X86_FEATURE_SGX2))
+		return -EINVAL;
+
+	if (offset + len < offset)
+		return -EINVAL;
+	if (encl->base + offset < encl->base)
+		return -EINVAL;
+	start  = offset + encl->base;
+	end = start + len;
+	if (end < start)
+		return -EINVAL;
+	if (end > encl->base + encl->size)
+		return -EINVAL;
+
+	/* EAUG works only for initialized enclaves. */
+	if (!test_bit(SGX_ENCL_INITIALIZED, &encl->flags))
+		return -EINVAL;
+
+	mmap_read_lock(current->mm);
+
+	vma = find_vma(current->mm, start);
+	if (!vma)
+		goto unlock;
+	if (vma->vm_private_data != encl)
+		goto unlock;
+
+	pos = start;
+	if (pos < vma->vm_start || end > vma->vm_end) {
+		/* Don't allow any gaps */
+		goto unlock;
+	}
+	/* Here: vm_start <= pos < end <= vm_end */
+	while (pos < end) {
+		if (xa_load(&encl->page_array, PFN_DOWN(pos)))
+			continue;
+		if (signal_pending(current)) {
+			if (pos == start)
+				ret = -ERESTARTSYS;
+			else
+				ret = -EINTR;
+			goto unlock;
+		}
+		ret = sgx_encl_eaug_page(vma, encl, pos);
+		/* It's OK to not finish */
+		if (ret)
+			break;
+		pos = pos + PAGE_SIZE;
+		cond_resched();
+	}
+	ret = 0;
+unlock:
+	mmap_read_unlock(current->mm);
+	return ret;
+}
+
 static unsigned long sgx_get_unmapped_area(struct file *file,
 					   unsigned long addr,
 					   unsigned long len,
@@ -133,6 +213,7 @@  static const struct file_operations sgx_encl_fops = {
 	.compat_ioctl		= sgx_compat_ioctl,
 #endif
 	.mmap			= sgx_mmap,
+	.fadvise		= sgx_fadvise,
 	.get_unmapped_area	= sgx_get_unmapped_area,
 };