diff mbox series

[v4,11/18] nitro_enclaves: Add logic for enclave memory region set

Message ID 20200622200329.52996-12-andraprs@amazon.com (mailing list archive)
State New, archived
Headers show
Series Add support for Nitro Enclaves | expand

Commit Message

Paraschiv, Andra-Irina June 22, 2020, 8:03 p.m. UTC
Another resource that is being set for an enclave is memory. User space
memory regions, that need to be backed by contiguous memory regions,
are associated with the enclave.

One solution for allocating / reserving contiguous memory regions, that
is used for integration, is hugetlbfs. The user space process that is
associated with the enclave passes to the driver these memory regions.

The enclave memory regions need to be from the same NUMA node as the
enclave CPUs.

Add ioctl command logic for setting user space memory region for an
enclave.

Signed-off-by: Alexandru Vasile <lexnv@amazon.com>
Signed-off-by: Andra Paraschiv <andraprs@amazon.com>
---
Changelog

v3 -> v4

* Check enclave memory regions are from the same NUMA node as the
  enclave CPUs.
* Use dev_err instead of custom NE log pattern.
* Update the NE ioctl call to match the decoupling from the KVM API.

v2 -> v3

* Remove the WARN_ON calls.
* Update static calls sanity checks.
* Update kzfree() calls to kfree().

v1 -> v2

* Add log pattern for NE.
* Update goto labels to match their purpose.
* Remove the BUG_ON calls.
* Check if enclave max memory regions is reached when setting an enclave
  memory region.
* Check if enclave state is init when setting an enclave memory region.
---
 drivers/virt/nitro_enclaves/ne_misc_dev.c | 257 ++++++++++++++++++++++
 1 file changed, 257 insertions(+)

Comments

Alexander Graf July 6, 2020, 10:46 a.m. UTC | #1
On 22.06.20 22:03, Andra Paraschiv wrote:
> Another resource that is being set for an enclave is memory. User space
> memory regions, that need to be backed by contiguous memory regions,
> are associated with the enclave.
> 
> One solution for allocating / reserving contiguous memory regions, that
> is used for integration, is hugetlbfs. The user space process that is
> associated with the enclave passes to the driver these memory regions.
> 
> The enclave memory regions need to be from the same NUMA node as the
> enclave CPUs.
> 
> Add ioctl command logic for setting user space memory region for an
> enclave.
> 
> Signed-off-by: Alexandru Vasile <lexnv@amazon.com>
> Signed-off-by: Andra Paraschiv <andraprs@amazon.com>
> ---
> Changelog
> 
> v3 -> v4
> 
> * Check enclave memory regions are from the same NUMA node as the
>    enclave CPUs.
> * Use dev_err instead of custom NE log pattern.
> * Update the NE ioctl call to match the decoupling from the KVM API.
> 
> v2 -> v3
> 
> * Remove the WARN_ON calls.
> * Update static calls sanity checks.
> * Update kzfree() calls to kfree().
> 
> v1 -> v2
> 
> * Add log pattern for NE.
> * Update goto labels to match their purpose.
> * Remove the BUG_ON calls.
> * Check if enclave max memory regions is reached when setting an enclave
>    memory region.
> * Check if enclave state is init when setting an enclave memory region.
> ---
>   drivers/virt/nitro_enclaves/ne_misc_dev.c | 257 ++++++++++++++++++++++
>   1 file changed, 257 insertions(+)
> 
> diff --git a/drivers/virt/nitro_enclaves/ne_misc_dev.c b/drivers/virt/nitro_enclaves/ne_misc_dev.c
> index cfdefa52ed2a..17ccb6cdbd75 100644
> --- a/drivers/virt/nitro_enclaves/ne_misc_dev.c
> +++ b/drivers/virt/nitro_enclaves/ne_misc_dev.c
> @@ -476,6 +476,233 @@ static int ne_create_vcpu_ioctl(struct ne_enclave *ne_enclave, u32 vcpu_id)
>   	return rc;
>   }
>   
> +/**
> + * ne_sanity_check_user_mem_region - Sanity check the userspace memory
> + * region received during the set user memory region ioctl call.
> + *
> + * This function gets called with the ne_enclave mutex held.
> + *
> + * @ne_enclave: private data associated with the current enclave.
> + * @mem_region: user space memory region to be sanity checked.
> + *
> + * @returns: 0 on success, negative return value on failure.
> + */
> +static int ne_sanity_check_user_mem_region(struct ne_enclave *ne_enclave,
> +	struct ne_user_memory_region *mem_region)
> +{
> +	if (ne_enclave->mm != current->mm)
> +		return -EIO;
> +
> +	if ((mem_region->memory_size % NE_MIN_MEM_REGION_SIZE) != 0) {
> +		dev_err_ratelimited(ne_misc_dev.this_device,
> +				    "Mem size not multiple of 2 MiB\n");
> +
> +		return -EINVAL;

Can we make this an error that gets propagated to user space explicitly? 
I'd rather have a clear error return value of this function than a 
random message in dmesg.

> +	}
> +
> +	if ((mem_region->userspace_addr & (NE_MIN_MEM_REGION_SIZE - 1)) ||

This logic already relies on the fact that NE_MIN_MEM_REGION_SIZE is a 
power of two. Can you do the same above on the memory_size check?

> +	    !access_ok((void __user *)(unsigned long)mem_region->userspace_addr,
> +		       mem_region->memory_size)) {
> +		dev_err_ratelimited(ne_misc_dev.this_device,
> +				    "Invalid user space addr range\n");
> +
> +		return -EINVAL;

Same comment again. Return different errors for different conditions, so 
that user space has a chance to print proper errors to its users.

Also, don't we have to check alignment of userspace_addr as well?

> +	}
> +
> +	return 0;
> +}
> +
> +/**
> + * ne_set_user_memory_region_ioctl - Add user space memory region to the slot
> + * associated with the current enclave.
> + *
> + * This function gets called with the ne_enclave mutex held.
> + *
> + * @ne_enclave: private data associated with the current enclave.
> + * @mem_region: user space memory region to be associated with the given slot.
> + *
> + * @returns: 0 on success, negative return value on failure.
> + */
> +static int ne_set_user_memory_region_ioctl(struct ne_enclave *ne_enclave,
> +	struct ne_user_memory_region *mem_region)
> +{
> +	struct ne_pci_dev_cmd_reply cmd_reply = {};
> +	long gup_rc = 0;
> +	unsigned long i = 0;
> +	struct ne_mem_region *ne_mem_region = NULL;
> +	unsigned long nr_phys_contig_mem_regions = 0;
> +	unsigned long nr_pinned_pages = 0;
> +	struct page **phys_contig_mem_regions = NULL;
> +	int rc = -EINVAL;
> +	struct slot_add_mem_req slot_add_mem_req = {};
> +
> +	rc = ne_sanity_check_user_mem_region(ne_enclave, mem_region);
> +	if (rc < 0)
> +		return rc;
> +
> +	ne_mem_region = kzalloc(sizeof(*ne_mem_region), GFP_KERNEL);
> +	if (!ne_mem_region)
> +		return -ENOMEM;
> +
> +	/*
> +	 * TODO: Update nr_pages value to handle contiguous virtual address
> +	 * ranges mapped to non-contiguous physical regions. Hugetlbfs can give
> +	 * 2 MiB / 1 GiB contiguous physical regions.
> +	 */
> +	ne_mem_region->nr_pages = mem_region->memory_size /
> +		NE_MIN_MEM_REGION_SIZE;
> +
> +	ne_mem_region->pages = kcalloc(ne_mem_region->nr_pages,
> +				       sizeof(*ne_mem_region->pages),
> +				       GFP_KERNEL);
> +	if (!ne_mem_region->pages) {
> +		kfree(ne_mem_region);
> +
> +		return -ENOMEM;

kfree(NULL) is a nop, so you can just set rc and goto free_mem_region 
here and below.

> +	}
> +
> +	phys_contig_mem_regions = kcalloc(ne_mem_region->nr_pages,
> +					  sizeof(*phys_contig_mem_regions),
> +					  GFP_KERNEL);
> +	if (!phys_contig_mem_regions) {
> +		kfree(ne_mem_region->pages);
> +		kfree(ne_mem_region);
> +
> +		return -ENOMEM;
> +	}
> +
> +	/*
> +	 * TODO: Handle non-contiguous memory regions received from user space.
> +	 * Hugetlbfs can give 2 MiB / 1 GiB contiguous physical regions. The
> +	 * virtual address space can be seen as contiguous, although it is
> +	 * mapped underneath to 2 MiB / 1 GiB physical regions e.g. 8 MiB
> +	 * virtual address space mapped to 4 physically contiguous regions of 2
> +	 * MiB.
> +	 */
> +	do {
> +		unsigned long tmp_nr_pages = ne_mem_region->nr_pages -
> +			nr_pinned_pages;
> +		struct page **tmp_pages = ne_mem_region->pages +
> +			nr_pinned_pages;
> +		u64 tmp_userspace_addr = mem_region->userspace_addr +
> +			nr_pinned_pages * NE_MIN_MEM_REGION_SIZE;
> +
> +		gup_rc = get_user_pages(tmp_userspace_addr, tmp_nr_pages,
> +					FOLL_GET, tmp_pages, NULL);
> +		if (gup_rc < 0) {
> +			rc = gup_rc;
> +
> +			dev_err_ratelimited(ne_misc_dev.this_device,
> +					    "Error in gup [rc=%d]\n", rc);
> +
> +			unpin_user_pages(ne_mem_region->pages, nr_pinned_pages);
> +
> +			goto free_mem_region;
> +		}
> +
> +		nr_pinned_pages += gup_rc;
> +
> +	} while (nr_pinned_pages < ne_mem_region->nr_pages);

Can this deadlock the kernel? Shouldn't we rather return an error when 
we can't pin all pages?

> +
> +	/*
> +	 * TODO: Update checks once physically contiguous regions are collected
> +	 * based on the user space address and get_user_pages() results.
> +	 */
> +	for (i = 0; i < ne_mem_region->nr_pages; i++) {
> +		if (!PageHuge(ne_mem_region->pages[i])) {
> +			dev_err_ratelimited(ne_misc_dev.this_device,
> +					    "Not a hugetlbfs page\n");
> +
> +			goto unpin_pages;
> +		}
> +
> +		if (huge_page_size(page_hstate(ne_mem_region->pages[i])) !=
> +		    NE_MIN_MEM_REGION_SIZE) {
> +			dev_err_ratelimited(ne_misc_dev.this_device,
> +					    "Page size isn't 2 MiB\n");

Why is a huge page size of >2MB a problem? Can't we just make 
huge_page_size() the ne mem slot size?

> +
> +			goto unpin_pages;
> +		}
> +
> +		if (ne_enclave->numa_node !=
> +		    page_to_nid(ne_mem_region->pages[i])) {
> +			dev_err_ratelimited(ne_misc_dev.this_device,
> +					    "Page isn't from NUMA node %d\n",
> +					    ne_enclave->numa_node);
> +
> +			goto unpin_pages;

Is there a way to give user space hints on *why* things are going wrong?

> +		}
> +
> +		/*
> +		 * TODO: Update once handled non-contiguous memory regions
> +		 * received from user space.
> +		 */
> +		phys_contig_mem_regions[i] = ne_mem_region->pages[i];
> +	}
> +
> +	/*
> +	 * TODO: Update once handled non-contiguous memory regions received
> +	 * from user space.
> +	 */
> +	nr_phys_contig_mem_regions = ne_mem_region->nr_pages;
> +
> +	if ((ne_enclave->nr_mem_regions + nr_phys_contig_mem_regions) >
> +	    ne_enclave->max_mem_regions) {
> +		dev_err_ratelimited(ne_misc_dev.this_device,
> +				    "Reached max memory regions %lld\n",
> +				    ne_enclave->max_mem_regions);
> +
> +		goto unpin_pages;
> +	}
> +
> +	for (i = 0; i < nr_phys_contig_mem_regions; i++) {
> +		u64 phys_addr = page_to_phys(phys_contig_mem_regions[i]);
> +
> +		slot_add_mem_req.slot_uid = ne_enclave->slot_uid;
> +		slot_add_mem_req.paddr = phys_addr;
> +		/*
> +		 * TODO: Update memory size of physical contiguous memory
> +		 * region, in case of non-contiguous memory regions received
> +		 * from user space.
> +		 */
> +		slot_add_mem_req.size = NE_MIN_MEM_REGION_SIZE;

Yeah, for now, just make it huge_page_size()! :)

> +
> +		rc = ne_do_request(ne_enclave->pdev, SLOT_ADD_MEM,
> +				   &slot_add_mem_req, sizeof(slot_add_mem_req),
> +				   &cmd_reply, sizeof(cmd_reply));
> +		if (rc < 0) {
> +			dev_err_ratelimited(ne_misc_dev.this_device,
> +					    "Error in slot add mem [rc=%d]\n",
> +					    rc);
> +
> +			/* TODO: Only unpin memory regions not added. */

Are we sure we're not creating an unusable system here?

> +			goto unpin_pages;
> +		}
> +
> +		ne_enclave->mem_size += slot_add_mem_req.size;
> +		ne_enclave->nr_mem_regions++;
> +
> +		memset(&slot_add_mem_req, 0, sizeof(slot_add_mem_req));
> +		memset(&cmd_reply, 0, sizeof(cmd_reply));

If you define the variables in the for loop scope, you don't need to 
manually zero them again.


Alex

> +	}
> +
> +	list_add(&ne_mem_region->mem_region_list_entry,
> +		 &ne_enclave->mem_regions_list);
> +
> +	kfree(phys_contig_mem_regions);
> +
> +	return 0;
> +
> +unpin_pages:
> +	unpin_user_pages(ne_mem_region->pages, ne_mem_region->nr_pages);
> +free_mem_region:
> +	kfree(phys_contig_mem_regions);
> +	kfree(ne_mem_region->pages);
> +	kfree(ne_mem_region);
> +
> +	return rc;
> +}
> +
>   static long ne_enclave_ioctl(struct file *file, unsigned int cmd,
>   			     unsigned long arg)
>   {
> @@ -561,6 +788,36 @@ static long ne_enclave_ioctl(struct file *file, unsigned int cmd,
>   		return 0;
>   	}
>   
> +	case NE_SET_USER_MEMORY_REGION: {
> +		struct ne_user_memory_region mem_region = {};
> +		int rc = -EINVAL;
> +
> +		if (copy_from_user(&mem_region, (void *)arg,
> +				   sizeof(mem_region))) {
> +			dev_err_ratelimited(ne_misc_dev.this_device,
> +					    "Error in copy from user\n");
> +
> +			return -EFAULT;
> +		}
> +
> +		mutex_lock(&ne_enclave->enclave_info_mutex);
> +
> +		if (ne_enclave->state != NE_STATE_INIT) {
> +			dev_err_ratelimited(ne_misc_dev.this_device,
> +					    "Enclave isn't in init state\n");
> +
> +			mutex_unlock(&ne_enclave->enclave_info_mutex);
> +
> +			return -EINVAL;
> +		}
> +
> +		rc = ne_set_user_memory_region_ioctl(ne_enclave, &mem_region);
> +
> +		mutex_unlock(&ne_enclave->enclave_info_mutex);
> +
> +		return rc;
> +	}
> +
>   	default:
>   		return -ENOTTY;
>   	}
> 



Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
Sitz: Berlin
Ust-ID: DE 289 237 879
Paraschiv, Andra-Irina July 9, 2020, 7:36 a.m. UTC | #2
On 06/07/2020 13:46, Alexander Graf wrote:
>
>
> On 22.06.20 22:03, Andra Paraschiv wrote:
>> Another resource that is being set for an enclave is memory. User space
>> memory regions, that need to be backed by contiguous memory regions,
>> are associated with the enclave.
>>
>> One solution for allocating / reserving contiguous memory regions, that
>> is used for integration, is hugetlbfs. The user space process that is
>> associated with the enclave passes to the driver these memory regions.
>>
>> The enclave memory regions need to be from the same NUMA node as the
>> enclave CPUs.
>>
>> Add ioctl command logic for setting user space memory region for an
>> enclave.
>>
>> Signed-off-by: Alexandru Vasile <lexnv@amazon.com>
>> Signed-off-by: Andra Paraschiv <andraprs@amazon.com>
>> ---
>> Changelog
>>
>> v3 -> v4
>>
>> * Check enclave memory regions are from the same NUMA node as the
>>    enclave CPUs.
>> * Use dev_err instead of custom NE log pattern.
>> * Update the NE ioctl call to match the decoupling from the KVM API.
>>
>> v2 -> v3
>>
>> * Remove the WARN_ON calls.
>> * Update static calls sanity checks.
>> * Update kzfree() calls to kfree().
>>
>> v1 -> v2
>>
>> * Add log pattern for NE.
>> * Update goto labels to match their purpose.
>> * Remove the BUG_ON calls.
>> * Check if enclave max memory regions is reached when setting an enclave
>>    memory region.
>> * Check if enclave state is init when setting an enclave memory region.
>> ---
>>   drivers/virt/nitro_enclaves/ne_misc_dev.c | 257 ++++++++++++++++++++++
>>   1 file changed, 257 insertions(+)
>>
>> diff --git a/drivers/virt/nitro_enclaves/ne_misc_dev.c 
>> b/drivers/virt/nitro_enclaves/ne_misc_dev.c
>> index cfdefa52ed2a..17ccb6cdbd75 100644
>> --- a/drivers/virt/nitro_enclaves/ne_misc_dev.c
>> +++ b/drivers/virt/nitro_enclaves/ne_misc_dev.c
>> @@ -476,6 +476,233 @@ static int ne_create_vcpu_ioctl(struct 
>> ne_enclave *ne_enclave, u32 vcpu_id)
>>       return rc;
>>   }
>>   +/**
>> + * ne_sanity_check_user_mem_region - Sanity check the userspace memory
>> + * region received during the set user memory region ioctl call.
>> + *
>> + * This function gets called with the ne_enclave mutex held.
>> + *
>> + * @ne_enclave: private data associated with the current enclave.
>> + * @mem_region: user space memory region to be sanity checked.
>> + *
>> + * @returns: 0 on success, negative return value on failure.
>> + */
>> +static int ne_sanity_check_user_mem_region(struct ne_enclave 
>> *ne_enclave,
>> +    struct ne_user_memory_region *mem_region)
>> +{
>> +    if (ne_enclave->mm != current->mm)
>> +        return -EIO;
>> +
>> +    if ((mem_region->memory_size % NE_MIN_MEM_REGION_SIZE) != 0) {
>> +        dev_err_ratelimited(ne_misc_dev.this_device,
>> +                    "Mem size not multiple of 2 MiB\n");
>> +
>> +        return -EINVAL;
>
> Can we make this an error that gets propagated to user space 
> explicitly? I'd rather have a clear error return value of this 
> function than a random message in dmesg.

We can make this, will add memory checks specific NE error codes, as for 
the other call paths in the series e.g. enclave CPU(s) setup.

>
>> +    }
>> +
>> +    if ((mem_region->userspace_addr & (NE_MIN_MEM_REGION_SIZE - 1)) ||
>
> This logic already relies on the fact that NE_MIN_MEM_REGION_SIZE is a 
> power of two. Can you do the same above on the memory_size check?

Done.

>
>> +        !access_ok((void __user *)(unsigned 
>> long)mem_region->userspace_addr,
>> +               mem_region->memory_size)) {
>> +        dev_err_ratelimited(ne_misc_dev.this_device,
>> +                    "Invalid user space addr range\n");
>> +
>> +        return -EINVAL;
>
> Same comment again. Return different errors for different conditions, 
> so that user space has a chance to print proper errors to its users.
>
> Also, don't we have to check alignment of userspace_addr as well?
>

Would need an alignment check for 2 MiB at least, yes.

>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +/**
>> + * ne_set_user_memory_region_ioctl - Add user space memory region to 
>> the slot
>> + * associated with the current enclave.
>> + *
>> + * This function gets called with the ne_enclave mutex held.
>> + *
>> + * @ne_enclave: private data associated with the current enclave.
>> + * @mem_region: user space memory region to be associated with the 
>> given slot.
>> + *
>> + * @returns: 0 on success, negative return value on failure.
>> + */
>> +static int ne_set_user_memory_region_ioctl(struct ne_enclave 
>> *ne_enclave,
>> +    struct ne_user_memory_region *mem_region)
>> +{
>> +    struct ne_pci_dev_cmd_reply cmd_reply = {};
>> +    long gup_rc = 0;
>> +    unsigned long i = 0;
>> +    struct ne_mem_region *ne_mem_region = NULL;
>> +    unsigned long nr_phys_contig_mem_regions = 0;
>> +    unsigned long nr_pinned_pages = 0;
>> +    struct page **phys_contig_mem_regions = NULL;
>> +    int rc = -EINVAL;
>> +    struct slot_add_mem_req slot_add_mem_req = {};
>> +
>> +    rc = ne_sanity_check_user_mem_region(ne_enclave, mem_region);
>> +    if (rc < 0)
>> +        return rc;
>> +
>> +    ne_mem_region = kzalloc(sizeof(*ne_mem_region), GFP_KERNEL);
>> +    if (!ne_mem_region)
>> +        return -ENOMEM;
>> +
>> +    /*
>> +     * TODO: Update nr_pages value to handle contiguous virtual address
>> +     * ranges mapped to non-contiguous physical regions. Hugetlbfs 
>> can give
>> +     * 2 MiB / 1 GiB contiguous physical regions.
>> +     */
>> +    ne_mem_region->nr_pages = mem_region->memory_size /
>> +        NE_MIN_MEM_REGION_SIZE;
>> +
>> +    ne_mem_region->pages = kcalloc(ne_mem_region->nr_pages,
>> +                       sizeof(*ne_mem_region->pages),
>> +                       GFP_KERNEL);
>> +    if (!ne_mem_region->pages) {
>> +        kfree(ne_mem_region);
>> +
>> +        return -ENOMEM;
>
> kfree(NULL) is a nop, so you can just set rc and goto free_mem_region 
> here and below.

Updated both return paths.

>
>> +    }
>> +
>> +    phys_contig_mem_regions = kcalloc(ne_mem_region->nr_pages,
>> +                      sizeof(*phys_contig_mem_regions),
>> +                      GFP_KERNEL);
>> +    if (!phys_contig_mem_regions) {
>> +        kfree(ne_mem_region->pages);
>> +        kfree(ne_mem_region);
>> +
>> +        return -ENOMEM;
>> +    }
>> +
>> +    /*
>> +     * TODO: Handle non-contiguous memory regions received from user 
>> space.
>> +     * Hugetlbfs can give 2 MiB / 1 GiB contiguous physical regions. 
>> The
>> +     * virtual address space can be seen as contiguous, although it is
>> +     * mapped underneath to 2 MiB / 1 GiB physical regions e.g. 8 MiB
>> +     * virtual address space mapped to 4 physically contiguous 
>> regions of 2
>> +     * MiB.
>> +     */
>> +    do {
>> +        unsigned long tmp_nr_pages = ne_mem_region->nr_pages -
>> +            nr_pinned_pages;
>> +        struct page **tmp_pages = ne_mem_region->pages +
>> +            nr_pinned_pages;
>> +        u64 tmp_userspace_addr = mem_region->userspace_addr +
>> +            nr_pinned_pages * NE_MIN_MEM_REGION_SIZE;
>> +
>> +        gup_rc = get_user_pages(tmp_userspace_addr, tmp_nr_pages,
>> +                    FOLL_GET, tmp_pages, NULL);
>> +        if (gup_rc < 0) {
>> +            rc = gup_rc;
>> +
>> +            dev_err_ratelimited(ne_misc_dev.this_device,
>> +                        "Error in gup [rc=%d]\n", rc);
>> +
>> +            unpin_user_pages(ne_mem_region->pages, nr_pinned_pages);
>> +
>> +            goto free_mem_region;
>> +        }
>> +
>> +        nr_pinned_pages += gup_rc;
>> +
>> +    } while (nr_pinned_pages < ne_mem_region->nr_pages);
>
> Can this deadlock the kernel? Shouldn't we rather return an error when 
> we can't pin all pages?

It shouldn't cause a deadlock, based on the return values:

 > Returns either number of pages pinned (which may be less than the
 > number requested), or an error. Details about the return value:
 >
 > -- If nr_pages is 0, returns 0.
 > -- If nr_pages is >0, but no pages were pinned, returns -errno.
 > -- If nr_pages is >0, and some pages were pinned, returns the number of
 > pages pinned. Again, this may be less than nr_pages.


But I can update the logic to have all or nothing.

>
>> +
>> +    /*
>> +     * TODO: Update checks once physically contiguous regions are 
>> collected
>> +     * based on the user space address and get_user_pages() results.
>> +     */
>> +    for (i = 0; i < ne_mem_region->nr_pages; i++) {
>> +        if (!PageHuge(ne_mem_region->pages[i])) {
>> +            dev_err_ratelimited(ne_misc_dev.this_device,
>> +                        "Not a hugetlbfs page\n");
>> +
>> +            goto unpin_pages;
>> +        }
>> +
>> +        if (huge_page_size(page_hstate(ne_mem_region->pages[i])) !=
>> +            NE_MIN_MEM_REGION_SIZE) {
>> +            dev_err_ratelimited(ne_misc_dev.this_device,
>> +                        "Page size isn't 2 MiB\n");
>
> Why is a huge page size of >2MB a problem? Can't we just make 
> huge_page_size() the ne mem slot size?

It's not a problem, actually this is part of the TODO(s) from the 
current function, to support contiguous regions larger than 2 MiB. It's 
just that we started with 2 MiB. :)

>
>> +
>> +            goto unpin_pages;
>> +        }
>> +
>> +        if (ne_enclave->numa_node !=
>> +            page_to_nid(ne_mem_region->pages[i])) {
>> +            dev_err_ratelimited(ne_misc_dev.this_device,
>> +                        "Page isn't from NUMA node %d\n",
>> +                        ne_enclave->numa_node);
>> +
>> +            goto unpin_pages;
>
> Is there a way to give user space hints on *why* things are going wrong?

Yes, one option for the user space to have more insights is to have the 
specific NE error codes you mentioned, so that we can improve the 
experience even further.

>
>> +        }
>> +
>> +        /*
>> +         * TODO: Update once handled non-contiguous memory regions
>> +         * received from user space.
>> +         */
>> +        phys_contig_mem_regions[i] = ne_mem_region->pages[i];
>> +    }
>> +
>> +    /*
>> +     * TODO: Update once handled non-contiguous memory regions received
>> +     * from user space.
>> +     */
>> +    nr_phys_contig_mem_regions = ne_mem_region->nr_pages;
>> +
>> +    if ((ne_enclave->nr_mem_regions + nr_phys_contig_mem_regions) >
>> +        ne_enclave->max_mem_regions) {
>> +        dev_err_ratelimited(ne_misc_dev.this_device,
>> +                    "Reached max memory regions %lld\n",
>> +                    ne_enclave->max_mem_regions);
>> +
>> +        goto unpin_pages;
>> +    }
>> +
>> +    for (i = 0; i < nr_phys_contig_mem_regions; i++) {
>> +        u64 phys_addr = page_to_phys(phys_contig_mem_regions[i]);
>> +
>> +        slot_add_mem_req.slot_uid = ne_enclave->slot_uid;
>> +        slot_add_mem_req.paddr = phys_addr;
>> +        /*
>> +         * TODO: Update memory size of physical contiguous memory
>> +         * region, in case of non-contiguous memory regions received
>> +         * from user space.
>> +         */
>> +        slot_add_mem_req.size = NE_MIN_MEM_REGION_SIZE;
>
> Yeah, for now, just make it huge_page_size()! :)

Yup, I'll handle this in order to have the option for other sizes, in 
addition to 2 MiB e.g. 1 GiB for hugetlbfs.

>
>> +
>> +        rc = ne_do_request(ne_enclave->pdev, SLOT_ADD_MEM,
>> +                   &slot_add_mem_req, sizeof(slot_add_mem_req),
>> +                   &cmd_reply, sizeof(cmd_reply));
>> +        if (rc < 0) {
>> +            dev_err_ratelimited(ne_misc_dev.this_device,
>> +                        "Error in slot add mem [rc=%d]\n",
>> +                        rc);
>> +
>> +            /* TODO: Only unpin memory regions not added. */
>
> Are we sure we're not creating an unusable system here?

The way the requests to the PCI device are structured is that we cannot 
get back a memory region / CPU, once added, till the enclave is 
terminated. Let's say there is an error in the remaining logic from the 
ioctl, after the region is successfully added, then the memory region 
can be given back to the primary / parent VM once the enclave 
termination (including slot free) is done.

We can either have the logic handle one contiguous region per ioctl call 
(user space gives a memory region that is backed by a single contiguous 
physical memory region) or have a for loop to go through all contiguous 
regions (user space gives a memory region that is backed by a set of 
(smaller) contiguous physical memory regions). In the second case, if a 
request to the NE PCI device fails, already added memory regions can be 
given back only on slot free, triggered by the enclave termination, when 
closing the enclave fd.

>
>> +            goto unpin_pages;
>> +        }
>> +
>> +        ne_enclave->mem_size += slot_add_mem_req.size;
>> +        ne_enclave->nr_mem_regions++;
>> +
>> +        memset(&slot_add_mem_req, 0, sizeof(slot_add_mem_req));
>> +        memset(&cmd_reply, 0, sizeof(cmd_reply));
>
> If you define the variables in the for loop scope, you don't need to 
> manually zero them again.

Updated to have the variables in the loop instead.

Thank you.

Andra

>
>> +    }
>> +
>> +    list_add(&ne_mem_region->mem_region_list_entry,
>> +         &ne_enclave->mem_regions_list);
>> +
>> +    kfree(phys_contig_mem_regions);
>> +
>> +    return 0;
>> +
>> +unpin_pages:
>> +    unpin_user_pages(ne_mem_region->pages, ne_mem_region->nr_pages);
>> +free_mem_region:
>> +    kfree(phys_contig_mem_regions);
>> +    kfree(ne_mem_region->pages);
>> +    kfree(ne_mem_region);
>> +
>> +    return rc;
>> +}
>> +
>>   static long ne_enclave_ioctl(struct file *file, unsigned int cmd,
>>                    unsigned long arg)
>>   {
>> @@ -561,6 +788,36 @@ static long ne_enclave_ioctl(struct file *file, 
>> unsigned int cmd,
>>           return 0;
>>       }
>>   +    case NE_SET_USER_MEMORY_REGION: {
>> +        struct ne_user_memory_region mem_region = {};
>> +        int rc = -EINVAL;
>> +
>> +        if (copy_from_user(&mem_region, (void *)arg,
>> +                   sizeof(mem_region))) {
>> +            dev_err_ratelimited(ne_misc_dev.this_device,
>> +                        "Error in copy from user\n");
>> +
>> +            return -EFAULT;
>> +        }
>> +
>> +        mutex_lock(&ne_enclave->enclave_info_mutex);
>> +
>> +        if (ne_enclave->state != NE_STATE_INIT) {
>> +            dev_err_ratelimited(ne_misc_dev.this_device,
>> +                        "Enclave isn't in init state\n");
>> +
>> + mutex_unlock(&ne_enclave->enclave_info_mutex);
>> +
>> +            return -EINVAL;
>> +        }
>> +
>> +        rc = ne_set_user_memory_region_ioctl(ne_enclave, &mem_region);
>> +
>> +        mutex_unlock(&ne_enclave->enclave_info_mutex);
>> +
>> +        return rc;
>> +    }
>> +
>>       default:
>>           return -ENOTTY;
>>       }
>>




Amazon Development Center (Romania) S.R.L. registered office: 27A Sf. Lazar Street, UBC5, floor 2, Iasi, Iasi County, 700045, Romania. Registered in Romania. Registration number J22/2621/2005.
Alexander Graf July 9, 2020, 8:40 a.m. UTC | #3
On 09.07.20 09:36, Paraschiv, Andra-Irina wrote:
> 
> 
> On 06/07/2020 13:46, Alexander Graf wrote:
>>
>>
>> On 22.06.20 22:03, Andra Paraschiv wrote:
>>> Another resource that is being set for an enclave is memory. User space
>>> memory regions, that need to be backed by contiguous memory regions,
>>> are associated with the enclave.
>>>
>>> One solution for allocating / reserving contiguous memory regions, that
>>> is used for integration, is hugetlbfs. The user space process that is
>>> associated with the enclave passes to the driver these memory regions.
>>>
>>> The enclave memory regions need to be from the same NUMA node as the
>>> enclave CPUs.
>>>
>>> Add ioctl command logic for setting user space memory region for an
>>> enclave.
>>>
>>> Signed-off-by: Alexandru Vasile <lexnv@amazon.com>
>>> Signed-off-by: Andra Paraschiv <andraprs@amazon.com>
>>> ---
>>> Changelog
>>>
>>> v3 -> v4
>>>
>>> * Check enclave memory regions are from the same NUMA node as the
>>>    enclave CPUs.
>>> * Use dev_err instead of custom NE log pattern.
>>> * Update the NE ioctl call to match the decoupling from the KVM API.
>>>
>>> v2 -> v3
>>>
>>> * Remove the WARN_ON calls.
>>> * Update static calls sanity checks.
>>> * Update kzfree() calls to kfree().
>>>
>>> v1 -> v2
>>>
>>> * Add log pattern for NE.
>>> * Update goto labels to match their purpose.
>>> * Remove the BUG_ON calls.
>>> * Check if enclave max memory regions is reached when setting an enclave
>>>    memory region.
>>> * Check if enclave state is init when setting an enclave memory region.
>>> ---
>>>   drivers/virt/nitro_enclaves/ne_misc_dev.c | 257 ++++++++++++++++++++++
>>>   1 file changed, 257 insertions(+)
>>>
>>> diff --git a/drivers/virt/nitro_enclaves/ne_misc_dev.c 
>>> b/drivers/virt/nitro_enclaves/ne_misc_dev.c
>>> index cfdefa52ed2a..17ccb6cdbd75 100644
>>> --- a/drivers/virt/nitro_enclaves/ne_misc_dev.c
>>> +++ b/drivers/virt/nitro_enclaves/ne_misc_dev.c
>>> @@ -476,6 +476,233 @@ static int ne_create_vcpu_ioctl(struct 
>>> ne_enclave *ne_enclave, u32 vcpu_id)
>>>       return rc;
>>>   }
>>>   +/**
>>> + * ne_sanity_check_user_mem_region - Sanity check the userspace memory
>>> + * region received during the set user memory region ioctl call.
>>> + *
>>> + * This function gets called with the ne_enclave mutex held.
>>> + *
>>> + * @ne_enclave: private data associated with the current enclave.
>>> + * @mem_region: user space memory region to be sanity checked.
>>> + *
>>> + * @returns: 0 on success, negative return value on failure.
>>> + */
>>> +static int ne_sanity_check_user_mem_region(struct ne_enclave 
>>> *ne_enclave,
>>> +    struct ne_user_memory_region *mem_region)
>>> +{
>>> +    if (ne_enclave->mm != current->mm)
>>> +        return -EIO;
>>> +
>>> +    if ((mem_region->memory_size % NE_MIN_MEM_REGION_SIZE) != 0) {
>>> +        dev_err_ratelimited(ne_misc_dev.this_device,
>>> +                    "Mem size not multiple of 2 MiB\n");
>>> +
>>> +        return -EINVAL;
>>
>> Can we make this an error that gets propagated to user space 
>> explicitly? I'd rather have a clear error return value of this 
>> function than a random message in dmesg.
> 
> We can make this, will add memory checks specific NE error codes, as for 
> the other call paths in the series e.g. enclave CPU(s) setup.
> 
>>
>>> +    }
>>> +
>>> +    if ((mem_region->userspace_addr & (NE_MIN_MEM_REGION_SIZE - 1)) ||
>>
>> This logic already relies on the fact that NE_MIN_MEM_REGION_SIZE is a 
>> power of two. Can you do the same above on the memory_size check?
> 
> Done.
> 
>>
>>> +        !access_ok((void __user *)(unsigned 
>>> long)mem_region->userspace_addr,
>>> +               mem_region->memory_size)) {
>>> +        dev_err_ratelimited(ne_misc_dev.this_device,
>>> +                    "Invalid user space addr range\n");
>>> +
>>> +        return -EINVAL;
>>
>> Same comment again. Return different errors for different conditions, 
>> so that user space has a chance to print proper errors to its users.
>>
>> Also, don't we have to check alignment of userspace_addr as well?
>>
> 
> Would need an alignment check for 2 MiB at least, yes.
> 
>>> +    }
>>> +
>>> +    return 0;
>>> +}
>>> +
>>> +/**
>>> + * ne_set_user_memory_region_ioctl - Add user space memory region to 
>>> the slot
>>> + * associated with the current enclave.
>>> + *
>>> + * This function gets called with the ne_enclave mutex held.
>>> + *
>>> + * @ne_enclave: private data associated with the current enclave.
>>> + * @mem_region: user space memory region to be associated with the 
>>> given slot.
>>> + *
>>> + * @returns: 0 on success, negative return value on failure.
>>> + */
>>> +static int ne_set_user_memory_region_ioctl(struct ne_enclave 
>>> *ne_enclave,
>>> +    struct ne_user_memory_region *mem_region)
>>> +{
>>> +    struct ne_pci_dev_cmd_reply cmd_reply = {};
>>> +    long gup_rc = 0;
>>> +    unsigned long i = 0;
>>> +    struct ne_mem_region *ne_mem_region = NULL;
>>> +    unsigned long nr_phys_contig_mem_regions = 0;
>>> +    unsigned long nr_pinned_pages = 0;
>>> +    struct page **phys_contig_mem_regions = NULL;
>>> +    int rc = -EINVAL;
>>> +    struct slot_add_mem_req slot_add_mem_req = {};
>>> +
>>> +    rc = ne_sanity_check_user_mem_region(ne_enclave, mem_region);
>>> +    if (rc < 0)
>>> +        return rc;
>>> +
>>> +    ne_mem_region = kzalloc(sizeof(*ne_mem_region), GFP_KERNEL);
>>> +    if (!ne_mem_region)
>>> +        return -ENOMEM;
>>> +
>>> +    /*
>>> +     * TODO: Update nr_pages value to handle contiguous virtual address
>>> +     * ranges mapped to non-contiguous physical regions. Hugetlbfs 
>>> can give
>>> +     * 2 MiB / 1 GiB contiguous physical regions.
>>> +     */
>>> +    ne_mem_region->nr_pages = mem_region->memory_size /
>>> +        NE_MIN_MEM_REGION_SIZE;
>>> +
>>> +    ne_mem_region->pages = kcalloc(ne_mem_region->nr_pages,
>>> +                       sizeof(*ne_mem_region->pages),
>>> +                       GFP_KERNEL);
>>> +    if (!ne_mem_region->pages) {
>>> +        kfree(ne_mem_region);
>>> +
>>> +        return -ENOMEM;
>>
>> kfree(NULL) is a nop, so you can just set rc and goto free_mem_region 
>> here and below.
> 
> Updated both return paths.
> 
>>
>>> +    }
>>> +
>>> +    phys_contig_mem_regions = kcalloc(ne_mem_region->nr_pages,
>>> +                      sizeof(*phys_contig_mem_regions),
>>> +                      GFP_KERNEL);
>>> +    if (!phys_contig_mem_regions) {
>>> +        kfree(ne_mem_region->pages);
>>> +        kfree(ne_mem_region);
>>> +
>>> +        return -ENOMEM;
>>> +    }
>>> +
>>> +    /*
>>> +     * TODO: Handle non-contiguous memory regions received from user 
>>> space.
>>> +     * Hugetlbfs can give 2 MiB / 1 GiB contiguous physical regions. 
>>> The
>>> +     * virtual address space can be seen as contiguous, although it is
>>> +     * mapped underneath to 2 MiB / 1 GiB physical regions e.g. 8 MiB
>>> +     * virtual address space mapped to 4 physically contiguous 
>>> regions of 2
>>> +     * MiB.
>>> +     */
>>> +    do {
>>> +        unsigned long tmp_nr_pages = ne_mem_region->nr_pages -
>>> +            nr_pinned_pages;
>>> +        struct page **tmp_pages = ne_mem_region->pages +
>>> +            nr_pinned_pages;
>>> +        u64 tmp_userspace_addr = mem_region->userspace_addr +
>>> +            nr_pinned_pages * NE_MIN_MEM_REGION_SIZE;
>>> +
>>> +        gup_rc = get_user_pages(tmp_userspace_addr, tmp_nr_pages,
>>> +                    FOLL_GET, tmp_pages, NULL);
>>> +        if (gup_rc < 0) {
>>> +            rc = gup_rc;
>>> +
>>> +            dev_err_ratelimited(ne_misc_dev.this_device,
>>> +                        "Error in gup [rc=%d]\n", rc);
>>> +
>>> +            unpin_user_pages(ne_mem_region->pages, nr_pinned_pages);
>>> +
>>> +            goto free_mem_region;
>>> +        }
>>> +
>>> +        nr_pinned_pages += gup_rc;
>>> +
>>> +    } while (nr_pinned_pages < ne_mem_region->nr_pages);
>>
>> Can this deadlock the kernel? Shouldn't we rather return an error when 
>> we can't pin all pages?
> 
> It shouldn't cause a deadlock, based on the return values:
> 
>  > Returns either number of pages pinned (which may be less than the
>  > number requested), or an error. Details about the return value:
>  >
>  > -- If nr_pages is 0, returns 0.
>  > -- If nr_pages is >0, but no pages were pinned, returns -errno.
>  > -- If nr_pages is >0, and some pages were pinned, returns the number of
>  > pages pinned. Again, this may be less than nr_pages.
> 
> 
> But I can update the logic to have all or nothing.
> 
>>
>>> +
>>> +    /*
>>> +     * TODO: Update checks once physically contiguous regions are 
>>> collected
>>> +     * based on the user space address and get_user_pages() results.
>>> +     */
>>> +    for (i = 0; i < ne_mem_region->nr_pages; i++) {
>>> +        if (!PageHuge(ne_mem_region->pages[i])) {
>>> +            dev_err_ratelimited(ne_misc_dev.this_device,
>>> +                        "Not a hugetlbfs page\n");
>>> +
>>> +            goto unpin_pages;
>>> +        }
>>> +
>>> +        if (huge_page_size(page_hstate(ne_mem_region->pages[i])) !=
>>> +            NE_MIN_MEM_REGION_SIZE) {
>>> +            dev_err_ratelimited(ne_misc_dev.this_device,
>>> +                        "Page size isn't 2 MiB\n");
>>
>> Why is a huge page size of >2MB a problem? Can't we just make 
>> huge_page_size() the ne mem slot size?
> 
> It's not a problem, actually this is part of the TODO(s) from the 
> current function, to support contiguous regions larger than 2 MiB. It's 
> just that we started with 2 MiB. :)
> 
>>
>>> +
>>> +            goto unpin_pages;
>>> +        }
>>> +
>>> +        if (ne_enclave->numa_node !=
>>> +            page_to_nid(ne_mem_region->pages[i])) {
>>> +            dev_err_ratelimited(ne_misc_dev.this_device,
>>> +                        "Page isn't from NUMA node %d\n",
>>> +                        ne_enclave->numa_node);
>>> +
>>> +            goto unpin_pages;
>>
>> Is there a way to give user space hints on *why* things are going wrong?
> 
> Yes, one option for the user space to have more insights is to have the 
> specific NE error codes you mentioned, so that we can improve the 
> experience even further.
> 
>>
>>> +        }
>>> +
>>> +        /*
>>> +         * TODO: Update once handled non-contiguous memory regions
>>> +         * received from user space.
>>> +         */
>>> +        phys_contig_mem_regions[i] = ne_mem_region->pages[i];
>>> +    }
>>> +
>>> +    /*
>>> +     * TODO: Update once handled non-contiguous memory regions received
>>> +     * from user space.
>>> +     */
>>> +    nr_phys_contig_mem_regions = ne_mem_region->nr_pages;
>>> +
>>> +    if ((ne_enclave->nr_mem_regions + nr_phys_contig_mem_regions) >
>>> +        ne_enclave->max_mem_regions) {
>>> +        dev_err_ratelimited(ne_misc_dev.this_device,
>>> +                    "Reached max memory regions %lld\n",
>>> +                    ne_enclave->max_mem_regions);
>>> +
>>> +        goto unpin_pages;
>>> +    }
>>> +
>>> +    for (i = 0; i < nr_phys_contig_mem_regions; i++) {
>>> +        u64 phys_addr = page_to_phys(phys_contig_mem_regions[i]);
>>> +
>>> +        slot_add_mem_req.slot_uid = ne_enclave->slot_uid;
>>> +        slot_add_mem_req.paddr = phys_addr;
>>> +        /*
>>> +         * TODO: Update memory size of physical contiguous memory
>>> +         * region, in case of non-contiguous memory regions received
>>> +         * from user space.
>>> +         */
>>> +        slot_add_mem_req.size = NE_MIN_MEM_REGION_SIZE;
>>
>> Yeah, for now, just make it huge_page_size()! :)
> 
> Yup, I'll handle this in order to have the option for other sizes, in 
> addition to 2 MiB e.g. 1 GiB for hugetlbfs.
> 
>>
>>> +
>>> +        rc = ne_do_request(ne_enclave->pdev, SLOT_ADD_MEM,
>>> +                   &slot_add_mem_req, sizeof(slot_add_mem_req),
>>> +                   &cmd_reply, sizeof(cmd_reply));
>>> +        if (rc < 0) {
>>> +            dev_err_ratelimited(ne_misc_dev.this_device,
>>> +                        "Error in slot add mem [rc=%d]\n",
>>> +                        rc);
>>> +
>>> +            /* TODO: Only unpin memory regions not added. */
>>
>> Are we sure we're not creating an unusable system here?
> 
> The way the requests to the PCI device are structured is that we cannot 
> get back a memory region / CPU, once added, till the enclave is 
> terminated. Let's say there is an error in the remaining logic from the 
> ioctl, after the region is successfully added, then the memory region 
> can be given back to the primary / parent VM once the enclave 
> termination (including slot free) is done.
> 
> We can either have the logic handle one contiguous region per ioctl call 
> (user space gives a memory region that is backed by a single contiguous 
> physical memory region) or have a for loop to go through all contiguous 
> regions (user space gives a memory region that is backed by a set of 
> (smaller) contiguous physical memory regions). In the second case, if a 
> request to the NE PCI device fails, already added memory regions can be 
> given back only on slot free, triggered by the enclave termination, when 
> closing the enclave fd.

I'm in full agreement with you, but the logic here aborts mid-way, 
explicitly unpins all pages (does that mean use count is now -1 for 
some?) and does not keep track of the fact that some pages may be 
donated already. Does that mean that those pages may be reserved for the 
enclave, but passed to user space again?

I think in the error case, we should not unpin for now, because we can't 
guarantee that the "enclave device" isn't using those pages.


Alex



Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
Sitz: Berlin
Ust-ID: DE 289 237 879
Paraschiv, Andra-Irina July 9, 2020, 9:41 a.m. UTC | #4
On 09/07/2020 11:40, Alexander Graf wrote:
>
>
> On 09.07.20 09:36, Paraschiv, Andra-Irina wrote:
>>
>>
>> On 06/07/2020 13:46, Alexander Graf wrote:
>>>
>>>
>>> On 22.06.20 22:03, Andra Paraschiv wrote:
>>>> Another resource that is being set for an enclave is memory. User 
>>>> space
>>>> memory regions, that need to be backed by contiguous memory regions,
>>>> are associated with the enclave.
>>>>
>>>> One solution for allocating / reserving contiguous memory regions, 
>>>> that
>>>> is used for integration, is hugetlbfs. The user space process that is
>>>> associated with the enclave passes to the driver these memory regions.
>>>>
>>>> The enclave memory regions need to be from the same NUMA node as the
>>>> enclave CPUs.
>>>>
>>>> Add ioctl command logic for setting user space memory region for an
>>>> enclave.
>>>>
>>>> Signed-off-by: Alexandru Vasile <lexnv@amazon.com>
>>>> Signed-off-by: Andra Paraschiv <andraprs@amazon.com>
>>>> ---
>>>> Changelog
>>>>
>>>> v3 -> v4
>>>>
>>>> * Check enclave memory regions are from the same NUMA node as the
>>>>    enclave CPUs.
>>>> * Use dev_err instead of custom NE log pattern.
>>>> * Update the NE ioctl call to match the decoupling from the KVM API.
>>>>
>>>> v2 -> v3
>>>>
>>>> * Remove the WARN_ON calls.
>>>> * Update static calls sanity checks.
>>>> * Update kzfree() calls to kfree().
>>>>
>>>> v1 -> v2
>>>>
>>>> * Add log pattern for NE.
>>>> * Update goto labels to match their purpose.
>>>> * Remove the BUG_ON calls.
>>>> * Check if enclave max memory regions is reached when setting an 
>>>> enclave
>>>>    memory region.
>>>> * Check if enclave state is init when setting an enclave memory 
>>>> region.
>>>> ---
>>>>   drivers/virt/nitro_enclaves/ne_misc_dev.c | 257 
>>>> ++++++++++++++++++++++
>>>>   1 file changed, 257 insertions(+)
>>>>
>>>> diff --git a/drivers/virt/nitro_enclaves/ne_misc_dev.c 
>>>> b/drivers/virt/nitro_enclaves/ne_misc_dev.c
>>>> index cfdefa52ed2a..17ccb6cdbd75 100644
>>>> --- a/drivers/virt/nitro_enclaves/ne_misc_dev.c
>>>> +++ b/drivers/virt/nitro_enclaves/ne_misc_dev.c
>>>> @@ -476,6 +476,233 @@ static int ne_create_vcpu_ioctl(struct 
>>>> ne_enclave *ne_enclave, u32 vcpu_id)
>>>>       return rc;
>>>>   }
>>>>   +/**
>>>> + * ne_sanity_check_user_mem_region - Sanity check the userspace 
>>>> memory
>>>> + * region received during the set user memory region ioctl call.
>>>> + *
>>>> + * This function gets called with the ne_enclave mutex held.
>>>> + *
>>>> + * @ne_enclave: private data associated with the current enclave.
>>>> + * @mem_region: user space memory region to be sanity checked.
>>>> + *
>>>> + * @returns: 0 on success, negative return value on failure.
>>>> + */
>>>> +static int ne_sanity_check_user_mem_region(struct ne_enclave 
>>>> *ne_enclave,
>>>> +    struct ne_user_memory_region *mem_region)
>>>> +{
>>>> +    if (ne_enclave->mm != current->mm)
>>>> +        return -EIO;
>>>> +
>>>> +    if ((mem_region->memory_size % NE_MIN_MEM_REGION_SIZE) != 0) {
>>>> +        dev_err_ratelimited(ne_misc_dev.this_device,
>>>> +                    "Mem size not multiple of 2 MiB\n");
>>>> +
>>>> +        return -EINVAL;
>>>
>>> Can we make this an error that gets propagated to user space 
>>> explicitly? I'd rather have a clear error return value of this 
>>> function than a random message in dmesg.
>>
>> We can make this, will add memory checks specific NE error codes, as 
>> for the other call paths in the series e.g. enclave CPU(s) setup.
>>
>>>
>>>> +    }
>>>> +
>>>> +    if ((mem_region->userspace_addr & (NE_MIN_MEM_REGION_SIZE - 
>>>> 1)) ||
>>>
>>> This logic already relies on the fact that NE_MIN_MEM_REGION_SIZE is 
>>> a power of two. Can you do the same above on the memory_size check?
>>
>> Done.
>>
>>>
>>>> +        !access_ok((void __user *)(unsigned 
>>>> long)mem_region->userspace_addr,
>>>> +               mem_region->memory_size)) {
>>>> +        dev_err_ratelimited(ne_misc_dev.this_device,
>>>> +                    "Invalid user space addr range\n");
>>>> +
>>>> +        return -EINVAL;
>>>
>>> Same comment again. Return different errors for different 
>>> conditions, so that user space has a chance to print proper errors 
>>> to its users.
>>>
>>> Also, don't we have to check alignment of userspace_addr as well?
>>>
>>
>> Would need an alignment check for 2 MiB at least, yes.
>>
>>>> +    }
>>>> +
>>>> +    return 0;
>>>> +}
>>>> +
>>>> +/**
>>>> + * ne_set_user_memory_region_ioctl - Add user space memory region 
>>>> to the slot
>>>> + * associated with the current enclave.
>>>> + *
>>>> + * This function gets called with the ne_enclave mutex held.
>>>> + *
>>>> + * @ne_enclave: private data associated with the current enclave.
>>>> + * @mem_region: user space memory region to be associated with the 
>>>> given slot.
>>>> + *
>>>> + * @returns: 0 on success, negative return value on failure.
>>>> + */
>>>> +static int ne_set_user_memory_region_ioctl(struct ne_enclave 
>>>> *ne_enclave,
>>>> +    struct ne_user_memory_region *mem_region)
>>>> +{
>>>> +    struct ne_pci_dev_cmd_reply cmd_reply = {};
>>>> +    long gup_rc = 0;
>>>> +    unsigned long i = 0;
>>>> +    struct ne_mem_region *ne_mem_region = NULL;
>>>> +    unsigned long nr_phys_contig_mem_regions = 0;
>>>> +    unsigned long nr_pinned_pages = 0;
>>>> +    struct page **phys_contig_mem_regions = NULL;
>>>> +    int rc = -EINVAL;
>>>> +    struct slot_add_mem_req slot_add_mem_req = {};
>>>> +
>>>> +    rc = ne_sanity_check_user_mem_region(ne_enclave, mem_region);
>>>> +    if (rc < 0)
>>>> +        return rc;
>>>> +
>>>> +    ne_mem_region = kzalloc(sizeof(*ne_mem_region), GFP_KERNEL);
>>>> +    if (!ne_mem_region)
>>>> +        return -ENOMEM;
>>>> +
>>>> +    /*
>>>> +     * TODO: Update nr_pages value to handle contiguous virtual 
>>>> address
>>>> +     * ranges mapped to non-contiguous physical regions. Hugetlbfs 
>>>> can give
>>>> +     * 2 MiB / 1 GiB contiguous physical regions.
>>>> +     */
>>>> +    ne_mem_region->nr_pages = mem_region->memory_size /
>>>> +        NE_MIN_MEM_REGION_SIZE;
>>>> +
>>>> +    ne_mem_region->pages = kcalloc(ne_mem_region->nr_pages,
>>>> +                       sizeof(*ne_mem_region->pages),
>>>> +                       GFP_KERNEL);
>>>> +    if (!ne_mem_region->pages) {
>>>> +        kfree(ne_mem_region);
>>>> +
>>>> +        return -ENOMEM;
>>>
>>> kfree(NULL) is a nop, so you can just set rc and goto 
>>> free_mem_region here and below.
>>
>> Updated both return paths.
>>
>>>
>>>> +    }
>>>> +
>>>> +    phys_contig_mem_regions = kcalloc(ne_mem_region->nr_pages,
>>>> +                      sizeof(*phys_contig_mem_regions),
>>>> +                      GFP_KERNEL);
>>>> +    if (!phys_contig_mem_regions) {
>>>> +        kfree(ne_mem_region->pages);
>>>> +        kfree(ne_mem_region);
>>>> +
>>>> +        return -ENOMEM;
>>>> +    }
>>>> +
>>>> +    /*
>>>> +     * TODO: Handle non-contiguous memory regions received from 
>>>> user space.
>>>> +     * Hugetlbfs can give 2 MiB / 1 GiB contiguous physical 
>>>> regions. The
>>>> +     * virtual address space can be seen as contiguous, although 
>>>> it is
>>>> +     * mapped underneath to 2 MiB / 1 GiB physical regions e.g. 8 MiB
>>>> +     * virtual address space mapped to 4 physically contiguous 
>>>> regions of 2
>>>> +     * MiB.
>>>> +     */
>>>> +    do {
>>>> +        unsigned long tmp_nr_pages = ne_mem_region->nr_pages -
>>>> +            nr_pinned_pages;
>>>> +        struct page **tmp_pages = ne_mem_region->pages +
>>>> +            nr_pinned_pages;
>>>> +        u64 tmp_userspace_addr = mem_region->userspace_addr +
>>>> +            nr_pinned_pages * NE_MIN_MEM_REGION_SIZE;
>>>> +
>>>> +        gup_rc = get_user_pages(tmp_userspace_addr, tmp_nr_pages,
>>>> +                    FOLL_GET, tmp_pages, NULL);
>>>> +        if (gup_rc < 0) {
>>>> +            rc = gup_rc;
>>>> +
>>>> +            dev_err_ratelimited(ne_misc_dev.this_device,
>>>> +                        "Error in gup [rc=%d]\n", rc);
>>>> +
>>>> +            unpin_user_pages(ne_mem_region->pages, nr_pinned_pages);
>>>> +
>>>> +            goto free_mem_region;
>>>> +        }
>>>> +
>>>> +        nr_pinned_pages += gup_rc;
>>>> +
>>>> +    } while (nr_pinned_pages < ne_mem_region->nr_pages);
>>>
>>> Can this deadlock the kernel? Shouldn't we rather return an error 
>>> when we can't pin all pages?
>>
>> It shouldn't cause a deadlock, based on the return values:
>>
>>  > Returns either number of pages pinned (which may be less than the
>>  > number requested), or an error. Details about the return value:
>>  >
>>  > -- If nr_pages is 0, returns 0.
>>  > -- If nr_pages is >0, but no pages were pinned, returns -errno.
>>  > -- If nr_pages is >0, and some pages were pinned, returns the 
>> number of
>>  > pages pinned. Again, this may be less than nr_pages.
>>
>>
>> But I can update the logic to have all or nothing.
>>
>>>
>>>> +
>>>> +    /*
>>>> +     * TODO: Update checks once physically contiguous regions are 
>>>> collected
>>>> +     * based on the user space address and get_user_pages() results.
>>>> +     */
>>>> +    for (i = 0; i < ne_mem_region->nr_pages; i++) {
>>>> +        if (!PageHuge(ne_mem_region->pages[i])) {
>>>> +            dev_err_ratelimited(ne_misc_dev.this_device,
>>>> +                        "Not a hugetlbfs page\n");
>>>> +
>>>> +            goto unpin_pages;
>>>> +        }
>>>> +
>>>> +        if (huge_page_size(page_hstate(ne_mem_region->pages[i])) !=
>>>> +            NE_MIN_MEM_REGION_SIZE) {
>>>> +            dev_err_ratelimited(ne_misc_dev.this_device,
>>>> +                        "Page size isn't 2 MiB\n");
>>>
>>> Why is a huge page size of >2MB a problem? Can't we just make 
>>> huge_page_size() the ne mem slot size?
>>
>> It's not a problem, actually this is part of the TODO(s) from the 
>> current function, to support contiguous regions larger than 2 MiB. 
>> It's just that we started with 2 MiB. :)
>>
>>>
>>>> +
>>>> +            goto unpin_pages;
>>>> +        }
>>>> +
>>>> +        if (ne_enclave->numa_node !=
>>>> +            page_to_nid(ne_mem_region->pages[i])) {
>>>> +            dev_err_ratelimited(ne_misc_dev.this_device,
>>>> +                        "Page isn't from NUMA node %d\n",
>>>> +                        ne_enclave->numa_node);
>>>> +
>>>> +            goto unpin_pages;
>>>
>>> Is there a way to give user space hints on *why* things are going 
>>> wrong?
>>
>> Yes, one option for the user space to have more insights is to have 
>> the specific NE error codes you mentioned, so that we can improve the 
>> experience even further.
>>
>>>
>>>> +        }
>>>> +
>>>> +        /*
>>>> +         * TODO: Update once handled non-contiguous memory regions
>>>> +         * received from user space.
>>>> +         */
>>>> +        phys_contig_mem_regions[i] = ne_mem_region->pages[i];
>>>> +    }
>>>> +
>>>> +    /*
>>>> +     * TODO: Update once handled non-contiguous memory regions 
>>>> received
>>>> +     * from user space.
>>>> +     */
>>>> +    nr_phys_contig_mem_regions = ne_mem_region->nr_pages;
>>>> +
>>>> +    if ((ne_enclave->nr_mem_regions + nr_phys_contig_mem_regions) >
>>>> +        ne_enclave->max_mem_regions) {
>>>> +        dev_err_ratelimited(ne_misc_dev.this_device,
>>>> +                    "Reached max memory regions %lld\n",
>>>> +                    ne_enclave->max_mem_regions);
>>>> +
>>>> +        goto unpin_pages;
>>>> +    }
>>>> +
>>>> +    for (i = 0; i < nr_phys_contig_mem_regions; i++) {
>>>> +        u64 phys_addr = page_to_phys(phys_contig_mem_regions[i]);
>>>> +
>>>> +        slot_add_mem_req.slot_uid = ne_enclave->slot_uid;
>>>> +        slot_add_mem_req.paddr = phys_addr;
>>>> +        /*
>>>> +         * TODO: Update memory size of physical contiguous memory
>>>> +         * region, in case of non-contiguous memory regions received
>>>> +         * from user space.
>>>> +         */
>>>> +        slot_add_mem_req.size = NE_MIN_MEM_REGION_SIZE;
>>>
>>> Yeah, for now, just make it huge_page_size()! :)
>>
>> Yup, I'll handle this in order to have the option for other sizes, in 
>> addition to 2 MiB e.g. 1 GiB for hugetlbfs.
>>
>>>
>>>> +
>>>> +        rc = ne_do_request(ne_enclave->pdev, SLOT_ADD_MEM,
>>>> +                   &slot_add_mem_req, sizeof(slot_add_mem_req),
>>>> +                   &cmd_reply, sizeof(cmd_reply));
>>>> +        if (rc < 0) {
>>>> +            dev_err_ratelimited(ne_misc_dev.this_device,
>>>> +                        "Error in slot add mem [rc=%d]\n",
>>>> +                        rc);
>>>> +
>>>> +            /* TODO: Only unpin memory regions not added. */
>>>
>>> Are we sure we're not creating an unusable system here?
>>
>> The way the requests to the PCI device are structured is that we 
>> cannot get back a memory region / CPU, once added, till the enclave 
>> is terminated. Let's say there is an error in the remaining logic 
>> from the ioctl, after the region is successfully added, then the 
>> memory region can be given back to the primary / parent VM once the 
>> enclave termination (including slot free) is done.
>>
>> We can either have the logic handle one contiguous region per ioctl 
>> call (user space gives a memory region that is backed by a single 
>> contiguous physical memory region) or have a for loop to go through 
>> all contiguous regions (user space gives a memory region that is 
>> backed by a set of (smaller) contiguous physical memory regions). In 
>> the second case, if a request to the NE PCI device fails, already 
>> added memory regions can be given back only on slot free, triggered 
>> by the enclave termination, when closing the enclave fd.
>
> I'm in full agreement with you, but the logic here aborts mid-way, 
> explicitly unpins all pages (does that mean use count is now -1 for 
> some?) and does not keep track of the fact that some pages may be 
> donated already. Does that mean that those pages may be reserved for 
> the enclave, but passed to user space again?
>
> I think in the error case, we should not unpin for now, because we 
> can't guarantee that the "enclave device" isn't using those pages.

True, it's somewhere in the middle. It didn't seem ok to me as well, 
that's why leaving the TODO in that block when considering this possible 
scenario.

I changed the logic after writing down the previous reply, to have a 
function exit in case of error and not unpin pages or remove the state, 
wrt memory regions, that we are keeping track of. Similar to what you've 
suggested above.

Thanks,
Andra



Amazon Development Center (Romania) S.R.L. registered office: 27A Sf. Lazar Street, UBC5, floor 2, Iasi, Iasi County, 700045, Romania. Registered in Romania. Registration number J22/2621/2005.
diff mbox series

Patch

diff --git a/drivers/virt/nitro_enclaves/ne_misc_dev.c b/drivers/virt/nitro_enclaves/ne_misc_dev.c
index cfdefa52ed2a..17ccb6cdbd75 100644
--- a/drivers/virt/nitro_enclaves/ne_misc_dev.c
+++ b/drivers/virt/nitro_enclaves/ne_misc_dev.c
@@ -476,6 +476,233 @@  static int ne_create_vcpu_ioctl(struct ne_enclave *ne_enclave, u32 vcpu_id)
 	return rc;
 }
 
+/**
+ * ne_sanity_check_user_mem_region - Sanity check the userspace memory
+ * region received during the set user memory region ioctl call.
+ *
+ * This function gets called with the ne_enclave mutex held.
+ *
+ * @ne_enclave: private data associated with the current enclave.
+ * @mem_region: user space memory region to be sanity checked.
+ *
+ * @returns: 0 on success, negative return value on failure.
+ */
+static int ne_sanity_check_user_mem_region(struct ne_enclave *ne_enclave,
+	struct ne_user_memory_region *mem_region)
+{
+	if (ne_enclave->mm != current->mm)
+		return -EIO;
+
+	if ((mem_region->memory_size % NE_MIN_MEM_REGION_SIZE) != 0) {
+		dev_err_ratelimited(ne_misc_dev.this_device,
+				    "Mem size not multiple of 2 MiB\n");
+
+		return -EINVAL;
+	}
+
+	if ((mem_region->userspace_addr & (NE_MIN_MEM_REGION_SIZE - 1)) ||
+	    !access_ok((void __user *)(unsigned long)mem_region->userspace_addr,
+		       mem_region->memory_size)) {
+		dev_err_ratelimited(ne_misc_dev.this_device,
+				    "Invalid user space addr range\n");
+
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * ne_set_user_memory_region_ioctl - Add user space memory region to the slot
+ * associated with the current enclave.
+ *
+ * This function gets called with the ne_enclave mutex held.
+ *
+ * @ne_enclave: private data associated with the current enclave.
+ * @mem_region: user space memory region to be associated with the given slot.
+ *
+ * @returns: 0 on success, negative return value on failure.
+ */
+static int ne_set_user_memory_region_ioctl(struct ne_enclave *ne_enclave,
+	struct ne_user_memory_region *mem_region)
+{
+	struct ne_pci_dev_cmd_reply cmd_reply = {};
+	long gup_rc = 0;
+	unsigned long i = 0;
+	struct ne_mem_region *ne_mem_region = NULL;
+	unsigned long nr_phys_contig_mem_regions = 0;
+	unsigned long nr_pinned_pages = 0;
+	struct page **phys_contig_mem_regions = NULL;
+	int rc = -EINVAL;
+	struct slot_add_mem_req slot_add_mem_req = {};
+
+	rc = ne_sanity_check_user_mem_region(ne_enclave, mem_region);
+	if (rc < 0)
+		return rc;
+
+	ne_mem_region = kzalloc(sizeof(*ne_mem_region), GFP_KERNEL);
+	if (!ne_mem_region)
+		return -ENOMEM;
+
+	/*
+	 * TODO: Update nr_pages value to handle contiguous virtual address
+	 * ranges mapped to non-contiguous physical regions. Hugetlbfs can give
+	 * 2 MiB / 1 GiB contiguous physical regions.
+	 */
+	ne_mem_region->nr_pages = mem_region->memory_size /
+		NE_MIN_MEM_REGION_SIZE;
+
+	ne_mem_region->pages = kcalloc(ne_mem_region->nr_pages,
+				       sizeof(*ne_mem_region->pages),
+				       GFP_KERNEL);
+	if (!ne_mem_region->pages) {
+		kfree(ne_mem_region);
+
+		return -ENOMEM;
+	}
+
+	phys_contig_mem_regions = kcalloc(ne_mem_region->nr_pages,
+					  sizeof(*phys_contig_mem_regions),
+					  GFP_KERNEL);
+	if (!phys_contig_mem_regions) {
+		kfree(ne_mem_region->pages);
+		kfree(ne_mem_region);
+
+		return -ENOMEM;
+	}
+
+	/*
+	 * TODO: Handle non-contiguous memory regions received from user space.
+	 * Hugetlbfs can give 2 MiB / 1 GiB contiguous physical regions. The
+	 * virtual address space can be seen as contiguous, although it is
+	 * mapped underneath to 2 MiB / 1 GiB physical regions e.g. 8 MiB
+	 * virtual address space mapped to 4 physically contiguous regions of 2
+	 * MiB.
+	 */
+	do {
+		unsigned long tmp_nr_pages = ne_mem_region->nr_pages -
+			nr_pinned_pages;
+		struct page **tmp_pages = ne_mem_region->pages +
+			nr_pinned_pages;
+		u64 tmp_userspace_addr = mem_region->userspace_addr +
+			nr_pinned_pages * NE_MIN_MEM_REGION_SIZE;
+
+		gup_rc = get_user_pages(tmp_userspace_addr, tmp_nr_pages,
+					FOLL_GET, tmp_pages, NULL);
+		if (gup_rc < 0) {
+			rc = gup_rc;
+
+			dev_err_ratelimited(ne_misc_dev.this_device,
+					    "Error in gup [rc=%d]\n", rc);
+
+			unpin_user_pages(ne_mem_region->pages, nr_pinned_pages);
+
+			goto free_mem_region;
+		}
+
+		nr_pinned_pages += gup_rc;
+
+	} while (nr_pinned_pages < ne_mem_region->nr_pages);
+
+	/*
+	 * TODO: Update checks once physically contiguous regions are collected
+	 * based on the user space address and get_user_pages() results.
+	 */
+	for (i = 0; i < ne_mem_region->nr_pages; i++) {
+		if (!PageHuge(ne_mem_region->pages[i])) {
+			dev_err_ratelimited(ne_misc_dev.this_device,
+					    "Not a hugetlbfs page\n");
+
+			goto unpin_pages;
+		}
+
+		if (huge_page_size(page_hstate(ne_mem_region->pages[i])) !=
+		    NE_MIN_MEM_REGION_SIZE) {
+			dev_err_ratelimited(ne_misc_dev.this_device,
+					    "Page size isn't 2 MiB\n");
+
+			goto unpin_pages;
+		}
+
+		if (ne_enclave->numa_node !=
+		    page_to_nid(ne_mem_region->pages[i])) {
+			dev_err_ratelimited(ne_misc_dev.this_device,
+					    "Page isn't from NUMA node %d\n",
+					    ne_enclave->numa_node);
+
+			goto unpin_pages;
+		}
+
+		/*
+		 * TODO: Update once handled non-contiguous memory regions
+		 * received from user space.
+		 */
+		phys_contig_mem_regions[i] = ne_mem_region->pages[i];
+	}
+
+	/*
+	 * TODO: Update once handled non-contiguous memory regions received
+	 * from user space.
+	 */
+	nr_phys_contig_mem_regions = ne_mem_region->nr_pages;
+
+	if ((ne_enclave->nr_mem_regions + nr_phys_contig_mem_regions) >
+	    ne_enclave->max_mem_regions) {
+		dev_err_ratelimited(ne_misc_dev.this_device,
+				    "Reached max memory regions %lld\n",
+				    ne_enclave->max_mem_regions);
+
+		goto unpin_pages;
+	}
+
+	for (i = 0; i < nr_phys_contig_mem_regions; i++) {
+		u64 phys_addr = page_to_phys(phys_contig_mem_regions[i]);
+
+		slot_add_mem_req.slot_uid = ne_enclave->slot_uid;
+		slot_add_mem_req.paddr = phys_addr;
+		/*
+		 * TODO: Update memory size of physical contiguous memory
+		 * region, in case of non-contiguous memory regions received
+		 * from user space.
+		 */
+		slot_add_mem_req.size = NE_MIN_MEM_REGION_SIZE;
+
+		rc = ne_do_request(ne_enclave->pdev, SLOT_ADD_MEM,
+				   &slot_add_mem_req, sizeof(slot_add_mem_req),
+				   &cmd_reply, sizeof(cmd_reply));
+		if (rc < 0) {
+			dev_err_ratelimited(ne_misc_dev.this_device,
+					    "Error in slot add mem [rc=%d]\n",
+					    rc);
+
+			/* TODO: Only unpin memory regions not added. */
+			goto unpin_pages;
+		}
+
+		ne_enclave->mem_size += slot_add_mem_req.size;
+		ne_enclave->nr_mem_regions++;
+
+		memset(&slot_add_mem_req, 0, sizeof(slot_add_mem_req));
+		memset(&cmd_reply, 0, sizeof(cmd_reply));
+	}
+
+	list_add(&ne_mem_region->mem_region_list_entry,
+		 &ne_enclave->mem_regions_list);
+
+	kfree(phys_contig_mem_regions);
+
+	return 0;
+
+unpin_pages:
+	unpin_user_pages(ne_mem_region->pages, ne_mem_region->nr_pages);
+free_mem_region:
+	kfree(phys_contig_mem_regions);
+	kfree(ne_mem_region->pages);
+	kfree(ne_mem_region);
+
+	return rc;
+}
+
 static long ne_enclave_ioctl(struct file *file, unsigned int cmd,
 			     unsigned long arg)
 {
@@ -561,6 +788,36 @@  static long ne_enclave_ioctl(struct file *file, unsigned int cmd,
 		return 0;
 	}
 
+	case NE_SET_USER_MEMORY_REGION: {
+		struct ne_user_memory_region mem_region = {};
+		int rc = -EINVAL;
+
+		if (copy_from_user(&mem_region, (void *)arg,
+				   sizeof(mem_region))) {
+			dev_err_ratelimited(ne_misc_dev.this_device,
+					    "Error in copy from user\n");
+
+			return -EFAULT;
+		}
+
+		mutex_lock(&ne_enclave->enclave_info_mutex);
+
+		if (ne_enclave->state != NE_STATE_INIT) {
+			dev_err_ratelimited(ne_misc_dev.this_device,
+					    "Enclave isn't in init state\n");
+
+			mutex_unlock(&ne_enclave->enclave_info_mutex);
+
+			return -EINVAL;
+		}
+
+		rc = ne_set_user_memory_region_ioctl(ne_enclave, &mem_region);
+
+		mutex_unlock(&ne_enclave->enclave_info_mutex);
+
+		return rc;
+	}
+
 	default:
 		return -ENOTTY;
 	}