diff mbox series

[2/2] selftests/sgx: Add SGX selftest augment_via_eaccept_long

Message ID 20220815233900.11225-2-jarkko@kernel.org (mailing list archive)
State New, archived
Headers show
Series [1/2] x86/sgx: Handle VA page allocation failure for EAUG on PF. | expand

Commit Message

Jarkko Sakkinen Aug. 15, 2022, 11:39 p.m. UTC
From: Vijay Dhanraj <vijay.dhanraj@intel.com>

Add a new test case which is same as augment_via_eaccept but adds a
larger number of EPC pages to stress test EAUG via EACCEPT.

Signed-off-by: Vijay Dhanraj <vijay.dhanraj@intel.com>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
I removed Githubisms (hyphens), added missing subsystem tag, and
cleaned up the commit message a bit.
 tools/testing/selftests/sgx/load.c      |   5 +-
 tools/testing/selftests/sgx/main.c      | 120 +++++++++++++++++++++++-
 tools/testing/selftests/sgx/main.h      |   3 +-
 tools/testing/selftests/sgx/sigstruct.c |   2 +-
 4 files changed, 125 insertions(+), 5 deletions(-)

Comments

Reinette Chatre Aug. 16, 2022, 4:26 p.m. UTC | #1
Hi Vijay,

Thank you very much for digging into this. A few comments below.

On 8/15/2022 4:39 PM, Jarkko Sakkinen wrote:
> From: Vijay Dhanraj <vijay.dhanraj@intel.com>
> 
> Add a new test case which is same as augment_via_eaccept but adds a
> larger number of EPC pages to stress test EAUG via EACCEPT.
> 
> Signed-off-by: Vijay Dhanraj <vijay.dhanraj@intel.com>
> Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
> Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
> ---
> I removed Githubisms (hyphens), added missing subsystem tag, and
> cleaned up the commit message a bit.
>  tools/testing/selftests/sgx/load.c      |   5 +-
>  tools/testing/selftests/sgx/main.c      | 120 +++++++++++++++++++++++-
>  tools/testing/selftests/sgx/main.h      |   3 +-
>  tools/testing/selftests/sgx/sigstruct.c |   2 +-
>  4 files changed, 125 insertions(+), 5 deletions(-)
> 
> diff --git a/tools/testing/selftests/sgx/load.c b/tools/testing/selftests/sgx/load.c
> index 94bdeac1cf04..7de1b15c90b1 100644
> --- a/tools/testing/selftests/sgx/load.c
> +++ b/tools/testing/selftests/sgx/load.c
> @@ -171,7 +171,8 @@ uint64_t encl_get_entry(struct encl *encl, const char *symbol)
>  	return 0;
>  }
>  
> -bool encl_load(const char *path, struct encl *encl, unsigned long heap_size)
> +bool encl_load(const char *path, struct encl *encl, unsigned long heap_size,
> +			   unsigned long edmm_size)
>  {
>  	const char device_path[] = "/dev/sgx_enclave";
>  	struct encl_segment *seg;
> @@ -300,7 +301,7 @@ bool encl_load(const char *path, struct encl *encl, unsigned long heap_size)
>  
>  	encl->src_size = encl->segment_tbl[j].offset + encl->segment_tbl[j].size;
>  
> -	for (encl->encl_size = 4096; encl->encl_size < encl->src_size; )
> +	for (encl->encl_size = 4096; encl->encl_size < encl->src_size + edmm_size;)
>  		encl->encl_size <<= 1;
>  

This seems to create the hardcoded 8GB larger enclave for all (SGX1 and SGX2) tests,
not just the test introduced with this commit (and the only user of this extra space).
Is this intended? This can be done without impacting all the other tests.

>  	return true;
> diff --git a/tools/testing/selftests/sgx/main.c b/tools/testing/selftests/sgx/main.c
> index 9820b3809c69..65e79682f75e 100644
> --- a/tools/testing/selftests/sgx/main.c
> +++ b/tools/testing/selftests/sgx/main.c
> @@ -25,6 +25,8 @@ static const uint64_t MAGIC = 0x1122334455667788ULL;
>  static const uint64_t MAGIC2 = 0x8877665544332211ULL;
>  vdso_sgx_enter_enclave_t vdso_sgx_enter_enclave;
>  
> +static const unsigned long edmm_size = 8589934592; //8G
> +

Could you please elaborate how this constant was chosen? I understand that this test helped
to uncover a bug and it is useful to add to the kernel. When doing so this test will be
run on systems with a variety of SGX memory sizes, could you please elaborate (and add a
snippet) how 8GB is the right value for all systems?

>  /*
>   * Security Information (SECINFO) data structure needed by a few SGX
>   * instructions (eg. ENCLU[EACCEPT] and ENCLU[EMODPE]) holds meta-data
> @@ -183,7 +185,7 @@ static bool setup_test_encl(unsigned long heap_size, struct encl *encl,
>  	unsigned int i;
>  	void *addr;
>  
> -	if (!encl_load("test_encl.elf", encl, heap_size)) {
> +	if (!encl_load("test_encl.elf", encl, heap_size, edmm_size)) {
>  		encl_delete(encl);
>  		TH_LOG("Failed to load the test enclave.");
>  		return false;
> @@ -1210,6 +1212,122 @@ TEST_F(enclave, augment_via_eaccept)
>  	munmap(addr, PAGE_SIZE);
>  }
>  
> +/*
> + * Test for the addition of large number of pages to an initialized enclave via
> + * a pre-emptive run of EACCEPT on page to be added.

/on page to be added/on every page to be added/ ?

> + */
> +#define TIMEOUT_LONG 900 /* seconds */
> +TEST_F_TIMEOUT(enclave, augment_via_eaccept_long, TIMEOUT_LONG)
> +{
> +	struct encl_op_get_from_addr get_addr_op;
> +	struct encl_op_put_to_addr put_addr_op;
> +	struct encl_op_eaccept eaccept_op;
> +	size_t total_size = 0;
> +	void *addr;
> +	unsigned long i;

(reverse fir tree order)

> +
> +	if (!sgx2_supported())
> +		SKIP(return, "SGX2 not supported");
> +
> +	ASSERT_TRUE(setup_test_encl(ENCL_HEAP_SIZE_DEFAULT, &self->encl, _metadata));
> +
> +	memset(&self->run, 0, sizeof(self->run));
> +	self->run.tcs = self->encl.encl_base;
> +
> +	for (i = 0; i < self->encl.nr_segments; i++) {
> +		struct encl_segment *seg = &self->encl.segment_tbl[i];
> +
> +		total_size += seg->size;
> +		TH_LOG("test enclave: total_size = %ld, seg->size = %ld", total_size, seg->size);
> +	}
> +
> +	/*
> +	 * Actual enclave size is expected to be larger than the loaded
> +	 * test enclave since enclave size must be a power of 2 in bytes while
> +	 * test_encl does not consume it all.
> +	 */
> +	EXPECT_LT(total_size + edmm_size, self->encl.encl_size);

Will this test ever fail?

> +
> +	/*
> +	 * mmap() a page at end of existing enclave to be used for dynamic
> +	 * EPC page.

copy&paste line still refers to single page

> +	 *
> +	 * Kernel will allow new mapping using any permissions if it
> +	 * falls into the enclave's address range but not backed
> +	 * by existing enclave pages.
> +	 */
> +	TH_LOG("mmaping pages at end of enclave...");
> +	addr = mmap((void *)self->encl.encl_base + total_size, edmm_size,
> +			PROT_READ | PROT_WRITE | PROT_EXEC, MAP_SHARED | MAP_FIXED,
> +			self->encl.fd, 0);
> +	EXPECT_NE(addr, MAP_FAILED);
> +
> +	self->run.exception_vector = 0;
> +	self->run.exception_error_code = 0;
> +	self->run.exception_addr = 0;
> +
> +	/*
> +	 * Run EACCEPT on new page to trigger the #PF->EAUG->EACCEPT(again
> +	 * without a #PF). All should be transparent to userspace.
> +	 */

copy&paste from single page test referring to one page

> +	TH_LOG("Entering enclave to run EACCEPT for each page of %zd bytes may take a while ...",
> +			edmm_size);
> +	eaccept_op.flags = SGX_SECINFO_R | SGX_SECINFO_W | SGX_SECINFO_REG | SGX_SECINFO_PENDING;
> +	eaccept_op.ret = 0;
> +	eaccept_op.header.type = ENCL_OP_EACCEPT;
> +
> +	for (i = 0; i < edmm_size; i += 4096) {
> +		eaccept_op.epc_addr = (uint64_t)(addr + i);
> +
> +		EXPECT_EQ(ENCL_CALL(&eaccept_op, &self->run, true), 0);
> +		if (self->run.exception_vector == 14 &&
> +			self->run.exception_error_code == 4 &&
> +			self->run.exception_addr == self->encl.encl_base) {
> +			munmap(addr, edmm_size);
> +			SKIP(return, "Kernel does not support adding pages to initialized enclave");
> +		}
> +
> +		EXPECT_EQ(self->run.exception_vector, 0);
> +		EXPECT_EQ(self->run.exception_error_code, 0);
> +		EXPECT_EQ(self->run.exception_addr, 0);
> +		ASSERT_EQ(eaccept_op.ret, 0);
> +		ASSERT_EQ(self->run.function, EEXIT);
> +	}
> +
> +	/*
> +	 * New page should be accessible from within enclave - attempt to
> +	 * write to it.
> +	 */

This portion below was also copied from previous test and by only testing
a write to the first page of the range the purpose is not clear. Could you
please elaborate if the intention is to only test accessibility of the first
page and why that is sufficient?


> +	put_addr_op.value = MAGIC;
> +	put_addr_op.addr = (unsigned long)addr;
> +	put_addr_op.header.type = ENCL_OP_PUT_TO_ADDRESS;
> +
> +	EXPECT_EQ(ENCL_CALL(&put_addr_op, &self->run, true), 0);
> +
> +	EXPECT_EEXIT(&self->run);
> +	EXPECT_EQ(self->run.exception_vector, 0);
> +	EXPECT_EQ(self->run.exception_error_code, 0);
> +	EXPECT_EQ(self->run.exception_addr, 0);
> +
> +	/*
> +	 * Read memory from newly added page that was just written to,
> +	 * confirming that data previously written (MAGIC) is present.
> +	 */
> +	get_addr_op.value = 0;
> +	get_addr_op.addr = (unsigned long)addr;
> +	get_addr_op.header.type = ENCL_OP_GET_FROM_ADDRESS;
> +
> +	EXPECT_EQ(ENCL_CALL(&get_addr_op, &self->run, true), 0);
> +
> +	EXPECT_EQ(get_addr_op.value, MAGIC);
> +	EXPECT_EEXIT(&self->run);
> +	EXPECT_EQ(self->run.exception_vector, 0);
> +	EXPECT_EQ(self->run.exception_error_code, 0);
> +	EXPECT_EQ(self->run.exception_addr, 0);
> +
> +	munmap(addr, edmm_size);
> +}
> +
>  /*
>   * SGX2 page type modification test in two phases:
>   * Phase 1:
> diff --git a/tools/testing/selftests/sgx/main.h b/tools/testing/selftests/sgx/main.h
> index fc585be97e2f..fe5d39ac0e1e 100644
> --- a/tools/testing/selftests/sgx/main.h
> +++ b/tools/testing/selftests/sgx/main.h
> @@ -35,7 +35,8 @@ extern unsigned char sign_key[];
>  extern unsigned char sign_key_end[];
>  
>  void encl_delete(struct encl *ctx);
> -bool encl_load(const char *path, struct encl *encl, unsigned long heap_size);
> +bool encl_load(const char *path, struct encl *encl, unsigned long heap_size,
> +			   unsigned long edmm_size);
>  bool encl_measure(struct encl *encl);
>  bool encl_build(struct encl *encl);
>  uint64_t encl_get_entry(struct encl *encl, const char *symbol);
> diff --git a/tools/testing/selftests/sgx/sigstruct.c b/tools/testing/selftests/sgx/sigstruct.c
> index 50c5ab1aa6fa..6000cf0e4975 100644
> --- a/tools/testing/selftests/sgx/sigstruct.c
> +++ b/tools/testing/selftests/sgx/sigstruct.c
> @@ -343,7 +343,7 @@ bool encl_measure(struct encl *encl)
>  	if (!ctx)
>  		goto err;
>  
> -	if (!mrenclave_ecreate(ctx, encl->src_size))
> +	if (!mrenclave_ecreate(ctx, encl->encl_size))
>  		goto err;
>  
>  	for (i = 0; i < encl->nr_segments; i++) {


Looking at mrenclave_ecreate() the above snippet seems separate from this test and incomplete
since it now obtains encl->encl_size but continues to compute it again internally. Should
this be a separate fix?

Reinette
Jarkko Sakkinen Aug. 16, 2022, 11:33 p.m. UTC | #2
On Tue, Aug 16, 2022 at 09:26:40AM -0700, Reinette Chatre wrote:
> Hi Vijay,
> 
> Thank you very much for digging into this. A few comments below.
> 
> On 8/15/2022 4:39 PM, Jarkko Sakkinen wrote:
> > From: Vijay Dhanraj <vijay.dhanraj@intel.com>
> > 
> > Add a new test case which is same as augment_via_eaccept but adds a
> > larger number of EPC pages to stress test EAUG via EACCEPT.
> > 
> > Signed-off-by: Vijay Dhanraj <vijay.dhanraj@intel.com>
> > Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
> > Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
> > ---
> > I removed Githubisms (hyphens), added missing subsystem tag, and
> > cleaned up the commit message a bit.
> >  tools/testing/selftests/sgx/load.c      |   5 +-
> >  tools/testing/selftests/sgx/main.c      | 120 +++++++++++++++++++++++-
> >  tools/testing/selftests/sgx/main.h      |   3 +-
> >  tools/testing/selftests/sgx/sigstruct.c |   2 +-
> >  4 files changed, 125 insertions(+), 5 deletions(-)
> > 
> > diff --git a/tools/testing/selftests/sgx/load.c b/tools/testing/selftests/sgx/load.c
> > index 94bdeac1cf04..7de1b15c90b1 100644
> > --- a/tools/testing/selftests/sgx/load.c
> > +++ b/tools/testing/selftests/sgx/load.c
> > @@ -171,7 +171,8 @@ uint64_t encl_get_entry(struct encl *encl, const char *symbol)
> >  	return 0;
> >  }
> >  
> > -bool encl_load(const char *path, struct encl *encl, unsigned long heap_size)
> > +bool encl_load(const char *path, struct encl *encl, unsigned long heap_size,
> > +			   unsigned long edmm_size)
> >  {
> >  	const char device_path[] = "/dev/sgx_enclave";
> >  	struct encl_segment *seg;
> > @@ -300,7 +301,7 @@ bool encl_load(const char *path, struct encl *encl, unsigned long heap_size)
> >  
> >  	encl->src_size = encl->segment_tbl[j].offset + encl->segment_tbl[j].size;
> >  
> > -	for (encl->encl_size = 4096; encl->encl_size < encl->src_size; )
> > +	for (encl->encl_size = 4096; encl->encl_size < encl->src_size + edmm_size;)
> >  		encl->encl_size <<= 1;
> >  
> 
> This seems to create the hardcoded 8GB larger enclave for all (SGX1 and SGX2) tests,
> not just the test introduced with this commit (and the only user of this extra space).
> Is this intended? This can be done without impacting all the other tests.

It's a valid point. I can adjust the patch.

> 
> >  	return true;
> > diff --git a/tools/testing/selftests/sgx/main.c b/tools/testing/selftests/sgx/main.c
> > index 9820b3809c69..65e79682f75e 100644
> > --- a/tools/testing/selftests/sgx/main.c
> > +++ b/tools/testing/selftests/sgx/main.c
> > @@ -25,6 +25,8 @@ static const uint64_t MAGIC = 0x1122334455667788ULL;
> >  static const uint64_t MAGIC2 = 0x8877665544332211ULL;
> >  vdso_sgx_enter_enclave_t vdso_sgx_enter_enclave;
> >  
> > +static const unsigned long edmm_size = 8589934592; //8G
> > +
> 
> Could you please elaborate how this constant was chosen? I understand that this test helped
> to uncover a bug and it is useful to add to the kernel. When doing so this test will be
> run on systems with a variety of SGX memory sizes, could you please elaborate (and add a
> snippet) how 8GB is the right value for all systems?

It is the only constant I know for sure that some people
(Vijay and Haitao) have been able to reproduce the bug.

Unless someone can show that the same bug reproduces
with a smaller constant, changing it would make the
whole test irrelevant.

> 
> /on page to be added/on every page to be added/ ?
> 
> > + */
> > +#define TIMEOUT_LONG 900 /* seconds */
> > +TEST_F_TIMEOUT(enclave, augment_via_eaccept_long, TIMEOUT_LONG)
> > +{
> > +	struct encl_op_get_from_addr get_addr_op;
> > +	struct encl_op_put_to_addr put_addr_op;
> > +	struct encl_op_eaccept eaccept_op;
> > +	size_t total_size = 0;
> > +	void *addr;
> > +	unsigned long i;
> 
> (reverse fir tree order)

I would just change this to "int i" instead.

> 
> > +
> > +	if (!sgx2_supported())
> > +		SKIP(return, "SGX2 not supported");
> > +
> > +	ASSERT_TRUE(setup_test_encl(ENCL_HEAP_SIZE_DEFAULT, &self->encl, _metadata));
> > +
> > +	memset(&self->run, 0, sizeof(self->run));
> > +	self->run.tcs = self->encl.encl_base;
> > +
> > +	for (i = 0; i < self->encl.nr_segments; i++) {
> > +		struct encl_segment *seg = &self->encl.segment_tbl[i];
> > +
> > +		total_size += seg->size;
> > +		TH_LOG("test enclave: total_size = %ld, seg->size = %ld", total_size, seg->size);
> > +	}
> > +
> > +	/*
> > +	 * Actual enclave size is expected to be larger than the loaded
> > +	 * test enclave since enclave size must be a power of 2 in bytes while
> > +	 * test_encl does not consume it all.
> > +	 */
> > +	EXPECT_LT(total_size + edmm_size, self->encl.encl_size);
> 
> Will this test ever fail?

With a *quick* look: no.

Vijay, what was the point of this check?

> > +
> > +	/*
> > +	 * mmap() a page at end of existing enclave to be used for dynamic
> > +	 * EPC page.
> 
> copy&paste line still refers to single page
> 
> > +	 *
> > +	 * Kernel will allow new mapping using any permissions if it
> > +	 * falls into the enclave's address range but not backed
> > +	 * by existing enclave pages.
> > +	 */
> > +	TH_LOG("mmaping pages at end of enclave...");
> > +	addr = mmap((void *)self->encl.encl_base + total_size, edmm_size,
> > +			PROT_READ | PROT_WRITE | PROT_EXEC, MAP_SHARED | MAP_FIXED,
> > +			self->encl.fd, 0);
> > +	EXPECT_NE(addr, MAP_FAILED);
> > +
> > +	self->run.exception_vector = 0;
> > +	self->run.exception_error_code = 0;
> > +	self->run.exception_addr = 0;
> > +
> > +	/*
> > +	 * Run EACCEPT on new page to trigger the #PF->EAUG->EACCEPT(again
> > +	 * without a #PF). All should be transparent to userspace.
> > +	 */
> 
> copy&paste from single page test referring to one page
> 
> > +	TH_LOG("Entering enclave to run EACCEPT for each page of %zd bytes may take a while ...",
> > +			edmm_size);
> > +	eaccept_op.flags = SGX_SECINFO_R | SGX_SECINFO_W | SGX_SECINFO_REG | SGX_SECINFO_PENDING;
> > +	eaccept_op.ret = 0;
> > +	eaccept_op.header.type = ENCL_OP_EACCEPT;
> > +
> > +	for (i = 0; i < edmm_size; i += 4096) {
> > +		eaccept_op.epc_addr = (uint64_t)(addr + i);
> > +
> > +		EXPECT_EQ(ENCL_CALL(&eaccept_op, &self->run, true), 0);
> > +		if (self->run.exception_vector == 14 &&
> > +			self->run.exception_error_code == 4 &&
> > +			self->run.exception_addr == self->encl.encl_base) {
> > +			munmap(addr, edmm_size);
> > +			SKIP(return, "Kernel does not support adding pages to initialized enclave");
> > +		}
> > +
> > +		EXPECT_EQ(self->run.exception_vector, 0);
> > +		EXPECT_EQ(self->run.exception_error_code, 0);
> > +		EXPECT_EQ(self->run.exception_addr, 0);
> > +		ASSERT_EQ(eaccept_op.ret, 0);
> > +		ASSERT_EQ(self->run.function, EEXIT);
> > +	}
> > +
> > +	/*
> > +	 * New page should be accessible from within enclave - attempt to
> > +	 * write to it.
> > +	 */
> 
> This portion below was also copied from previous test and by only testing
> a write to the first page of the range the purpose is not clear. Could you
> please elaborate if the intention is to only test accessibility of the first
> page and why that is sufficient?

It is sufficient because the test reproduces the bug. It would
have to be rather elaborated why you would possibly want to do
more than that.

> > +	put_addr_op.value = MAGIC;
> > +	put_addr_op.addr = (unsigned long)addr;
> > +	put_addr_op.header.type = ENCL_OP_PUT_TO_ADDRESS;
> > +
> > +	EXPECT_EQ(ENCL_CALL(&put_addr_op, &self->run, true), 0);
> > +
> > +	EXPECT_EEXIT(&self->run);
> > +	EXPECT_EQ(self->run.exception_vector, 0);
> > +	EXPECT_EQ(self->run.exception_error_code, 0);
> > +	EXPECT_EQ(self->run.exception_addr, 0);
> > +
> > +	/*
> > +	 * Read memory from newly added page that was just written to,
> > +	 * confirming that data previously written (MAGIC) is present.
> > +	 */
> > +	get_addr_op.value = 0;
> > +	get_addr_op.addr = (unsigned long)addr;
> > +	get_addr_op.header.type = ENCL_OP_GET_FROM_ADDRESS;
> > +
> > +	EXPECT_EQ(ENCL_CALL(&get_addr_op, &self->run, true), 0);
> > +
> > +	EXPECT_EQ(get_addr_op.value, MAGIC);
> > +	EXPECT_EEXIT(&self->run);
> > +	EXPECT_EQ(self->run.exception_vector, 0);
> > +	EXPECT_EQ(self->run.exception_error_code, 0);
> > +	EXPECT_EQ(self->run.exception_addr, 0);
> > +
> > +	munmap(addr, edmm_size);
> > +}
> > +
> >  /*
> >   * SGX2 page type modification test in two phases:
> >   * Phase 1:
> > diff --git a/tools/testing/selftests/sgx/main.h b/tools/testing/selftests/sgx/main.h
> > index fc585be97e2f..fe5d39ac0e1e 100644
> > --- a/tools/testing/selftests/sgx/main.h
> > +++ b/tools/testing/selftests/sgx/main.h
> > @@ -35,7 +35,8 @@ extern unsigned char sign_key[];
> >  extern unsigned char sign_key_end[];
> >  
> >  void encl_delete(struct encl *ctx);
> > -bool encl_load(const char *path, struct encl *encl, unsigned long heap_size);
> > +bool encl_load(const char *path, struct encl *encl, unsigned long heap_size,
> > +			   unsigned long edmm_size);
> >  bool encl_measure(struct encl *encl);
> >  bool encl_build(struct encl *encl);
> >  uint64_t encl_get_entry(struct encl *encl, const char *symbol);
> > diff --git a/tools/testing/selftests/sgx/sigstruct.c b/tools/testing/selftests/sgx/sigstruct.c
> > index 50c5ab1aa6fa..6000cf0e4975 100644
> > --- a/tools/testing/selftests/sgx/sigstruct.c
> > +++ b/tools/testing/selftests/sgx/sigstruct.c
> > @@ -343,7 +343,7 @@ bool encl_measure(struct encl *encl)
> >  	if (!ctx)
> >  		goto err;
> >  
> > -	if (!mrenclave_ecreate(ctx, encl->src_size))
> > +	if (!mrenclave_ecreate(ctx, encl->encl_size))
> >  		goto err;
> >  
> >  	for (i = 0; i < encl->nr_segments; i++) {
> 
> 
> Looking at mrenclave_ecreate() the above snippet seems separate from this test and incomplete
> since it now obtains encl->encl_size but continues to compute it again internally. Should
> this be a separate fix?

I would remove this part completely but this also needs
comment from Vijay.

> Reinette


BR, Jarkko
Jarkko Sakkinen Aug. 16, 2022, 11:37 p.m. UTC | #3
On Wed, Aug 17, 2022 at 02:33:54AM +0300, Jarkko Sakkinen wrote:
> On Tue, Aug 16, 2022 at 09:26:40AM -0700, Reinette Chatre wrote:
> > Hi Vijay,
> > 
> > Thank you very much for digging into this. A few comments below.
> > 
> > On 8/15/2022 4:39 PM, Jarkko Sakkinen wrote:
> > > From: Vijay Dhanraj <vijay.dhanraj@intel.com>
> > > 
> > > Add a new test case which is same as augment_via_eaccept but adds a
> > > larger number of EPC pages to stress test EAUG via EACCEPT.
> > > 
> > > Signed-off-by: Vijay Dhanraj <vijay.dhanraj@intel.com>
> > > Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
> > > Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
> > > ---
> > > I removed Githubisms (hyphens), added missing subsystem tag, and
> > > cleaned up the commit message a bit.
> > >  tools/testing/selftests/sgx/load.c      |   5 +-
> > >  tools/testing/selftests/sgx/main.c      | 120 +++++++++++++++++++++++-
> > >  tools/testing/selftests/sgx/main.h      |   3 +-
> > >  tools/testing/selftests/sgx/sigstruct.c |   2 +-
> > >  4 files changed, 125 insertions(+), 5 deletions(-)
> > > 
> > > diff --git a/tools/testing/selftests/sgx/load.c b/tools/testing/selftests/sgx/load.c
> > > index 94bdeac1cf04..7de1b15c90b1 100644
> > > --- a/tools/testing/selftests/sgx/load.c
> > > +++ b/tools/testing/selftests/sgx/load.c
> > > @@ -171,7 +171,8 @@ uint64_t encl_get_entry(struct encl *encl, const char *symbol)
> > >  	return 0;
> > >  }
> > >  
> > > -bool encl_load(const char *path, struct encl *encl, unsigned long heap_size)
> > > +bool encl_load(const char *path, struct encl *encl, unsigned long heap_size,
> > > +			   unsigned long edmm_size)
> > >  {
> > >  	const char device_path[] = "/dev/sgx_enclave";
> > >  	struct encl_segment *seg;
> > > @@ -300,7 +301,7 @@ bool encl_load(const char *path, struct encl *encl, unsigned long heap_size)
> > >  
> > >  	encl->src_size = encl->segment_tbl[j].offset + encl->segment_tbl[j].size;
> > >  
> > > -	for (encl->encl_size = 4096; encl->encl_size < encl->src_size; )
> > > +	for (encl->encl_size = 4096; encl->encl_size < encl->src_size + edmm_size;)
> > >  		encl->encl_size <<= 1;
> > >  
> > 
> > This seems to create the hardcoded 8GB larger enclave for all (SGX1 and SGX2) tests,
> > not just the test introduced with this commit (and the only user of this extra space).
> > Is this intended? This can be done without impacting all the other tests.
> 
> It's a valid point. I can adjust the patch.
> 
> > 
> > >  	return true;
> > > diff --git a/tools/testing/selftests/sgx/main.c b/tools/testing/selftests/sgx/main.c
> > > index 9820b3809c69..65e79682f75e 100644
> > > --- a/tools/testing/selftests/sgx/main.c
> > > +++ b/tools/testing/selftests/sgx/main.c
> > > @@ -25,6 +25,8 @@ static const uint64_t MAGIC = 0x1122334455667788ULL;
> > >  static const uint64_t MAGIC2 = 0x8877665544332211ULL;
> > >  vdso_sgx_enter_enclave_t vdso_sgx_enter_enclave;
> > >  
> > > +static const unsigned long edmm_size = 8589934592; //8G
> > > +
> > 
> > Could you please elaborate how this constant was chosen? I understand that this test helped
> > to uncover a bug and it is useful to add to the kernel. When doing so this test will be
> > run on systems with a variety of SGX memory sizes, could you please elaborate (and add a
> > snippet) how 8GB is the right value for all systems?
> 
> It is the only constant I know for sure that some people
> (Vijay and Haitao) have been able to reproduce the bug.
> 
> Unless someone can show that the same bug reproduces
> with a smaller constant, changing it would make the
> whole test irrelevant.
> 
> > 
> > /on page to be added/on every page to be added/ ?
> > 
> > > + */
> > > +#define TIMEOUT_LONG 900 /* seconds */
> > > +TEST_F_TIMEOUT(enclave, augment_via_eaccept_long, TIMEOUT_LONG)
> > > +{
> > > +	struct encl_op_get_from_addr get_addr_op;
> > > +	struct encl_op_put_to_addr put_addr_op;
> > > +	struct encl_op_eaccept eaccept_op;
> > > +	size_t total_size = 0;
> > > +	void *addr;
> > > +	unsigned long i;
> > 
> > (reverse fir tree order)
> 
> I would just change this to "int i" instead.
> 
> > 
> > > +
> > > +	if (!sgx2_supported())
> > > +		SKIP(return, "SGX2 not supported");
> > > +
> > > +	ASSERT_TRUE(setup_test_encl(ENCL_HEAP_SIZE_DEFAULT, &self->encl, _metadata));
> > > +
> > > +	memset(&self->run, 0, sizeof(self->run));
> > > +	self->run.tcs = self->encl.encl_base;
> > > +
> > > +	for (i = 0; i < self->encl.nr_segments; i++) {
> > > +		struct encl_segment *seg = &self->encl.segment_tbl[i];
> > > +
> > > +		total_size += seg->size;
> > > +		TH_LOG("test enclave: total_size = %ld, seg->size = %ld", total_size, seg->size);
> > > +	}
> > > +
> > > +	/*
> > > +	 * Actual enclave size is expected to be larger than the loaded
> > > +	 * test enclave since enclave size must be a power of 2 in bytes while
> > > +	 * test_encl does not consume it all.
> > > +	 */
> > > +	EXPECT_LT(total_size + edmm_size, self->encl.encl_size);
> > 
> > Will this test ever fail?
> 
> With a *quick* look: no.
> 
> Vijay, what was the point of this check?
> 
> > > +
> > > +	/*
> > > +	 * mmap() a page at end of existing enclave to be used for dynamic
> > > +	 * EPC page.
> > 
> > copy&paste line still refers to single page
> > 
> > > +	 *
> > > +	 * Kernel will allow new mapping using any permissions if it
> > > +	 * falls into the enclave's address range but not backed
> > > +	 * by existing enclave pages.
> > > +	 */
> > > +	TH_LOG("mmaping pages at end of enclave...");
> > > +	addr = mmap((void *)self->encl.encl_base + total_size, edmm_size,
> > > +			PROT_READ | PROT_WRITE | PROT_EXEC, MAP_SHARED | MAP_FIXED,
> > > +			self->encl.fd, 0);
> > > +	EXPECT_NE(addr, MAP_FAILED);
> > > +
> > > +	self->run.exception_vector = 0;
> > > +	self->run.exception_error_code = 0;
> > > +	self->run.exception_addr = 0;
> > > +
> > > +	/*
> > > +	 * Run EACCEPT on new page to trigger the #PF->EAUG->EACCEPT(again
> > > +	 * without a #PF). All should be transparent to userspace.
> > > +	 */
> > 
> > copy&paste from single page test referring to one page
> > 
> > > +	TH_LOG("Entering enclave to run EACCEPT for each page of %zd bytes may take a while ...",
> > > +			edmm_size);
> > > +	eaccept_op.flags = SGX_SECINFO_R | SGX_SECINFO_W | SGX_SECINFO_REG | SGX_SECINFO_PENDING;
> > > +	eaccept_op.ret = 0;
> > > +	eaccept_op.header.type = ENCL_OP_EACCEPT;
> > > +
> > > +	for (i = 0; i < edmm_size; i += 4096) {
> > > +		eaccept_op.epc_addr = (uint64_t)(addr + i);
> > > +
> > > +		EXPECT_EQ(ENCL_CALL(&eaccept_op, &self->run, true), 0);
> > > +		if (self->run.exception_vector == 14 &&
> > > +			self->run.exception_error_code == 4 &&
> > > +			self->run.exception_addr == self->encl.encl_base) {
> > > +			munmap(addr, edmm_size);
> > > +			SKIP(return, "Kernel does not support adding pages to initialized enclave");
> > > +		}
> > > +
> > > +		EXPECT_EQ(self->run.exception_vector, 0);
> > > +		EXPECT_EQ(self->run.exception_error_code, 0);
> > > +		EXPECT_EQ(self->run.exception_addr, 0);
> > > +		ASSERT_EQ(eaccept_op.ret, 0);
> > > +		ASSERT_EQ(self->run.function, EEXIT);
> > > +	}
> > > +
> > > +	/*
> > > +	 * New page should be accessible from within enclave - attempt to
> > > +	 * write to it.
> > > +	 */
> > 
> > This portion below was also copied from previous test and by only testing
> > a write to the first page of the range the purpose is not clear. Could you
> > please elaborate if the intention is to only test accessibility of the first
> > page and why that is sufficient?
> 
> It is sufficient because the test reproduces the bug. It would
> have to be rather elaborated why you would possibly want to do
> more than that.
> 
> > > +	put_addr_op.value = MAGIC;
> > > +	put_addr_op.addr = (unsigned long)addr;
> > > +	put_addr_op.header.type = ENCL_OP_PUT_TO_ADDRESS;
> > > +
> > > +	EXPECT_EQ(ENCL_CALL(&put_addr_op, &self->run, true), 0);
> > > +
> > > +	EXPECT_EEXIT(&self->run);
> > > +	EXPECT_EQ(self->run.exception_vector, 0);
> > > +	EXPECT_EQ(self->run.exception_error_code, 0);
> > > +	EXPECT_EQ(self->run.exception_addr, 0);
> > > +
> > > +	/*
> > > +	 * Read memory from newly added page that was just written to,
> > > +	 * confirming that data previously written (MAGIC) is present.
> > > +	 */
> > > +	get_addr_op.value = 0;
> > > +	get_addr_op.addr = (unsigned long)addr;
> > > +	get_addr_op.header.type = ENCL_OP_GET_FROM_ADDRESS;
> > > +
> > > +	EXPECT_EQ(ENCL_CALL(&get_addr_op, &self->run, true), 0);
> > > +
> > > +	EXPECT_EQ(get_addr_op.value, MAGIC);
> > > +	EXPECT_EEXIT(&self->run);
> > > +	EXPECT_EQ(self->run.exception_vector, 0);
> > > +	EXPECT_EQ(self->run.exception_error_code, 0);
> > > +	EXPECT_EQ(self->run.exception_addr, 0);
> > > +
> > > +	munmap(addr, edmm_size);
> > > +}
> > > +
> > >  /*
> > >   * SGX2 page type modification test in two phases:
> > >   * Phase 1:
> > > diff --git a/tools/testing/selftests/sgx/main.h b/tools/testing/selftests/sgx/main.h
> > > index fc585be97e2f..fe5d39ac0e1e 100644
> > > --- a/tools/testing/selftests/sgx/main.h
> > > +++ b/tools/testing/selftests/sgx/main.h
> > > @@ -35,7 +35,8 @@ extern unsigned char sign_key[];
> > >  extern unsigned char sign_key_end[];
> > >  
> > >  void encl_delete(struct encl *ctx);
> > > -bool encl_load(const char *path, struct encl *encl, unsigned long heap_size);
> > > +bool encl_load(const char *path, struct encl *encl, unsigned long heap_size,
> > > +			   unsigned long edmm_size);
> > >  bool encl_measure(struct encl *encl);
> > >  bool encl_build(struct encl *encl);
> > >  uint64_t encl_get_entry(struct encl *encl, const char *symbol);
> > > diff --git a/tools/testing/selftests/sgx/sigstruct.c b/tools/testing/selftests/sgx/sigstruct.c
> > > index 50c5ab1aa6fa..6000cf0e4975 100644
> > > --- a/tools/testing/selftests/sgx/sigstruct.c
> > > +++ b/tools/testing/selftests/sgx/sigstruct.c
> > > @@ -343,7 +343,7 @@ bool encl_measure(struct encl *encl)
> > >  	if (!ctx)
> > >  		goto err;
> > >  
> > > -	if (!mrenclave_ecreate(ctx, encl->src_size))
> > > +	if (!mrenclave_ecreate(ctx, encl->encl_size))
> > >  		goto err;
> > >  
> > >  	for (i = 0; i < encl->nr_segments; i++) {
> > 
> > 
> > Looking at mrenclave_ecreate() the above snippet seems separate from this test and incomplete
> > since it now obtains encl->encl_size but continues to compute it again internally. Should
> > this be a separate fix?
> 
> I would remove this part completely but this also needs
> comment from Vijay.
> 
> > Reinette

Related:

https://github.com/jarkkojs/bpftrace-sgx/blob/main/sgx-alloc-error.bt

Thought that this might be useful, if there is still some
need to discuss about the bug, e.g. to compare the results.
Can be run with bpftrace.

The bug did not reproduce in my side, even after changing
to the 2GB PRMRR configuration.

BR, Jarkko
Dhanraj, Vijay Aug. 17, 2022, 1:27 a.m. UTC | #4
Hi Jarkko, Reinette,

> -----Original Message-----
> From: Jarkko Sakkinen <jarkko@kernel.org>
> Sent: Tuesday, August 16, 2022 4:34 PM
> To: Chatre, Reinette <reinette.chatre@intel.com>
> Cc: Dave Hansen <dave.hansen@linux.intel.com>; linux-
> sgx@vger.kernel.org; Dhanraj, Vijay <vijay.dhanraj@intel.com>; Shuah Khan
> <shuah@kernel.org>; open list:KERNEL SELFTEST FRAMEWORK <linux-
> kselftest@vger.kernel.org>; open list <linux-kernel@vger.kernel.org>
> Subject: Re: [PATCH 2/2] selftests/sgx: Add SGX selftest
> augment_via_eaccept_long
> 
> On Tue, Aug 16, 2022 at 09:26:40AM -0700, Reinette Chatre wrote:
> > Hi Vijay,
> >
> > Thank you very much for digging into this. A few comments below.
> >
> > On 8/15/2022 4:39 PM, Jarkko Sakkinen wrote:
> > > From: Vijay Dhanraj <vijay.dhanraj@intel.com>
> > >
> > > Add a new test case which is same as augment_via_eaccept but adds a
> > > larger number of EPC pages to stress test EAUG via EACCEPT.
> > >
> > > Signed-off-by: Vijay Dhanraj <vijay.dhanraj@intel.com>
> > > Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
> > > Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
> > > ---
> > > I removed Githubisms (hyphens), added missing subsystem tag, and
> > > cleaned up the commit message a bit.
> > >  tools/testing/selftests/sgx/load.c      |   5 +-
> > >  tools/testing/selftests/sgx/main.c      | 120
> +++++++++++++++++++++++-
> > >  tools/testing/selftests/sgx/main.h      |   3 +-
> > >  tools/testing/selftests/sgx/sigstruct.c |   2 +-
> > >  4 files changed, 125 insertions(+), 5 deletions(-)
> > >
> > > diff --git a/tools/testing/selftests/sgx/load.c
> > > b/tools/testing/selftests/sgx/load.c
> > > index 94bdeac1cf04..7de1b15c90b1 100644
> > > --- a/tools/testing/selftests/sgx/load.c
> > > +++ b/tools/testing/selftests/sgx/load.c
> > > @@ -171,7 +171,8 @@ uint64_t encl_get_entry(struct encl *encl, const
> char *symbol)
> > >  	return 0;
> > >  }
> > >
> > > -bool encl_load(const char *path, struct encl *encl, unsigned long
> > > heap_size)
> > > +bool encl_load(const char *path, struct encl *encl, unsigned long
> heap_size,
> > > +			   unsigned long edmm_size)
> > >  {
> > >  	const char device_path[] = "/dev/sgx_enclave";
> > >  	struct encl_segment *seg;
> > > @@ -300,7 +301,7 @@ bool encl_load(const char *path, struct encl
> > > *encl, unsigned long heap_size)
> > >
> > >  	encl->src_size = encl->segment_tbl[j].offset +
> > > encl->segment_tbl[j].size;
> > >
> > > -	for (encl->encl_size = 4096; encl->encl_size < encl->src_size; )
> > > +	for (encl->encl_size = 4096; encl->encl_size < encl->src_size +
> > > +edmm_size;)
> > >  		encl->encl_size <<= 1;
> > >
> >
> > This seems to create the hardcoded 8GB larger enclave for all (SGX1
> > and SGX2) tests, not just the test introduced with this commit (and the only
> user of this extra space).
> > Is this intended? This can be done without impacting all the other tests.
> 
> It's a valid point. I can adjust the patch.

Thanks Jarkko.

> 
> >
> > >  	return true;
> > > diff --git a/tools/testing/selftests/sgx/main.c
> > > b/tools/testing/selftests/sgx/main.c
> > > index 9820b3809c69..65e79682f75e 100644
> > > --- a/tools/testing/selftests/sgx/main.c
> > > +++ b/tools/testing/selftests/sgx/main.c
> > > @@ -25,6 +25,8 @@ static const uint64_t MAGIC =
> > > 0x1122334455667788ULL;  static const uint64_t MAGIC2 =
> > > 0x8877665544332211ULL;  vdso_sgx_enter_enclave_t
> > > vdso_sgx_enter_enclave;
> > >
> > > +static const unsigned long edmm_size = 8589934592; //8G
> > > +
> >
> > Could you please elaborate how this constant was chosen? I understand
> > that this test helped to uncover a bug and it is useful to add to the
> > kernel. When doing so this test will be run on systems with a variety
> > of SGX memory sizes, could you please elaborate (and add a
> > snippet) how 8GB is the right value for all systems?
> 
> It is the only constant I know for sure that some people (Vijay and Haitao)
> have been able to reproduce the bug.
> 
> Unless someone can show that the same bug reproduces with a smaller
> constant, changing it would make the whole test irrelevant.

I tried with 2GB and it always succeed and with 4GB was able to repro sporadically. But with 8GB failure was consistent. One thing to note is even with 8GB Haitao couldn't reproduce this every time. So not sure if it good for all the systems but on my ICX system, I was able to consistently repro with this value.
 
> 
> >
> > /on page to be added/on every page to be added/ ?
> >
> > > + */
> > > +#define TIMEOUT_LONG 900 /* seconds */ TEST_F_TIMEOUT(enclave,
> > > +augment_via_eaccept_long, TIMEOUT_LONG) {
> > > +	struct encl_op_get_from_addr get_addr_op;
> > > +	struct encl_op_put_to_addr put_addr_op;
> > > +	struct encl_op_eaccept eaccept_op;
> > > +	size_t total_size = 0;
> > > +	void *addr;
> > > +	unsigned long i;
> >
> > (reverse fir tree order)
> 
> I would just change this to "int i" instead.

I think changing it to "int i" will cause a buffer overflow with edmm_size being 8GB.

> 
> >
> > > +
> > > +	if (!sgx2_supported())
> > > +		SKIP(return, "SGX2 not supported");
> > > +
> > > +	ASSERT_TRUE(setup_test_encl(ENCL_HEAP_SIZE_DEFAULT, &self-
> >encl,
> > > +_metadata));
> > > +
> > > +	memset(&self->run, 0, sizeof(self->run));
> > > +	self->run.tcs = self->encl.encl_base;
> > > +
> > > +	for (i = 0; i < self->encl.nr_segments; i++) {
> > > +		struct encl_segment *seg = &self->encl.segment_tbl[i];
> > > +
> > > +		total_size += seg->size;
> > > +		TH_LOG("test enclave: total_size = %ld, seg->size = %ld",
> total_size, seg->size);
> > > +	}
> > > +
> > > +	/*
> > > +	 * Actual enclave size is expected to be larger than the loaded
> > > +	 * test enclave since enclave size must be a power of 2 in bytes while
> > > +	 * test_encl does not consume it all.
> > > +	 */
> > > +	EXPECT_LT(total_size + edmm_size, self->encl.encl_size);
> >
> > Will this test ever fail?
> 
> With a *quick* look: no.
> 
> Vijay, what was the point of this check?

Yes we can remove this check. I tried to copy from `augment_via_eaccept` and just changed the request size.

> 
> > > +
> > > +	/*
> > > +	 * mmap() a page at end of existing enclave to be used for dynamic
> > > +	 * EPC page.
> >
> > copy&paste line still refers to single page
> >
> > > +	 *
> > > +	 * Kernel will allow new mapping using any permissions if it
> > > +	 * falls into the enclave's address range but not backed
> > > +	 * by existing enclave pages.
> > > +	 */
> > > +	TH_LOG("mmaping pages at end of enclave...");
> > > +	addr = mmap((void *)self->encl.encl_base + total_size, edmm_size,
> > > +			PROT_READ | PROT_WRITE | PROT_EXEC,
> MAP_SHARED | MAP_FIXED,
> > > +			self->encl.fd, 0);
> > > +	EXPECT_NE(addr, MAP_FAILED);
> > > +
> > > +	self->run.exception_vector = 0;
> > > +	self->run.exception_error_code = 0;
> > > +	self->run.exception_addr = 0;
> > > +
> > > +	/*
> > > +	 * Run EACCEPT on new page to trigger the #PF->EAUG-
> >EACCEPT(again
> > > +	 * without a #PF). All should be transparent to userspace.
> > > +	 */
> >
> > copy&paste from single page test referring to one page
> >
> > > +	TH_LOG("Entering enclave to run EACCEPT for each page of %zd
> bytes may take a while ...",
> > > +			edmm_size);
> > > +	eaccept_op.flags = SGX_SECINFO_R | SGX_SECINFO_W |
> SGX_SECINFO_REG | SGX_SECINFO_PENDING;
> > > +	eaccept_op.ret = 0;
> > > +	eaccept_op.header.type = ENCL_OP_EACCEPT;
> > > +
> > > +	for (i = 0; i < edmm_size; i += 4096) {
> > > +		eaccept_op.epc_addr = (uint64_t)(addr + i);
> > > +
> > > +		EXPECT_EQ(ENCL_CALL(&eaccept_op, &self->run, true), 0);
> > > +		if (self->run.exception_vector == 14 &&
> > > +			self->run.exception_error_code == 4 &&
> > > +			self->run.exception_addr == self->encl.encl_base) {
> > > +			munmap(addr, edmm_size);
> > > +			SKIP(return, "Kernel does not support adding pages
> to initialized enclave");
> > > +		}
> > > +
> > > +		EXPECT_EQ(self->run.exception_vector, 0);
> > > +		EXPECT_EQ(self->run.exception_error_code, 0);
> > > +		EXPECT_EQ(self->run.exception_addr, 0);
> > > +		ASSERT_EQ(eaccept_op.ret, 0);
> > > +		ASSERT_EQ(self->run.function, EEXIT);
> > > +	}
> > > +
> > > +	/*
> > > +	 * New page should be accessible from within enclave - attempt to
> > > +	 * write to it.
> > > +	 */
> >
> > This portion below was also copied from previous test and by only
> > testing a write to the first page of the range the purpose is not
> > clear. Could you please elaborate if the intention is to only test
> > accessibility of the first page and why that is sufficient?
> 
> It is sufficient because the test reproduces the bug. It would have to be
> rather elaborated why you would possibly want to do more than that.
> 
> > > +	put_addr_op.value = MAGIC;
> > > +	put_addr_op.addr = (unsigned long)addr;
> > > +	put_addr_op.header.type = ENCL_OP_PUT_TO_ADDRESS;
> > > +
> > > +	EXPECT_EQ(ENCL_CALL(&put_addr_op, &self->run, true), 0);
> > > +
> > > +	EXPECT_EEXIT(&self->run);
> > > +	EXPECT_EQ(self->run.exception_vector, 0);
> > > +	EXPECT_EQ(self->run.exception_error_code, 0);
> > > +	EXPECT_EQ(self->run.exception_addr, 0);
> > > +
> > > +	/*
> > > +	 * Read memory from newly added page that was just written to,
> > > +	 * confirming that data previously written (MAGIC) is present.
> > > +	 */
> > > +	get_addr_op.value = 0;
> > > +	get_addr_op.addr = (unsigned long)addr;
> > > +	get_addr_op.header.type = ENCL_OP_GET_FROM_ADDRESS;
> > > +
> > > +	EXPECT_EQ(ENCL_CALL(&get_addr_op, &self->run, true), 0);
> > > +
> > > +	EXPECT_EQ(get_addr_op.value, MAGIC);
> > > +	EXPECT_EEXIT(&self->run);
> > > +	EXPECT_EQ(self->run.exception_vector, 0);
> > > +	EXPECT_EQ(self->run.exception_error_code, 0);
> > > +	EXPECT_EQ(self->run.exception_addr, 0);
> > > +
> > > +	munmap(addr, edmm_size);
> > > +}
> > > +
> > >  /*
> > >   * SGX2 page type modification test in two phases:
> > >   * Phase 1:
> > > diff --git a/tools/testing/selftests/sgx/main.h
> > > b/tools/testing/selftests/sgx/main.h
> > > index fc585be97e2f..fe5d39ac0e1e 100644
> > > --- a/tools/testing/selftests/sgx/main.h
> > > +++ b/tools/testing/selftests/sgx/main.h
> > > @@ -35,7 +35,8 @@ extern unsigned char sign_key[];  extern unsigned
> > > char sign_key_end[];
> > >
> > >  void encl_delete(struct encl *ctx); -bool encl_load(const char
> > > *path, struct encl *encl, unsigned long heap_size);
> > > +bool encl_load(const char *path, struct encl *encl, unsigned long
> heap_size,
> > > +			   unsigned long edmm_size);
> > >  bool encl_measure(struct encl *encl);  bool encl_build(struct encl
> > > *encl);  uint64_t encl_get_entry(struct encl *encl, const char
> > > *symbol); diff --git a/tools/testing/selftests/sgx/sigstruct.c
> > > b/tools/testing/selftests/sgx/sigstruct.c
> > > index 50c5ab1aa6fa..6000cf0e4975 100644
> > > --- a/tools/testing/selftests/sgx/sigstruct.c
> > > +++ b/tools/testing/selftests/sgx/sigstruct.c
> > > @@ -343,7 +343,7 @@ bool encl_measure(struct encl *encl)
> > >  	if (!ctx)
> > >  		goto err;
> > >
> > > -	if (!mrenclave_ecreate(ctx, encl->src_size))
> > > +	if (!mrenclave_ecreate(ctx, encl->encl_size))
> > >  		goto err;
> > >
> > >  	for (i = 0; i < encl->nr_segments; i++) {
> >
> >
> > Looking at mrenclave_ecreate() the above snippet seems separate from
> > this test and incomplete since it now obtains encl->encl_size but
> > continues to compute it again internally. Should this be a separate fix?
> 
> I would remove this part completely but this also needs comment from Vijay.

If we restrict the large enclave size just for this test, then the above change can be reverted. Calling ` mrenclave_ecreate`  with src_size esults in EINIT failure and I think the reason is because of incorrect MRenclave.
> 
> > Reinette
> 
> 
> BR, Jarkko

Regards, Vijay
Reinette Chatre Aug. 17, 2022, 4:35 a.m. UTC | #5
Hi Vijay,

On 8/16/2022 6:27 PM, Dhanraj, Vijay wrote:
> Hi Jarkko, Reinette,
> 
>> -----Original Message-----
>> From: Jarkko Sakkinen <jarkko@kernel.org>
>> Sent: Tuesday, August 16, 2022 4:34 PM
>> To: Chatre, Reinette <reinette.chatre@intel.com>
>> Cc: Dave Hansen <dave.hansen@linux.intel.com>; linux-
>> sgx@vger.kernel.org; Dhanraj, Vijay <vijay.dhanraj@intel.com>; Shuah Khan
>> <shuah@kernel.org>; open list:KERNEL SELFTEST FRAMEWORK <linux-
>> kselftest@vger.kernel.org>; open list <linux-kernel@vger.kernel.org>
>> Subject: Re: [PATCH 2/2] selftests/sgx: Add SGX selftest
>> augment_via_eaccept_long
>>
>> On Tue, Aug 16, 2022 at 09:26:40AM -0700, Reinette Chatre wrote:
>>> Hi Vijay,
>>>
>>> Thank you very much for digging into this. A few comments below.
>>>
>>> On 8/15/2022 4:39 PM, Jarkko Sakkinen wrote:

...

>>>> @@ -25,6 +25,8 @@ static const uint64_t MAGIC =
>>>> 0x1122334455667788ULL;  static const uint64_t MAGIC2 =
>>>> 0x8877665544332211ULL;  vdso_sgx_enter_enclave_t
>>>> vdso_sgx_enter_enclave;
>>>>
>>>> +static const unsigned long edmm_size = 8589934592; //8G
>>>> +
>>>
>>> Could you please elaborate how this constant was chosen? I understand
>>> that this test helped to uncover a bug and it is useful to add to the
>>> kernel. When doing so this test will be run on systems with a variety
>>> of SGX memory sizes, could you please elaborate (and add a
>>> snippet) how 8GB is the right value for all systems?
>>
>> It is the only constant I know for sure that some people (Vijay and Haitao)
>> have been able to reproduce the bug.
>>
>> Unless someone can show that the same bug reproduces with a smaller
>> constant, changing it would make the whole test irrelevant.
> 
> I tried with 2GB and it always succeed and with 4GB was able to repro sporadically. But with 8GB failure was consistent. One thing to note is even with 8GB Haitao couldn't reproduce this every time. So not sure if it good for all the systems but on my ICX system, I was able to consistently repro with this value.
>  

Could all of this information be placed in a description of this constant? At this time
it appears to be arbitrary.

>>>> +
>>>> +	if (!sgx2_supported())
>>>> +		SKIP(return, "SGX2 not supported");
>>>> +
>>>> +	ASSERT_TRUE(setup_test_encl(ENCL_HEAP_SIZE_DEFAULT, &self-
>>> encl,
>>>> +_metadata));
>>>> +
>>>> +	memset(&self->run, 0, sizeof(self->run));
>>>> +	self->run.tcs = self->encl.encl_base;
>>>> +
>>>> +	for (i = 0; i < self->encl.nr_segments; i++) {
>>>> +		struct encl_segment *seg = &self->encl.segment_tbl[i];
>>>> +
>>>> +		total_size += seg->size;
>>>> +		TH_LOG("test enclave: total_size = %ld, seg->size = %ld",
>> total_size, seg->size);
>>>> +	}
>>>> +
>>>> +	/*
>>>> +	 * Actual enclave size is expected to be larger than the loaded
>>>> +	 * test enclave since enclave size must be a power of 2 in bytes while
>>>> +	 * test_encl does not consume it all.
>>>> +	 */
>>>> +	EXPECT_LT(total_size + edmm_size, self->encl.encl_size);
>>>
>>> Will this test ever fail?
>>
>> With a *quick* look: no.
>>
>> Vijay, what was the point of this check?
> 
> Yes we can remove this check. I tried to copy from `augment_via_eaccept` and just changed the request size.
> 

In augment_via_eaccept the check is required since augment_via_eaccept assumes
that there is enough address space in the existing enclave for dynamic memory addition
without needing to change the enclave size. If anybody later changes the test
enclave to break this assumption then that check will pick it up.

In this new test the enclave size is set to accommodate the planned
dynamic memory addition and thus adding a test to check if the enclave
has enough space for the dynamic memory is not needed.

>>>> +	TH_LOG("Entering enclave to run EACCEPT for each page of %zd
>> bytes may take a while ...",
>>>> +			edmm_size);
>>>> +	eaccept_op.flags = SGX_SECINFO_R | SGX_SECINFO_W |
>> SGX_SECINFO_REG | SGX_SECINFO_PENDING;
>>>> +	eaccept_op.ret = 0;
>>>> +	eaccept_op.header.type = ENCL_OP_EACCEPT;
>>>> +
>>>> +	for (i = 0; i < edmm_size; i += 4096) {
>>>> +		eaccept_op.epc_addr = (uint64_t)(addr + i);
>>>> +
>>>> +		EXPECT_EQ(ENCL_CALL(&eaccept_op, &self->run, true), 0);
>>>> +		if (self->run.exception_vector == 14 &&
>>>> +			self->run.exception_error_code == 4 &&
>>>> +			self->run.exception_addr == self->encl.encl_base) {
>>>> +			munmap(addr, edmm_size);
>>>> +			SKIP(return, "Kernel does not support adding pages
>> to initialized enclave");
>>>> +		}
>>>> +
>>>> +		EXPECT_EQ(self->run.exception_vector, 0);
>>>> +		EXPECT_EQ(self->run.exception_error_code, 0);
>>>> +		EXPECT_EQ(self->run.exception_addr, 0);
>>>> +		ASSERT_EQ(eaccept_op.ret, 0);
>>>> +		ASSERT_EQ(self->run.function, EEXIT);
>>>> +	}
>>>> +
>>>> +	/*
>>>> +	 * New page should be accessible from within enclave - attempt to
>>>> +	 * write to it.
>>>> +	 */
>>>
>>> This portion below was also copied from previous test and by only
>>> testing a write to the first page of the range the purpose is not
>>> clear. Could you please elaborate if the intention is to only test
>>> accessibility of the first page and why that is sufficient?
>>
>> It is sufficient because the test reproduces the bug. It would have to be
>> rather elaborated why you would possibly want to do more than that.

That is fair. An accurate comment (currently an inaccurate copy&paste) would
help to explain this part of the test.

>>>> +	put_addr_op.value = MAGIC;
>>>> +	put_addr_op.addr = (unsigned long)addr;
>>>> +	put_addr_op.header.type = ENCL_OP_PUT_TO_ADDRESS;
>>>> +
>>>> +	EXPECT_EQ(ENCL_CALL(&put_addr_op, &self->run, true), 0);
>>>> +
>>>> +	EXPECT_EEXIT(&self->run);
>>>> +	EXPECT_EQ(self->run.exception_vector, 0);
>>>> +	EXPECT_EQ(self->run.exception_error_code, 0);
>>>> +	EXPECT_EQ(self->run.exception_addr, 0);
>>>> +
>>>> +	/*
>>>> +	 * Read memory from newly added page that was just written to,
>>>> +	 * confirming that data previously written (MAGIC) is present.
>>>> +	 */
>>>> +	get_addr_op.value = 0;
>>>> +	get_addr_op.addr = (unsigned long)addr;
>>>> +	get_addr_op.header.type = ENCL_OP_GET_FROM_ADDRESS;
>>>> +
>>>> +	EXPECT_EQ(ENCL_CALL(&get_addr_op, &self->run, true), 0);
>>>> +
>>>> +	EXPECT_EQ(get_addr_op.value, MAGIC);
>>>> +	EXPECT_EEXIT(&self->run);
>>>> +	EXPECT_EQ(self->run.exception_vector, 0);
>>>> +	EXPECT_EQ(self->run.exception_error_code, 0);
>>>> +	EXPECT_EQ(self->run.exception_addr, 0);
>>>> +
>>>> +	munmap(addr, edmm_size);
>>>> +}
>>>> +
>>>>  /*
>>>>   * SGX2 page type modification test in two phases:
>>>>   * Phase 1:
>>>> diff --git a/tools/testing/selftests/sgx/main.h
>>>> b/tools/testing/selftests/sgx/main.h
>>>> index fc585be97e2f..fe5d39ac0e1e 100644
>>>> --- a/tools/testing/selftests/sgx/main.h
>>>> +++ b/tools/testing/selftests/sgx/main.h
>>>> @@ -35,7 +35,8 @@ extern unsigned char sign_key[];  extern unsigned
>>>> char sign_key_end[];
>>>>
>>>>  void encl_delete(struct encl *ctx); -bool encl_load(const char
>>>> *path, struct encl *encl, unsigned long heap_size);
>>>> +bool encl_load(const char *path, struct encl *encl, unsigned long
>> heap_size,
>>>> +			   unsigned long edmm_size);
>>>>  bool encl_measure(struct encl *encl);  bool encl_build(struct encl
>>>> *encl);  uint64_t encl_get_entry(struct encl *encl, const char
>>>> *symbol); diff --git a/tools/testing/selftests/sgx/sigstruct.c
>>>> b/tools/testing/selftests/sgx/sigstruct.c
>>>> index 50c5ab1aa6fa..6000cf0e4975 100644
>>>> --- a/tools/testing/selftests/sgx/sigstruct.c
>>>> +++ b/tools/testing/selftests/sgx/sigstruct.c
>>>> @@ -343,7 +343,7 @@ bool encl_measure(struct encl *encl)
>>>>  	if (!ctx)
>>>>  		goto err;
>>>>
>>>> -	if (!mrenclave_ecreate(ctx, encl->src_size))
>>>> +	if (!mrenclave_ecreate(ctx, encl->encl_size))
>>>>  		goto err;
>>>>
>>>>  	for (i = 0; i < encl->nr_segments; i++) {
>>>
>>>
>>> Looking at mrenclave_ecreate() the above snippet seems separate from
>>> this test and incomplete since it now obtains encl->encl_size but
>>> continues to compute it again internally. Should this be a separate fix?
>>
>> I would remove this part completely but this also needs comment from Vijay.
> 
> If we restrict the large enclave size just for this test, then the above change can be reverted. Calling ` mrenclave_ecreate`  with src_size esults in EINIT failure and I think the reason is because of incorrect MRenclave.

From what I understand this change is needed since the enclave size is no longer just
the size of all the segments at enclave creation. I think it is incomplete though since it
still recomputes the enclave size even though it is now provided as parameter.
This change does not need to be part of this test addition.

Reinette
Jarkko Sakkinen Aug. 17, 2022, 2:38 p.m. UTC | #6
On Wed, Aug 17, 2022 at 01:27:38AM +0000, Dhanraj, Vijay wrote:
> I think changing it to "int i" will cause a buffer overflow with
> edmm_size being 8GB.

Hmm.. 'i' iterates segments. Amd I missing something?

BR, Jarkko
Jarkko Sakkinen Aug. 17, 2022, 2:44 p.m. UTC | #7
On Tue, Aug 16, 2022 at 09:35:27PM -0700, Reinette Chatre wrote:
> >>> This portion below was also copied from previous test and by only
> >>> testing a write to the first page of the range the purpose is not
> >>> clear. Could you please elaborate if the intention is to only test
> >>> accessibility of the first page and why that is sufficient?
> >>
> >> It is sufficient because the test reproduces the bug. It would have to be
> >> rather elaborated why you would possibly want to do more than that.
> 
> That is fair. An accurate comment (currently an inaccurate copy&paste) would
> help to explain this part of the test.

I would simply add something like:

/* 
 * Define memory pool size big enough to trigger the reclaimer in the EAUG
 * path of the page reclaimer.
 */

Suggestions/edits obviously welcome for the comment.

BR, Jarkko
Jarkko Sakkinen Aug. 17, 2022, 2:53 p.m. UTC | #8
On Wed, Aug 17, 2022 at 05:44:31PM +0300, Jarkko Sakkinen wrote:
> On Tue, Aug 16, 2022 at 09:35:27PM -0700, Reinette Chatre wrote:
> > >>> This portion below was also copied from previous test and by only
> > >>> testing a write to the first page of the range the purpose is not
> > >>> clear. Could you please elaborate if the intention is to only test
> > >>> accessibility of the first page and why that is sufficient?
> > >>
> > >> It is sufficient because the test reproduces the bug. It would have to be
> > >> rather elaborated why you would possibly want to do more than that.
> > 
> > That is fair. An accurate comment (currently an inaccurate copy&paste) would
> > help to explain this part of the test.
> 
> I would simply add something like:
> 
> /* 
>  * Define memory pool size big enough to trigger the reclaimer in the EAUG
>  * path of the page reclaimer.
>  */
> 
> Suggestions/edits obviously welcome for the comment.

I wonder if we could put .bt files somewhere to make them available. In
root causing this bug bpftrace scripting was the key so it would nice to
have them available along with kselftest.

I could imagine that we end up also in future to bugs allocation so
it would have the script when you clone the kernel tree, and possibly
more scripts in future.

E.g. add bt/alloc-error.bt under tools/testing/selftests/sgx.

BR, Jarkko
Dhanraj, Vijay Aug. 17, 2022, 3:39 p.m. UTC | #9
> -----Original Message-----
> From: Jarkko Sakkinen <jarkko@kernel.org>
> Sent: Wednesday, August 17, 2022 7:39 AM
> To: Dhanraj, Vijay <vijay.dhanraj@intel.com>
> Cc: Chatre, Reinette <reinette.chatre@intel.com>; Dave Hansen
> <dave.hansen@linux.intel.com>; linux-sgx@vger.kernel.org; Shuah Khan
> <shuah@kernel.org>; open list:KERNEL SELFTEST FRAMEWORK <linux-
> kselftest@vger.kernel.org>; open list <linux-kernel@vger.kernel.org>
> Subject: Re: [PATCH 2/2] selftests/sgx: Add SGX selftest
> augment_via_eaccept_long
> 
> On Wed, Aug 17, 2022 at 01:27:38AM +0000, Dhanraj, Vijay wrote:
> > I think changing it to "int i" will cause a buffer overflow with
> > edmm_size being 8GB.
> 
> Hmm.. 'i' iterates segments. Amd I missing something?
> 
> BR, Jarkko

It is also used when iterating over pages to eaccept. This might cause an issue.

	for (i = 0; i < edmm_size; i += 4096) {
		eaccept_op.epc_addr = (uint64_t)(addr + i);

Regards, Vijay
Reinette Chatre Aug. 17, 2022, 3:43 p.m. UTC | #10
Hi Jarkko,

On 8/17/2022 7:53 AM, Jarkko Sakkinen wrote:
> On Wed, Aug 17, 2022 at 05:44:31PM +0300, Jarkko Sakkinen wrote:
>> On Tue, Aug 16, 2022 at 09:35:27PM -0700, Reinette Chatre wrote:
>>>>>> This portion below was also copied from previous test and by only
>>>>>> testing a write to the first page of the range the purpose is not
>>>>>> clear. Could you please elaborate if the intention is to only test
>>>>>> accessibility of the first page and why that is sufficient?
>>>>>
>>>>> It is sufficient because the test reproduces the bug. It would have to be
>>>>> rather elaborated why you would possibly want to do more than that.
>>>
>>> That is fair. An accurate comment (currently an inaccurate copy&paste) would
>>> help to explain this part of the test.
>>
>> I would simply add something like:
>>
>> /* 
>>  * Define memory pool size big enough to trigger the reclaimer in the EAUG
>>  * path of the page reclaimer.
>>  */
>>
>> Suggestions/edits obviously welcome for the comment.

The comment seems to better match the code below than the area referred to above:
        static const unsigned long edmm_size = 8589934592; //8G

Even so, I think that raises the point that this is platform specific since
edmm_size of 8GB would not trigger reclaimer on all platforms.

How about adjusting it to:
/*
 * Define memory pool size big enough to trigger the reclaimer in the EAUG
 * path of the page reclaimer on some platforms. This constant has been
 * successful in triggering a bug on some platforms (independent of the
 * platforms where the reclaimer is triggered) and thus considered
 * appropriate for general use.
 */ 


Regarding the area referred to above, a comment like below may help:

/*
 * Pool of pages were successfully added to enclave. Perform sanity
 * check on first page of the pool only to ensure data can be written
 * to and read from a dynamically added enclave page.
 */

> 
> I wonder if we could put .bt files somewhere to make them available. In
> root causing this bug bpftrace scripting was the key so it would nice to
> have them available along with kselftest.
> 
> I could imagine that we end up also in future to bugs allocation so
> it would have the script when you clone the kernel tree, and possibly
> more scripts in future.
> 
> E.g. add bt/alloc-error.bt under tools/testing/selftests/sgx.

Thank you very much for helping to debug this issue. I also think
the scripts you created are very valuable and making them easily
accessible sounds great.

Reinette
Dhanraj, Vijay Aug. 17, 2022, 4:14 p.m. UTC | #11
Hi Reinette,

> -----Original Message-----
> From: Chatre, Reinette <reinette.chatre@intel.com>
> Sent: Tuesday, August 16, 2022 9:35 PM
> To: Dhanraj, Vijay <vijay.dhanraj@intel.com>; Jarkko Sakkinen
> <jarkko@kernel.org>
> Cc: Dave Hansen <dave.hansen@linux.intel.com>; linux-
> sgx@vger.kernel.org; Shuah Khan <shuah@kernel.org>; open list:KERNEL
> SELFTEST FRAMEWORK <linux-kselftest@vger.kernel.org>; open list <linux-
> kernel@vger.kernel.org>
> Subject: Re: [PATCH 2/2] selftests/sgx: Add SGX selftest
> augment_via_eaccept_long
> 
> Hi Vijay,
> 
> On 8/16/2022 6:27 PM, Dhanraj, Vijay wrote:
> > Hi Jarkko, Reinette,
> >
> >> -----Original Message-----
> >> From: Jarkko Sakkinen <jarkko@kernel.org>
> >> Sent: Tuesday, August 16, 2022 4:34 PM
> >> To: Chatre, Reinette <reinette.chatre@intel.com>
> >> Cc: Dave Hansen <dave.hansen@linux.intel.com>; linux-
> >> sgx@vger.kernel.org; Dhanraj, Vijay <vijay.dhanraj@intel.com>; Shuah
> >> Khan <shuah@kernel.org>; open list:KERNEL SELFTEST FRAMEWORK
> <linux-
> >> kselftest@vger.kernel.org>; open list <linux-kernel@vger.kernel.org>
> >> Subject: Re: [PATCH 2/2] selftests/sgx: Add SGX selftest
> >> augment_via_eaccept_long
> >>
> >> On Tue, Aug 16, 2022 at 09:26:40AM -0700, Reinette Chatre wrote:
> >>> Hi Vijay,
> >>>
> >>> Thank you very much for digging into this. A few comments below.
> >>>
> >>> On 8/15/2022 4:39 PM, Jarkko Sakkinen wrote:
> 
> ...
> 
> >>>> @@ -25,6 +25,8 @@ static const uint64_t MAGIC =
> >>>> 0x1122334455667788ULL;  static const uint64_t MAGIC2 =
> >>>> 0x8877665544332211ULL;  vdso_sgx_enter_enclave_t
> >>>> vdso_sgx_enter_enclave;
> >>>>
> >>>> +static const unsigned long edmm_size = 8589934592; //8G
> >>>> +
> >>>
> >>> Could you please elaborate how this constant was chosen? I
> >>> understand that this test helped to uncover a bug and it is useful
> >>> to add to the kernel. When doing so this test will be run on systems
> >>> with a variety of SGX memory sizes, could you please elaborate (and
> >>> add a
> >>> snippet) how 8GB is the right value for all systems?
> >>
> >> It is the only constant I know for sure that some people (Vijay and
> >> Haitao) have been able to reproduce the bug.
> >>
> >> Unless someone can show that the same bug reproduces with a smaller
> >> constant, changing it would make the whole test irrelevant.
> >
> > I tried with 2GB and it always succeed and with 4GB was able to repro
> sporadically. But with 8GB failure was consistent. One thing to note is even
> with 8GB Haitao couldn't reproduce this every time. So not sure if it good for
> all the systems but on my ICX system, I was able to consistently repro with
> this value.
> >
> 
> Could all of this information be placed in a description of this constant? At this
> time it appears to be arbitrary.

Yes it makes sense to record the reason for this constant.
> 
> >>>> +
> >>>> +	if (!sgx2_supported())
> >>>> +		SKIP(return, "SGX2 not supported");
> >>>> +
> >>>> +	ASSERT_TRUE(setup_test_encl(ENCL_HEAP_SIZE_DEFAULT, &self-
> >>> encl,
> >>>> +_metadata));
> >>>> +
> >>>> +	memset(&self->run, 0, sizeof(self->run));
> >>>> +	self->run.tcs = self->encl.encl_base;
> >>>> +
> >>>> +	for (i = 0; i < self->encl.nr_segments; i++) {
> >>>> +		struct encl_segment *seg = &self->encl.segment_tbl[i];
> >>>> +
> >>>> +		total_size += seg->size;
> >>>> +		TH_LOG("test enclave: total_size = %ld, seg->size = %ld",
> >> total_size, seg->size);
> >>>> +	}
> >>>> +
> >>>> +	/*
> >>>> +	 * Actual enclave size is expected to be larger than the loaded
> >>>> +	 * test enclave since enclave size must be a power of 2 in bytes while
> >>>> +	 * test_encl does not consume it all.
> >>>> +	 */
> >>>> +	EXPECT_LT(total_size + edmm_size, self->encl.encl_size);
> >>>
> >>> Will this test ever fail?
> >>
> >> With a *quick* look: no.
> >>
> >> Vijay, what was the point of this check?
> >
> > Yes we can remove this check. I tried to copy from `augment_via_eaccept`
> and just changed the request size.
> >
> 
> In augment_via_eaccept the check is required since augment_via_eaccept
> assumes that there is enough address space in the existing enclave for
> dynamic memory addition without needing to change the enclave size. If
> anybody later changes the test enclave to break this assumption then that
> check will pick it up.


Got it, thanks. Yes this check is can be removed.

> 
> In this new test the enclave size is set to accommodate the planned dynamic
> memory addition and thus adding a test to check if the enclave has enough
> space for the dynamic memory is not needed.
> 
> >>>> +	TH_LOG("Entering enclave to run EACCEPT for each page of %zd
> >> bytes may take a while ...",
> >>>> +			edmm_size);
> >>>> +	eaccept_op.flags = SGX_SECINFO_R | SGX_SECINFO_W |
> >> SGX_SECINFO_REG | SGX_SECINFO_PENDING;
> >>>> +	eaccept_op.ret = 0;
> >>>> +	eaccept_op.header.type = ENCL_OP_EACCEPT;
> >>>> +
> >>>> +	for (i = 0; i < edmm_size; i += 4096) {
> >>>> +		eaccept_op.epc_addr = (uint64_t)(addr + i);
> >>>> +
> >>>> +		EXPECT_EQ(ENCL_CALL(&eaccept_op, &self->run, true), 0);
> >>>> +		if (self->run.exception_vector == 14 &&
> >>>> +			self->run.exception_error_code == 4 &&
> >>>> +			self->run.exception_addr == self->encl.encl_base) {
> >>>> +			munmap(addr, edmm_size);
> >>>> +			SKIP(return, "Kernel does not support adding pages
> >> to initialized enclave");
> >>>> +		}
> >>>> +
> >>>> +		EXPECT_EQ(self->run.exception_vector, 0);
> >>>> +		EXPECT_EQ(self->run.exception_error_code, 0);
> >>>> +		EXPECT_EQ(self->run.exception_addr, 0);
> >>>> +		ASSERT_EQ(eaccept_op.ret, 0);
> >>>> +		ASSERT_EQ(self->run.function, EEXIT);
> >>>> +	}
> >>>> +
> >>>> +	/*
> >>>> +	 * New page should be accessible from within enclave - attempt to
> >>>> +	 * write to it.
> >>>> +	 */
> >>>
> >>> This portion below was also copied from previous test and by only
> >>> testing a write to the first page of the range the purpose is not
> >>> clear. Could you please elaborate if the intention is to only test
> >>> accessibility of the first page and why that is sufficient?
> >>
> >> It is sufficient because the test reproduces the bug. It would have
> >> to be rather elaborated why you would possibly want to do more than
> that.
> 
> That is fair. An accurate comment (currently an inaccurate copy&paste)
> would help to explain this part of the test.
> 
> >>>> +	put_addr_op.value = MAGIC;
> >>>> +	put_addr_op.addr = (unsigned long)addr;
> >>>> +	put_addr_op.header.type = ENCL_OP_PUT_TO_ADDRESS;
> >>>> +
> >>>> +	EXPECT_EQ(ENCL_CALL(&put_addr_op, &self->run, true), 0);
> >>>> +
> >>>> +	EXPECT_EEXIT(&self->run);
> >>>> +	EXPECT_EQ(self->run.exception_vector, 0);
> >>>> +	EXPECT_EQ(self->run.exception_error_code, 0);
> >>>> +	EXPECT_EQ(self->run.exception_addr, 0);
> >>>> +
> >>>> +	/*
> >>>> +	 * Read memory from newly added page that was just written to,
> >>>> +	 * confirming that data previously written (MAGIC) is present.
> >>>> +	 */
> >>>> +	get_addr_op.value = 0;
> >>>> +	get_addr_op.addr = (unsigned long)addr;
> >>>> +	get_addr_op.header.type = ENCL_OP_GET_FROM_ADDRESS;
> >>>> +
> >>>> +	EXPECT_EQ(ENCL_CALL(&get_addr_op, &self->run, true), 0);
> >>>> +
> >>>> +	EXPECT_EQ(get_addr_op.value, MAGIC);
> >>>> +	EXPECT_EEXIT(&self->run);
> >>>> +	EXPECT_EQ(self->run.exception_vector, 0);
> >>>> +	EXPECT_EQ(self->run.exception_error_code, 0);
> >>>> +	EXPECT_EQ(self->run.exception_addr, 0);
> >>>> +
> >>>> +	munmap(addr, edmm_size);
> >>>> +}
> >>>> +
> >>>>  /*
> >>>>   * SGX2 page type modification test in two phases:
> >>>>   * Phase 1:
> >>>> diff --git a/tools/testing/selftests/sgx/main.h
> >>>> b/tools/testing/selftests/sgx/main.h
> >>>> index fc585be97e2f..fe5d39ac0e1e 100644
> >>>> --- a/tools/testing/selftests/sgx/main.h
> >>>> +++ b/tools/testing/selftests/sgx/main.h
> >>>> @@ -35,7 +35,8 @@ extern unsigned char sign_key[];  extern unsigned
> >>>> char sign_key_end[];
> >>>>
> >>>>  void encl_delete(struct encl *ctx); -bool encl_load(const char
> >>>> *path, struct encl *encl, unsigned long heap_size);
> >>>> +bool encl_load(const char *path, struct encl *encl, unsigned long
> >> heap_size,
> >>>> +			   unsigned long edmm_size);
> >>>>  bool encl_measure(struct encl *encl);  bool encl_build(struct encl
> >>>> *encl);  uint64_t encl_get_entry(struct encl *encl, const char
> >>>> *symbol); diff --git a/tools/testing/selftests/sgx/sigstruct.c
> >>>> b/tools/testing/selftests/sgx/sigstruct.c
> >>>> index 50c5ab1aa6fa..6000cf0e4975 100644
> >>>> --- a/tools/testing/selftests/sgx/sigstruct.c
> >>>> +++ b/tools/testing/selftests/sgx/sigstruct.c
> >>>> @@ -343,7 +343,7 @@ bool encl_measure(struct encl *encl)
> >>>>  	if (!ctx)
> >>>>  		goto err;
> >>>>
> >>>> -	if (!mrenclave_ecreate(ctx, encl->src_size))
> >>>> +	if (!mrenclave_ecreate(ctx, encl->encl_size))
> >>>>  		goto err;
> >>>>
> >>>>  	for (i = 0; i < encl->nr_segments; i++) {
> >>>
> >>>
> >>> Looking at mrenclave_ecreate() the above snippet seems separate from
> >>> this test and incomplete since it now obtains encl->encl_size but
> >>> continues to compute it again internally. Should this be a separate fix?
> >>
> >> I would remove this part completely but this also needs comment from
> Vijay.
> >
> > If we restrict the large enclave size just for this test, then the above change
> can be reverted. Calling ` mrenclave_ecreate`  with src_size esults in EINIT
> failure and I think the reason is because of incorrect MRenclave.
> 
> From what I understand this change is needed since the enclave size is no
> longer just the size of all the segments at enclave creation. I think it is
> incomplete though since it still recomputes the enclave size even though it is
> now provided as parameter.
> This change does not need to be part of this test addition.

I see your point and this change can be removed from the test.
> 
> Reinette

Regards, Vijay
Jarkko Sakkinen Aug. 17, 2022, 4:36 p.m. UTC | #12
On Wed, Aug 17, 2022 at 08:43:57AM -0700, Reinette Chatre wrote:
> Hi Jarkko,
> 
> On 8/17/2022 7:53 AM, Jarkko Sakkinen wrote:
> > On Wed, Aug 17, 2022 at 05:44:31PM +0300, Jarkko Sakkinen wrote:
> >> On Tue, Aug 16, 2022 at 09:35:27PM -0700, Reinette Chatre wrote:
> >>>>>> This portion below was also copied from previous test and by only
> >>>>>> testing a write to the first page of the range the purpose is not
> >>>>>> clear. Could you please elaborate if the intention is to only test
> >>>>>> accessibility of the first page and why that is sufficient?
> >>>>>
> >>>>> It is sufficient because the test reproduces the bug. It would have to be
> >>>>> rather elaborated why you would possibly want to do more than that.
> >>>
> >>> That is fair. An accurate comment (currently an inaccurate copy&paste) would
> >>> help to explain this part of the test.
> >>
> >> I would simply add something like:
> >>
> >> /* 
> >>  * Define memory pool size big enough to trigger the reclaimer in the EAUG
> >>  * path of the page reclaimer.
> >>  */
> >>
> >> Suggestions/edits obviously welcome for the comment.
> 
> The comment seems to better match the code below than the area referred to above:
>         static const unsigned long edmm_size = 8589934592; //8G
> 
> Even so, I think that raises the point that this is platform specific since
> edmm_size of 8GB would not trigger reclaimer on all platforms.
> 
> How about adjusting it to:
> /*
>  * Define memory pool size big enough to trigger the reclaimer in the EAUG
>  * path of the page reclaimer on some platforms. This constant has been
>  * successful in triggering a bug on some platforms (independent of the
>  * platforms where the reclaimer is triggered) and thus considered
>  * appropriate for general use.
>  */ 
> 
> 
> Regarding the area referred to above, a comment like below may help:
> 
> /*
>  * Pool of pages were successfully added to enclave. Perform sanity
>  * check on first page of the pool only to ensure data can be written
>  * to and read from a dynamically added enclave page.
>  */
> 
> > 
> > I wonder if we could put .bt files somewhere to make them available. In
> > root causing this bug bpftrace scripting was the key so it would nice to
> > have them available along with kselftest.
> > 
> > I could imagine that we end up also in future to bugs allocation so
> > it would have the script when you clone the kernel tree, and possibly
> > more scripts in future.
> > 
> > E.g. add bt/alloc-error.bt under tools/testing/selftests/sgx.
> 
> Thank you very much for helping to debug this issue. I also think
> the scripts you created are very valuable and making them easily
> accessible sounds great.

Yeah, I mean they do no harm there, even if not directly used
by the test program.

Thanks for the valuable feedback. I will incorporate it to the
next version.

BR, Jarkko
Jarkko Sakkinen Aug. 25, 2022, 1:28 a.m. UTC | #13
On Wed, Aug 17, 2022 at 03:39:36PM +0000, Dhanraj, Vijay wrote:
> 
> 
> > -----Original Message-----
> > From: Jarkko Sakkinen <jarkko@kernel.org>
> > Sent: Wednesday, August 17, 2022 7:39 AM
> > To: Dhanraj, Vijay <vijay.dhanraj@intel.com>
> > Cc: Chatre, Reinette <reinette.chatre@intel.com>; Dave Hansen
> > <dave.hansen@linux.intel.com>; linux-sgx@vger.kernel.org; Shuah Khan
> > <shuah@kernel.org>; open list:KERNEL SELFTEST FRAMEWORK <linux-
> > kselftest@vger.kernel.org>; open list <linux-kernel@vger.kernel.org>
> > Subject: Re: [PATCH 2/2] selftests/sgx: Add SGX selftest
> > augment_via_eaccept_long
> > 
> > On Wed, Aug 17, 2022 at 01:27:38AM +0000, Dhanraj, Vijay wrote:
> > > I think changing it to "int i" will cause a buffer overflow with
> > > edmm_size being 8GB.
> > 
> > Hmm.. 'i' iterates segments. Amd I missing something?
> > 
> > BR, Jarkko
> 
> It is also used when iterating over pages to eaccept. This might cause an issue.
> 
> 	for (i = 0; i < edmm_size; i += 4096) {
> 		eaccept_op.epc_addr = (uint64_t)(addr + i);
> 
> Regards, Vijay

Ah got it, thanks.

BR, Jarkko
diff mbox series

Patch

diff --git a/tools/testing/selftests/sgx/load.c b/tools/testing/selftests/sgx/load.c
index 94bdeac1cf04..7de1b15c90b1 100644
--- a/tools/testing/selftests/sgx/load.c
+++ b/tools/testing/selftests/sgx/load.c
@@ -171,7 +171,8 @@  uint64_t encl_get_entry(struct encl *encl, const char *symbol)
 	return 0;
 }
 
-bool encl_load(const char *path, struct encl *encl, unsigned long heap_size)
+bool encl_load(const char *path, struct encl *encl, unsigned long heap_size,
+			   unsigned long edmm_size)
 {
 	const char device_path[] = "/dev/sgx_enclave";
 	struct encl_segment *seg;
@@ -300,7 +301,7 @@  bool encl_load(const char *path, struct encl *encl, unsigned long heap_size)
 
 	encl->src_size = encl->segment_tbl[j].offset + encl->segment_tbl[j].size;
 
-	for (encl->encl_size = 4096; encl->encl_size < encl->src_size; )
+	for (encl->encl_size = 4096; encl->encl_size < encl->src_size + edmm_size;)
 		encl->encl_size <<= 1;
 
 	return true;
diff --git a/tools/testing/selftests/sgx/main.c b/tools/testing/selftests/sgx/main.c
index 9820b3809c69..65e79682f75e 100644
--- a/tools/testing/selftests/sgx/main.c
+++ b/tools/testing/selftests/sgx/main.c
@@ -25,6 +25,8 @@  static const uint64_t MAGIC = 0x1122334455667788ULL;
 static const uint64_t MAGIC2 = 0x8877665544332211ULL;
 vdso_sgx_enter_enclave_t vdso_sgx_enter_enclave;
 
+static const unsigned long edmm_size = 8589934592; //8G
+
 /*
  * Security Information (SECINFO) data structure needed by a few SGX
  * instructions (eg. ENCLU[EACCEPT] and ENCLU[EMODPE]) holds meta-data
@@ -183,7 +185,7 @@  static bool setup_test_encl(unsigned long heap_size, struct encl *encl,
 	unsigned int i;
 	void *addr;
 
-	if (!encl_load("test_encl.elf", encl, heap_size)) {
+	if (!encl_load("test_encl.elf", encl, heap_size, edmm_size)) {
 		encl_delete(encl);
 		TH_LOG("Failed to load the test enclave.");
 		return false;
@@ -1210,6 +1212,122 @@  TEST_F(enclave, augment_via_eaccept)
 	munmap(addr, PAGE_SIZE);
 }
 
+/*
+ * Test for the addition of large number of pages to an initialized enclave via
+ * a pre-emptive run of EACCEPT on page to be added.
+ */
+#define TIMEOUT_LONG 900 /* seconds */
+TEST_F_TIMEOUT(enclave, augment_via_eaccept_long, TIMEOUT_LONG)
+{
+	struct encl_op_get_from_addr get_addr_op;
+	struct encl_op_put_to_addr put_addr_op;
+	struct encl_op_eaccept eaccept_op;
+	size_t total_size = 0;
+	void *addr;
+	unsigned long i;
+
+	if (!sgx2_supported())
+		SKIP(return, "SGX2 not supported");
+
+	ASSERT_TRUE(setup_test_encl(ENCL_HEAP_SIZE_DEFAULT, &self->encl, _metadata));
+
+	memset(&self->run, 0, sizeof(self->run));
+	self->run.tcs = self->encl.encl_base;
+
+	for (i = 0; i < self->encl.nr_segments; i++) {
+		struct encl_segment *seg = &self->encl.segment_tbl[i];
+
+		total_size += seg->size;
+		TH_LOG("test enclave: total_size = %ld, seg->size = %ld", total_size, seg->size);
+	}
+
+	/*
+	 * Actual enclave size is expected to be larger than the loaded
+	 * test enclave since enclave size must be a power of 2 in bytes while
+	 * test_encl does not consume it all.
+	 */
+	EXPECT_LT(total_size + edmm_size, self->encl.encl_size);
+
+	/*
+	 * mmap() a page at end of existing enclave to be used for dynamic
+	 * EPC page.
+	 *
+	 * Kernel will allow new mapping using any permissions if it
+	 * falls into the enclave's address range but not backed
+	 * by existing enclave pages.
+	 */
+	TH_LOG("mmaping pages at end of enclave...");
+	addr = mmap((void *)self->encl.encl_base + total_size, edmm_size,
+			PROT_READ | PROT_WRITE | PROT_EXEC, MAP_SHARED | MAP_FIXED,
+			self->encl.fd, 0);
+	EXPECT_NE(addr, MAP_FAILED);
+
+	self->run.exception_vector = 0;
+	self->run.exception_error_code = 0;
+	self->run.exception_addr = 0;
+
+	/*
+	 * Run EACCEPT on new page to trigger the #PF->EAUG->EACCEPT(again
+	 * without a #PF). All should be transparent to userspace.
+	 */
+	TH_LOG("Entering enclave to run EACCEPT for each page of %zd bytes may take a while ...",
+			edmm_size);
+	eaccept_op.flags = SGX_SECINFO_R | SGX_SECINFO_W | SGX_SECINFO_REG | SGX_SECINFO_PENDING;
+	eaccept_op.ret = 0;
+	eaccept_op.header.type = ENCL_OP_EACCEPT;
+
+	for (i = 0; i < edmm_size; i += 4096) {
+		eaccept_op.epc_addr = (uint64_t)(addr + i);
+
+		EXPECT_EQ(ENCL_CALL(&eaccept_op, &self->run, true), 0);
+		if (self->run.exception_vector == 14 &&
+			self->run.exception_error_code == 4 &&
+			self->run.exception_addr == self->encl.encl_base) {
+			munmap(addr, edmm_size);
+			SKIP(return, "Kernel does not support adding pages to initialized enclave");
+		}
+
+		EXPECT_EQ(self->run.exception_vector, 0);
+		EXPECT_EQ(self->run.exception_error_code, 0);
+		EXPECT_EQ(self->run.exception_addr, 0);
+		ASSERT_EQ(eaccept_op.ret, 0);
+		ASSERT_EQ(self->run.function, EEXIT);
+	}
+
+	/*
+	 * New page should be accessible from within enclave - attempt to
+	 * write to it.
+	 */
+	put_addr_op.value = MAGIC;
+	put_addr_op.addr = (unsigned long)addr;
+	put_addr_op.header.type = ENCL_OP_PUT_TO_ADDRESS;
+
+	EXPECT_EQ(ENCL_CALL(&put_addr_op, &self->run, true), 0);
+
+	EXPECT_EEXIT(&self->run);
+	EXPECT_EQ(self->run.exception_vector, 0);
+	EXPECT_EQ(self->run.exception_error_code, 0);
+	EXPECT_EQ(self->run.exception_addr, 0);
+
+	/*
+	 * Read memory from newly added page that was just written to,
+	 * confirming that data previously written (MAGIC) is present.
+	 */
+	get_addr_op.value = 0;
+	get_addr_op.addr = (unsigned long)addr;
+	get_addr_op.header.type = ENCL_OP_GET_FROM_ADDRESS;
+
+	EXPECT_EQ(ENCL_CALL(&get_addr_op, &self->run, true), 0);
+
+	EXPECT_EQ(get_addr_op.value, MAGIC);
+	EXPECT_EEXIT(&self->run);
+	EXPECT_EQ(self->run.exception_vector, 0);
+	EXPECT_EQ(self->run.exception_error_code, 0);
+	EXPECT_EQ(self->run.exception_addr, 0);
+
+	munmap(addr, edmm_size);
+}
+
 /*
  * SGX2 page type modification test in two phases:
  * Phase 1:
diff --git a/tools/testing/selftests/sgx/main.h b/tools/testing/selftests/sgx/main.h
index fc585be97e2f..fe5d39ac0e1e 100644
--- a/tools/testing/selftests/sgx/main.h
+++ b/tools/testing/selftests/sgx/main.h
@@ -35,7 +35,8 @@  extern unsigned char sign_key[];
 extern unsigned char sign_key_end[];
 
 void encl_delete(struct encl *ctx);
-bool encl_load(const char *path, struct encl *encl, unsigned long heap_size);
+bool encl_load(const char *path, struct encl *encl, unsigned long heap_size,
+			   unsigned long edmm_size);
 bool encl_measure(struct encl *encl);
 bool encl_build(struct encl *encl);
 uint64_t encl_get_entry(struct encl *encl, const char *symbol);
diff --git a/tools/testing/selftests/sgx/sigstruct.c b/tools/testing/selftests/sgx/sigstruct.c
index 50c5ab1aa6fa..6000cf0e4975 100644
--- a/tools/testing/selftests/sgx/sigstruct.c
+++ b/tools/testing/selftests/sgx/sigstruct.c
@@ -343,7 +343,7 @@  bool encl_measure(struct encl *encl)
 	if (!ctx)
 		goto err;
 
-	if (!mrenclave_ecreate(ctx, encl->src_size))
+	if (!mrenclave_ecreate(ctx, encl->encl_size))
 		goto err;
 
 	for (i = 0; i < encl->nr_segments; i++) {