diff mbox series

cxl/test: Enhance event testing

Message ID 20240401-enhance-event-test-v1-1-6669a524ed38@intel.com
State Accepted
Headers show
Series cxl/test: Enhance event testing | expand

Commit Message

Ira Weiny April 2, 2024, 5:31 a.m. UTC
An issue was found in the processing of event logs when the output
buffer length was not reset.[1]

This bug was not caught with cxl-test for 2 reasons.  First, the test
harness mbox_send command [mock_get_event()] does not set the output
size based on the amount of data returned like the hardware command
does.  Second, the simplistic event log testing always returned the same
number of elements per-get command.

Enhance the simulation of the event log mailbox to better match the bug
found with real hardware to cover potential regressions.

NOTE: These changes will cause cxl-events.sh in ndctl to fail without
the fix from Kwangjin.  However, no changes to the user space test was
required.  Therefore ndctl itself will be compatible with old or new
kernels once both patches land in the new kernel.

[1] Link: https://lore.kernel.org/all/20240401091057.1044-1-kwangjin.ko@sk.com/

Cc: Kwangjin Ko <kwangjin.ko@sk.com>
Signed-off-by: Ira Weiny <ira.weiny@intel.com>
---
 tools/testing/cxl/test/mem.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)


---
base-commit: 8d025e2092e29bfd13e56c78e22af25fac83c8ec
change-id: 20240401-enhance-event-test-cebeff5189cd

Best regards,

Comments

Jonathan Cameron April 3, 2024, 3:41 p.m. UTC | #1
On Mon, 01 Apr 2024 22:31:58 -0700
Ira Weiny <ira.weiny@intel.com> wrote:

> An issue was found in the processing of event logs when the output
> buffer length was not reset.[1]
> 
> This bug was not caught with cxl-test for 2 reasons.  First, the test
> harness mbox_send command [mock_get_event()] does not set the output
> size based on the amount of data returned like the hardware command
> does.  Second, the simplistic event log testing always returned the same
> number of elements per-get command.
> 
> Enhance the simulation of the event log mailbox to better match the bug
> found with real hardware to cover potential regressions.
> 
> NOTE: These changes will cause cxl-events.sh in ndctl to fail without
> the fix from Kwangjin.  However, no changes to the user space test was
> required.  Therefore ndctl itself will be compatible with old or new
> kernels once both patches land in the new kernel.

Good to state how many events are read out (22 I think) as that allows
reader of this patch to see that it will cycle all the way around, so we
will get the 4 to 1 reduction between two reads ensuring things work
correctly when smaller number of events are returned.

1, 2, 3, 4, 1, 2, 3, 4, 1, 1 
I think.
> 
> [1] Link: https://lore.kernel.org/all/20240401091057.1044-1-kwangjin.ko@sk.com/
> 
> Cc: Kwangjin Ko <kwangjin.ko@sk.com>
> Signed-off-by: Ira Weiny <ira.weiny@intel.com>

Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>


> ---
>  tools/testing/cxl/test/mem.c | 19 +++++++++++++++----
>  1 file changed, 15 insertions(+), 4 deletions(-)
> 
> diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c
> index 35ee41e435ab..6584443144de 100644
> --- a/tools/testing/cxl/test/mem.c
> +++ b/tools/testing/cxl/test/mem.c
> @@ -127,7 +127,7 @@ static struct {
>  #define CXL_TEST_EVENT_CNT_MAX 15
>  
>  /* Set a number of events to return at a time for simulation.  */
> -#define CXL_TEST_EVENT_CNT 3
> +#define CXL_TEST_EVENT_RET_MAX 4
>  
>  struct mock_event_log {
>  	u16 clear_idx;
> @@ -222,6 +222,12 @@ static void mes_add_event(struct mock_event_store *mes,
>  	log->nr_events++;
>  }
>  
> +/*
> + * Vary the number of events returned to simulate events occuring while the
> + * logs are being read.
> + */
> +static int ret_limit = 0;
> +
>  static int mock_get_event(struct device *dev, struct cxl_mbox_cmd *cmd)
>  {
>  	struct cxl_get_event_payload *pl;
> @@ -233,14 +239,18 @@ static int mock_get_event(struct device *dev, struct cxl_mbox_cmd *cmd)
>  	if (cmd->size_in != sizeof(log_type))
>  		return -EINVAL;
>  
> -	if (cmd->size_out < struct_size(pl, records, CXL_TEST_EVENT_CNT))
> +	ret_limit = (ret_limit + 1) % CXL_TEST_EVENT_RET_MAX;
> +	if (!ret_limit)
> +		ret_limit = 1;
> +
> +	if (cmd->size_out < struct_size(pl, records, ret_limit))
>  		return -EINVAL;
>  
>  	log_type = *((u8 *)cmd->payload_in);
>  	if (log_type >= CXL_EVENT_TYPE_MAX)
>  		return -EINVAL;
>  
> -	memset(cmd->payload_out, 0, cmd->size_out);
> +	memset(cmd->payload_out, 0, struct_size(pl, records, 0));
>  
>  	log = event_find_log(dev, log_type);
>  	if (!log || event_log_empty(log))
> @@ -248,7 +258,7 @@ static int mock_get_event(struct device *dev, struct cxl_mbox_cmd *cmd)
>  
>  	pl = cmd->payload_out;
>  
> -	for (i = 0; i < CXL_TEST_EVENT_CNT && !event_log_empty(log); i++) {
> +	for (i = 0; i < ret_limit && !event_log_empty(log); i++) {
>  		memcpy(&pl->records[i], event_get_current(log),
>  		       sizeof(pl->records[i]));
>  		pl->records[i].event.generic.hdr.handle =
> @@ -256,6 +266,7 @@ static int mock_get_event(struct device *dev, struct cxl_mbox_cmd *cmd)
>  		log->cur_idx++;
>  	}
>  
> +	cmd->size_out = struct_size(pl, records, i);
>  	pl->record_count = cpu_to_le16(i);
>  	if (!event_log_empty(log))
>  		pl->flags |= CXL_GET_EVENT_FLAG_MORE_RECORDS;
> 
> ---
> base-commit: 8d025e2092e29bfd13e56c78e22af25fac83c8ec
> change-id: 20240401-enhance-event-test-cebeff5189cd
> 
> Best regards,
Ira Weiny April 4, 2024, 8:18 p.m. UTC | #2
Jonathan Cameron wrote:
> On Mon, 01 Apr 2024 22:31:58 -0700
> Ira Weiny <ira.weiny@intel.com> wrote:
> 
> > An issue was found in the processing of event logs when the output
> > buffer length was not reset.[1]
> > 
> > This bug was not caught with cxl-test for 2 reasons.  First, the test
> > harness mbox_send command [mock_get_event()] does not set the output
> > size based on the amount of data returned like the hardware command
> > does.  Second, the simplistic event log testing always returned the same
> > number of elements per-get command.
> > 
> > Enhance the simulation of the event log mailbox to better match the bug
> > found with real hardware to cover potential regressions.
> > 
> > NOTE: These changes will cause cxl-events.sh in ndctl to fail without
> > the fix from Kwangjin.  However, no changes to the user space test was
> > required.  Therefore ndctl itself will be compatible with old or new
> > kernels once both patches land in the new kernel.
> 
> Good to state how many events are read out (22 I think) as that allows
> reader of this patch to see that it will cycle all the way around, so we
> will get the 4 to 1 reduction between two reads ensuring things work
> correctly when smaller number of events are returned.

It is actually:

num_overflow_expected=1
num_fatal_expected=2
num_failure_expected=16
num_info_expected=3

...because each log is read separately.  But the 16 failures is what tests
this.

> 
> 1, 2, 3, 4, 1, 2, 3, 4, 1, 1 
> I think.

1, 2, 3

> > 
> > [1] Link: https://lore.kernel.org/all/20240401091057.1044-1-kwangjin.ko@sk.com/
> > 
> > Cc: Kwangjin Ko <kwangjin.ko@sk.com>
> > Signed-off-by: Ira Weiny <ira.weiny@intel.com>
> 
> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>

Thanks, Dave do you want me to reroll with an updated commit message?

Ira

> 
> 
> > ---
> >  tools/testing/cxl/test/mem.c | 19 +++++++++++++++----
> >  1 file changed, 15 insertions(+), 4 deletions(-)
> > 
> > diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c
> > index 35ee41e435ab..6584443144de 100644
> > --- a/tools/testing/cxl/test/mem.c
> > +++ b/tools/testing/cxl/test/mem.c
> > @@ -127,7 +127,7 @@ static struct {
> >  #define CXL_TEST_EVENT_CNT_MAX 15
> >  
> >  /* Set a number of events to return at a time for simulation.  */
> > -#define CXL_TEST_EVENT_CNT 3
> > +#define CXL_TEST_EVENT_RET_MAX 4
> >  
> >  struct mock_event_log {
> >  	u16 clear_idx;
> > @@ -222,6 +222,12 @@ static void mes_add_event(struct mock_event_store *mes,
> >  	log->nr_events++;
> >  }
> >  
> > +/*
> > + * Vary the number of events returned to simulate events occuring while the
> > + * logs are being read.
> > + */
> > +static int ret_limit = 0;
> > +
> >  static int mock_get_event(struct device *dev, struct cxl_mbox_cmd *cmd)
> >  {
> >  	struct cxl_get_event_payload *pl;
> > @@ -233,14 +239,18 @@ static int mock_get_event(struct device *dev, struct cxl_mbox_cmd *cmd)
> >  	if (cmd->size_in != sizeof(log_type))
> >  		return -EINVAL;
> >  
> > -	if (cmd->size_out < struct_size(pl, records, CXL_TEST_EVENT_CNT))
> > +	ret_limit = (ret_limit + 1) % CXL_TEST_EVENT_RET_MAX;
> > +	if (!ret_limit)
> > +		ret_limit = 1;
> > +
> > +	if (cmd->size_out < struct_size(pl, records, ret_limit))
> >  		return -EINVAL;
> >  
> >  	log_type = *((u8 *)cmd->payload_in);
> >  	if (log_type >= CXL_EVENT_TYPE_MAX)
> >  		return -EINVAL;
> >  
> > -	memset(cmd->payload_out, 0, cmd->size_out);
> > +	memset(cmd->payload_out, 0, struct_size(pl, records, 0));
> >  
> >  	log = event_find_log(dev, log_type);
> >  	if (!log || event_log_empty(log))
> > @@ -248,7 +258,7 @@ static int mock_get_event(struct device *dev, struct cxl_mbox_cmd *cmd)
> >  
> >  	pl = cmd->payload_out;
> >  
> > -	for (i = 0; i < CXL_TEST_EVENT_CNT && !event_log_empty(log); i++) {
> > +	for (i = 0; i < ret_limit && !event_log_empty(log); i++) {
> >  		memcpy(&pl->records[i], event_get_current(log),
> >  		       sizeof(pl->records[i]));
> >  		pl->records[i].event.generic.hdr.handle =
> > @@ -256,6 +266,7 @@ static int mock_get_event(struct device *dev, struct cxl_mbox_cmd *cmd)
> >  		log->cur_idx++;
> >  	}
> >  
> > +	cmd->size_out = struct_size(pl, records, i);
> >  	pl->record_count = cpu_to_le16(i);
> >  	if (!event_log_empty(log))
> >  		pl->flags |= CXL_GET_EVENT_FLAG_MORE_RECORDS;
> > 
> > ---
> > base-commit: 8d025e2092e29bfd13e56c78e22af25fac83c8ec
> > change-id: 20240401-enhance-event-test-cebeff5189cd
> > 
> > Best regards,
>
diff mbox series

Patch

diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c
index 35ee41e435ab..6584443144de 100644
--- a/tools/testing/cxl/test/mem.c
+++ b/tools/testing/cxl/test/mem.c
@@ -127,7 +127,7 @@  static struct {
 #define CXL_TEST_EVENT_CNT_MAX 15
 
 /* Set a number of events to return at a time for simulation.  */
-#define CXL_TEST_EVENT_CNT 3
+#define CXL_TEST_EVENT_RET_MAX 4
 
 struct mock_event_log {
 	u16 clear_idx;
@@ -222,6 +222,12 @@  static void mes_add_event(struct mock_event_store *mes,
 	log->nr_events++;
 }
 
+/*
+ * Vary the number of events returned to simulate events occuring while the
+ * logs are being read.
+ */
+static int ret_limit = 0;
+
 static int mock_get_event(struct device *dev, struct cxl_mbox_cmd *cmd)
 {
 	struct cxl_get_event_payload *pl;
@@ -233,14 +239,18 @@  static int mock_get_event(struct device *dev, struct cxl_mbox_cmd *cmd)
 	if (cmd->size_in != sizeof(log_type))
 		return -EINVAL;
 
-	if (cmd->size_out < struct_size(pl, records, CXL_TEST_EVENT_CNT))
+	ret_limit = (ret_limit + 1) % CXL_TEST_EVENT_RET_MAX;
+	if (!ret_limit)
+		ret_limit = 1;
+
+	if (cmd->size_out < struct_size(pl, records, ret_limit))
 		return -EINVAL;
 
 	log_type = *((u8 *)cmd->payload_in);
 	if (log_type >= CXL_EVENT_TYPE_MAX)
 		return -EINVAL;
 
-	memset(cmd->payload_out, 0, cmd->size_out);
+	memset(cmd->payload_out, 0, struct_size(pl, records, 0));
 
 	log = event_find_log(dev, log_type);
 	if (!log || event_log_empty(log))
@@ -248,7 +258,7 @@  static int mock_get_event(struct device *dev, struct cxl_mbox_cmd *cmd)
 
 	pl = cmd->payload_out;
 
-	for (i = 0; i < CXL_TEST_EVENT_CNT && !event_log_empty(log); i++) {
+	for (i = 0; i < ret_limit && !event_log_empty(log); i++) {
 		memcpy(&pl->records[i], event_get_current(log),
 		       sizeof(pl->records[i]));
 		pl->records[i].event.generic.hdr.handle =
@@ -256,6 +266,7 @@  static int mock_get_event(struct device *dev, struct cxl_mbox_cmd *cmd)
 		log->cur_idx++;
 	}
 
+	cmd->size_out = struct_size(pl, records, i);
 	pl->record_count = cpu_to_le16(i);
 	if (!event_log_empty(log))
 		pl->flags |= CXL_GET_EVENT_FLAG_MORE_RECORDS;