diff mbox series

cxl/core: Fix potential payload size confusion in cxl_mem_get_poison()

Message ID 171235441633.2716581.12330082428680958635.stgit@dwillia2-xfh.jf.intel.com
State Accepted
Commit 4b759dd5765503bd466defac7d93aca14c23a15d
Headers show
Series cxl/core: Fix potential payload size confusion in cxl_mem_get_poison() | expand

Commit Message

Dan Williams April 5, 2024, 10 p.m. UTC
A recent change to cxl_mem_get_records_log() [1] highlighted a subtle
nuance of looping calls to cxl_internal_send_cmd(), i.e. that
cxl_internal_send_cmd() modifies the 'size_out' member of the @mbox_cmd
argument. That mechanism is useful for communicating underflow, but it
is unwanted when reusing @mbox_cmd for a subsequent submission. It turns
out that cxl_xfer_log() avoids this scenario by always redefining
@mbox_cmd each iteration.

Update cxl_mem_get_records_log() and cxl_mem_get_poison() to follow the
same style as cxl_xfer_log(), i.e. re-define @mbox_cmd each iteration.
The cxl_mem_get_records_log() change is just a style fixup, but the
cxl_mem_get_poison() change is a potential fix, per Alison [2]:

    Poison list retrieval can hit this case if the MORE flag is set and
    a follow on read of the list delivers more records than the previous
    read.  ie. device gives one record, sets the _MORE flag, then gives 5.

Not an urgent fix since this behavior has not been seen in the wild,
but worth tracking as a fix.

Cc: Kwangjin Ko <kwangjin.ko@sk.com>
Cc: Alison Schofield <alison.schofield@intel.com>
Fixes: ed83f7ca398b ("cxl/mbox: Add GET_POISON_LIST mailbox command")
Link: http://lore.kernel.org/r/20240402081404.1106-2-kwangjin.ko@sk.com [1]
Link: http://lore.kernel.org/r/ZhAhAL/GOaWFrauw@aschofie-mobl2 [2]
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/cxl/core/mbox.c |   38 +++++++++++++++++---------------------
 1 file changed, 17 insertions(+), 21 deletions(-)

Comments

Alison Schofield April 5, 2024, 11:59 p.m. UTC | #1
On Fri, Apr 05, 2024 at 03:00:16PM -0700, Dan Williams wrote:
> A recent change to cxl_mem_get_records_log() [1] highlighted a subtle
> nuance of looping calls to cxl_internal_send_cmd(), i.e. that
> cxl_internal_send_cmd() modifies the 'size_out' member of the @mbox_cmd
> argument. That mechanism is useful for communicating underflow, but it
> is unwanted when reusing @mbox_cmd for a subsequent submission. It turns
> out that cxl_xfer_log() avoids this scenario by always redefining
> @mbox_cmd each iteration.
> 
> Update cxl_mem_get_records_log() and cxl_mem_get_poison() to follow the
> same style as cxl_xfer_log(), i.e. re-define @mbox_cmd each iteration.
> The cxl_mem_get_records_log() change is just a style fixup, but the
> cxl_mem_get_poison() change is a potential fix, per Alison [2]:
> 
>     Poison list retrieval can hit this case if the MORE flag is set and
>     a follow on read of the list delivers more records than the previous
>     read.  ie. device gives one record, sets the _MORE flag, then gives 5.
> 
> Not an urgent fix since this behavior has not been seen in the wild,
> but worth tracking as a fix.
> 
> Cc: Kwangjin Ko <kwangjin.ko@sk.com>
> Cc: Alison Schofield <alison.schofield@intel.com>
> Fixes: ed83f7ca398b ("cxl/mbox: Add GET_POISON_LIST mailbox command")
> Link: http://lore.kernel.org/r/20240402081404.1106-2-kwangjin.ko@sk.com [1]
> Link: http://lore.kernel.org/r/ZhAhAL/GOaWFrauw@aschofie-mobl2 [2]
> Signed-off-by: Dan Williams <dan.j.williams@intel.com>

Thanks Dan -

Reviewed-by: Alison Schofield <alison.schofield@intel.com>


> ---
>  drivers/cxl/core/mbox.c |   38 +++++++++++++++++---------------------
>  1 file changed, 17 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
> index f0f54aeccc87..65185c9fa001 100644
> --- a/drivers/cxl/core/mbox.c
> +++ b/drivers/cxl/core/mbox.c
> @@ -946,25 +946,22 @@ static void cxl_mem_get_records_log(struct cxl_memdev_state *mds,
>  	struct cxl_memdev *cxlmd = mds->cxlds.cxlmd;
>  	struct device *dev = mds->cxlds.dev;
>  	struct cxl_get_event_payload *payload;
> -	struct cxl_mbox_cmd mbox_cmd;
>  	u8 log_type = type;
>  	u16 nr_rec;
>  
>  	mutex_lock(&mds->event.log_lock);
>  	payload = mds->event.buf;
>  
> -	mbox_cmd = (struct cxl_mbox_cmd) {
> -		.opcode = CXL_MBOX_OP_GET_EVENT_RECORD,
> -		.payload_in = &log_type,
> -		.size_in = sizeof(log_type),
> -		.payload_out = payload,
> -		.min_out = struct_size(payload, records, 0),
> -	};
> -
>  	do {
>  		int rc, i;
> -
> -		mbox_cmd.size_out = mds->payload_size;
> +		struct cxl_mbox_cmd mbox_cmd = (struct cxl_mbox_cmd) {
> +			.opcode = CXL_MBOX_OP_GET_EVENT_RECORD,
> +			.payload_in = &log_type,
> +			.size_in = sizeof(log_type),
> +			.payload_out = payload,
> +			.size_out = mds->payload_size,
> +			.min_out = struct_size(payload, records, 0),
> +		};
>  
>  		rc = cxl_internal_send_cmd(mds, &mbox_cmd);
>  		if (rc) {
> @@ -1297,7 +1294,6 @@ int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len,
>  	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
>  	struct cxl_mbox_poison_out *po;
>  	struct cxl_mbox_poison_in pi;
> -	struct cxl_mbox_cmd mbox_cmd;
>  	int nr_records = 0;
>  	int rc;
>  
> @@ -1309,16 +1305,16 @@ int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len,
>  	pi.offset = cpu_to_le64(offset);
>  	pi.length = cpu_to_le64(len / CXL_POISON_LEN_MULT);
>  
> -	mbox_cmd = (struct cxl_mbox_cmd) {
> -		.opcode = CXL_MBOX_OP_GET_POISON,
> -		.size_in = sizeof(pi),
> -		.payload_in = &pi,
> -		.size_out = mds->payload_size,
> -		.payload_out = po,
> -		.min_out = struct_size(po, record, 0),
> -	};
> -
>  	do {
> +		struct cxl_mbox_cmd mbox_cmd = (struct cxl_mbox_cmd){
> +			.opcode = CXL_MBOX_OP_GET_POISON,
> +			.size_in = sizeof(pi),
> +			.payload_in = &pi,
> +			.size_out = mds->payload_size,
> +			.payload_out = po,
> +			.min_out = struct_size(po, record, 0),
> +		};
> +
>  		rc = cxl_internal_send_cmd(mds, &mbox_cmd);
>  		if (rc)
>  			break;
>
Ira Weiny April 20, 2024, 12:09 a.m. UTC | #2
Dan Williams wrote:
> A recent change to cxl_mem_get_records_log() [1] highlighted a subtle
> nuance of looping calls to cxl_internal_send_cmd(), i.e. that
> cxl_internal_send_cmd() modifies the 'size_out' member of the @mbox_cmd
> argument. That mechanism is useful for communicating underflow, but it
> is unwanted when reusing @mbox_cmd for a subsequent submission. It turns
> out that cxl_xfer_log() avoids this scenario by always redefining
> @mbox_cmd each iteration.
> 
> Update cxl_mem_get_records_log() and cxl_mem_get_poison() to follow the
> same style as cxl_xfer_log(), i.e. re-define @mbox_cmd each iteration.
> The cxl_mem_get_records_log() change is just a style fixup, but the
> cxl_mem_get_poison() change is a potential fix, per Alison [2]:
> 
>     Poison list retrieval can hit this case if the MORE flag is set and
>     a follow on read of the list delivers more records than the previous
>     read.  ie. device gives one record, sets the _MORE flag, then gives 5.
> 
> Not an urgent fix since this behavior has not been seen in the wild,
> but worth tracking as a fix.
> 
> Cc: Kwangjin Ko <kwangjin.ko@sk.com>
> Cc: Alison Schofield <alison.schofield@intel.com>
> Fixes: ed83f7ca398b ("cxl/mbox: Add GET_POISON_LIST mailbox command")
> Link: http://lore.kernel.org/r/20240402081404.1106-2-kwangjin.ko@sk.com [1]
> Link: http://lore.kernel.org/r/ZhAhAL/GOaWFrauw@aschofie-mobl2 [2]
> Signed-off-by: Dan Williams <dan.j.williams@intel.com>

Reviewed-by: Ira Weiny <ira.weiny@intel.com>
diff mbox series

Patch

diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
index f0f54aeccc87..65185c9fa001 100644
--- a/drivers/cxl/core/mbox.c
+++ b/drivers/cxl/core/mbox.c
@@ -946,25 +946,22 @@  static void cxl_mem_get_records_log(struct cxl_memdev_state *mds,
 	struct cxl_memdev *cxlmd = mds->cxlds.cxlmd;
 	struct device *dev = mds->cxlds.dev;
 	struct cxl_get_event_payload *payload;
-	struct cxl_mbox_cmd mbox_cmd;
 	u8 log_type = type;
 	u16 nr_rec;
 
 	mutex_lock(&mds->event.log_lock);
 	payload = mds->event.buf;
 
-	mbox_cmd = (struct cxl_mbox_cmd) {
-		.opcode = CXL_MBOX_OP_GET_EVENT_RECORD,
-		.payload_in = &log_type,
-		.size_in = sizeof(log_type),
-		.payload_out = payload,
-		.min_out = struct_size(payload, records, 0),
-	};
-
 	do {
 		int rc, i;
-
-		mbox_cmd.size_out = mds->payload_size;
+		struct cxl_mbox_cmd mbox_cmd = (struct cxl_mbox_cmd) {
+			.opcode = CXL_MBOX_OP_GET_EVENT_RECORD,
+			.payload_in = &log_type,
+			.size_in = sizeof(log_type),
+			.payload_out = payload,
+			.size_out = mds->payload_size,
+			.min_out = struct_size(payload, records, 0),
+		};
 
 		rc = cxl_internal_send_cmd(mds, &mbox_cmd);
 		if (rc) {
@@ -1297,7 +1294,6 @@  int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len,
 	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
 	struct cxl_mbox_poison_out *po;
 	struct cxl_mbox_poison_in pi;
-	struct cxl_mbox_cmd mbox_cmd;
 	int nr_records = 0;
 	int rc;
 
@@ -1309,16 +1305,16 @@  int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len,
 	pi.offset = cpu_to_le64(offset);
 	pi.length = cpu_to_le64(len / CXL_POISON_LEN_MULT);
 
-	mbox_cmd = (struct cxl_mbox_cmd) {
-		.opcode = CXL_MBOX_OP_GET_POISON,
-		.size_in = sizeof(pi),
-		.payload_in = &pi,
-		.size_out = mds->payload_size,
-		.payload_out = po,
-		.min_out = struct_size(po, record, 0),
-	};
-
 	do {
+		struct cxl_mbox_cmd mbox_cmd = (struct cxl_mbox_cmd){
+			.opcode = CXL_MBOX_OP_GET_POISON,
+			.size_in = sizeof(pi),
+			.payload_in = &pi,
+			.size_out = mds->payload_size,
+			.payload_out = po,
+			.min_out = struct_size(po, record, 0),
+		};
+
 		rc = cxl_internal_send_cmd(mds, &mbox_cmd);
 		if (rc)
 			break;