diff mbox series

[v2,3/4] cxl: Add extended linear cache address alias emission for cxl events

Message ID 20250110151913.3462283-4-dave.jiang@intel.com (mailing list archive)
State Handled Elsewhere, archived
Headers show
Series acpi/hmat / cxl: Add exclusive caching enumeration and RAS support | expand

Commit Message

Dave Jiang Jan. 10, 2025, 3:17 p.m. UTC
Add the aliased address of extended linear cache when emitting event
trace for DRAM and general media of CXL events.

Signed-off-by: Dave Jiang <dave.jiang@intel.com>
---
v2:
- Emit hpa_alias0 instead of hpa_alias. (Jonathan)
- Check valid cxlr before dereference. (Jonathan)
---
 drivers/cxl/core/core.h   |  5 +++++
 drivers/cxl/core/mbox.c   | 33 +++++++++++++++++++++++++++++----
 drivers/cxl/core/region.c | 12 ++++++++++++
 drivers/cxl/core/trace.h  | 24 ++++++++++++++++--------
 4 files changed, 62 insertions(+), 12 deletions(-)

Comments

Jonathan Cameron Jan. 10, 2025, 4:32 p.m. UTC | #1
On Fri, 10 Jan 2025 08:17:46 -0700
Dave Jiang <dave.jiang@intel.com> wrote:

> Add the aliased address of extended linear cache when emitting event
> trace for DRAM and general media of CXL events.
> 
> Signed-off-by: Dave Jiang <dave.jiang@intel.com>
I think the value set when there isn't a value differs between no
cache and no region. That seems an odd bit of ABI.  Maybe go with ~0ULL in
both cases?

Other than tidying that up this looks fine to me.
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>

> ---
> v2:
> - Emit hpa_alias0 instead of hpa_alias. (Jonathan)
> - Check valid cxlr before dereference. (Jonathan)
> ---
>  drivers/cxl/core/core.h   |  5 +++++
>  drivers/cxl/core/mbox.c   | 33 +++++++++++++++++++++++++++++----
>  drivers/cxl/core/region.c | 12 ++++++++++++
>  drivers/cxl/core/trace.h  | 24 ++++++++++++++++--------
>  4 files changed, 62 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
> index 0fb779b612d1..afbefc72c8fa 100644
> --- a/drivers/cxl/core/core.h
> +++ b/drivers/cxl/core/core.h
> @@ -30,8 +30,13 @@ int cxl_get_poison_by_endpoint(struct cxl_port *port);
>  struct cxl_region *cxl_dpa_to_region(const struct cxl_memdev *cxlmd, u64 dpa);
>  u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd,
>  		   u64 dpa);
> +int cxl_region_nid(struct cxl_region *cxlr);
>  
>  #else
> +static inline int cxl_region_nid(struct cxl_region *cxlr)
> +{
> +	return NUMA_NO_NODE;
> +}
>  static inline u64 cxl_dpa_to_hpa(struct cxl_region *cxlr,
>  				 const struct cxl_memdev *cxlmd, u64 dpa)
>  {
> diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
> index 548564c770c0..d7999260f004 100644
> --- a/drivers/cxl/core/mbox.c
> +++ b/drivers/cxl/core/mbox.c
> @@ -856,6 +856,28 @@ int cxl_enumerate_cmds(struct cxl_memdev_state *mds)
>  }
>  EXPORT_SYMBOL_NS_GPL(cxl_enumerate_cmds, "CXL");
>  
> +static u64 cxlr_hpa_cache_alias(struct cxl_region *cxlr, u64 hpa)
> +{
> +	struct cxl_region_params *p;
> +	int nid;
> +
> +	if (!cxlr)
> +		return ~0ULL;

> +	p = &cxlr->params;
> +	if (!p->cache_size)
> +		return ~0ULL;

So no cache is all 1s. .. See below.

> +
> +	nid = cxl_region_nid(cxlr);
> +	if (nid == NUMA_NO_NODE)
> +		nid = 0;
> +
> +	if (hpa >= p->res->start + p->cache_size)
> +		return hpa - p->cache_size;
> +
> +	return hpa + p->cache_size;
> +}
> +
>  void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
>  			    enum cxl_event_log_type type,
>  			    enum cxl_event_type event_type,
> @@ -871,7 +893,7 @@ void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
>  	}
>  
>  	if (trace_cxl_general_media_enabled() || trace_cxl_dram_enabled()) {
> -		u64 dpa, hpa = ULLONG_MAX;
> +		u64 dpa, hpa = ULLONG_MAX, hpa_alias = 0;

If I read this right if there is a region but no alias then the hpa_alias is
~0ULL but if there no region it is 0.  Seems a tiny bit inconsistent.

>  		struct cxl_region *cxlr;
>  
>  		/*
> @@ -884,14 +906,17 @@ void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
>  
>  		dpa = le64_to_cpu(evt->media_hdr.phys_addr) & CXL_DPA_MASK;
>  		cxlr = cxl_dpa_to_region(cxlmd, dpa);
> -		if (cxlr)
> +		if (cxlr) {
>  			hpa = cxl_dpa_to_hpa(cxlr, cxlmd, dpa);
> +			hpa_alias = cxlr_hpa_cache_alias(cxlr, hpa);
> +		}
>  
>  		if (event_type == CXL_CPER_EVENT_GEN_MEDIA)
>  			trace_cxl_general_media(cxlmd, type, cxlr, hpa,
> -						&evt->gen_media);
> +						hpa_alias, &evt->gen_media);
>  		else if (event_type == CXL_CPER_EVENT_DRAM)
> -			trace_cxl_dram(cxlmd, type, cxlr, hpa, &evt->dram);
> +			trace_cxl_dram(cxlmd, type, cxlr, hpa, hpa_alias,
> +				       &evt->dram);
>  	}
>  }
Dave Jiang Jan. 10, 2025, 5:27 p.m. UTC | #2
On 1/10/25 9:32 AM, Jonathan Cameron wrote:
> On Fri, 10 Jan 2025 08:17:46 -0700
> Dave Jiang <dave.jiang@intel.com> wrote:
> 
>> Add the aliased address of extended linear cache when emitting event
>> trace for DRAM and general media of CXL events.
>>
>> Signed-off-by: Dave Jiang <dave.jiang@intel.com>
> I think the value set when there isn't a value differs between no
> cache and no region. That seems an odd bit of ABI.  Maybe go with ~0ULL in
> both cases?

Not sure I follow. Currently both cases do return ~0ULL.

DJ

> 
> Other than tidying that up this looks fine to me.
> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> 
>> ---
>> v2:
>> - Emit hpa_alias0 instead of hpa_alias. (Jonathan)
>> - Check valid cxlr before dereference. (Jonathan)
>> ---
>>  drivers/cxl/core/core.h   |  5 +++++
>>  drivers/cxl/core/mbox.c   | 33 +++++++++++++++++++++++++++++----
>>  drivers/cxl/core/region.c | 12 ++++++++++++
>>  drivers/cxl/core/trace.h  | 24 ++++++++++++++++--------
>>  4 files changed, 62 insertions(+), 12 deletions(-)
>>
>> diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
>> index 0fb779b612d1..afbefc72c8fa 100644
>> --- a/drivers/cxl/core/core.h
>> +++ b/drivers/cxl/core/core.h
>> @@ -30,8 +30,13 @@ int cxl_get_poison_by_endpoint(struct cxl_port *port);
>>  struct cxl_region *cxl_dpa_to_region(const struct cxl_memdev *cxlmd, u64 dpa);
>>  u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd,
>>  		   u64 dpa);
>> +int cxl_region_nid(struct cxl_region *cxlr);
>>  
>>  #else
>> +static inline int cxl_region_nid(struct cxl_region *cxlr)
>> +{
>> +	return NUMA_NO_NODE;
>> +}
>>  static inline u64 cxl_dpa_to_hpa(struct cxl_region *cxlr,
>>  				 const struct cxl_memdev *cxlmd, u64 dpa)
>>  {
>> diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
>> index 548564c770c0..d7999260f004 100644
>> --- a/drivers/cxl/core/mbox.c
>> +++ b/drivers/cxl/core/mbox.c
>> @@ -856,6 +856,28 @@ int cxl_enumerate_cmds(struct cxl_memdev_state *mds)
>>  }
>>  EXPORT_SYMBOL_NS_GPL(cxl_enumerate_cmds, "CXL");
>>  
>> +static u64 cxlr_hpa_cache_alias(struct cxl_region *cxlr, u64 hpa)
>> +{
>> +	struct cxl_region_params *p;
>> +	int nid;
>> +
>> +	if (!cxlr)
>> +		return ~0ULL;
> 
>> +	p = &cxlr->params;
>> +	if (!p->cache_size)
>> +		return ~0ULL;
> 
> So no cache is all 1s. .. See below.
> 
>> +
>> +	nid = cxl_region_nid(cxlr);
>> +	if (nid == NUMA_NO_NODE)
>> +		nid = 0;
>> +
>> +	if (hpa >= p->res->start + p->cache_size)
>> +		return hpa - p->cache_size;
>> +
>> +	return hpa + p->cache_size;
>> +}
>> +
>>  void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
>>  			    enum cxl_event_log_type type,
>>  			    enum cxl_event_type event_type,
>> @@ -871,7 +893,7 @@ void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
>>  	}
>>  
>>  	if (trace_cxl_general_media_enabled() || trace_cxl_dram_enabled()) {
>> -		u64 dpa, hpa = ULLONG_MAX;
>> +		u64 dpa, hpa = ULLONG_MAX, hpa_alias = 0;
> 
> If I read this right if there is a region but no alias then the hpa_alias is
> ~0ULL but if there no region it is 0.  Seems a tiny bit inconsistent.
> 
>>  		struct cxl_region *cxlr;
>>  
>>  		/*
>> @@ -884,14 +906,17 @@ void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
>>  
>>  		dpa = le64_to_cpu(evt->media_hdr.phys_addr) & CXL_DPA_MASK;
>>  		cxlr = cxl_dpa_to_region(cxlmd, dpa);
>> -		if (cxlr)
>> +		if (cxlr) {
>>  			hpa = cxl_dpa_to_hpa(cxlr, cxlmd, dpa);
>> +			hpa_alias = cxlr_hpa_cache_alias(cxlr, hpa);
>> +		}
>>  
>>  		if (event_type == CXL_CPER_EVENT_GEN_MEDIA)
>>  			trace_cxl_general_media(cxlmd, type, cxlr, hpa,
>> -						&evt->gen_media);
>> +						hpa_alias, &evt->gen_media);
>>  		else if (event_type == CXL_CPER_EVENT_DRAM)
>> -			trace_cxl_dram(cxlmd, type, cxlr, hpa, &evt->dram);
>> +			trace_cxl_dram(cxlmd, type, cxlr, hpa, hpa_alias,
>> +				       &evt->dram);
>>  	}
>>  }
> 
>
Dave Jiang Jan. 10, 2025, 5:29 p.m. UTC | #3
On 1/10/25 8:17 AM, Dave Jiang wrote:
> Add the aliased address of extended linear cache when emitting event
> trace for DRAM and general media of CXL events.
> 
> Signed-off-by: Dave Jiang <dave.jiang@intel.com>
> ---
> v2:
> - Emit hpa_alias0 instead of hpa_alias. (Jonathan)
> - Check valid cxlr before dereference. (Jonathan)
> ---
>  drivers/cxl/core/core.h   |  5 +++++
>  drivers/cxl/core/mbox.c   | 33 +++++++++++++++++++++++++++++----
>  drivers/cxl/core/region.c | 12 ++++++++++++
>  drivers/cxl/core/trace.h  | 24 ++++++++++++++++--------
>  4 files changed, 62 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
> index 0fb779b612d1..afbefc72c8fa 100644
> --- a/drivers/cxl/core/core.h
> +++ b/drivers/cxl/core/core.h
> @@ -30,8 +30,13 @@ int cxl_get_poison_by_endpoint(struct cxl_port *port);
>  struct cxl_region *cxl_dpa_to_region(const struct cxl_memdev *cxlmd, u64 dpa);
>  u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd,
>  		   u64 dpa);
> +int cxl_region_nid(struct cxl_region *cxlr);
>  
>  #else
> +static inline int cxl_region_nid(struct cxl_region *cxlr)
> +{
> +	return NUMA_NO_NODE;
> +}
>  static inline u64 cxl_dpa_to_hpa(struct cxl_region *cxlr,
>  				 const struct cxl_memdev *cxlmd, u64 dpa)
>  {
> diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
> index 548564c770c0..d7999260f004 100644
> --- a/drivers/cxl/core/mbox.c
> +++ b/drivers/cxl/core/mbox.c
> @@ -856,6 +856,28 @@ int cxl_enumerate_cmds(struct cxl_memdev_state *mds)
>  }
>  EXPORT_SYMBOL_NS_GPL(cxl_enumerate_cmds, "CXL");
>  
> +static u64 cxlr_hpa_cache_alias(struct cxl_region *cxlr, u64 hpa)
> +{
> +	struct cxl_region_params *p;
> +	int nid;
> +
> +	if (!cxlr)
> +		return ~0ULL;
> +
> +	p = &cxlr->params;
> +	if (!p->cache_size)
> +		return ~0ULL;
> +
> +	nid = cxl_region_nid(cxlr);
> +	if (nid == NUMA_NO_NODE)
> +		nid = 0;

Just noticed that this chunk and the related function can be dropped. Not used.

DJ


> +
> +	if (hpa >= p->res->start + p->cache_size)
> +		return hpa - p->cache_size;
> +
> +	return hpa + p->cache_size;
> +}
> +
>  void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
>  			    enum cxl_event_log_type type,
>  			    enum cxl_event_type event_type,
> @@ -871,7 +893,7 @@ void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
>  	}
>  
>  	if (trace_cxl_general_media_enabled() || trace_cxl_dram_enabled()) {
> -		u64 dpa, hpa = ULLONG_MAX;
> +		u64 dpa, hpa = ULLONG_MAX, hpa_alias = 0;
>  		struct cxl_region *cxlr;
>  
>  		/*
> @@ -884,14 +906,17 @@ void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
>  
>  		dpa = le64_to_cpu(evt->media_hdr.phys_addr) & CXL_DPA_MASK;
>  		cxlr = cxl_dpa_to_region(cxlmd, dpa);
> -		if (cxlr)
> +		if (cxlr) {
>  			hpa = cxl_dpa_to_hpa(cxlr, cxlmd, dpa);
> +			hpa_alias = cxlr_hpa_cache_alias(cxlr, hpa);
> +		}
>  
>  		if (event_type == CXL_CPER_EVENT_GEN_MEDIA)
>  			trace_cxl_general_media(cxlmd, type, cxlr, hpa,
> -						&evt->gen_media);
> +						hpa_alias, &evt->gen_media);
>  		else if (event_type == CXL_CPER_EVENT_DRAM)
> -			trace_cxl_dram(cxlmd, type, cxlr, hpa, &evt->dram);
> +			trace_cxl_dram(cxlmd, type, cxlr, hpa, hpa_alias,
> +				       &evt->dram);
>  	}
>  }
>  EXPORT_SYMBOL_NS_GPL(cxl_event_trace_record, "CXL");
> diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
> index 2d8699a86b24..5d23bd26d9ba 100644
> --- a/drivers/cxl/core/region.c
> +++ b/drivers/cxl/core/region.c
> @@ -2417,6 +2417,18 @@ static bool cxl_region_update_coordinates(struct cxl_region *cxlr, int nid)
>  	return true;
>  }
>  
> +int cxl_region_nid(struct cxl_region *cxlr)
> +{
> +	struct cxl_region_params *p = &cxlr->params;
> +	struct resource *res;
> +
> +	guard(rwsem_read)(&cxl_region_rwsem);
> +	res = p->res;
> +	if (!res)
> +		return NUMA_NO_NODE;
> +	return phys_to_target_node(res->start);
> +}
> +
>  static int cxl_region_perf_attrs_callback(struct notifier_block *nb,
>  					  unsigned long action, void *arg)
>  {
> diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h
> index 8389a94adb1a..257f60f16e4c 100644
> --- a/drivers/cxl/core/trace.h
> +++ b/drivers/cxl/core/trace.h
> @@ -316,9 +316,10 @@ TRACE_EVENT(cxl_generic_event,
>  TRACE_EVENT(cxl_general_media,
>  
>  	TP_PROTO(const struct cxl_memdev *cxlmd, enum cxl_event_log_type log,
> -		 struct cxl_region *cxlr, u64 hpa, struct cxl_event_gen_media *rec),
> +		 struct cxl_region *cxlr, u64 hpa, u64 hpa_alias,
> +		 struct cxl_event_gen_media *rec),
>  
> -	TP_ARGS(cxlmd, log, cxlr, hpa, rec),
> +	TP_ARGS(cxlmd, log, cxlr, hpa, hpa_alias, rec),
>  
>  	TP_STRUCT__entry(
>  		CXL_EVT_TP_entry
> @@ -332,6 +333,7 @@ TRACE_EVENT(cxl_general_media,
>  		__array(u8, comp_id, CXL_EVENT_GEN_MED_COMP_ID_SIZE)
>  		/* Following are out of order to pack trace record */
>  		__field(u64, hpa)
> +		__field(u64, hpa_alias)
>  		__field_struct(uuid_t, region_uuid)
>  		__field(u16, validity_flags)
>  		__field(u8, rank)
> @@ -358,6 +360,7 @@ TRACE_EVENT(cxl_general_media,
>  			CXL_EVENT_GEN_MED_COMP_ID_SIZE);
>  		__entry->validity_flags = get_unaligned_le16(&rec->media_hdr.validity_flags);
>  		__entry->hpa = hpa;
> +		__entry->hpa_alias = hpa_alias;
>  		if (cxlr) {
>  			__assign_str(region_name);
>  			uuid_copy(&__entry->region_uuid, &cxlr->params.uuid);
> @@ -370,7 +373,7 @@ TRACE_EVENT(cxl_general_media,
>  	CXL_EVT_TP_printk("dpa=%llx dpa_flags='%s' " \
>  		"descriptor='%s' type='%s' transaction_type='%s' channel=%u rank=%u " \
>  		"device=%x comp_id=%s validity_flags='%s' " \
> -		"hpa=%llx region=%s region_uuid=%pUb",
> +		"hpa=%llx hpa_alias0=%llx region=%s region_uuid=%pUb",
>  		__entry->dpa, show_dpa_flags(__entry->dpa_flags),
>  		show_event_desc_flags(__entry->descriptor),
>  		show_gmer_mem_event_type(__entry->type),
> @@ -378,7 +381,8 @@ TRACE_EVENT(cxl_general_media,
>  		__entry->channel, __entry->rank, __entry->device,
>  		__print_hex(__entry->comp_id, CXL_EVENT_GEN_MED_COMP_ID_SIZE),
>  		show_valid_flags(__entry->validity_flags),
> -		__entry->hpa, __get_str(region_name), &__entry->region_uuid
> +		__entry->hpa, __entry->hpa_alias, __get_str(region_name),
> +		&__entry->region_uuid
>  	)
>  );
>  
> @@ -424,9 +428,10 @@ TRACE_EVENT(cxl_general_media,
>  TRACE_EVENT(cxl_dram,
>  
>  	TP_PROTO(const struct cxl_memdev *cxlmd, enum cxl_event_log_type log,
> -		 struct cxl_region *cxlr, u64 hpa, struct cxl_event_dram *rec),
> +		 struct cxl_region *cxlr, u64 hpa, u64 hpa_alias,
> +		 struct cxl_event_dram *rec),
>  
> -	TP_ARGS(cxlmd, log, cxlr, hpa, rec),
> +	TP_ARGS(cxlmd, log, cxlr, hpa, hpa_alias, rec),
>  
>  	TP_STRUCT__entry(
>  		CXL_EVT_TP_entry
> @@ -442,6 +447,7 @@ TRACE_EVENT(cxl_dram,
>  		__field(u32, row)
>  		__array(u8, cor_mask, CXL_EVENT_DER_CORRECTION_MASK_SIZE)
>  		__field(u64, hpa)
> +		__field(u64, hpa_alias)
>  		__field_struct(uuid_t, region_uuid)
>  		__field(u8, rank)	/* Out of order to pack trace record */
>  		__field(u8, bank_group)	/* Out of order to pack trace record */
> @@ -472,6 +478,7 @@ TRACE_EVENT(cxl_dram,
>  		memcpy(__entry->cor_mask, &rec->correction_mask,
>  			CXL_EVENT_DER_CORRECTION_MASK_SIZE);
>  		__entry->hpa = hpa;
> +		__entry->hpa_alias = hpa_alias;
>  		if (cxlr) {
>  			__assign_str(region_name);
>  			uuid_copy(&__entry->region_uuid, &cxlr->params.uuid);
> @@ -485,7 +492,7 @@ TRACE_EVENT(cxl_dram,
>  		"transaction_type='%s' channel=%u rank=%u nibble_mask=%x " \
>  		"bank_group=%u bank=%u row=%u column=%u cor_mask=%s " \
>  		"validity_flags='%s' " \
> -		"hpa=%llx region=%s region_uuid=%pUb",
> +		"hpa=%llx hpa_alias0=%llx region=%s region_uuid=%pUb",
>  		__entry->dpa, show_dpa_flags(__entry->dpa_flags),
>  		show_event_desc_flags(__entry->descriptor),
>  		show_dram_mem_event_type(__entry->type),
> @@ -495,7 +502,8 @@ TRACE_EVENT(cxl_dram,
>  		__entry->row, __entry->column,
>  		__print_hex(__entry->cor_mask, CXL_EVENT_DER_CORRECTION_MASK_SIZE),
>  		show_dram_valid_flags(__entry->validity_flags),
> -		__entry->hpa, __get_str(region_name), &__entry->region_uuid
> +		__entry->hpa_alias, __entry->hpa, __get_str(region_name),
> +		&__entry->region_uuid
>  	)
>  );
>
Jonathan Cameron Jan. 13, 2025, 12:53 p.m. UTC | #4
On Fri, 10 Jan 2025 10:27:52 -0700
Dave Jiang <dave.jiang@intel.com> wrote:

> On 1/10/25 9:32 AM, Jonathan Cameron wrote:
> > On Fri, 10 Jan 2025 08:17:46 -0700
> > Dave Jiang <dave.jiang@intel.com> wrote:
> >   
> >> Add the aliased address of extended linear cache when emitting event
> >> trace for DRAM and general media of CXL events.
> >>
> >> Signed-off-by: Dave Jiang <dave.jiang@intel.com>  
> > I think the value set when there isn't a value differs between no
> > cache and no region. That seems an odd bit of ABI.  Maybe go with ~0ULL in
> > both cases?  
> 
> Not sure I follow. Currently both cases do return ~0ULL.

See inline.  I don't think they do... In the no region case
hpa is ~0ULL, hpa_alias is 0


> 
> DJ
> 
> > 
> > Other than tidying that up this looks fine to me.
> > Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> >   
> >> ---
> >> v2:
> >> - Emit hpa_alias0 instead of hpa_alias. (Jonathan)
> >> - Check valid cxlr before dereference. (Jonathan)
> >> ---
> >>  drivers/cxl/core/core.h   |  5 +++++
> >>  drivers/cxl/core/mbox.c   | 33 +++++++++++++++++++++++++++++----
> >>  drivers/cxl/core/region.c | 12 ++++++++++++
> >>  drivers/cxl/core/trace.h  | 24 ++++++++++++++++--------
> >>  4 files changed, 62 insertions(+), 12 deletions(-)
> >>
> >> diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
> >> index 0fb779b612d1..afbefc72c8fa 100644
> >> --- a/drivers/cxl/core/core.h
> >> +++ b/drivers/cxl/core/core.h
> >> @@ -30,8 +30,13 @@ int cxl_get_poison_by_endpoint(struct cxl_port *port);
> >>  struct cxl_region *cxl_dpa_to_region(const struct cxl_memdev *cxlmd, u64 dpa);
> >>  u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd,
> >>  		   u64 dpa);
> >> +int cxl_region_nid(struct cxl_region *cxlr);
> >>  
> >>  #else
> >> +static inline int cxl_region_nid(struct cxl_region *cxlr)
> >> +{
> >> +	return NUMA_NO_NODE;
> >> +}
> >>  static inline u64 cxl_dpa_to_hpa(struct cxl_region *cxlr,
> >>  				 const struct cxl_memdev *cxlmd, u64 dpa)
> >>  {
> >> diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
> >> index 548564c770c0..d7999260f004 100644
> >> --- a/drivers/cxl/core/mbox.c
> >> +++ b/drivers/cxl/core/mbox.c
> >> @@ -856,6 +856,28 @@ int cxl_enumerate_cmds(struct cxl_memdev_state *mds)
> >>  }
> >>  EXPORT_SYMBOL_NS_GPL(cxl_enumerate_cmds, "CXL");
> >>  
> >> +static u64 cxlr_hpa_cache_alias(struct cxl_region *cxlr, u64 hpa)
> >> +{
> >> +	struct cxl_region_params *p;
> >> +	int nid;
> >> +
> >> +	if (!cxlr)
> >> +		return ~0ULL;  
> >   
> >> +	p = &cxlr->params;
> >> +	if (!p->cache_size)
> >> +		return ~0ULL;  
> > 
> > So no cache is all 1s. .. See below.
> >   
> >> +
> >> +	nid = cxl_region_nid(cxlr);
> >> +	if (nid == NUMA_NO_NODE)
> >> +		nid = 0;
> >> +
> >> +	if (hpa >= p->res->start + p->cache_size)
> >> +		return hpa - p->cache_size;
> >> +
> >> +	return hpa + p->cache_size;
> >> +}
> >> +
> >>  void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
> >>  			    enum cxl_event_log_type type,
> >>  			    enum cxl_event_type event_type,
> >> @@ -871,7 +893,7 @@ void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
> >>  	}
> >>  
> >>  	if (trace_cxl_general_media_enabled() || trace_cxl_dram_enabled()) {
> >> -		u64 dpa, hpa = ULLONG_MAX;
> >> +		u64 dpa, hpa = ULLONG_MAX, hpa_alias = 0;  
> > 
> > If I read this right if there is a region but no alias then the hpa_alias is
> > ~0ULL but if there no region it is 0.  Seems a tiny bit inconsistent.

Here you set hpa_alias to 0.

> >   
> >>  		struct cxl_region *cxlr;
> >>  
> >>  		/*
> >> @@ -884,14 +906,17 @@ void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
> >>  
> >>  		dpa = le64_to_cpu(evt->media_hdr.phys_addr) & CXL_DPA_MASK;
> >>  		cxlr = cxl_dpa_to_region(cxlmd, dpa);
> >> -		if (cxlr)
> >> +		if (cxlr) {

If no region, nothing changes hpa_alias.

> >>  			hpa = cxl_dpa_to_hpa(cxlr, cxlmd, dpa);
> >> +			hpa_alias = cxlr_hpa_cache_alias(cxlr, hpa);
> >> +		}
> >>  
> >>  		if (event_type == CXL_CPER_EVENT_GEN_MEDIA)
> >>  			trace_cxl_general_media(cxlmd, type, cxlr, hpa,
> >> -						&evt->gen_media);
> >> +						hpa_alias, &evt->gen_media);
> >>  		else if (event_type == CXL_CPER_EVENT_DRAM)
> >> -			trace_cxl_dram(cxlmd, type, cxlr, hpa, &evt->dram);
> >> +			trace_cxl_dram(cxlmd, type, cxlr, hpa, hpa_alias,
> >> +				       &evt->dram);
> >>  	}
> >>  }  
> > 
> >   
> 
>
Dave Jiang Jan. 13, 2025, 5:54 p.m. UTC | #5
On 1/13/25 5:53 AM, Jonathan Cameron wrote:
> On Fri, 10 Jan 2025 10:27:52 -0700
> Dave Jiang <dave.jiang@intel.com> wrote:
> 
>> On 1/10/25 9:32 AM, Jonathan Cameron wrote:
>>> On Fri, 10 Jan 2025 08:17:46 -0700
>>> Dave Jiang <dave.jiang@intel.com> wrote:
>>>   
>>>> Add the aliased address of extended linear cache when emitting event
>>>> trace for DRAM and general media of CXL events.
>>>>
>>>> Signed-off-by: Dave Jiang <dave.jiang@intel.com>  
>>> I think the value set when there isn't a value differs between no
>>> cache and no region. That seems an odd bit of ABI.  Maybe go with ~0ULL in
>>> both cases?  
>>
>> Not sure I follow. Currently both cases do return ~0ULL.
> 
> See inline.  I don't think they do... In the no region case
> hpa is ~0ULL, hpa_alias is 0

Ah I see what you are saying now. Thanks! I'll fix that. 

DJ

> 
> 
>>
>> DJ
>>
>>>
>>> Other than tidying that up this looks fine to me.
>>> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
>>>   
>>>> ---
>>>> v2:
>>>> - Emit hpa_alias0 instead of hpa_alias. (Jonathan)
>>>> - Check valid cxlr before dereference. (Jonathan)
>>>> ---
>>>>  drivers/cxl/core/core.h   |  5 +++++
>>>>  drivers/cxl/core/mbox.c   | 33 +++++++++++++++++++++++++++++----
>>>>  drivers/cxl/core/region.c | 12 ++++++++++++
>>>>  drivers/cxl/core/trace.h  | 24 ++++++++++++++++--------
>>>>  4 files changed, 62 insertions(+), 12 deletions(-)
>>>>
>>>> diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
>>>> index 0fb779b612d1..afbefc72c8fa 100644
>>>> --- a/drivers/cxl/core/core.h
>>>> +++ b/drivers/cxl/core/core.h
>>>> @@ -30,8 +30,13 @@ int cxl_get_poison_by_endpoint(struct cxl_port *port);
>>>>  struct cxl_region *cxl_dpa_to_region(const struct cxl_memdev *cxlmd, u64 dpa);
>>>>  u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd,
>>>>  		   u64 dpa);
>>>> +int cxl_region_nid(struct cxl_region *cxlr);
>>>>  
>>>>  #else
>>>> +static inline int cxl_region_nid(struct cxl_region *cxlr)
>>>> +{
>>>> +	return NUMA_NO_NODE;
>>>> +}
>>>>  static inline u64 cxl_dpa_to_hpa(struct cxl_region *cxlr,
>>>>  				 const struct cxl_memdev *cxlmd, u64 dpa)
>>>>  {
>>>> diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
>>>> index 548564c770c0..d7999260f004 100644
>>>> --- a/drivers/cxl/core/mbox.c
>>>> +++ b/drivers/cxl/core/mbox.c
>>>> @@ -856,6 +856,28 @@ int cxl_enumerate_cmds(struct cxl_memdev_state *mds)
>>>>  }
>>>>  EXPORT_SYMBOL_NS_GPL(cxl_enumerate_cmds, "CXL");
>>>>  
>>>> +static u64 cxlr_hpa_cache_alias(struct cxl_region *cxlr, u64 hpa)
>>>> +{
>>>> +	struct cxl_region_params *p;
>>>> +	int nid;
>>>> +
>>>> +	if (!cxlr)
>>>> +		return ~0ULL;  
>>>   
>>>> +	p = &cxlr->params;
>>>> +	if (!p->cache_size)
>>>> +		return ~0ULL;  
>>>
>>> So no cache is all 1s. .. See below.
>>>   
>>>> +
>>>> +	nid = cxl_region_nid(cxlr);
>>>> +	if (nid == NUMA_NO_NODE)
>>>> +		nid = 0;
>>>> +
>>>> +	if (hpa >= p->res->start + p->cache_size)
>>>> +		return hpa - p->cache_size;
>>>> +
>>>> +	return hpa + p->cache_size;
>>>> +}
>>>> +
>>>>  void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
>>>>  			    enum cxl_event_log_type type,
>>>>  			    enum cxl_event_type event_type,
>>>> @@ -871,7 +893,7 @@ void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
>>>>  	}
>>>>  
>>>>  	if (trace_cxl_general_media_enabled() || trace_cxl_dram_enabled()) {
>>>> -		u64 dpa, hpa = ULLONG_MAX;
>>>> +		u64 dpa, hpa = ULLONG_MAX, hpa_alias = 0;  
>>>
>>> If I read this right if there is a region but no alias then the hpa_alias is
>>> ~0ULL but if there no region it is 0.  Seems a tiny bit inconsistent.
> 
> Here you set hpa_alias to 0.
> 
>>>   
>>>>  		struct cxl_region *cxlr;
>>>>  
>>>>  		/*
>>>> @@ -884,14 +906,17 @@ void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
>>>>  
>>>>  		dpa = le64_to_cpu(evt->media_hdr.phys_addr) & CXL_DPA_MASK;
>>>>  		cxlr = cxl_dpa_to_region(cxlmd, dpa);
>>>> -		if (cxlr)
>>>> +		if (cxlr) {
> 
> If no region, nothing changes hpa_alias.
> 
>>>>  			hpa = cxl_dpa_to_hpa(cxlr, cxlmd, dpa);
>>>> +			hpa_alias = cxlr_hpa_cache_alias(cxlr, hpa);
>>>> +		}
>>>>  
>>>>  		if (event_type == CXL_CPER_EVENT_GEN_MEDIA)
>>>>  			trace_cxl_general_media(cxlmd, type, cxlr, hpa,
>>>> -						&evt->gen_media);
>>>> +						hpa_alias, &evt->gen_media);
>>>>  		else if (event_type == CXL_CPER_EVENT_DRAM)
>>>> -			trace_cxl_dram(cxlmd, type, cxlr, hpa, &evt->dram);
>>>> +			trace_cxl_dram(cxlmd, type, cxlr, hpa, hpa_alias,
>>>> +				       &evt->dram);
>>>>  	}
>>>>  }  
>>>
>>>   
>>
>>
>
diff mbox series

Patch

diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
index 0fb779b612d1..afbefc72c8fa 100644
--- a/drivers/cxl/core/core.h
+++ b/drivers/cxl/core/core.h
@@ -30,8 +30,13 @@  int cxl_get_poison_by_endpoint(struct cxl_port *port);
 struct cxl_region *cxl_dpa_to_region(const struct cxl_memdev *cxlmd, u64 dpa);
 u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd,
 		   u64 dpa);
+int cxl_region_nid(struct cxl_region *cxlr);
 
 #else
+static inline int cxl_region_nid(struct cxl_region *cxlr)
+{
+	return NUMA_NO_NODE;
+}
 static inline u64 cxl_dpa_to_hpa(struct cxl_region *cxlr,
 				 const struct cxl_memdev *cxlmd, u64 dpa)
 {
diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
index 548564c770c0..d7999260f004 100644
--- a/drivers/cxl/core/mbox.c
+++ b/drivers/cxl/core/mbox.c
@@ -856,6 +856,28 @@  int cxl_enumerate_cmds(struct cxl_memdev_state *mds)
 }
 EXPORT_SYMBOL_NS_GPL(cxl_enumerate_cmds, "CXL");
 
+static u64 cxlr_hpa_cache_alias(struct cxl_region *cxlr, u64 hpa)
+{
+	struct cxl_region_params *p;
+	int nid;
+
+	if (!cxlr)
+		return ~0ULL;
+
+	p = &cxlr->params;
+	if (!p->cache_size)
+		return ~0ULL;
+
+	nid = cxl_region_nid(cxlr);
+	if (nid == NUMA_NO_NODE)
+		nid = 0;
+
+	if (hpa >= p->res->start + p->cache_size)
+		return hpa - p->cache_size;
+
+	return hpa + p->cache_size;
+}
+
 void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
 			    enum cxl_event_log_type type,
 			    enum cxl_event_type event_type,
@@ -871,7 +893,7 @@  void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
 	}
 
 	if (trace_cxl_general_media_enabled() || trace_cxl_dram_enabled()) {
-		u64 dpa, hpa = ULLONG_MAX;
+		u64 dpa, hpa = ULLONG_MAX, hpa_alias = 0;
 		struct cxl_region *cxlr;
 
 		/*
@@ -884,14 +906,17 @@  void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
 
 		dpa = le64_to_cpu(evt->media_hdr.phys_addr) & CXL_DPA_MASK;
 		cxlr = cxl_dpa_to_region(cxlmd, dpa);
-		if (cxlr)
+		if (cxlr) {
 			hpa = cxl_dpa_to_hpa(cxlr, cxlmd, dpa);
+			hpa_alias = cxlr_hpa_cache_alias(cxlr, hpa);
+		}
 
 		if (event_type == CXL_CPER_EVENT_GEN_MEDIA)
 			trace_cxl_general_media(cxlmd, type, cxlr, hpa,
-						&evt->gen_media);
+						hpa_alias, &evt->gen_media);
 		else if (event_type == CXL_CPER_EVENT_DRAM)
-			trace_cxl_dram(cxlmd, type, cxlr, hpa, &evt->dram);
+			trace_cxl_dram(cxlmd, type, cxlr, hpa, hpa_alias,
+				       &evt->dram);
 	}
 }
 EXPORT_SYMBOL_NS_GPL(cxl_event_trace_record, "CXL");
diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index 2d8699a86b24..5d23bd26d9ba 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -2417,6 +2417,18 @@  static bool cxl_region_update_coordinates(struct cxl_region *cxlr, int nid)
 	return true;
 }
 
+int cxl_region_nid(struct cxl_region *cxlr)
+{
+	struct cxl_region_params *p = &cxlr->params;
+	struct resource *res;
+
+	guard(rwsem_read)(&cxl_region_rwsem);
+	res = p->res;
+	if (!res)
+		return NUMA_NO_NODE;
+	return phys_to_target_node(res->start);
+}
+
 static int cxl_region_perf_attrs_callback(struct notifier_block *nb,
 					  unsigned long action, void *arg)
 {
diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h
index 8389a94adb1a..257f60f16e4c 100644
--- a/drivers/cxl/core/trace.h
+++ b/drivers/cxl/core/trace.h
@@ -316,9 +316,10 @@  TRACE_EVENT(cxl_generic_event,
 TRACE_EVENT(cxl_general_media,
 
 	TP_PROTO(const struct cxl_memdev *cxlmd, enum cxl_event_log_type log,
-		 struct cxl_region *cxlr, u64 hpa, struct cxl_event_gen_media *rec),
+		 struct cxl_region *cxlr, u64 hpa, u64 hpa_alias,
+		 struct cxl_event_gen_media *rec),
 
-	TP_ARGS(cxlmd, log, cxlr, hpa, rec),
+	TP_ARGS(cxlmd, log, cxlr, hpa, hpa_alias, rec),
 
 	TP_STRUCT__entry(
 		CXL_EVT_TP_entry
@@ -332,6 +333,7 @@  TRACE_EVENT(cxl_general_media,
 		__array(u8, comp_id, CXL_EVENT_GEN_MED_COMP_ID_SIZE)
 		/* Following are out of order to pack trace record */
 		__field(u64, hpa)
+		__field(u64, hpa_alias)
 		__field_struct(uuid_t, region_uuid)
 		__field(u16, validity_flags)
 		__field(u8, rank)
@@ -358,6 +360,7 @@  TRACE_EVENT(cxl_general_media,
 			CXL_EVENT_GEN_MED_COMP_ID_SIZE);
 		__entry->validity_flags = get_unaligned_le16(&rec->media_hdr.validity_flags);
 		__entry->hpa = hpa;
+		__entry->hpa_alias = hpa_alias;
 		if (cxlr) {
 			__assign_str(region_name);
 			uuid_copy(&__entry->region_uuid, &cxlr->params.uuid);
@@ -370,7 +373,7 @@  TRACE_EVENT(cxl_general_media,
 	CXL_EVT_TP_printk("dpa=%llx dpa_flags='%s' " \
 		"descriptor='%s' type='%s' transaction_type='%s' channel=%u rank=%u " \
 		"device=%x comp_id=%s validity_flags='%s' " \
-		"hpa=%llx region=%s region_uuid=%pUb",
+		"hpa=%llx hpa_alias0=%llx region=%s region_uuid=%pUb",
 		__entry->dpa, show_dpa_flags(__entry->dpa_flags),
 		show_event_desc_flags(__entry->descriptor),
 		show_gmer_mem_event_type(__entry->type),
@@ -378,7 +381,8 @@  TRACE_EVENT(cxl_general_media,
 		__entry->channel, __entry->rank, __entry->device,
 		__print_hex(__entry->comp_id, CXL_EVENT_GEN_MED_COMP_ID_SIZE),
 		show_valid_flags(__entry->validity_flags),
-		__entry->hpa, __get_str(region_name), &__entry->region_uuid
+		__entry->hpa, __entry->hpa_alias, __get_str(region_name),
+		&__entry->region_uuid
 	)
 );
 
@@ -424,9 +428,10 @@  TRACE_EVENT(cxl_general_media,
 TRACE_EVENT(cxl_dram,
 
 	TP_PROTO(const struct cxl_memdev *cxlmd, enum cxl_event_log_type log,
-		 struct cxl_region *cxlr, u64 hpa, struct cxl_event_dram *rec),
+		 struct cxl_region *cxlr, u64 hpa, u64 hpa_alias,
+		 struct cxl_event_dram *rec),
 
-	TP_ARGS(cxlmd, log, cxlr, hpa, rec),
+	TP_ARGS(cxlmd, log, cxlr, hpa, hpa_alias, rec),
 
 	TP_STRUCT__entry(
 		CXL_EVT_TP_entry
@@ -442,6 +447,7 @@  TRACE_EVENT(cxl_dram,
 		__field(u32, row)
 		__array(u8, cor_mask, CXL_EVENT_DER_CORRECTION_MASK_SIZE)
 		__field(u64, hpa)
+		__field(u64, hpa_alias)
 		__field_struct(uuid_t, region_uuid)
 		__field(u8, rank)	/* Out of order to pack trace record */
 		__field(u8, bank_group)	/* Out of order to pack trace record */
@@ -472,6 +478,7 @@  TRACE_EVENT(cxl_dram,
 		memcpy(__entry->cor_mask, &rec->correction_mask,
 			CXL_EVENT_DER_CORRECTION_MASK_SIZE);
 		__entry->hpa = hpa;
+		__entry->hpa_alias = hpa_alias;
 		if (cxlr) {
 			__assign_str(region_name);
 			uuid_copy(&__entry->region_uuid, &cxlr->params.uuid);
@@ -485,7 +492,7 @@  TRACE_EVENT(cxl_dram,
 		"transaction_type='%s' channel=%u rank=%u nibble_mask=%x " \
 		"bank_group=%u bank=%u row=%u column=%u cor_mask=%s " \
 		"validity_flags='%s' " \
-		"hpa=%llx region=%s region_uuid=%pUb",
+		"hpa=%llx hpa_alias0=%llx region=%s region_uuid=%pUb",
 		__entry->dpa, show_dpa_flags(__entry->dpa_flags),
 		show_event_desc_flags(__entry->descriptor),
 		show_dram_mem_event_type(__entry->type),
@@ -495,7 +502,8 @@  TRACE_EVENT(cxl_dram,
 		__entry->row, __entry->column,
 		__print_hex(__entry->cor_mask, CXL_EVENT_DER_CORRECTION_MASK_SIZE),
 		show_dram_valid_flags(__entry->validity_flags),
-		__entry->hpa, __get_str(region_name), &__entry->region_uuid
+		__entry->hpa_alias, __entry->hpa, __get_str(region_name),
+		&__entry->region_uuid
 	)
 );