Message ID | 20231005053932.149497-2-suma.hegde@amd.com (mailing list archive) |
---|---|
State | Superseded, archived |
Headers | show |
Series | [v4,1/3] platform/x86/amd/hsmp: create plat specific struct | expand |
On Thu, 5 Oct 2023, Suma Hegde wrote: > AMD MI300 MCM provides GET_METRICS_TABLE message to retrieve > all the system management information from SMU. > > The metrics table is made available as hexadecimal sysfs binary file > under per socket sysfs directory created at > /sys/devices/platform/amd_hsmp/socket%d/metrics_bin > > Metrics table definitions will be documented as part of Public PPR. > The same is defined in the amd_hsmp.h header. > > Signed-off-by: Suma Hegde <suma.hegde@amd.com> > Reviewed-by: Naveen Krishna Chatradhi <nchatrad@amd.com> > +static int hsmp_create_sysfs_interface(void) > +{ > + const struct attribute_group **hsmp_attr_grps; > + struct bin_attribute **hsmp_bin_attrs; > + struct attribute_group *attr_grp; > + int ret; > + u8 i; > + > + hsmp_attr_grps = devm_kzalloc(plat_dev.dev, sizeof(struct attribute_group *) * > + (plat_dev.num_sockets + 1), GFP_KERNEL); > + if (!hsmp_attr_grps) > + return -ENOMEM; > + > + /* Create a sysfs directory for each socket */ > + for (i = 0; i < plat_dev.num_sockets; i++) { The change for i to u8 seems not very wise given num_sockets still is u16 as it can turn this into an infinite loop.
On 10/6/2023 6:51 PM, Ilpo Järvinen wrote: > Caution: This message originated from an External Source. Use proper caution when opening attachments, clicking links, or responding. > > > On Thu, 5 Oct 2023, Suma Hegde wrote: > >> AMD MI300 MCM provides GET_METRICS_TABLE message to retrieve >> all the system management information from SMU. >> >> The metrics table is made available as hexadecimal sysfs binary file >> under per socket sysfs directory created at >> /sys/devices/platform/amd_hsmp/socket%d/metrics_bin >> >> Metrics table definitions will be documented as part of Public PPR. >> The same is defined in the amd_hsmp.h header. >> >> Signed-off-by: Suma Hegde <suma.hegde@amd.com> >> Reviewed-by: Naveen Krishna Chatradhi <nchatrad@amd.com> > >> +static int hsmp_create_sysfs_interface(void) >> +{ >> + const struct attribute_group **hsmp_attr_grps; >> + struct bin_attribute **hsmp_bin_attrs; >> + struct attribute_group *attr_grp; >> + int ret; >> + u8 i; >> + >> + hsmp_attr_grps = devm_kzalloc(plat_dev.dev, sizeof(struct attribute_group *) * >> + (plat_dev.num_sockets + 1), GFP_KERNEL); >> + if (!hsmp_attr_grps) >> + return -ENOMEM; >> + >> + /* Create a sysfs directory for each socket */ >> + for (i = 0; i < plat_dev.num_sockets; i++) { > The change for i to u8 seems not very wise given num_sockets still is u16 > as it can turn this into an infinite loop. Hi Ilpo, amd_nb_num() API which we use to identify the number of sockets on a node returns u16. So, we used u16 for plat_dev.num_sockets. u8 should be enough, as present AMD processors support upto 8 sockets on a node. Coming to the warning raised: We have defined, HSMP_ATTR_GRP_NAME_SIZE as 10bytes, 6 chars for 'socket', 3 chars for 3digits (socket index) and a null terminator. Socket index may not need more than 3 digits in the foreseeable future. How about, we declare i as u16 and typecast it as (u8) in the snprintf. int ret; - u8 i; + u16 i; hsmp_attr_grps = devm_kzalloc(plat_dev.dev, sizeof(struct attribute_group *) * (plat_dev.num_sockets + 1), GFP_KERNEL); @@ -449,7 +449,7 @@ static int hsmp_create_sysfs_interface(void) if (!attr_grp) return -ENOMEM; - snprintf(plat_dev.sock[i].name, HSMP_ATTR_GRP_NAME_SIZE, "socket%u", i); + snprintf(plat_dev.sock[i].name, HSMP_ATTR_GRP_NAME_SIZE, + "socket%u", (u8)i); attr_grp->name = plat_dev.sock[i].name; Thanks and Regards, Suma > > -- > i. >
On Mon, 9 Oct 2023, Hegde, Suma wrote: > On 10/6/2023 6:51 PM, Ilpo Järvinen wrote: > > On Thu, 5 Oct 2023, Suma Hegde wrote: > > > > > AMD MI300 MCM provides GET_METRICS_TABLE message to retrieve > > > all the system management information from SMU. > > > > > > The metrics table is made available as hexadecimal sysfs binary file > > > under per socket sysfs directory created at > > > /sys/devices/platform/amd_hsmp/socket%d/metrics_bin > > > > > > Metrics table definitions will be documented as part of Public PPR. > > > The same is defined in the amd_hsmp.h header. > > > > > > Signed-off-by: Suma Hegde <suma.hegde@amd.com> > > > Reviewed-by: Naveen Krishna Chatradhi <nchatrad@amd.com> > > > > > +static int hsmp_create_sysfs_interface(void) > > > +{ > > > + const struct attribute_group **hsmp_attr_grps; > > > + struct bin_attribute **hsmp_bin_attrs; > > > + struct attribute_group *attr_grp; > > > + int ret; > > > + u8 i; > > > + > > > + hsmp_attr_grps = devm_kzalloc(plat_dev.dev, sizeof(struct > > > attribute_group *) * > > > + (plat_dev.num_sockets + 1), > > > GFP_KERNEL); > > > + if (!hsmp_attr_grps) > > > + return -ENOMEM; > > > + > > > + /* Create a sysfs directory for each socket */ > > > + for (i = 0; i < plat_dev.num_sockets; i++) { > > The change for i to u8 seems not very wise given num_sockets still is u16 > > as it can turn this into an infinite loop. > > Hi Ilpo, > > amd_nb_num() API which we use to identify the number of sockets on a node > returns u16. So, we used u16 for plat_dev.num_sockets. > u8 should be enough, as present AMD processors support upto 8 sockets on a > node. I wasn't expecting it to fail at the moment, I just don't want to leave a silent trap for the future. > Coming to the warning raised: > We have defined, HSMP_ATTR_GRP_NAME_SIZE as 10bytes, 6 chars for 'socket', 3 > chars for 3digits (socket index) and a null terminator. > Socket index may not need more than 3 digits in the foreseeable future. How > about, we declare i as u16 and typecast it as (u8) in the snprintf. > > int ret; > - u8 i; > + u16 i; > > hsmp_attr_grps = devm_kzalloc(plat_dev.dev, sizeof(struct > attribute_group *) * > (plat_dev.num_sockets + 1), GFP_KERNEL); @@ > -449,7 +449,7 @@ static int hsmp_create_sysfs_interface(void) > if (!attr_grp) > return -ENOMEM; > > - snprintf(plat_dev.sock[i].name, HSMP_ATTR_GRP_NAME_SIZE, "socket%u", > i); > + snprintf(plat_dev.sock[i].name, HSMP_ATTR_GRP_NAME_SIZE, > + "socket%u", (u8)i); > attr_grp->name = plat_dev.sock[i].name; This is similarly trappy as it truncates i if num_sockets is > 255. I suggest you just put this into start of the function: /* String formatting is currently limited to u8 sockets */ if (WARN_ON(plat_dev.num_sockets > U8_MAX)) return -ERANGE; to catch the too many sockets case if it is ever going to occur. And explain in the changelog that u8 is enough for foreseeable future and the string formatting triggers a warning if unnecessarily large type is passed to snprintf().
On 10/9/2023 4:53 PM, Ilpo Järvinen wrote: > Caution: This message originated from an External Source. Use proper caution when opening attachments, clicking links, or responding. > > > On Mon, 9 Oct 2023, Hegde, Suma wrote: >> On 10/6/2023 6:51 PM, Ilpo Järvinen wrote: >>> On Thu, 5 Oct 2023, Suma Hegde wrote: >>> >>>> AMD MI300 MCM provides GET_METRICS_TABLE message to retrieve >>>> all the system management information from SMU. >>>> >>>> The metrics table is made available as hexadecimal sysfs binary file >>>> under per socket sysfs directory created at >>>> /sys/devices/platform/amd_hsmp/socket%d/metrics_bin >>>> >>>> Metrics table definitions will be documented as part of Public PPR. >>>> The same is defined in the amd_hsmp.h header. >>>> >>>> Signed-off-by: Suma Hegde <suma.hegde@amd.com> >>>> Reviewed-by: Naveen Krishna Chatradhi <nchatrad@amd.com> >>>> +static int hsmp_create_sysfs_interface(void) >>>> +{ >>>> + const struct attribute_group **hsmp_attr_grps; >>>> + struct bin_attribute **hsmp_bin_attrs; >>>> + struct attribute_group *attr_grp; >>>> + int ret; >>>> + u8 i; >>>> + >>>> + hsmp_attr_grps = devm_kzalloc(plat_dev.dev, sizeof(struct >>>> attribute_group *) * >>>> + (plat_dev.num_sockets + 1), >>>> GFP_KERNEL); >>>> + if (!hsmp_attr_grps) >>>> + return -ENOMEM; >>>> + >>>> + /* Create a sysfs directory for each socket */ >>>> + for (i = 0; i < plat_dev.num_sockets; i++) { >>> The change for i to u8 seems not very wise given num_sockets still is u16 >>> as it can turn this into an infinite loop. >> Hi Ilpo, >> >> amd_nb_num() API which we use to identify the number of sockets on a node >> returns u16. So, we used u16 for plat_dev.num_sockets. >> u8 should be enough, as present AMD processors support upto 8 sockets on a >> node. > I wasn't expecting it to fail at the moment, I just don't want to leave a > silent trap for the future. > >> Coming to the warning raised: >> We have defined, HSMP_ATTR_GRP_NAME_SIZE as 10bytes, 6 chars for 'socket', 3 >> chars for 3digits (socket index) and a null terminator. >> Socket index may not need more than 3 digits in the foreseeable future. How >> about, we declare i as u16 and typecast it as (u8) in the snprintf. >> >> int ret; >> - u8 i; >> + u16 i; >> >> hsmp_attr_grps = devm_kzalloc(plat_dev.dev, sizeof(struct >> attribute_group *) * >> (plat_dev.num_sockets + 1), GFP_KERNEL); @@ >> -449,7 +449,7 @@ static int hsmp_create_sysfs_interface(void) >> if (!attr_grp) >> return -ENOMEM; >> >> - snprintf(plat_dev.sock[i].name, HSMP_ATTR_GRP_NAME_SIZE, "socket%u", >> i); >> + snprintf(plat_dev.sock[i].name, HSMP_ATTR_GRP_NAME_SIZE, >> + "socket%u", (u8)i); >> attr_grp->name = plat_dev.sock[i].name; > This is similarly trappy as it truncates i if num_sockets is > 255. > > I suggest you just put this into start of the function: > > /* String formatting is currently limited to u8 sockets */ > if (WARN_ON(plat_dev.num_sockets > U8_MAX)) > return -ERANGE; > > to catch the too many sockets case if it is ever going to occur. > And explain in the changelog that u8 is enough for foreseeable future > and the string formatting triggers a warning if unnecessarily large type > is passed to snprintf(). Ok Ilpo, I will add this WARN_ON and send v5 patch set. > > -- > i.
diff --git a/Documentation/arch/x86/amd_hsmp.rst b/Documentation/arch/x86/amd_hsmp.rst index 440e4b645a1c..72083124b9ca 100644 --- a/Documentation/arch/x86/amd_hsmp.rst +++ b/Documentation/arch/x86/amd_hsmp.rst @@ -41,6 +41,24 @@ In-kernel integration: * Locking across callers is taken care by the driver. +HSMP sysfs interface +==================== + +1. Metrics table binary sysfs + +AMD MI300A MCM provides GET_METRICS_TABLE message to retrieve +most of the system management information from SMU in one go. + +The metrics table is made available as hexadecimal sysfs binary file +under per socket sysfs directory created at +/sys/devices/platform/amd_hsmp/socket%d/metrics_bin + +Note: lseek is not supported as entire metrics table is read + +Metrics table definitions will be documented as part of Public PPR. +The same is defined in the amd_hsmp.h header. + + An example ========== diff --git a/arch/x86/include/uapi/asm/amd_hsmp.h b/arch/x86/include/uapi/asm/amd_hsmp.h index 769b939444ae..fce22686c834 100644 --- a/arch/x86/include/uapi/asm/amd_hsmp.h +++ b/arch/x86/include/uapi/asm/amd_hsmp.h @@ -47,6 +47,9 @@ enum hsmp_message_ids { HSMP_SET_PCI_RATE, /* 20h Control link rate on PCIe devices */ HSMP_SET_POWER_MODE, /* 21h Select power efficiency profile policy */ HSMP_SET_PSTATE_MAX_MIN, /* 22h Set the max and min DF P-State */ + HSMP_GET_METRIC_TABLE_VER, /* 23h Get metrics table version */ + HSMP_GET_METRIC_TABLE, /* 24h Get metrics table */ + HSMP_GET_METRIC_TABLE_DRAM_ADDR,/* 25h Get metrics table dram address */ HSMP_MSG_ID_MAX, }; @@ -64,6 +67,14 @@ enum hsmp_msg_type { HSMP_GET = 1, }; +enum hsmp_proto_versions { + HSMP_PROTO_VER2 = 2, + HSMP_PROTO_VER3, + HSMP_PROTO_VER4, + HSMP_PROTO_VER5, + HSMP_PROTO_VER6 +}; + struct hsmp_msg_desc { int num_args; int response_sz; @@ -295,6 +306,104 @@ static const struct hsmp_msg_desc hsmp_msg_desc_table[] = { * input: args[0] = min df pstate[15:8] + max df pstate[7:0] */ {1, 0, HSMP_SET}, + + /* + * HSMP_GET_METRIC_TABLE_VER, num_args = 0, response_sz = 1 + * output: args[0] = metrics table version + */ + {0, 1, HSMP_GET}, + + /* + * HSMP_GET_METRIC_TABLE, num_args = 0, response_sz = 0 + */ + {0, 0, HSMP_GET}, + + /* + * HSMP_GET_METRIC_TABLE_DRAM_ADDR, num_args = 0, response_sz = 2 + * output: args[0] = lower 32 bits of the address + * output: args[1] = upper 32 bits of the address + */ + {0, 2, HSMP_GET}, +}; + +/* Metrics table (supported only with proto version 6) */ +struct hsmp_metric_table { + __u32 accumulation_counter; + + /* TEMPERATURE */ + __u32 max_socket_temperature; + __u32 max_vr_temperature; + __u32 max_hbm_temperature; + __u64 max_socket_temperature_acc; + __u64 max_vr_temperature_acc; + __u64 max_hbm_temperature_acc; + + /* POWER */ + __u32 socket_power_limit; + __u32 max_socket_power_limit; + __u32 socket_power; + + /* ENERGY */ + __u64 timestamp; + __u64 socket_energy_acc; + __u64 ccd_energy_acc; + __u64 xcd_energy_acc; + __u64 aid_energy_acc; + __u64 hbm_energy_acc; + + /* FREQUENCY */ + __u32 cclk_frequency_limit; + __u32 gfxclk_frequency_limit; + __u32 fclk_frequency; + __u32 uclk_frequency; + __u32 socclk_frequency[4]; + __u32 vclk_frequency[4]; + __u32 dclk_frequency[4]; + __u32 lclk_frequency[4]; + __u64 gfxclk_frequency_acc[8]; + __u64 cclk_frequency_acc[96]; + + /* FREQUENCY RANGE */ + __u32 max_cclk_frequency; + __u32 min_cclk_frequency; + __u32 max_gfxclk_frequency; + __u32 min_gfxclk_frequency; + __u32 fclk_frequency_table[4]; + __u32 uclk_frequency_table[4]; + __u32 socclk_frequency_table[4]; + __u32 vclk_frequency_table[4]; + __u32 dclk_frequency_table[4]; + __u32 lclk_frequency_table[4]; + __u32 max_lclk_dpm_range; + __u32 min_lclk_dpm_range; + + /* XGMI */ + __u32 xgmi_width; + __u32 xgmi_bitrate; + __u64 xgmi_read_bandwidth_acc[8]; + __u64 xgmi_write_bandwidth_acc[8]; + + /* ACTIVITY */ + __u32 socket_c0_residency; + __u32 socket_gfx_busy; + __u32 dram_bandwidth_utilization; + __u64 socket_c0_residency_acc; + __u64 socket_gfx_busy_acc; + __u64 dram_bandwidth_acc; + __u32 max_dram_bandwidth; + __u64 dram_bandwidth_utilization_acc; + __u64 pcie_bandwidth_acc[4]; + + /* THROTTLERS */ + __u32 prochot_residency_acc; + __u32 ppt_residency_acc; + __u32 socket_thm_residency_acc; + __u32 vr_thm_residency_acc; + __u32 hbm_thm_residency_acc; + __u32 spare; + + /* New items at the end to maintain driver compatibility */ + __u32 gfxclk_frequency[8]; }; /* Reset to default packing */ diff --git a/drivers/platform/x86/amd/hsmp.c b/drivers/platform/x86/amd/hsmp.c index 99727cd705cf..5d8efff201d3 100644 --- a/drivers/platform/x86/amd/hsmp.c +++ b/drivers/platform/x86/amd/hsmp.c @@ -20,7 +20,7 @@ #include <linux/semaphore.h> #define DRIVER_NAME "amd_hsmp" -#define DRIVER_VERSION "1.0" +#define DRIVER_VERSION "2.0" /* HSMP Status / Error codes */ #define HSMP_STATUS_NOT_READY 0x00 @@ -49,9 +49,15 @@ #define HSMP_CDEV_NAME "hsmp_cdev" #define HSMP_DEVNODE_NAME "hsmp" +#define HSMP_METRICS_TABLE_NAME "metrics_bin" + +#define HSMP_ATTR_GRP_NAME_SIZE 10 struct hsmp_socket { + struct bin_attribute hsmp_attr; + void __iomem *metric_tbl_addr; struct semaphore hsmp_sem; + char name[HSMP_ATTR_GRP_NAME_SIZE]; u16 sock_ind; }; @@ -59,6 +65,7 @@ struct hsmp_plat_device { struct miscdevice hsmp_device; struct hsmp_socket *sock; struct device *dev; + u32 proto_ver; u16 num_sockets; }; @@ -330,9 +337,158 @@ static const struct file_operations hsmp_fops = { .compat_ioctl = hsmp_ioctl, }; +static ssize_t hsmp_metric_tbl_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, char *buf, + loff_t off, size_t count) +{ + struct hsmp_socket *sock = bin_attr->private; + struct hsmp_message msg = { 0 }; + int ret; + + /* Do not support lseek, reads entire metric table */ + if (count < bin_attr->size) { + dev_err(plat_dev.dev, "Wrong buffer size\n"); + return -EINVAL; + } + + if (!sock) { + dev_err(plat_dev.dev, "Failed to read attribute private data\n"); + return -EINVAL; + } + + msg.msg_id = HSMP_GET_METRIC_TABLE; + msg.sock_ind = sock->sock_ind; + + ret = hsmp_send_message(&msg); + if (ret) + return ret; + memcpy(buf, sock->metric_tbl_addr, bin_attr->size); + + return bin_attr->size; +} + +static int hsmp_get_tbl_dram_base(u16 sock_ind) +{ + struct hsmp_socket *sock = &plat_dev.sock[sock_ind]; + struct hsmp_message msg = { 0 }; + phys_addr_t dram_addr; + int ret; + + msg.sock_ind = sock_ind; + msg.response_sz = hsmp_msg_desc_table[HSMP_GET_METRIC_TABLE_DRAM_ADDR].response_sz; + msg.msg_id = HSMP_GET_METRIC_TABLE_DRAM_ADDR; + + ret = hsmp_send_message(&msg); + if (ret) + return ret; + + /* + * calculate the metric table DRAM address from lower and upper 32 bits + * sent from SMU and ioremap it to virtual address. + */ + dram_addr = msg.args[0] | ((u64)(msg.args[1]) << 32); + if (!dram_addr) { + dev_err(plat_dev.dev, "Invalid dram address for metric table\n"); + return -ENOMEM; + } + sock->metric_tbl_addr = devm_ioremap(plat_dev.dev, dram_addr, + sizeof(struct hsmp_metric_table)); + if (!sock->metric_tbl_addr) { + dev_err(plat_dev.dev, "Failed to ioremap metric table addr\n"); + return -ENOMEM; + } + return 0; +} + +static umode_t hsmp_is_sock_attr_visible(struct kobject *kobj, + struct bin_attribute *battr, int id) +{ + if (plat_dev.proto_ver == HSMP_PROTO_VER6) + return battr->attr.mode; + else + return 0; +} + +static int hsmp_init_metric_tbl_bin_attr(struct bin_attribute **hattrs, u16 sock_ind) +{ + struct bin_attribute *hattr = &plat_dev.sock[sock_ind].hsmp_attr; + + sysfs_bin_attr_init(hattr); + hattr->attr.name = HSMP_METRICS_TABLE_NAME; + hattr->attr.mode = 0444; + hattr->read = hsmp_metric_tbl_read; + hattr->size = sizeof(struct hsmp_metric_table); + hattr->private = &plat_dev.sock[sock_ind]; + hattrs[0] = hattr; + + if (plat_dev.proto_ver == HSMP_PROTO_VER6) + return (hsmp_get_tbl_dram_base(sock_ind)); + else + return 0; +} + +/* One bin sysfs for metrics table*/ +#define NUM_HSMP_ATTRS 1 + +static int hsmp_create_sysfs_interface(void) +{ + const struct attribute_group **hsmp_attr_grps; + struct bin_attribute **hsmp_bin_attrs; + struct attribute_group *attr_grp; + int ret; + u8 i; + + hsmp_attr_grps = devm_kzalloc(plat_dev.dev, sizeof(struct attribute_group *) * + (plat_dev.num_sockets + 1), GFP_KERNEL); + if (!hsmp_attr_grps) + return -ENOMEM; + + /* Create a sysfs directory for each socket */ + for (i = 0; i < plat_dev.num_sockets; i++) { + attr_grp = devm_kzalloc(plat_dev.dev, sizeof(struct attribute_group), GFP_KERNEL); + if (!attr_grp) + return -ENOMEM; + + snprintf(plat_dev.sock[i].name, HSMP_ATTR_GRP_NAME_SIZE, "socket%u", i); + attr_grp->name = plat_dev.sock[i].name; + + /* Null terminated list of attributes */ + hsmp_bin_attrs = devm_kzalloc(plat_dev.dev, sizeof(struct bin_attribute *) * + (NUM_HSMP_ATTRS + 1), GFP_KERNEL); + if (!hsmp_bin_attrs) + return -ENOMEM; + + attr_grp->bin_attrs = hsmp_bin_attrs; + attr_grp->is_bin_visible = hsmp_is_sock_attr_visible; + hsmp_attr_grps[i] = attr_grp; + + /* Now create the leaf nodes */ + ret = hsmp_init_metric_tbl_bin_attr(hsmp_bin_attrs, i); + if (ret) + return ret; + } + return devm_device_add_groups(plat_dev.dev, hsmp_attr_grps); +} + +static int hsmp_cache_proto_ver(void) +{ + struct hsmp_message msg = { 0 }; + int ret; + + msg.msg_id = HSMP_GET_PROTO_VER; + msg.sock_ind = 0; + msg.response_sz = hsmp_msg_desc_table[HSMP_GET_PROTO_VER].response_sz; + + ret = hsmp_send_message(&msg); + if (!ret) + plat_dev.proto_ver = msg.args[0]; + + return ret; +} + static int hsmp_pltdrv_probe(struct platform_device *pdev) { - int i; + int ret, i; plat_dev.sock = devm_kzalloc(&pdev->dev, (plat_dev.num_sockets * sizeof(struct hsmp_socket)), @@ -353,6 +509,16 @@ static int hsmp_pltdrv_probe(struct platform_device *pdev) plat_dev.hsmp_device.nodename = HSMP_DEVNODE_NAME; plat_dev.hsmp_device.mode = 0644; + ret = hsmp_cache_proto_ver(); + if (ret) { + dev_err(plat_dev.dev, "Failed to read HSMP protocol version\n"); + return ret; + } + + ret = hsmp_create_sysfs_interface(); + if (ret) + dev_err(plat_dev.dev, "Failed to create HSMP sysfs interface\n"); + return misc_register(&plat_dev.hsmp_device); }