Message ID | 1423394932-2965-2-git-send-email-haggaie@mellanox.com (mailing list archive) |
---|---|
State | Rejected |
Headers | show |
Le dimanche 08 février 2015 à 13:28 +0200, Haggai Eran a écrit : > From: Eli Cohen <eli@mellanox.com> > > Add extensible query device capabilities verb to allow adding new features. > ib_uverbs_ex_query_device is added and copy_query_dev_fields is used to copy > capability fields to be used by both ib_uverbs_query_device and > ib_uverbs_ex_query_device. > > Following the discussion about this patch [1], the code now validates the > command's comp_mask is zero, returning -EINVAL for unknown values, in order to > allow extending the verb in the future. > > The verb also checks the user-space provided response buffer size and only > fills in capabilities that will fit in the buffer. In attempt to follow the > spirit of presentation [2] by Tzahi Oved that was presented during OpenFabrics > Alliance International Developer Workshop 2013, the comp_mask bits will only > describe which fields are valid. Furthermore, fields that can simply be > cleared when they are not supported, do not require a comp_mask bit at all. > The verb returns a response_length field containing the actual number of bytes > written by the kernel, so that a newer version running on an older kernel can > tell which fields were actually returned. > > [1] [PATCH v1 0/5] IB/core: extended query device caps cleanup for v3.19 > http://thread.gmane.org/gmane.linux.kernel.api/7889/ > > [2] https://www.openfabrics.org/images/docs/2013_Dev_Workshop/Tues_0423/2013_Workshop_Tues_0830_Tzahi_Oved-verbs_extensions_ofa_2013-tzahio.pdf > > Cc: Yann Droneaud <ydroneaud@opteya.com> > Cc: Ira Weiny <ira.weiny@intel.com> > Cc: Jason Gunthorpe <jgunthorpe@obsidianresearch.com> > Signed-off-by: Eli Cohen <eli@mellanox.com> > Signed-off-by: Haggai Eran <haggaie@mellanox.com> > --- > drivers/infiniband/core/uverbs.h | 1 + > drivers/infiniband/core/uverbs_cmd.c | 131 +++++++++++++++++++++++----------- > drivers/infiniband/core/uverbs_main.c | 1 + > include/uapi/rdma/ib_user_verbs.h | 12 ++++ > 4 files changed, 104 insertions(+), 41 deletions(-) > > diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h > index 643c08a025a5..b716b0815644 100644 > --- a/drivers/infiniband/core/uverbs.h > +++ b/drivers/infiniband/core/uverbs.h > @@ -258,5 +258,6 @@ IB_UVERBS_DECLARE_CMD(close_xrcd); > > IB_UVERBS_DECLARE_EX_CMD(create_flow); > IB_UVERBS_DECLARE_EX_CMD(destroy_flow); > +IB_UVERBS_DECLARE_EX_CMD(query_device); > > #endif /* UVERBS_H */ > diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c > index b7943ff16ed3..8f507538c42b 100644 > --- a/drivers/infiniband/core/uverbs_cmd.c > +++ b/drivers/infiniband/core/uverbs_cmd.c > @@ -400,6 +400,52 @@ err: > return ret; > } > > +static void copy_query_dev_fields(struct ib_uverbs_file *file, > + struct ib_uverbs_query_device_resp *resp, > + struct ib_device_attr *attr) > +{ > + resp->fw_ver = attr->fw_ver; > + resp->node_guid = file->device->ib_dev->node_guid; > + resp->sys_image_guid = attr->sys_image_guid; > + resp->max_mr_size = attr->max_mr_size; > + resp->page_size_cap = attr->page_size_cap; > + resp->vendor_id = attr->vendor_id; > + resp->vendor_part_id = attr->vendor_part_id; > + resp->hw_ver = attr->hw_ver; > + resp->max_qp = attr->max_qp; > + resp->max_qp_wr = attr->max_qp_wr; > + resp->device_cap_flags = attr->device_cap_flags; > + resp->max_sge = attr->max_sge; > + resp->max_sge_rd = attr->max_sge_rd; > + resp->max_cq = attr->max_cq; > + resp->max_cqe = attr->max_cqe; > + resp->max_mr = attr->max_mr; > + resp->max_pd = attr->max_pd; > + resp->max_qp_rd_atom = attr->max_qp_rd_atom; > + resp->max_ee_rd_atom = attr->max_ee_rd_atom; > + resp->max_res_rd_atom = attr->max_res_rd_atom; > + resp->max_qp_init_rd_atom = attr->max_qp_init_rd_atom; > + resp->max_ee_init_rd_atom = attr->max_ee_init_rd_atom; > + resp->atomic_cap = attr->atomic_cap; > + resp->max_ee = attr->max_ee; > + resp->max_rdd = attr->max_rdd; > + resp->max_mw = attr->max_mw; > + resp->max_raw_ipv6_qp = attr->max_raw_ipv6_qp; > + resp->max_raw_ethy_qp = attr->max_raw_ethy_qp; > + resp->max_mcast_grp = attr->max_mcast_grp; > + resp->max_mcast_qp_attach = attr->max_mcast_qp_attach; > + resp->max_total_mcast_qp_attach = attr->max_total_mcast_qp_attach; > + resp->max_ah = attr->max_ah; > + resp->max_fmr = attr->max_fmr; > + resp->max_map_per_fmr = attr->max_map_per_fmr; > + resp->max_srq = attr->max_srq; > + resp->max_srq_wr = attr->max_srq_wr; > + resp->max_srq_sge = attr->max_srq_sge; > + resp->max_pkeys = attr->max_pkeys; > + resp->local_ca_ack_delay = attr->local_ca_ack_delay; > + resp->phys_port_cnt = file->device->ib_dev->phys_port_cnt; > +} > + > ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file, > const char __user *buf, > int in_len, int out_len) > @@ -420,47 +466,7 @@ ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file, > return ret; > > memset(&resp, 0, sizeof resp); > - > - resp.fw_ver = attr.fw_ver; > - resp.node_guid = file->device->ib_dev->node_guid; > - resp.sys_image_guid = attr.sys_image_guid; > - resp.max_mr_size = attr.max_mr_size; > - resp.page_size_cap = attr.page_size_cap; > - resp.vendor_id = attr.vendor_id; > - resp.vendor_part_id = attr.vendor_part_id; > - resp.hw_ver = attr.hw_ver; > - resp.max_qp = attr.max_qp; > - resp.max_qp_wr = attr.max_qp_wr; > - resp.device_cap_flags = attr.device_cap_flags; > - resp.max_sge = attr.max_sge; > - resp.max_sge_rd = attr.max_sge_rd; > - resp.max_cq = attr.max_cq; > - resp.max_cqe = attr.max_cqe; > - resp.max_mr = attr.max_mr; > - resp.max_pd = attr.max_pd; > - resp.max_qp_rd_atom = attr.max_qp_rd_atom; > - resp.max_ee_rd_atom = attr.max_ee_rd_atom; > - resp.max_res_rd_atom = attr.max_res_rd_atom; > - resp.max_qp_init_rd_atom = attr.max_qp_init_rd_atom; > - resp.max_ee_init_rd_atom = attr.max_ee_init_rd_atom; > - resp.atomic_cap = attr.atomic_cap; > - resp.max_ee = attr.max_ee; > - resp.max_rdd = attr.max_rdd; > - resp.max_mw = attr.max_mw; > - resp.max_raw_ipv6_qp = attr.max_raw_ipv6_qp; > - resp.max_raw_ethy_qp = attr.max_raw_ethy_qp; > - resp.max_mcast_grp = attr.max_mcast_grp; > - resp.max_mcast_qp_attach = attr.max_mcast_qp_attach; > - resp.max_total_mcast_qp_attach = attr.max_total_mcast_qp_attach; > - resp.max_ah = attr.max_ah; > - resp.max_fmr = attr.max_fmr; > - resp.max_map_per_fmr = attr.max_map_per_fmr; > - resp.max_srq = attr.max_srq; > - resp.max_srq_wr = attr.max_srq_wr; > - resp.max_srq_sge = attr.max_srq_sge; > - resp.max_pkeys = attr.max_pkeys; > - resp.local_ca_ack_delay = attr.local_ca_ack_delay; > - resp.phys_port_cnt = file->device->ib_dev->phys_port_cnt; > + copy_query_dev_fields(file, &resp, &attr); > > if (copy_to_user((void __user *) (unsigned long) cmd.response, > &resp, sizeof resp)) > @@ -3287,3 +3293,46 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, > > return ret ? ret : in_len; > } > + > +int ib_uverbs_ex_query_device(struct ib_uverbs_file *file, > + struct ib_udata *ucore, > + struct ib_udata *uhw) > +{ > + struct ib_uverbs_ex_query_device_resp resp; > + struct ib_uverbs_ex_query_device cmd; > + struct ib_device_attr attr; > + struct ib_device *device; > + int err; > + > + device = file->device->ib_dev; Please an empty line after, or may be, move this in device declaration. > + if (ucore->inlen < sizeof(cmd)) > + return -EINVAL; > + > + err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd)); > + if (err) > + return err; > + > + if (cmd.comp_mask) > + return -EINVAL; > + > + if (cmd.reserved) > + return -EINVAL; > + > + resp.response_length = sizeof(resp); > + > + if (ucore->outlen < resp.response_length) > + return -ENOSPC; > + > + err = device->query_device(device, &attr); > + if (err) > + return err; > + > + copy_query_dev_fields(file, &resp.base, &attr); > + resp.comp_mask = 0; > + > + err = ib_copy_to_udata(ucore, &resp, resp.response_length); > + if (err) > + return err; > + > + return 0; > +} > diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c > index 5db1a8cc388d..259dcc7779f5 100644 > --- a/drivers/infiniband/core/uverbs_main.c > +++ b/drivers/infiniband/core/uverbs_main.c > @@ -123,6 +123,7 @@ static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file, > struct ib_udata *uhw) = { > [IB_USER_VERBS_EX_CMD_CREATE_FLOW] = ib_uverbs_ex_create_flow, > [IB_USER_VERBS_EX_CMD_DESTROY_FLOW] = ib_uverbs_ex_destroy_flow, > + [IB_USER_VERBS_EX_CMD_QUERY_DEVICE] = ib_uverbs_ex_query_device, > }; > > static void ib_uverbs_add_one(struct ib_device *device); > diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h > index 867cc5084afb..f0f799afd856 100644 > --- a/include/uapi/rdma/ib_user_verbs.h > +++ b/include/uapi/rdma/ib_user_verbs.h > @@ -90,6 +90,7 @@ enum { > }; > > enum { > + IB_USER_VERBS_EX_CMD_QUERY_DEVICE = IB_USER_VERBS_CMD_QUERY_DEVICE, > IB_USER_VERBS_EX_CMD_CREATE_FLOW = IB_USER_VERBS_CMD_THRESHOLD, > IB_USER_VERBS_EX_CMD_DESTROY_FLOW, > }; > @@ -201,6 +202,17 @@ struct ib_uverbs_query_device_resp { > __u8 reserved[4]; > }; > > +struct ib_uverbs_ex_query_device { > + __u32 comp_mask; > + __u32 reserved; > +}; > + > +struct ib_uverbs_ex_query_device_resp { > + struct ib_uverbs_query_device_resp base; > + __u32 comp_mask; > + __u32 response_length; > +}; > + > struct ib_uverbs_query_port { > __u64 response; > __u8 port_num; Everything else seems OK for me. Thanks a lot the new patch with all the fixes. Reviewed-by: Yann Droneaud <ydroneaud@opteya.com> Regards.
> The verb also checks the user-space provided response buffer size and only > fills in capabilities that will fit in the buffer. In attempt to follow the > spirit of presentation [2] by Tzahi Oved that was presented during OpenFabrics > Alliance International Developer Workshop 2013, the comp_mask bits will only > describe which fields are valid. > + if (ucore->outlen < resp.response_length) > + return -ENOSPC; So is this really what we want? A future kernel that adds new fields will cause previously working userspace to get an error? Or do we expect userspace to retry with a bigger buffer on ENOSPC, without actually returning the required size back to userspace? Is there anything wrong with truncating the response to only include the fields that userspace asked for? Is it just that this is the initial implementation of the query_device_ex verb, so the current size is also the minimum size? If so I think we need at least a comment here, so that we remember to fix the ENOSPC code when adding more fields in the kernel. -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 18/02/2015 08:21, Roland Dreier wrote: >> The verb also checks the user-space provided response buffer size and only >> fills in capabilities that will fit in the buffer. In attempt to follow the >> spirit of presentation [2] by Tzahi Oved that was presented during OpenFabrics >> Alliance International Developer Workshop 2013, the comp_mask bits will only >> describe which fields are valid. > >> + if (ucore->outlen < resp.response_length) >> + return -ENOSPC; > > So is this really what we want? A future kernel that adds new fields > will cause previously working userspace to get an error? No, the code is intended to only check for the legacy fields, comp_mask and response_length. At this point though, those are all the fields in the response struct, so I use sizeof. The next patch adds more fields to the response struct, and changes the value of response_length at this point to be the same as it was in this patch using offsetof: > --- a/drivers/infiniband/core/uverbs_cmd.c > +++ b/drivers/infiniband/core/uverbs_cmd.c > @@ -3318,7 +3318,7 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file, > if (cmd.reserved) > return -EINVAL; > > - resp.response_length = sizeof(resp); > + resp.response_length = offsetof(typeof(resp), odp_caps); > > if (ucore->outlen < resp.response_length) > return -ENOSPC; ... > Is there anything wrong with truncating the response to only include > the fields that userspace asked for? Yann had strong objections to that implementation. The main issue is as I see it is that it can hide user-space bugs where user-space retrieves a partial field in the response struct. > Is it just that this is the > initial implementation of the query_device_ex verb, so the current > size is also the minimum size? Yes. > If so I think we need at least a > comment here, so that we remember to fix the ENOSPC code when adding > more fields in the kernel. Sure, I'll resend the patches with a comment. Regards, Haggai -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 643c08a025a5..b716b0815644 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -258,5 +258,6 @@ IB_UVERBS_DECLARE_CMD(close_xrcd); IB_UVERBS_DECLARE_EX_CMD(create_flow); IB_UVERBS_DECLARE_EX_CMD(destroy_flow); +IB_UVERBS_DECLARE_EX_CMD(query_device); #endif /* UVERBS_H */ diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index b7943ff16ed3..8f507538c42b 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -400,6 +400,52 @@ err: return ret; } +static void copy_query_dev_fields(struct ib_uverbs_file *file, + struct ib_uverbs_query_device_resp *resp, + struct ib_device_attr *attr) +{ + resp->fw_ver = attr->fw_ver; + resp->node_guid = file->device->ib_dev->node_guid; + resp->sys_image_guid = attr->sys_image_guid; + resp->max_mr_size = attr->max_mr_size; + resp->page_size_cap = attr->page_size_cap; + resp->vendor_id = attr->vendor_id; + resp->vendor_part_id = attr->vendor_part_id; + resp->hw_ver = attr->hw_ver; + resp->max_qp = attr->max_qp; + resp->max_qp_wr = attr->max_qp_wr; + resp->device_cap_flags = attr->device_cap_flags; + resp->max_sge = attr->max_sge; + resp->max_sge_rd = attr->max_sge_rd; + resp->max_cq = attr->max_cq; + resp->max_cqe = attr->max_cqe; + resp->max_mr = attr->max_mr; + resp->max_pd = attr->max_pd; + resp->max_qp_rd_atom = attr->max_qp_rd_atom; + resp->max_ee_rd_atom = attr->max_ee_rd_atom; + resp->max_res_rd_atom = attr->max_res_rd_atom; + resp->max_qp_init_rd_atom = attr->max_qp_init_rd_atom; + resp->max_ee_init_rd_atom = attr->max_ee_init_rd_atom; + resp->atomic_cap = attr->atomic_cap; + resp->max_ee = attr->max_ee; + resp->max_rdd = attr->max_rdd; + resp->max_mw = attr->max_mw; + resp->max_raw_ipv6_qp = attr->max_raw_ipv6_qp; + resp->max_raw_ethy_qp = attr->max_raw_ethy_qp; + resp->max_mcast_grp = attr->max_mcast_grp; + resp->max_mcast_qp_attach = attr->max_mcast_qp_attach; + resp->max_total_mcast_qp_attach = attr->max_total_mcast_qp_attach; + resp->max_ah = attr->max_ah; + resp->max_fmr = attr->max_fmr; + resp->max_map_per_fmr = attr->max_map_per_fmr; + resp->max_srq = attr->max_srq; + resp->max_srq_wr = attr->max_srq_wr; + resp->max_srq_sge = attr->max_srq_sge; + resp->max_pkeys = attr->max_pkeys; + resp->local_ca_ack_delay = attr->local_ca_ack_delay; + resp->phys_port_cnt = file->device->ib_dev->phys_port_cnt; +} + ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) @@ -420,47 +466,7 @@ ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file, return ret; memset(&resp, 0, sizeof resp); - - resp.fw_ver = attr.fw_ver; - resp.node_guid = file->device->ib_dev->node_guid; - resp.sys_image_guid = attr.sys_image_guid; - resp.max_mr_size = attr.max_mr_size; - resp.page_size_cap = attr.page_size_cap; - resp.vendor_id = attr.vendor_id; - resp.vendor_part_id = attr.vendor_part_id; - resp.hw_ver = attr.hw_ver; - resp.max_qp = attr.max_qp; - resp.max_qp_wr = attr.max_qp_wr; - resp.device_cap_flags = attr.device_cap_flags; - resp.max_sge = attr.max_sge; - resp.max_sge_rd = attr.max_sge_rd; - resp.max_cq = attr.max_cq; - resp.max_cqe = attr.max_cqe; - resp.max_mr = attr.max_mr; - resp.max_pd = attr.max_pd; - resp.max_qp_rd_atom = attr.max_qp_rd_atom; - resp.max_ee_rd_atom = attr.max_ee_rd_atom; - resp.max_res_rd_atom = attr.max_res_rd_atom; - resp.max_qp_init_rd_atom = attr.max_qp_init_rd_atom; - resp.max_ee_init_rd_atom = attr.max_ee_init_rd_atom; - resp.atomic_cap = attr.atomic_cap; - resp.max_ee = attr.max_ee; - resp.max_rdd = attr.max_rdd; - resp.max_mw = attr.max_mw; - resp.max_raw_ipv6_qp = attr.max_raw_ipv6_qp; - resp.max_raw_ethy_qp = attr.max_raw_ethy_qp; - resp.max_mcast_grp = attr.max_mcast_grp; - resp.max_mcast_qp_attach = attr.max_mcast_qp_attach; - resp.max_total_mcast_qp_attach = attr.max_total_mcast_qp_attach; - resp.max_ah = attr.max_ah; - resp.max_fmr = attr.max_fmr; - resp.max_map_per_fmr = attr.max_map_per_fmr; - resp.max_srq = attr.max_srq; - resp.max_srq_wr = attr.max_srq_wr; - resp.max_srq_sge = attr.max_srq_sge; - resp.max_pkeys = attr.max_pkeys; - resp.local_ca_ack_delay = attr.local_ca_ack_delay; - resp.phys_port_cnt = file->device->ib_dev->phys_port_cnt; + copy_query_dev_fields(file, &resp, &attr); if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) @@ -3287,3 +3293,46 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, return ret ? ret : in_len; } + +int ib_uverbs_ex_query_device(struct ib_uverbs_file *file, + struct ib_udata *ucore, + struct ib_udata *uhw) +{ + struct ib_uverbs_ex_query_device_resp resp; + struct ib_uverbs_ex_query_device cmd; + struct ib_device_attr attr; + struct ib_device *device; + int err; + + device = file->device->ib_dev; + if (ucore->inlen < sizeof(cmd)) + return -EINVAL; + + err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd)); + if (err) + return err; + + if (cmd.comp_mask) + return -EINVAL; + + if (cmd.reserved) + return -EINVAL; + + resp.response_length = sizeof(resp); + + if (ucore->outlen < resp.response_length) + return -ENOSPC; + + err = device->query_device(device, &attr); + if (err) + return err; + + copy_query_dev_fields(file, &resp.base, &attr); + resp.comp_mask = 0; + + err = ib_copy_to_udata(ucore, &resp, resp.response_length); + if (err) + return err; + + return 0; +} diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 5db1a8cc388d..259dcc7779f5 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -123,6 +123,7 @@ static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file, struct ib_udata *uhw) = { [IB_USER_VERBS_EX_CMD_CREATE_FLOW] = ib_uverbs_ex_create_flow, [IB_USER_VERBS_EX_CMD_DESTROY_FLOW] = ib_uverbs_ex_destroy_flow, + [IB_USER_VERBS_EX_CMD_QUERY_DEVICE] = ib_uverbs_ex_query_device, }; static void ib_uverbs_add_one(struct ib_device *device); diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index 867cc5084afb..f0f799afd856 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -90,6 +90,7 @@ enum { }; enum { + IB_USER_VERBS_EX_CMD_QUERY_DEVICE = IB_USER_VERBS_CMD_QUERY_DEVICE, IB_USER_VERBS_EX_CMD_CREATE_FLOW = IB_USER_VERBS_CMD_THRESHOLD, IB_USER_VERBS_EX_CMD_DESTROY_FLOW, }; @@ -201,6 +202,17 @@ struct ib_uverbs_query_device_resp { __u8 reserved[4]; }; +struct ib_uverbs_ex_query_device { + __u32 comp_mask; + __u32 reserved; +}; + +struct ib_uverbs_ex_query_device_resp { + struct ib_uverbs_query_device_resp base; + __u32 comp_mask; + __u32 response_length; +}; + struct ib_uverbs_query_port { __u64 response; __u8 port_num;