Message ID | 20230801111923.118268-3-przemyslaw.kitszel@intel.com (mailing list archive) |
---|---|
State | RFC |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | introduce DECLARE_FLEX() macro | expand |
From: Przemek Kitszel <przemyslaw.kitszel@intel.com> Date: Tue, 1 Aug 2023 13:19:23 +0200 > Use DECLARE_FLEX() macro for 1-elem flex array members of ice_switch.c > > Signed-off-by: Przemek Kitszel <przemyslaw.kitszel@intel.com> > --- > drivers/net/ethernet/intel/ice/ice_switch.c | 53 +++++---------------- > 1 file changed, 12 insertions(+), 41 deletions(-) > > diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c > index a7afb612fe32..41679cb6b548 100644 > --- a/drivers/net/ethernet/intel/ice/ice_switch.c > +++ b/drivers/net/ethernet/intel/ice/ice_switch.c > @@ -1812,15 +1812,11 @@ ice_aq_alloc_free_vsi_list(struct ice_hw *hw, u16 *vsi_list_id, > enum ice_sw_lkup_type lkup_type, > enum ice_adminq_opc opc) > { > - struct ice_aqc_alloc_free_res_elem *sw_buf; > + DECLARE_FLEX(struct ice_aqc_alloc_free_res_elem *, sw_buf, elem, 1); > + u16 buf_len = struct_size(sw_buf, elem, 1); > struct ice_aqc_res_elem *vsi_ele; > - u16 buf_len; > int status; > > - buf_len = struct_size(sw_buf, elem, 1); > - sw_buf = devm_kzalloc(ice_hw_to_dev(hw), buf_len, GFP_KERNEL); > - if (!sw_buf) > - return -ENOMEM; > sw_buf->num_elems = cpu_to_le16(1); > > if (lkup_type == ICE_SW_LKUP_MAC || > @@ -1840,8 +1836,7 @@ ice_aq_alloc_free_vsi_list(struct ice_hw *hw, u16 *vsi_list_id, > sw_buf->res_type = > cpu_to_le16(ICE_AQC_RES_TYPE_VSI_LIST_PRUNE); > } else { > - status = -EINVAL; > - goto ice_aq_alloc_free_vsi_list_exit; > + return -EINVAL; > } > > if (opc == ice_aqc_opc_free_res) bloat-o-meter results would be nice to have in the commitmsg. [...] Thanks, Olek
On 8/1/23 15:48, Alexander Lobakin wrote: > From: Przemek Kitszel <przemyslaw.kitszel@intel.com> > Date: Tue, 1 Aug 2023 13:19:23 +0200 > >> Use DECLARE_FLEX() macro for 1-elem flex array members of ice_switch.c >> >> Signed-off-by: Przemek Kitszel <przemyslaw.kitszel@intel.com> >> --- >> drivers/net/ethernet/intel/ice/ice_switch.c | 53 +++++---------------- >> 1 file changed, 12 insertions(+), 41 deletions(-) >> >> diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c >> index a7afb612fe32..41679cb6b548 100644 >> --- a/drivers/net/ethernet/intel/ice/ice_switch.c >> +++ b/drivers/net/ethernet/intel/ice/ice_switch.c >> @@ -1812,15 +1812,11 @@ ice_aq_alloc_free_vsi_list(struct ice_hw *hw, u16 *vsi_list_id, >> enum ice_sw_lkup_type lkup_type, >> enum ice_adminq_opc opc) >> { >> - struct ice_aqc_alloc_free_res_elem *sw_buf; >> + DECLARE_FLEX(struct ice_aqc_alloc_free_res_elem *, sw_buf, elem, 1); >> + u16 buf_len = struct_size(sw_buf, elem, 1); >> struct ice_aqc_res_elem *vsi_ele; >> - u16 buf_len; >> int status; >> >> - buf_len = struct_size(sw_buf, elem, 1); >> - sw_buf = devm_kzalloc(ice_hw_to_dev(hw), buf_len, GFP_KERNEL); >> - if (!sw_buf) >> - return -ENOMEM; >> sw_buf->num_elems = cpu_to_le16(1); >> >> if (lkup_type == ICE_SW_LKUP_MAC || >> @@ -1840,8 +1836,7 @@ ice_aq_alloc_free_vsi_list(struct ice_hw *hw, u16 *vsi_list_id, >> sw_buf->res_type = >> cpu_to_le16(ICE_AQC_RES_TYPE_VSI_LIST_PRUNE); >> } else { >> - status = -EINVAL; >> - goto ice_aq_alloc_free_vsi_list_exit; >> + return -EINVAL; >> } >> >> if (opc == ice_aqc_opc_free_res) > > bloat-o-meter results would be nice to have in the commitmsg. I will do next time, perhaps you could tell me if I get the results right here: ./scripts/bloat-o-meter ice_switch.o{ld,} add/remove: 2/2 grow/shrink: 3/5 up/down: 560/-483 (77) Function old new delta ice_create_vsi_list_rule - 241 +241 ice_remove_vsi_list_rule 139 270 +131 ice_add_adv_rule 6047 6139 +92 ice_add_sw_recipe 2892 2972 +80 __pfx_ice_create_vsi_list_rule - 16 +16 ice_alloc_recipe 124 113 -11 __pfx_ice_aq_alloc_free_vsi_list 16 - -16 ice_free_res_cntr 185 155 -30 ice_alloc_res_cntr 154 124 -30 ice_add_update_vsi_list 1037 994 -43 ice_add_vlan_internal 1027 953 -74 ice_aq_alloc_free_vsi_list 279 - -279 Total: Before=42183, After=42260, chg +0.18% My guess here is that compiler did different decisions about what to inline where, what is biggest difference. And biggest gain here is avoidance of heap allocs, perhaps that enables gcc to shuffle things a bit too. Another guess is that b-o-m ignores heap bloat, so slight growth is expected. Values reported for ice.ko are the same, with bigger base to compute the percent off. > [...] > > Thanks, > Olek Thank you too, also for our initial talk about on the topic.
From: Przemek Kitszel <przemyslaw.kitszel@intel.com> Date: Tue, 1 Aug 2023 16:36:57 +0200 > On 8/1/23 15:48, Alexander Lobakin wrote: >> From: Przemek Kitszel <przemyslaw.kitszel@intel.com> >> Date: Tue, 1 Aug 2023 13:19:23 +0200 [...] >>> - status = -EINVAL; >>> - goto ice_aq_alloc_free_vsi_list_exit; >>> + return -EINVAL; >>> } >>> if (opc == ice_aqc_opc_free_res) >> >> bloat-o-meter results would be nice to have in the commitmsg. > > I will do next time, perhaps you could tell me if I get the results > right here: > > ./scripts/bloat-o-meter ice_switch.o{ld,} > add/remove: 2/2 grow/shrink: 3/5 up/down: 560/-483 (77) > Function old new delta > ice_create_vsi_list_rule - 241 +241 > ice_remove_vsi_list_rule 139 270 +131 > ice_add_adv_rule 6047 6139 +92 > ice_add_sw_recipe 2892 2972 +80 > __pfx_ice_create_vsi_list_rule - 16 +16 > ice_alloc_recipe 124 113 -11 > __pfx_ice_aq_alloc_free_vsi_list 16 - -16 > ice_free_res_cntr 185 155 -30 > ice_alloc_res_cntr 154 124 -30 > ice_add_update_vsi_list 1037 994 -43 > ice_add_vlan_internal 1027 953 -74 > ice_aq_alloc_free_vsi_list 279 - -279 > Total: Before=42183, After=42260, chg +0.18% > > My guess here is that compiler did different decisions about what to > inline where, what is biggest difference. 77 bytes is very good result, because see below. > And biggest gain here is avoidance of heap allocs, perhaps that enables > gcc to shuffle things a bit too. Exactly, having the stack grown only by 77 bytes after avoiding -- how many? -- a lot of heap allocations sounds great. > Another guess is that b-o-m ignores heap bloat, so slight growth is > expected. BOM can't calculate any heap usage, it simply compares symbol sizes in object files. (BTW, passing /dev/null as the first "object file" is legit, in case you just want to see sorted symbol sizes in your module or vmlinux) > > Values reported for ice.ko are the same, with bigger base to compute the > percent off. > >> [...] >> >> Thanks, >> Olek > > Thank you too, also for our initial talk about on the topic. No problem, I really feel like this macro would be very useful. Thanks, Olek
On Tue, Aug 01, 2023 at 01:19:23PM +0200, Przemek Kitszel wrote: > Use DECLARE_FLEX() macro for 1-elem flex array members of ice_switch.c > > Signed-off-by: Przemek Kitszel <przemyslaw.kitszel@intel.com> > --- > drivers/net/ethernet/intel/ice/ice_switch.c | 53 +++++---------------- > 1 file changed, 12 insertions(+), 41 deletions(-) > > diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c > index a7afb612fe32..41679cb6b548 100644 > --- a/drivers/net/ethernet/intel/ice/ice_switch.c > +++ b/drivers/net/ethernet/intel/ice/ice_switch.c > @@ -1812,15 +1812,11 @@ ice_aq_alloc_free_vsi_list(struct ice_hw *hw, u16 *vsi_list_id, > enum ice_sw_lkup_type lkup_type, > enum ice_adminq_opc opc) > { > - struct ice_aqc_alloc_free_res_elem *sw_buf; > + DECLARE_FLEX(struct ice_aqc_alloc_free_res_elem *, sw_buf, elem, 1); > + u16 buf_len = struct_size(sw_buf, elem, 1); With the macro I suggested, I think this line can become: u16 buf_len = __builtin_object_size(sw_buf, 1); but either is fine. (N.B. the "1" here is a bitfield, not the "1" size above). -Kees
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index a7afb612fe32..41679cb6b548 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -1812,15 +1812,11 @@ ice_aq_alloc_free_vsi_list(struct ice_hw *hw, u16 *vsi_list_id, enum ice_sw_lkup_type lkup_type, enum ice_adminq_opc opc) { - struct ice_aqc_alloc_free_res_elem *sw_buf; + DECLARE_FLEX(struct ice_aqc_alloc_free_res_elem *, sw_buf, elem, 1); + u16 buf_len = struct_size(sw_buf, elem, 1); struct ice_aqc_res_elem *vsi_ele; - u16 buf_len; int status; - buf_len = struct_size(sw_buf, elem, 1); - sw_buf = devm_kzalloc(ice_hw_to_dev(hw), buf_len, GFP_KERNEL); - if (!sw_buf) - return -ENOMEM; sw_buf->num_elems = cpu_to_le16(1); if (lkup_type == ICE_SW_LKUP_MAC || @@ -1840,8 +1836,7 @@ ice_aq_alloc_free_vsi_list(struct ice_hw *hw, u16 *vsi_list_id, sw_buf->res_type = cpu_to_le16(ICE_AQC_RES_TYPE_VSI_LIST_PRUNE); } else { - status = -EINVAL; - goto ice_aq_alloc_free_vsi_list_exit; + return -EINVAL; } if (opc == ice_aqc_opc_free_res) @@ -1849,16 +1844,14 @@ ice_aq_alloc_free_vsi_list(struct ice_hw *hw, u16 *vsi_list_id, status = ice_aq_alloc_free_res(hw, 1, sw_buf, buf_len, opc, NULL); if (status) - goto ice_aq_alloc_free_vsi_list_exit; + return status; if (opc == ice_aqc_opc_alloc_res) { vsi_ele = &sw_buf->elem[0]; *vsi_list_id = le16_to_cpu(vsi_ele->e.sw_resp); } -ice_aq_alloc_free_vsi_list_exit: - devm_kfree(ice_hw_to_dev(hw), sw_buf); - return status; + return 0; } /** @@ -2088,15 +2081,10 @@ ice_aq_get_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap, */ int ice_alloc_recipe(struct ice_hw *hw, u16 *rid) { - struct ice_aqc_alloc_free_res_elem *sw_buf; - u16 buf_len; + DECLARE_FLEX(struct ice_aqc_alloc_free_res_elem *, sw_buf, elem, 1); + u16 buf_len = struct_size(sw_buf, elem, 1); int status; - buf_len = struct_size(sw_buf, elem, 1); - sw_buf = kzalloc(buf_len, GFP_KERNEL); - if (!sw_buf) - return -ENOMEM; - sw_buf->num_elems = cpu_to_le16(1); sw_buf->res_type = cpu_to_le16((ICE_AQC_RES_TYPE_RECIPE << ICE_AQC_RES_TYPE_S) | @@ -2105,7 +2093,6 @@ int ice_alloc_recipe(struct ice_hw *hw, u16 *rid) ice_aqc_opc_alloc_res, NULL); if (!status) *rid = le16_to_cpu(sw_buf->elem[0].e.sw_resp); - kfree(sw_buf); return status; } @@ -4482,16 +4469,10 @@ int ice_alloc_res_cntr(struct ice_hw *hw, u8 type, u8 alloc_shared, u16 num_items, u16 *counter_id) { - struct ice_aqc_alloc_free_res_elem *buf; - u16 buf_len; + DECLARE_FLEX(struct ice_aqc_alloc_free_res_elem *, buf, elem, 1); + u16 buf_len = struct_size(buf, elem, 1); int status; - /* Allocate resource */ - buf_len = struct_size(buf, elem, 1); - buf = kzalloc(buf_len, GFP_KERNEL); - if (!buf) - return -ENOMEM; - buf->num_elems = cpu_to_le16(num_items); buf->res_type = cpu_to_le16(((type << ICE_AQC_RES_TYPE_S) & ICE_AQC_RES_TYPE_M) | alloc_shared); @@ -4499,12 +4480,9 @@ ice_alloc_res_cntr(struct ice_hw *hw, u8 type, u8 alloc_shared, u16 num_items, status = ice_aq_alloc_free_res(hw, 1, buf, buf_len, ice_aqc_opc_alloc_res, NULL); if (status) - goto exit; + return status; *counter_id = le16_to_cpu(buf->elem[0].e.sw_resp); - -exit: - kfree(buf); return status; } @@ -4520,16 +4498,10 @@ int ice_free_res_cntr(struct ice_hw *hw, u8 type, u8 alloc_shared, u16 num_items, u16 counter_id) { - struct ice_aqc_alloc_free_res_elem *buf; - u16 buf_len; + DECLARE_FLEX(struct ice_aqc_alloc_free_res_elem *, buf, elem, 1); + u16 buf_len = struct_size(buf, elem, 1); int status; - /* Free resource */ - buf_len = struct_size(buf, elem, 1); - buf = kzalloc(buf_len, GFP_KERNEL); - if (!buf) - return -ENOMEM; - buf->num_elems = cpu_to_le16(num_items); buf->res_type = cpu_to_le16(((type << ICE_AQC_RES_TYPE_S) & ICE_AQC_RES_TYPE_M) | alloc_shared); @@ -4540,7 +4512,6 @@ ice_free_res_cntr(struct ice_hw *hw, u8 type, u8 alloc_shared, u16 num_items, if (status) ice_debug(hw, ICE_DBG_SW, "counter resource could not be freed\n"); - kfree(buf); return status; }
Use DECLARE_FLEX() macro for 1-elem flex array members of ice_switch.c Signed-off-by: Przemek Kitszel <przemyslaw.kitszel@intel.com> --- drivers/net/ethernet/intel/ice/ice_switch.c | 53 +++++---------------- 1 file changed, 12 insertions(+), 41 deletions(-)