[v2] drm/i915: use static const array for PICK macro
diff mbox

Message ID 20171211124640.1010542-1-arnd@arndb.de
State New
Headers show

Commit Message

Arnd Bergmann Dec. 11, 2017, 12:46 p.m. UTC
The varargs macro trick in _PIPE3/_PHY3/_PORT3 was meant as an optimization
to shrink the i915 kernel module by around 1000 bytes. However, the
downside is a size regression with CONFIG_KASAN, as I found from stack size
warnings with gcc-7.0.1:

before:
drivers/gpu/drm/i915/intel_dpll_mgr.c: In function 'bxt_ddi_pll_get_hw_state':
drivers/gpu/drm/i915/intel_dpll_mgr.c:1644:1: error: the frame size of 176 bytes is larger than 100 bytes [-Werror=frame-larger-than=]
drivers/gpu/drm/i915/intel_dpll_mgr.c: In function 'bxt_ddi_pll_enable':
drivers/gpu/drm/i915/intel_dpll_mgr.c:1548:1: error: the frame size of 224 bytes is larger than 100 bytes [-Werror=frame-larger-than=]

after:
drivers/gpu/drm/i915/intel_dpll_mgr.c: In function 'bxt_ddi_pll_get_hw_state':
drivers/gpu/drm/i915/intel_dpll_mgr.c:1644:1: error: the frame size of 1016 bytes is larger than 1000 bytes [-Werror=frame-larger-than=]
drivers/gpu/drm/i915/intel_dpll_mgr.c: In function 'bxt_ddi_pll_enable':
drivers/gpu/drm/i915/intel_dpll_mgr.c:1548:1: error: the frame size of 1960 bytes is larger than 1000 bytes [-Werror=frame-larger-than=]

I also checked the module sizes and got with gcc-7.0.1

original:
   text	   data	    bss	    dec	    hex	filename
2380830	1155436	   4448	3540714	 3606ea	drivers/gpu/drm/i915/i915-kasan.o
1298054	 543692	   2884	1844630	 1c2596	drivers/gpu/drm/i915/i915-nokasan.o

after ce64645d86ac:
   text	   data	    bss	    dec	    hex	filename
2389515	1154476	   4448	3548439	 362517	drivers/gpu/drm/i915/i915-kasan.o
1299639	 543692	   2884	1846215	 1c2bc7	drivers/gpu/drm/i915/i915-nokasan.o

with this patch:
   text	   data	    bss	    dec	    hex	filename
2381275	1163884	   4448	3549607	 3629a7	drivers/gpu/drm/i915/i915-kasan.o
1296038	 543692	   2884	1842614	 1c1db6	drivers/gpu/drm/i915/i915-nokasan.o

Actually showing a code size growth in .text both with and without kasan,
and my version gets most of it back at the expense of larger .data when
kasan is enabled.

Fixes: ce64645d86ac ("drm/i915: use variadic macros and arrays to choose port/pipe based registers")
Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80114
Link: https://lkml.org/lkml/2017/3/20/1022
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
v2: rebased after a1986f4174a4 ("drm/i915: Remove unnecessary PORT3 definition.")
---
 drivers/gpu/drm/i915/i915_reg.h | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

Comments

Chris Wilson Dec. 11, 2017, 6:40 p.m. UTC | #1
Quoting Chris Wilson (2017-12-11 12:51:42)
> Quoting Arnd Bergmann (2017-12-11 12:46:22)
> > The varargs macro trick in _PIPE3/_PHY3/_PORT3 was meant as an optimization
> > to shrink the i915 kernel module by around 1000 bytes. However, the
> > downside is a size regression with CONFIG_KASAN, as I found from stack size
> > warnings with gcc-7.0.1:
> > 
> > before:
> > drivers/gpu/drm/i915/intel_dpll_mgr.c: In function 'bxt_ddi_pll_get_hw_state':
> > drivers/gpu/drm/i915/intel_dpll_mgr.c:1644:1: error: the frame size of 176 bytes is larger than 100 bytes [-Werror=frame-larger-than=]
> > drivers/gpu/drm/i915/intel_dpll_mgr.c: In function 'bxt_ddi_pll_enable':
> > drivers/gpu/drm/i915/intel_dpll_mgr.c:1548:1: error: the frame size of 224 bytes is larger than 100 bytes [-Werror=frame-larger-than=]
> > 
> > after:
> > drivers/gpu/drm/i915/intel_dpll_mgr.c: In function 'bxt_ddi_pll_get_hw_state':
> > drivers/gpu/drm/i915/intel_dpll_mgr.c:1644:1: error: the frame size of 1016 bytes is larger than 1000 bytes [-Werror=frame-larger-than=]
> > drivers/gpu/drm/i915/intel_dpll_mgr.c: In function 'bxt_ddi_pll_enable':
> > drivers/gpu/drm/i915/intel_dpll_mgr.c:1548:1: error: the frame size of 1960 bytes is larger than 1000 bytes [-Werror=frame-larger-than=]
> > 
> > I also checked the module sizes and got with gcc-7.0.1
> > 
> > original:
> >    text    data     bss     dec     hex filename
> > 2380830 1155436    4448 3540714  3606ea drivers/gpu/drm/i915/i915-kasan.o
> > 1298054  543692    2884 1844630  1c2596 drivers/gpu/drm/i915/i915-nokasan.o
> > 
> > after ce64645d86ac:
> >    text    data     bss     dec     hex filename
> > 2389515 1154476    4448 3548439  362517 drivers/gpu/drm/i915/i915-kasan.o
> > 1299639  543692    2884 1846215  1c2bc7 drivers/gpu/drm/i915/i915-nokasan.o
> > 
> > with this patch:
> >    text    data     bss     dec     hex filename
> > 2381275 1163884    4448 3549607  3629a7 drivers/gpu/drm/i915/i915-kasan.o
> > 1296038  543692    2884 1842614  1c1db6 drivers/gpu/drm/i915/i915-nokasan.o
> > 
> > Actually showing a code size growth in .text both with and without kasan,
> > and my version gets most of it back at the expense of larger .data when
> > kasan is enabled.
> > 
> > Fixes: ce64645d86ac ("drm/i915: use variadic macros and arrays to choose port/pipe based registers")
> > Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80114
> > Link: https://lkml.org/lkml/2017/3/20/1022
> > Cc: Jani Nikula <jani.nikula@linux.intel.com>
> > Signed-off-by: Arnd Bergmann <arnd@arndb.de>
> > ---
> > v2: rebased after a1986f4174a4 ("drm/i915: Remove unnecessary PORT3 definition.")
> > ---
> >  drivers/gpu/drm/i915/i915_reg.h | 18 +++++++++---------
> >  1 file changed, 9 insertions(+), 9 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> > index 09bf043c1c2e..36f4408503e1 100644
> > --- a/drivers/gpu/drm/i915/i915_reg.h
> > +++ b/drivers/gpu/drm/i915/i915_reg.h
> > @@ -139,7 +139,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
> >         return !i915_mmio_reg_equal(reg, INVALID_MMIO_REG);
> >  }
> >  
> > -#define _PICK(__index, ...) (((const u32 []){ __VA_ARGS__ })[__index])
> > +#define _PICK(__index, ...) ({static const u32 __arr[] = { __VA_ARGS__ }; __arr[__index];})
> 
> Is gcc smart enough for
>         if (__builtin_context_p(__index)) {
>                 ((const u32 []){ __VA_ARGS__ })[__index];
>         } else {
>                 static const u32 __arr[] = { __VA_ARGS__ };
>                 __arr[__index];
>         }
> ?

Not really, we don't have enough constants for it to make a substantial
difference:

add/remove: 1/0 grow/shrink: 3/5 up/down: 617/-604 (13)
Function                                     old     new   delta
cnl_ddi_vswing_program.isra                    -     574    +574
bxt_ddi_phy_is_enabled                       220     241     +21
bxt_ddi_phy_set_signal_level                 537     556     +19
i9xx_get_pipe_config                        1474    1477      +3
bxt_ddi_phy_verify_state                     411     408      -3
_bxt_ddi_phy_init                            956     950      -6
vlv_display_power_well_init                  470     461      -9
bxt_ddi_pll_get_hw_state                     774     762     -12
cnl_ddi_vswing_sequence                     1166     592    -574
Total: Before=13461532, After=13461545, chg +0.00%

Of particular note the size of __arr[] is not reduced, so gcc is already
eliminating the static[] for constant index, or not eliminating the
redundant branch here.
-Chris
Arnd Bergmann Jan. 16, 2018, 4:42 p.m. UTC | #2
On Mon, Dec 11, 2017 at 7:40 PM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> Quoting Chris Wilson (2017-12-11 12:51:42)
>> Quoting Arnd Bergmann (2017-12-11 12:46:22)
>> > v2: rebased after a1986f4174a4 ("drm/i915: Remove unnecessary PORT3 definition.")
>> > ---
>> >  drivers/gpu/drm/i915/i915_reg.h | 18 +++++++++---------
>> >  1 file changed, 9 insertions(+), 9 deletions(-)
>> >
>> > diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
>> > index 09bf043c1c2e..36f4408503e1 100644
>> > --- a/drivers/gpu/drm/i915/i915_reg.h
>> > +++ b/drivers/gpu/drm/i915/i915_reg.h
>> > @@ -139,7 +139,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
>> >         return !i915_mmio_reg_equal(reg, INVALID_MMIO_REG);
>> >  }
>> >
>> > -#define _PICK(__index, ...) (((const u32 []){ __VA_ARGS__ })[__index])
>> > +#define _PICK(__index, ...) ({static const u32 __arr[] = { __VA_ARGS__ }; __arr[__index];})
>>
>> Is gcc smart enough for
>>         if (__builtin_context_p(__index)) {
>>                 ((const u32 []){ __VA_ARGS__ })[__index];
>>         } else {
>>                 static const u32 __arr[] = { __VA_ARGS__ };
>>                 __arr[__index];
>>         }
>> ?
>
> Not really, we don't have enough constants for it to make a substantial
> difference:
>
> add/remove: 1/0 grow/shrink: 3/5 up/down: 617/-604 (13)
> Function                                     old     new   delta
> cnl_ddi_vswing_program.isra                    -     574    +574
> bxt_ddi_phy_is_enabled                       220     241     +21
> bxt_ddi_phy_set_signal_level                 537     556     +19
> i9xx_get_pipe_config                        1474    1477      +3
> bxt_ddi_phy_verify_state                     411     408      -3
> _bxt_ddi_phy_init                            956     950      -6
> vlv_display_power_well_init                  470     461      -9
> bxt_ddi_pll_get_hw_state                     774     762     -12
> cnl_ddi_vswing_sequence                     1166     592    -574
> Total: Before=13461532, After=13461545, chg +0.00%
>
> Of particular note the size of __arr[] is not reduced, so gcc is already
> eliminating the static[] for constant index, or not eliminating the
> redundant branch here.

I noticed we never concluded here. Did you see anything wrong with my
workaround in the end or could we just apply it to avoid the stack
size regression?

       Arnd

Patch
diff mbox

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 09bf043c1c2e..36f4408503e1 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -139,7 +139,7 @@  static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 	return !i915_mmio_reg_equal(reg, INVALID_MMIO_REG);
 }
 
-#define _PICK(__index, ...) (((const u32 []){ __VA_ARGS__ })[__index])
+#define _PICK(__index, ...) ({static const u32 __arr[] = { __VA_ARGS__ }; __arr[__index];})
 
 #define _PIPE(pipe, a, b) ((a) + (pipe)*((b)-(a)))
 #define _MMIO_PIPE(pipe, a, b) _MMIO(_PIPE(pipe, a, b))
@@ -3097,10 +3097,10 @@  enum i915_power_well_id {
 /*
  * Clock control & power management
  */
-#define _DPLL_A (dev_priv->info.display_mmio_offset + 0x6014)
-#define _DPLL_B (dev_priv->info.display_mmio_offset + 0x6018)
-#define _CHV_DPLL_C (dev_priv->info.display_mmio_offset + 0x6030)
-#define DPLL(pipe) _MMIO_PIPE3((pipe), _DPLL_A, _DPLL_B, _CHV_DPLL_C)
+#define _DPLL_A			0x6014
+#define _DPLL_B			0x6018
+#define _CHV_DPLL_C		0x6030
+#define DPLL(pipe) _MMIO(dev_priv->info.display_mmio_offset + _PICK((pipe), _DPLL_A, _DPLL_B, _CHV_DPLL_C))
 
 #define VGA0	_MMIO(0x6000)
 #define VGA1	_MMIO(0x6004)
@@ -3196,10 +3196,10 @@  enum i915_power_well_id {
 #define   SDVO_MULTIPLIER_SHIFT_HIRES		4
 #define   SDVO_MULTIPLIER_SHIFT_VGA		0
 
-#define _DPLL_A_MD (dev_priv->info.display_mmio_offset + 0x601c)
-#define _DPLL_B_MD (dev_priv->info.display_mmio_offset + 0x6020)
-#define _CHV_DPLL_C_MD (dev_priv->info.display_mmio_offset + 0x603c)
-#define DPLL_MD(pipe) _MMIO_PIPE3((pipe), _DPLL_A_MD, _DPLL_B_MD, _CHV_DPLL_C_MD)
+#define _DPLL_A_MD				0x601c
+#define _DPLL_B_MD				0x6020
+#define _CHV_DPLL_C_MD				0x603c
+#define DPLL_MD(pipe) _MMIO(dev_priv->info.display_mmio_offset + _PICK((pipe), _DPLL_A_MD, _DPLL_B_MD, _CHV_DPLL_C_MD))
 
 /*
  * UDI pixel divider, controlling how many pixels are stuffed into a packet.