Message ID | 1491381329-3995-4-git-send-email-mark.cave-ayland@ilande.co.uk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 04/05/2017 05:35 AM, Mark Cave-Ayland wrote: > These aren't required since we can use the display width and height directly. > > Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk> Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> > --- > hw/display/cg3.c | 15 ++++++--------- > 1 file changed, 6 insertions(+), 9 deletions(-) > > diff --git a/hw/display/cg3.c b/hw/display/cg3.c > index b42f60e..178a6dd 100644 > --- a/hw/display/cg3.c > +++ b/hw/display/cg3.c > @@ -93,14 +93,11 @@ static void cg3_update_display(void *opaque) > uint32_t *data; > uint32_t dval; > int x, y, y_start; > - unsigned int width, height; > ram_addr_t page, page_min, page_max; > > if (surface_bits_per_pixel(surface) != 32) { > return; > } > - width = s->width; > - height = s->height; > > y_start = -1; > page_min = -1; > @@ -110,11 +107,11 @@ static void cg3_update_display(void *opaque) > data = (uint32_t *)surface_data(surface); > > memory_region_sync_dirty_bitmap(&s->vram_mem); > - for (y = 0; y < height; y++) { > + for (y = 0; y < s->height; y++) { > int update = s->full_update; > > - page = y * width; > - update |= memory_region_get_dirty(&s->vram_mem, page, width, > + page = y * s->width; > + update |= memory_region_get_dirty(&s->vram_mem, page, s->width, > DIRTY_MEMORY_VGA); > if (update) { > if (y_start < 0) { > @@ -127,7 +124,7 @@ static void cg3_update_display(void *opaque) > page_max = page; > } > > - for (x = 0; x < width; x++) { > + for (x = 0; x < s->width; x++) { > dval = *pix++; > dval = (s->r[dval] << 16) | (s->g[dval] << 8) | s->b[dval]; > *data++ = dval; > @@ -137,8 +134,8 @@ static void cg3_update_display(void *opaque) > dpy_gfx_update(s->con, 0, y_start, s->width, y - y_start); > y_start = -1; > } > - pix += width; > - data += width; > + pix += s->width; > + data += s->width; > } > } > s->full_update = 0; >
On 04/05/2017 01:35 AM, Mark Cave-Ayland wrote: > These aren't required since we can use the display width and height directly. > > Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk> > --- > hw/display/cg3.c | 15 ++++++--------- > 1 file changed, 6 insertions(+), 9 deletions(-) > > diff --git a/hw/display/cg3.c b/hw/display/cg3.c > index b42f60e..178a6dd 100644 > --- a/hw/display/cg3.c > +++ b/hw/display/cg3.c > @@ -93,14 +93,11 @@ static void cg3_update_display(void *opaque) > uint32_t *data; > uint32_t dval; > int x, y, y_start; > - unsigned int width, height; > ram_addr_t page, page_min, page_max; > > if (surface_bits_per_pixel(surface) != 32) { > return; > } > - width = s->width; > - height = s->height; > > y_start = -1; > page_min = -1; > @@ -110,11 +107,11 @@ static void cg3_update_display(void *opaque) > data = (uint32_t *)surface_data(surface); > > memory_region_sync_dirty_bitmap(&s->vram_mem); > - for (y = 0; y < height; y++) { > + for (y = 0; y < s->height; y++) { I suspect the generated code is much worse, since the compiler can no longer tell that the loop bounds are constant. You probably do want to keep width and height in local variables across this function. r~
On 15/04/17 11:54, Richard Henderson wrote: > On 04/05/2017 01:35 AM, Mark Cave-Ayland wrote: >> These aren't required since we can use the display width and height >> directly. >> >> Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk> >> --- >> hw/display/cg3.c | 15 ++++++--------- >> 1 file changed, 6 insertions(+), 9 deletions(-) >> >> diff --git a/hw/display/cg3.c b/hw/display/cg3.c >> index b42f60e..178a6dd 100644 >> --- a/hw/display/cg3.c >> +++ b/hw/display/cg3.c >> @@ -93,14 +93,11 @@ static void cg3_update_display(void *opaque) >> uint32_t *data; >> uint32_t dval; >> int x, y, y_start; >> - unsigned int width, height; >> ram_addr_t page, page_min, page_max; >> >> if (surface_bits_per_pixel(surface) != 32) { >> return; >> } >> - width = s->width; >> - height = s->height; >> >> y_start = -1; >> page_min = -1; >> @@ -110,11 +107,11 @@ static void cg3_update_display(void *opaque) >> data = (uint32_t *)surface_data(surface); >> >> memory_region_sync_dirty_bitmap(&s->vram_mem); >> - for (y = 0; y < height; y++) { >> + for (y = 0; y < s->height; y++) { > > I suspect the generated code is much worse, since the compiler can no > longer tell that the loop bounds are constant. > > You probably do want to keep width and height in local variables across > this function. Can you explain a bit more about this? My thoughts were that with optimisation enabled the compiler would assume s->width is constant throughout the loop by default (or at least in OpenBIOS I've had to explicitly mark variables as volatile to ensure the compiler refetches the original value instead of reusing a previous copy from the stack/registers)? ATB, Mark.
On 04/18/2017 07:38 AM, Mark Cave-Ayland wrote: > On 15/04/17 11:54, Richard Henderson wrote: > >> On 04/05/2017 01:35 AM, Mark Cave-Ayland wrote: >>> These aren't required since we can use the display width and height >>> directly. >>> >>> Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk> >>> --- >>> hw/display/cg3.c | 15 ++++++--------- >>> 1 file changed, 6 insertions(+), 9 deletions(-) >>> >>> diff --git a/hw/display/cg3.c b/hw/display/cg3.c >>> index b42f60e..178a6dd 100644 >>> --- a/hw/display/cg3.c >>> +++ b/hw/display/cg3.c >>> @@ -93,14 +93,11 @@ static void cg3_update_display(void *opaque) >>> uint32_t *data; >>> uint32_t dval; >>> int x, y, y_start; >>> - unsigned int width, height; >>> ram_addr_t page, page_min, page_max; >>> >>> if (surface_bits_per_pixel(surface) != 32) { >>> return; >>> } >>> - width = s->width; >>> - height = s->height; >>> >>> y_start = -1; >>> page_min = -1; >>> @@ -110,11 +107,11 @@ static void cg3_update_display(void *opaque) >>> data = (uint32_t *)surface_data(surface); >>> >>> memory_region_sync_dirty_bitmap(&s->vram_mem); >>> - for (y = 0; y < height; y++) { >>> + for (y = 0; y < s->height; y++) { >> >> I suspect the generated code is much worse, since the compiler can no >> longer tell that the loop bounds are constant. >> >> You probably do want to keep width and height in local variables across >> this function. > > Can you explain a bit more about this? My thoughts were that with > optimisation enabled the compiler would assume s->width is constant > throughout the loop by default (or at least in OpenBIOS I've had to > explicitly mark variables as volatile to ensure the compiler refetches > the original value instead of reusing a previous copy from the > stack/registers)? With data in memory, as for foo->bar, the compiler is obliged to consider the memory may be changed for e.g. an intervening function call baz(), when foo is "global", as it is here. You have many of those in those loops. r~
On 18/04/17 15:55, Richard Henderson wrote: > On 04/18/2017 07:38 AM, Mark Cave-Ayland wrote: >> On 15/04/17 11:54, Richard Henderson wrote: >> >>> On 04/05/2017 01:35 AM, Mark Cave-Ayland wrote: >>>> These aren't required since we can use the display width and height >>>> directly. >>>> >>>> Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk> >>>> --- >>>> hw/display/cg3.c | 15 ++++++--------- >>>> 1 file changed, 6 insertions(+), 9 deletions(-) >>>> >>>> diff --git a/hw/display/cg3.c b/hw/display/cg3.c >>>> index b42f60e..178a6dd 100644 >>>> --- a/hw/display/cg3.c >>>> +++ b/hw/display/cg3.c >>>> @@ -93,14 +93,11 @@ static void cg3_update_display(void *opaque) >>>> uint32_t *data; >>>> uint32_t dval; >>>> int x, y, y_start; >>>> - unsigned int width, height; >>>> ram_addr_t page, page_min, page_max; >>>> >>>> if (surface_bits_per_pixel(surface) != 32) { >>>> return; >>>> } >>>> - width = s->width; >>>> - height = s->height; >>>> >>>> y_start = -1; >>>> page_min = -1; >>>> @@ -110,11 +107,11 @@ static void cg3_update_display(void *opaque) >>>> data = (uint32_t *)surface_data(surface); >>>> >>>> memory_region_sync_dirty_bitmap(&s->vram_mem); >>>> - for (y = 0; y < height; y++) { >>>> + for (y = 0; y < s->height; y++) { >>> >>> I suspect the generated code is much worse, since the compiler can no >>> longer tell that the loop bounds are constant. >>> >>> You probably do want to keep width and height in local variables across >>> this function. >> >> Can you explain a bit more about this? My thoughts were that with >> optimisation enabled the compiler would assume s->width is constant >> throughout the loop by default (or at least in OpenBIOS I've had to >> explicitly mark variables as volatile to ensure the compiler refetches >> the original value instead of reusing a previous copy from the >> stack/registers)? > > With data in memory, as for foo->bar, the compiler is obliged to > consider the memory may be changed for e.g. an intervening function call > baz(), when foo is "global", as it is here. You have many of those in > those loops. Given that you know a lot more about this than I do, I'll drop this patch for now and send a v2. I see that TCX references s->width in a similar way, however I'm going to be AFK for a while and so I'll try and send a PR later today since this is a blocker on Gerd's VGA fixes patchset - TCX can be altered later in a similar way if it is deemed to give a noticeable performance benefit. ATB, Mark.
diff --git a/hw/display/cg3.c b/hw/display/cg3.c index b42f60e..178a6dd 100644 --- a/hw/display/cg3.c +++ b/hw/display/cg3.c @@ -93,14 +93,11 @@ static void cg3_update_display(void *opaque) uint32_t *data; uint32_t dval; int x, y, y_start; - unsigned int width, height; ram_addr_t page, page_min, page_max; if (surface_bits_per_pixel(surface) != 32) { return; } - width = s->width; - height = s->height; y_start = -1; page_min = -1; @@ -110,11 +107,11 @@ static void cg3_update_display(void *opaque) data = (uint32_t *)surface_data(surface); memory_region_sync_dirty_bitmap(&s->vram_mem); - for (y = 0; y < height; y++) { + for (y = 0; y < s->height; y++) { int update = s->full_update; - page = y * width; - update |= memory_region_get_dirty(&s->vram_mem, page, width, + page = y * s->width; + update |= memory_region_get_dirty(&s->vram_mem, page, s->width, DIRTY_MEMORY_VGA); if (update) { if (y_start < 0) { @@ -127,7 +124,7 @@ static void cg3_update_display(void *opaque) page_max = page; } - for (x = 0; x < width; x++) { + for (x = 0; x < s->width; x++) { dval = *pix++; dval = (s->r[dval] << 16) | (s->g[dval] << 8) | s->b[dval]; *data++ = dval; @@ -137,8 +134,8 @@ static void cg3_update_display(void *opaque) dpy_gfx_update(s->con, 0, y_start, s->width, y - y_start); y_start = -1; } - pix += width; - data += width; + pix += s->width; + data += s->width; } } s->full_update = 0;
These aren't required since we can use the display width and height directly. Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk> --- hw/display/cg3.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-)