Message ID | 20190524072551.24429-1-chris@chris-wilson.co.uk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [i-g-t] benchmarks/gem_wsim: Heap allocate VLA structs | expand |
On Fri, 2019-05-24 at 08:25 +0100, Chris Wilson wrote: > Apparently VLA structs (e.g. struct { int array[count] }) is a gcc > extension that clang refuses to support as handling memory layout is too > difficult for it. > > Move the on-stack VLA to the heap. IMHO using an upper bound would be much simpler. > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > --- > benchmarks/gem_wsim.c | 146 +++++++++++++++++++++++++++--------------- > 1 file changed, 95 insertions(+), 51 deletions(-) > > diff --git a/benchmarks/gem_wsim.c b/benchmarks/gem_wsim.c > index e2ffb93a9..0a0032bff 100644 > --- a/benchmarks/gem_wsim.c > +++ b/benchmarks/gem_wsim.c > @@ -1441,6 +1441,48 @@ set_ctx_sseu(struct ctx *ctx, uint64_t slice_mask) > return slice_mask; > } > > +static size_t sizeof_load_balance(int count) > +{ > + struct i915_context_engines_load_balance *ptr; > + > + assert(sizeof(ptr->engines[count]) == count * sizeof(ptr->engines[0])); > + return sizeof(*ptr) + sizeof(ptr->engines[count]); > +} > + > +static struct i915_context_engines_load_balance * > +alloc_load_balance(int count) > +{ > + return calloc(1, sizeof_load_balance(count)); > +} > + > +static size_t sizeof_param_engines(int count) > +{ > + struct i915_context_param_engines *ptr; > + > + assert(sizeof(ptr->engines[count]) == count * sizeof(ptr->engines[0])); > + return sizeof(*ptr) + sizeof(ptr->engines[count]); > +} > + > +static struct i915_context_param_engines * > +alloc_param_engines(int count) > +{ > + return calloc(1, sizeof_param_engines(count)); > +} > + > +static size_t sizeof_engines_bond(int count) > +{ > + struct i915_context_engines_bond *ptr; > + > + assert(sizeof(ptr->engines[count]) == count * sizeof(ptr->engines[0])); > + return sizeof(*ptr) + sizeof(ptr->engines[count]); > +} > + > +static struct i915_context_engines_bond * > +alloc_engines_bond(int count) > +{ > + return calloc(1, sizeof_engines_bond(count)); > +} > + > static int > prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags) > { > @@ -1676,66 +1718,54 @@ prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags) > } > > if (ctx->engine_map) { > - I915_DEFINE_CONTEXT_PARAM_ENGINES(set_engines, > - ctx->engine_map_count + 1); > - I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(load_balance, > - ctx->engine_map_count); > + struct i915_context_param_engines *set_engines = > + alloc_param_engines(ctx->engine_map_count + 1); > + struct i915_context_engines_load_balance *load_balance = > + alloc_load_balance(ctx->engine_map_count); > struct drm_i915_gem_context_param param = { > .ctx_id = ctx_id, > .param = I915_CONTEXT_PARAM_ENGINES, > - .size = sizeof(set_engines), > - .value = to_user_pointer(&set_engines), > + .size = sizeof_param_engines(ctx->engine_map_count + 1), > + .value = to_user_pointer(set_engines), > }; > + struct i915_context_engines_bond *last = NULL; > > if (ctx->wants_balance) { > - set_engines.extensions = > - to_user_pointer(&load_balance); > + set_engines->extensions = > + to_user_pointer(load_balance); > > - memset(&load_balance, 0, sizeof(load_balance)); > - load_balance.base.name = > + load_balance->base.name = > I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE; > - load_balance.num_siblings = > + load_balance->num_siblings = > ctx->engine_map_count; > > for (j = 0; j < ctx->engine_map_count; j++) > - load_balance.engines[j] = > + load_balance->engines[j] = > get_engine(ctx->engine_map[j]); > - } else { > - set_engines.extensions = 0; > } > > /* Reserve slot for virtual engine. */ > - set_engines.engines[0].engine_class = > + set_engines->engines[0].engine_class = > I915_ENGINE_CLASS_INVALID; > - set_engines.engines[0].engine_instance = > + set_engines->engines[0].engine_instance = > I915_ENGINE_CLASS_INVALID_NONE; > > for (j = 1; j <= ctx->engine_map_count; j++) > - set_engines.engines[j] = > + set_engines->engines[j] = > get_engine(ctx->engine_map[j - 1]); > > + last = NULL; > for (j = 0; j < ctx->bond_count; j++) { > unsigned long mask = ctx->bonds[j].mask; > - I915_DEFINE_CONTEXT_ENGINES_BOND(bond, > - __builtin_popcount(mask)); > - struct i915_context_engines_bond *p = NULL, *prev; > + struct i915_context_engines_bond *bond = > + alloc_engines_bond(__builtin_popcount(mask)); > unsigned int b, e; > > - prev = p; > - p = alloca(sizeof(bond)); > - assert(p); > - memset(p, 0, sizeof(bond)); > - > - if (j == 0) > - load_balance.base.next_extension = > - to_user_pointer(p); > - else if (j < (ctx->bond_count - 1)) > - prev->base.next_extension = > - to_user_pointer(p); > + bond->base.next_extension = to_user_pointer(last); > + bond->base.name = I915_CONTEXT_ENGINES_EXT_BOND; > > - p->base.name = I915_CONTEXT_ENGINES_EXT_BOND; > - p->virtual_index = 0; > - p->master = get_engine(ctx->bonds[j].master); > + bond->virtual_index = 0; > + bond->master = get_engine(ctx->bonds[j].master); > > for (b = 0, e = 0; mask; e++, mask >>= 1) { > unsigned int idx; > @@ -1743,44 +1773,58 @@ prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags) > if (!(mask & 1)) > continue; > > - idx = find_engine(&set_engines.engines[1], > + idx = find_engine(&set_engines->engines[1], > ctx->engine_map_count, > e); > - p->engines[b++] = > - set_engines.engines[1 + idx]; > + bond->engines[b++] = > + set_engines->engines[1 + idx]; > } > + > + last = bond; > } > + load_balance->base.next_extension = to_user_pointer(last); > > gem_context_set_param(fd, ¶m); > + > + while (last) { > + struct i915_context_engines_bond *next = > + from_user_pointer(last->base.next_extension); > + free(last); > + last = next; > + } > + free(load_balance); > + free(set_engines); > } else if (ctx->wants_balance) { > const unsigned int count = num_engines_in_class(VCS); > - I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(load_balance, > - count); > - I915_DEFINE_CONTEXT_PARAM_ENGINES(set_engines, > - count + 1); > + struct i915_context_engines_load_balance *load_balance = > + alloc_load_balance(count); > + struct i915_context_param_engines *set_engines = > + alloc_param_engines(count + 1); > struct drm_i915_gem_context_param param = { > .ctx_id = ctx_id, > .param = I915_CONTEXT_PARAM_ENGINES, > - .size = sizeof(set_engines), > - .value = to_user_pointer(&set_engines), > + .size = sizeof_param_engines(count + 1), > + .value = to_user_pointer(set_engines), > }; > > - set_engines.extensions = to_user_pointer(&load_balance); > + set_engines->extensions = to_user_pointer(load_balance); > > - set_engines.engines[0].engine_class = > + set_engines->engines[0].engine_class = > I915_ENGINE_CLASS_INVALID; > - set_engines.engines[0].engine_instance = > + set_engines->engines[0].engine_instance = > I915_ENGINE_CLASS_INVALID_NONE; > - fill_engines_class(&set_engines.engines[1], VCS); > + fill_engines_class(&set_engines->engines[1], VCS); > > - memset(&load_balance, 0, sizeof(load_balance)); > - load_balance.base.name = > + load_balance->base.name = > I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE; > - load_balance.num_siblings = count; > + load_balance->num_siblings = count; > > - fill_engines_class(&load_balance.engines[0], VCS); > + fill_engines_class(&load_balance->engines[0], VCS); > > gem_context_set_param(fd, ¶m); > + > + free(set_engines); > + free(load_balance); > } > > if (wrk->sseu) {
On 24/05/2019 08:25, Chris Wilson wrote: > Apparently VLA structs (e.g. struct { int array[count] }) is a gcc > extension that clang refuses to support as handling memory layout is too > difficult for it. > > Move the on-stack VLA to the heap. > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > --- > benchmarks/gem_wsim.c | 146 +++++++++++++++++++++++++++--------------- > 1 file changed, 95 insertions(+), 51 deletions(-) > > diff --git a/benchmarks/gem_wsim.c b/benchmarks/gem_wsim.c > index e2ffb93a9..0a0032bff 100644 > --- a/benchmarks/gem_wsim.c > +++ b/benchmarks/gem_wsim.c > @@ -1441,6 +1441,48 @@ set_ctx_sseu(struct ctx *ctx, uint64_t slice_mask) > return slice_mask; > } > > +static size_t sizeof_load_balance(int count) > +{ > + struct i915_context_engines_load_balance *ptr; > + > + assert(sizeof(ptr->engines[count]) == count * sizeof(ptr->engines[0])); This seems wrong - is bound to trigger. > + return sizeof(*ptr) + sizeof(ptr->engines[count]); So size of of engine needs to be multiplied by count. > +} > + > +static struct i915_context_engines_load_balance * > +alloc_load_balance(int count) > +{ > + return calloc(1, sizeof_load_balance(count)); How about alloca so cleanup is simpler? Or is alloca also on the unpopular list? Or possibly what Simon suggested, just a large temporary stack arrays would be enough and easiest diff. Just with an assert that it fits. I can do that if you want? Regards, Tvrtko > +} > + > +static size_t sizeof_param_engines(int count) > +{ > + struct i915_context_param_engines *ptr; > + > + assert(sizeof(ptr->engines[count]) == count * sizeof(ptr->engines[0])); > + return sizeof(*ptr) + sizeof(ptr->engines[count]); > +} > + > +static struct i915_context_param_engines * > +alloc_param_engines(int count) > +{ > + return calloc(1, sizeof_param_engines(count)); > +} > + > +static size_t sizeof_engines_bond(int count) > +{ > + struct i915_context_engines_bond *ptr; > + > + assert(sizeof(ptr->engines[count]) == count * sizeof(ptr->engines[0])); > + return sizeof(*ptr) + sizeof(ptr->engines[count]); > +} > + > +static struct i915_context_engines_bond * > +alloc_engines_bond(int count) > +{ > + return calloc(1, sizeof_engines_bond(count)); > +} > + > static int > prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags) > { > @@ -1676,66 +1718,54 @@ prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags) > } > > if (ctx->engine_map) { > - I915_DEFINE_CONTEXT_PARAM_ENGINES(set_engines, > - ctx->engine_map_count + 1); > - I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(load_balance, > - ctx->engine_map_count); > + struct i915_context_param_engines *set_engines = > + alloc_param_engines(ctx->engine_map_count + 1); > + struct i915_context_engines_load_balance *load_balance = > + alloc_load_balance(ctx->engine_map_count); > struct drm_i915_gem_context_param param = { > .ctx_id = ctx_id, > .param = I915_CONTEXT_PARAM_ENGINES, > - .size = sizeof(set_engines), > - .value = to_user_pointer(&set_engines), > + .size = sizeof_param_engines(ctx->engine_map_count + 1), > + .value = to_user_pointer(set_engines), > }; > + struct i915_context_engines_bond *last = NULL; > > if (ctx->wants_balance) { > - set_engines.extensions = > - to_user_pointer(&load_balance); > + set_engines->extensions = > + to_user_pointer(load_balance); > > - memset(&load_balance, 0, sizeof(load_balance)); > - load_balance.base.name = > + load_balance->base.name = > I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE; > - load_balance.num_siblings = > + load_balance->num_siblings = > ctx->engine_map_count; > > for (j = 0; j < ctx->engine_map_count; j++) > - load_balance.engines[j] = > + load_balance->engines[j] = > get_engine(ctx->engine_map[j]); > - } else { > - set_engines.extensions = 0; > } > > /* Reserve slot for virtual engine. */ > - set_engines.engines[0].engine_class = > + set_engines->engines[0].engine_class = > I915_ENGINE_CLASS_INVALID; > - set_engines.engines[0].engine_instance = > + set_engines->engines[0].engine_instance = > I915_ENGINE_CLASS_INVALID_NONE; > > for (j = 1; j <= ctx->engine_map_count; j++) > - set_engines.engines[j] = > + set_engines->engines[j] = > get_engine(ctx->engine_map[j - 1]); > > + last = NULL; > for (j = 0; j < ctx->bond_count; j++) { > unsigned long mask = ctx->bonds[j].mask; > - I915_DEFINE_CONTEXT_ENGINES_BOND(bond, > - __builtin_popcount(mask)); > - struct i915_context_engines_bond *p = NULL, *prev; > + struct i915_context_engines_bond *bond = > + alloc_engines_bond(__builtin_popcount(mask)); > unsigned int b, e; > > - prev = p; > - p = alloca(sizeof(bond)); > - assert(p); > - memset(p, 0, sizeof(bond)); > - > - if (j == 0) > - load_balance.base.next_extension = > - to_user_pointer(p); > - else if (j < (ctx->bond_count - 1)) > - prev->base.next_extension = > - to_user_pointer(p); > + bond->base.next_extension = to_user_pointer(last); > + bond->base.name = I915_CONTEXT_ENGINES_EXT_BOND; > > - p->base.name = I915_CONTEXT_ENGINES_EXT_BOND; > - p->virtual_index = 0; > - p->master = get_engine(ctx->bonds[j].master); > + bond->virtual_index = 0; > + bond->master = get_engine(ctx->bonds[j].master); > > for (b = 0, e = 0; mask; e++, mask >>= 1) { > unsigned int idx; > @@ -1743,44 +1773,58 @@ prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags) > if (!(mask & 1)) > continue; > > - idx = find_engine(&set_engines.engines[1], > + idx = find_engine(&set_engines->engines[1], > ctx->engine_map_count, > e); > - p->engines[b++] = > - set_engines.engines[1 + idx]; > + bond->engines[b++] = > + set_engines->engines[1 + idx]; > } > + > + last = bond; > } > + load_balance->base.next_extension = to_user_pointer(last); > > gem_context_set_param(fd, ¶m); > + > + while (last) { > + struct i915_context_engines_bond *next = > + from_user_pointer(last->base.next_extension); > + free(last); > + last = next; > + } > + free(load_balance); > + free(set_engines); > } else if (ctx->wants_balance) { > const unsigned int count = num_engines_in_class(VCS); > - I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(load_balance, > - count); > - I915_DEFINE_CONTEXT_PARAM_ENGINES(set_engines, > - count + 1); > + struct i915_context_engines_load_balance *load_balance = > + alloc_load_balance(count); > + struct i915_context_param_engines *set_engines = > + alloc_param_engines(count + 1); > struct drm_i915_gem_context_param param = { > .ctx_id = ctx_id, > .param = I915_CONTEXT_PARAM_ENGINES, > - .size = sizeof(set_engines), > - .value = to_user_pointer(&set_engines), > + .size = sizeof_param_engines(count + 1), > + .value = to_user_pointer(set_engines), > }; > > - set_engines.extensions = to_user_pointer(&load_balance); > + set_engines->extensions = to_user_pointer(load_balance); > > - set_engines.engines[0].engine_class = > + set_engines->engines[0].engine_class = > I915_ENGINE_CLASS_INVALID; > - set_engines.engines[0].engine_instance = > + set_engines->engines[0].engine_instance = > I915_ENGINE_CLASS_INVALID_NONE; > - fill_engines_class(&set_engines.engines[1], VCS); > + fill_engines_class(&set_engines->engines[1], VCS); > > - memset(&load_balance, 0, sizeof(load_balance)); > - load_balance.base.name = > + load_balance->base.name = > I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE; > - load_balance.num_siblings = count; > + load_balance->num_siblings = count; > > - fill_engines_class(&load_balance.engines[0], VCS); > + fill_engines_class(&load_balance->engines[0], VCS); > > gem_context_set_param(fd, ¶m); > + > + free(set_engines); > + free(load_balance); > } > > if (wrk->sseu) { >
On Fri, 2019-05-24 at 09:20 +0100, Tvrtko Ursulin wrote: > On 24/05/2019 08:25, Chris Wilson wrote: > > Apparently VLA structs (e.g. struct { int array[count] }) is a gcc > > extension that clang refuses to support as handling memory layout is too > > difficult for it. > > > > Move the on-stack VLA to the heap. > > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > > --- > > benchmarks/gem_wsim.c | 146 +++++++++++++++++++++++++++--------------- > > 1 file changed, 95 insertions(+), 51 deletions(-) > > > > diff --git a/benchmarks/gem_wsim.c b/benchmarks/gem_wsim.c > > index e2ffb93a9..0a0032bff 100644 > > --- a/benchmarks/gem_wsim.c > > +++ b/benchmarks/gem_wsim.c > > @@ -1441,6 +1441,48 @@ set_ctx_sseu(struct ctx *ctx, uint64_t slice_mask) > > return slice_mask; > > } > > > > +static size_t sizeof_load_balance(int count) > > +{ > > + struct i915_context_engines_load_balance *ptr; > > + > > + assert(sizeof(ptr->engines[count]) == count * sizeof(ptr->engines[0])); > > This seems wrong - is bound to trigger. > > > + return sizeof(*ptr) + sizeof(ptr->engines[count]); > > So size of of engine needs to be multiplied by count. > > > +} > > + > > +static struct i915_context_engines_load_balance * > > +alloc_load_balance(int count) > > +{ > > + return calloc(1, sizeof_load_balance(count)); > > How about alloca so cleanup is simpler? Or is alloca also on the > unpopular list? > > Or possibly what Simon suggested, just a large temporary stack arrays > would be enough and easiest diff. Just with an assert that it fits. > > I can do that if you want? I think Arek already has a patch for this. > Regards, > > Tvrtko > > > +} > > + > > +static size_t sizeof_param_engines(int count) > > +{ > > + struct i915_context_param_engines *ptr; > > + > > + assert(sizeof(ptr->engines[count]) == count * sizeof(ptr->engines[0])); > > + return sizeof(*ptr) + sizeof(ptr->engines[count]); > > +} > > + > > +static struct i915_context_param_engines * > > +alloc_param_engines(int count) > > +{ > > + return calloc(1, sizeof_param_engines(count)); > > +} > > + > > +static size_t sizeof_engines_bond(int count) > > +{ > > + struct i915_context_engines_bond *ptr; > > + > > + assert(sizeof(ptr->engines[count]) == count * sizeof(ptr->engines[0])); > > + return sizeof(*ptr) + sizeof(ptr->engines[count]); > > +} > > + > > +static struct i915_context_engines_bond * > > +alloc_engines_bond(int count) > > +{ > > + return calloc(1, sizeof_engines_bond(count)); > > +} > > + > > static int > > prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags) > > { > > @@ -1676,66 +1718,54 @@ prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags) > > } > > > > if (ctx->engine_map) { > > - I915_DEFINE_CONTEXT_PARAM_ENGINES(set_engines, > > - ctx->engine_map_count + 1); > > - I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(load_balance, > > - ctx->engine_map_count); > > + struct i915_context_param_engines *set_engines = > > + alloc_param_engines(ctx->engine_map_count + 1); > > + struct i915_context_engines_load_balance *load_balance = > > + alloc_load_balance(ctx->engine_map_count); > > struct drm_i915_gem_context_param param = { > > .ctx_id = ctx_id, > > .param = I915_CONTEXT_PARAM_ENGINES, > > - .size = sizeof(set_engines), > > - .value = to_user_pointer(&set_engines), > > + .size = sizeof_param_engines(ctx->engine_map_count + 1), > > + .value = to_user_pointer(set_engines), > > }; > > + struct i915_context_engines_bond *last = NULL; > > > > if (ctx->wants_balance) { > > - set_engines.extensions = > > - to_user_pointer(&load_balance); > > + set_engines->extensions = > > + to_user_pointer(load_balance); > > > > - memset(&load_balance, 0, sizeof(load_balance)); > > - load_balance.base.name = > > + load_balance->base.name = > > I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE; > > - load_balance.num_siblings = > > + load_balance->num_siblings = > > ctx->engine_map_count; > > > > for (j = 0; j < ctx->engine_map_count; j++) > > - load_balance.engines[j] = > > + load_balance->engines[j] = > > get_engine(ctx->engine_map[j]); > > - } else { > > - set_engines.extensions = 0; > > } > > > > /* Reserve slot for virtual engine. */ > > - set_engines.engines[0].engine_class = > > + set_engines->engines[0].engine_class = > > I915_ENGINE_CLASS_INVALID; > > - set_engines.engines[0].engine_instance = > > + set_engines->engines[0].engine_instance = > > I915_ENGINE_CLASS_INVALID_NONE; > > > > for (j = 1; j <= ctx->engine_map_count; j++) > > - set_engines.engines[j] = > > + set_engines->engines[j] = > > get_engine(ctx->engine_map[j - 1]); > > > > + last = NULL; > > for (j = 0; j < ctx->bond_count; j++) { > > unsigned long mask = ctx->bonds[j].mask; > > - I915_DEFINE_CONTEXT_ENGINES_BOND(bond, > > - __builtin_popcount(mask)); > > - struct i915_context_engines_bond *p = NULL, *prev; > > + struct i915_context_engines_bond *bond = > > + alloc_engines_bond(__builtin_popcount(mask)); > > unsigned int b, e; > > > > - prev = p; > > - p = alloca(sizeof(bond)); > > - assert(p); > > - memset(p, 0, sizeof(bond)); > > - > > - if (j == 0) > > - load_balance.base.next_extension = > > - to_user_pointer(p); > > - else if (j < (ctx->bond_count - 1)) > > - prev->base.next_extension = > > - to_user_pointer(p); > > + bond->base.next_extension = to_user_pointer(last); > > + bond->base.name = I915_CONTEXT_ENGINES_EXT_BOND; > > > > - p->base.name = I915_CONTEXT_ENGINES_EXT_BOND; > > - p->virtual_index = 0; > > - p->master = get_engine(ctx->bonds[j].master); > > + bond->virtual_index = 0; > > + bond->master = get_engine(ctx->bonds[j].master); > > > > for (b = 0, e = 0; mask; e++, mask >>= 1) { > > unsigned int idx; > > @@ -1743,44 +1773,58 @@ prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags) > > if (!(mask & 1)) > > continue; > > > > - idx = find_engine(&set_engines.engines[1], > > + idx = find_engine(&set_engines->engines[1], > > ctx->engine_map_count, > > e); > > - p->engines[b++] = > > - set_engines.engines[1 + idx]; > > + bond->engines[b++] = > > + set_engines->engines[1 + idx]; > > } > > + > > + last = bond; > > } > > + load_balance->base.next_extension = to_user_pointer(last); > > > > gem_context_set_param(fd, ¶m); > > + > > + while (last) { > > + struct i915_context_engines_bond *next = > > + from_user_pointer(last->base.next_extension); > > + free(last); > > + last = next; > > + } > > + free(load_balance); > > + free(set_engines); > > } else if (ctx->wants_balance) { > > const unsigned int count = num_engines_in_class(VCS); > > - I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(load_balance, > > - count); > > - I915_DEFINE_CONTEXT_PARAM_ENGINES(set_engines, > > - count + 1); > > + struct i915_context_engines_load_balance *load_balance = > > + alloc_load_balance(count); > > + struct i915_context_param_engines *set_engines = > > + alloc_param_engines(count + 1); > > struct drm_i915_gem_context_param param = { > > .ctx_id = ctx_id, > > .param = I915_CONTEXT_PARAM_ENGINES, > > - .size = sizeof(set_engines), > > - .value = to_user_pointer(&set_engines), > > + .size = sizeof_param_engines(count + 1), > > + .value = to_user_pointer(set_engines), > > }; > > > > - set_engines.extensions = to_user_pointer(&load_balance); > > + set_engines->extensions = to_user_pointer(load_balance); > > > > - set_engines.engines[0].engine_class = > > + set_engines->engines[0].engine_class = > > I915_ENGINE_CLASS_INVALID; > > - set_engines.engines[0].engine_instance = > > + set_engines->engines[0].engine_instance = > > I915_ENGINE_CLASS_INVALID_NONE; > > - fill_engines_class(&set_engines.engines[1], VCS); > > + fill_engines_class(&set_engines->engines[1], VCS); > > > > - memset(&load_balance, 0, sizeof(load_balance)); > > - load_balance.base.name = > > + load_balance->base.name = > > I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE; > > - load_balance.num_siblings = count; > > + load_balance->num_siblings = count; > > > > - fill_engines_class(&load_balance.engines[0], VCS); > > + fill_engines_class(&load_balance->engines[0], VCS); > > > > gem_context_set_param(fd, ¶m); > > + > > + free(set_engines); > > + free(load_balance); > > } > > > > if (wrk->sseu) { > > > _______________________________________________ > igt-dev mailing list > igt-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/igt-dev
Quoting Tvrtko Ursulin (2019-05-24 09:20:47) > > On 24/05/2019 08:25, Chris Wilson wrote: > > Apparently VLA structs (e.g. struct { int array[count] }) is a gcc > > extension that clang refuses to support as handling memory layout is too > > difficult for it. > > > > Move the on-stack VLA to the heap. > > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > > --- > > benchmarks/gem_wsim.c | 146 +++++++++++++++++++++++++++--------------- > > 1 file changed, 95 insertions(+), 51 deletions(-) > > > > diff --git a/benchmarks/gem_wsim.c b/benchmarks/gem_wsim.c > > index e2ffb93a9..0a0032bff 100644 > > --- a/benchmarks/gem_wsim.c > > +++ b/benchmarks/gem_wsim.c > > @@ -1441,6 +1441,48 @@ set_ctx_sseu(struct ctx *ctx, uint64_t slice_mask) > > return slice_mask; > > } > > > > +static size_t sizeof_load_balance(int count) > > +{ > > + struct i915_context_engines_load_balance *ptr; > > + > > + assert(sizeof(ptr->engines[count]) == count * sizeof(ptr->engines[0])); > > This seems wrong - is bound to trigger. Why does it seem wrong? That's the calculation used previously, and the ptr->engines[] was meant to be packed in order for sizeof(ptr->engines[count]) == count * sizeof(ptr->engines[0]). Anyway, I threw it in there to check if the calculation was sane. > > + return sizeof(*ptr) + sizeof(ptr->engines[count]); > > So size of of engine needs to be multiplied by count. (Just note this is the what the current VLA evaluates to :) > > +} > > + > > +static struct i915_context_engines_load_balance * > > +alloc_load_balance(int count) > > +{ > > + return calloc(1, sizeof_load_balance(count)); > > How about alloca so cleanup is simpler? Or is alloca also on the > unpopular list? I don't mind. Would shave a few lines indeed, but we need the memsets back. #define alloca0()? > Or possibly what Simon suggested, just a large temporary stack arrays > would be enough and easiest diff. Just with an assert that it fits. I don't think that is as clean for the long term. -Chris
On Fri, 2019-05-24 at 09:33 +0100, Chris Wilson wrote: > Quoting Tvrtko Ursulin (2019-05-24 09:20:47) > > On 24/05/2019 08:25, Chris Wilson wrote: > > > Apparently VLA structs (e.g. struct { int array[count] }) is a gcc > > > extension that clang refuses to support as handling memory layout is too > > > difficult for it. > > > > > > Move the on-stack VLA to the heap. > > > > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > > > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > > > --- > > > benchmarks/gem_wsim.c | 146 +++++++++++++++++++++++++++--------------- > > > 1 file changed, 95 insertions(+), 51 deletions(-) > > > > > > diff --git a/benchmarks/gem_wsim.c b/benchmarks/gem_wsim.c > > > index e2ffb93a9..0a0032bff 100644 > > > --- a/benchmarks/gem_wsim.c > > > +++ b/benchmarks/gem_wsim.c > > > @@ -1441,6 +1441,48 @@ set_ctx_sseu(struct ctx *ctx, uint64_t slice_mask) > > > return slice_mask; > > > } > > > > > > +static size_t sizeof_load_balance(int count) > > > +{ > > > + struct i915_context_engines_load_balance *ptr; > > > + > > > + assert(sizeof(ptr->engines[count]) == count * sizeof(ptr->engines[0])); > > > > This seems wrong - is bound to trigger. > > Why does it seem wrong? That's the calculation used previously, and the > ptr->engines[] was meant to be packed in order for > sizeof(ptr->engines[count]) == count * sizeof(ptr->engines[0]). Anyway, > I threw it in there to check if the calculation was sane. > > > > + return sizeof(*ptr) + sizeof(ptr->engines[count]); > > > > So size of of engine needs to be multiplied by count. > > (Just note this is the what the current VLA evaluates to :) > > > > +} > > > + > > > +static struct i915_context_engines_load_balance * > > > +alloc_load_balance(int count) > > > +{ > > > + return calloc(1, sizeof_load_balance(count)); > > > > How about alloca so cleanup is simpler? Or is alloca also on the > > unpopular list? > > I don't mind. Would shave a few lines indeed, but we need the memsets > back. #define alloca0()? > > > Or possibly what Simon suggested, just a large temporary stack arrays > > would be enough and easiest diff. Just with an assert that it fits. > > I don't think that is as clean for the long term. I don't understand the motivation here. Can you elaborate?
On 24/05/2019 09:33, Chris Wilson wrote: > Quoting Tvrtko Ursulin (2019-05-24 09:20:47) >> >> On 24/05/2019 08:25, Chris Wilson wrote: >>> Apparently VLA structs (e.g. struct { int array[count] }) is a gcc >>> extension that clang refuses to support as handling memory layout is too >>> difficult for it. >>> >>> Move the on-stack VLA to the heap. >>> >>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> >>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> >>> --- >>> benchmarks/gem_wsim.c | 146 +++++++++++++++++++++++++++--------------- >>> 1 file changed, 95 insertions(+), 51 deletions(-) >>> >>> diff --git a/benchmarks/gem_wsim.c b/benchmarks/gem_wsim.c >>> index e2ffb93a9..0a0032bff 100644 >>> --- a/benchmarks/gem_wsim.c >>> +++ b/benchmarks/gem_wsim.c >>> @@ -1441,6 +1441,48 @@ set_ctx_sseu(struct ctx *ctx, uint64_t slice_mask) >>> return slice_mask; >>> } >>> >>> +static size_t sizeof_load_balance(int count) >>> +{ >>> + struct i915_context_engines_load_balance *ptr; >>> + >>> + assert(sizeof(ptr->engines[count]) == count * sizeof(ptr->engines[0])); >> >> This seems wrong - is bound to trigger. > > Why does it seem wrong? That's the calculation used previously, and the > ptr->engines[] was meant to be packed in order for > sizeof(ptr->engines[count]) == count * sizeof(ptr->engines[0]). Anyway, > I threw it in there to check if the calculation was sane. Because sizeof(ptr->engines[0]) == sizeof(ptr->engines[N]), since the code is not declaring N big array, just referencing the element N. So for more than one engine I expect it explodes. Unless I am way wrong.. I guess someone needs to run it.. :) >>> + return sizeof(*ptr) + sizeof(ptr->engines[count]); >> >> So size of of engine needs to be multiplied by count. > > (Just note this is the what the current VLA evaluates to :) > >>> +} >>> + >>> +static struct i915_context_engines_load_balance * >>> +alloc_load_balance(int count) >>> +{ >>> + return calloc(1, sizeof_load_balance(count)); >> >> How about alloca so cleanup is simpler? Or is alloca also on the >> unpopular list? > > I don't mind. Would shave a few lines indeed, but we need the memsets > back. #define alloca0()? And a helper macro to generically deal with struct header + engines array so it doesn't need to be repeated three times. Yadayada too much work.. :) ... >> Or possibly what Simon suggested, just a large temporary stack arrays >> would be enough and easiest diff. Just with an assert that it fits. > > I don't think that is as clean for the long term. ... this should be just fine for now so I'd vote for it. Regards, Tvrtko
diff --git a/benchmarks/gem_wsim.c b/benchmarks/gem_wsim.c index e2ffb93a9..0a0032bff 100644 --- a/benchmarks/gem_wsim.c +++ b/benchmarks/gem_wsim.c @@ -1441,6 +1441,48 @@ set_ctx_sseu(struct ctx *ctx, uint64_t slice_mask) return slice_mask; } +static size_t sizeof_load_balance(int count) +{ + struct i915_context_engines_load_balance *ptr; + + assert(sizeof(ptr->engines[count]) == count * sizeof(ptr->engines[0])); + return sizeof(*ptr) + sizeof(ptr->engines[count]); +} + +static struct i915_context_engines_load_balance * +alloc_load_balance(int count) +{ + return calloc(1, sizeof_load_balance(count)); +} + +static size_t sizeof_param_engines(int count) +{ + struct i915_context_param_engines *ptr; + + assert(sizeof(ptr->engines[count]) == count * sizeof(ptr->engines[0])); + return sizeof(*ptr) + sizeof(ptr->engines[count]); +} + +static struct i915_context_param_engines * +alloc_param_engines(int count) +{ + return calloc(1, sizeof_param_engines(count)); +} + +static size_t sizeof_engines_bond(int count) +{ + struct i915_context_engines_bond *ptr; + + assert(sizeof(ptr->engines[count]) == count * sizeof(ptr->engines[0])); + return sizeof(*ptr) + sizeof(ptr->engines[count]); +} + +static struct i915_context_engines_bond * +alloc_engines_bond(int count) +{ + return calloc(1, sizeof_engines_bond(count)); +} + static int prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags) { @@ -1676,66 +1718,54 @@ prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags) } if (ctx->engine_map) { - I915_DEFINE_CONTEXT_PARAM_ENGINES(set_engines, - ctx->engine_map_count + 1); - I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(load_balance, - ctx->engine_map_count); + struct i915_context_param_engines *set_engines = + alloc_param_engines(ctx->engine_map_count + 1); + struct i915_context_engines_load_balance *load_balance = + alloc_load_balance(ctx->engine_map_count); struct drm_i915_gem_context_param param = { .ctx_id = ctx_id, .param = I915_CONTEXT_PARAM_ENGINES, - .size = sizeof(set_engines), - .value = to_user_pointer(&set_engines), + .size = sizeof_param_engines(ctx->engine_map_count + 1), + .value = to_user_pointer(set_engines), }; + struct i915_context_engines_bond *last = NULL; if (ctx->wants_balance) { - set_engines.extensions = - to_user_pointer(&load_balance); + set_engines->extensions = + to_user_pointer(load_balance); - memset(&load_balance, 0, sizeof(load_balance)); - load_balance.base.name = + load_balance->base.name = I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE; - load_balance.num_siblings = + load_balance->num_siblings = ctx->engine_map_count; for (j = 0; j < ctx->engine_map_count; j++) - load_balance.engines[j] = + load_balance->engines[j] = get_engine(ctx->engine_map[j]); - } else { - set_engines.extensions = 0; } /* Reserve slot for virtual engine. */ - set_engines.engines[0].engine_class = + set_engines->engines[0].engine_class = I915_ENGINE_CLASS_INVALID; - set_engines.engines[0].engine_instance = + set_engines->engines[0].engine_instance = I915_ENGINE_CLASS_INVALID_NONE; for (j = 1; j <= ctx->engine_map_count; j++) - set_engines.engines[j] = + set_engines->engines[j] = get_engine(ctx->engine_map[j - 1]); + last = NULL; for (j = 0; j < ctx->bond_count; j++) { unsigned long mask = ctx->bonds[j].mask; - I915_DEFINE_CONTEXT_ENGINES_BOND(bond, - __builtin_popcount(mask)); - struct i915_context_engines_bond *p = NULL, *prev; + struct i915_context_engines_bond *bond = + alloc_engines_bond(__builtin_popcount(mask)); unsigned int b, e; - prev = p; - p = alloca(sizeof(bond)); - assert(p); - memset(p, 0, sizeof(bond)); - - if (j == 0) - load_balance.base.next_extension = - to_user_pointer(p); - else if (j < (ctx->bond_count - 1)) - prev->base.next_extension = - to_user_pointer(p); + bond->base.next_extension = to_user_pointer(last); + bond->base.name = I915_CONTEXT_ENGINES_EXT_BOND; - p->base.name = I915_CONTEXT_ENGINES_EXT_BOND; - p->virtual_index = 0; - p->master = get_engine(ctx->bonds[j].master); + bond->virtual_index = 0; + bond->master = get_engine(ctx->bonds[j].master); for (b = 0, e = 0; mask; e++, mask >>= 1) { unsigned int idx; @@ -1743,44 +1773,58 @@ prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags) if (!(mask & 1)) continue; - idx = find_engine(&set_engines.engines[1], + idx = find_engine(&set_engines->engines[1], ctx->engine_map_count, e); - p->engines[b++] = - set_engines.engines[1 + idx]; + bond->engines[b++] = + set_engines->engines[1 + idx]; } + + last = bond; } + load_balance->base.next_extension = to_user_pointer(last); gem_context_set_param(fd, ¶m); + + while (last) { + struct i915_context_engines_bond *next = + from_user_pointer(last->base.next_extension); + free(last); + last = next; + } + free(load_balance); + free(set_engines); } else if (ctx->wants_balance) { const unsigned int count = num_engines_in_class(VCS); - I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(load_balance, - count); - I915_DEFINE_CONTEXT_PARAM_ENGINES(set_engines, - count + 1); + struct i915_context_engines_load_balance *load_balance = + alloc_load_balance(count); + struct i915_context_param_engines *set_engines = + alloc_param_engines(count + 1); struct drm_i915_gem_context_param param = { .ctx_id = ctx_id, .param = I915_CONTEXT_PARAM_ENGINES, - .size = sizeof(set_engines), - .value = to_user_pointer(&set_engines), + .size = sizeof_param_engines(count + 1), + .value = to_user_pointer(set_engines), }; - set_engines.extensions = to_user_pointer(&load_balance); + set_engines->extensions = to_user_pointer(load_balance); - set_engines.engines[0].engine_class = + set_engines->engines[0].engine_class = I915_ENGINE_CLASS_INVALID; - set_engines.engines[0].engine_instance = + set_engines->engines[0].engine_instance = I915_ENGINE_CLASS_INVALID_NONE; - fill_engines_class(&set_engines.engines[1], VCS); + fill_engines_class(&set_engines->engines[1], VCS); - memset(&load_balance, 0, sizeof(load_balance)); - load_balance.base.name = + load_balance->base.name = I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE; - load_balance.num_siblings = count; + load_balance->num_siblings = count; - fill_engines_class(&load_balance.engines[0], VCS); + fill_engines_class(&load_balance->engines[0], VCS); gem_context_set_param(fd, ¶m); + + free(set_engines); + free(load_balance); } if (wrk->sseu) {
Apparently VLA structs (e.g. struct { int array[count] }) is a gcc extension that clang refuses to support as handling memory layout is too difficult for it. Move the on-stack VLA to the heap. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> --- benchmarks/gem_wsim.c | 146 +++++++++++++++++++++++++++--------------- 1 file changed, 95 insertions(+), 51 deletions(-)