Message ID | 20200617160120.16555-3-tvrtko.ursulin@linux.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | gem_wsim improvements | expand |
Quoting Tvrtko Ursulin (2020-06-17 17:01:12) > From: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > > Add support for defining buffer object working sets and targetting them as > data dependencies. For more information please see the README file. > > Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > --- > benchmarks/gem_wsim.c | 453 +++++++++++++++++++++--- > benchmarks/wsim/README | 59 +++ > benchmarks/wsim/cloud-gaming-60fps.wsim | 11 + > benchmarks/wsim/composited-ui.wsim | 7 + > 4 files changed, 476 insertions(+), 54 deletions(-) > create mode 100644 benchmarks/wsim/cloud-gaming-60fps.wsim > create mode 100644 benchmarks/wsim/composited-ui.wsim > > diff --git a/benchmarks/gem_wsim.c b/benchmarks/gem_wsim.c > index 02fe8f5a5e69..9e5bfe6a36d4 100644 > --- a/benchmarks/gem_wsim.c > +++ b/benchmarks/gem_wsim.c > @@ -88,14 +88,21 @@ enum w_type > LOAD_BALANCE, > BOND, > TERMINATE, > - SSEU > + SSEU, > + WORKINGSET, > +}; > + > +struct dep_entry { > + int target; > + bool write; > + int working_set; /* -1 = step dependecy, >= 0 working set id */ > }; > > struct deps > { > int nr; > bool submit_fence; > - int *list; > + struct dep_entry *list; > }; > > struct w_arg { > @@ -110,6 +117,14 @@ struct bond { > enum intel_engine_id master; > }; > > +struct working_set { > + int id; > + bool shared; > + unsigned int nr; > + uint32_t *handles; > + unsigned long *sizes; > +}; > + > struct workload; > > struct w_step > @@ -143,6 +158,7 @@ struct w_step > enum intel_engine_id bond_master; > }; > int sseu; > + struct working_set working_set; > }; > > /* Implementation details */ > @@ -193,6 +209,9 @@ struct workload > unsigned int nr_ctxs; > struct ctx *ctx_list; > > + struct working_set **working_sets; /* array indexed by set id */ > + int max_working_set_id; > + > int sync_timeline; > uint32_t sync_seqno; > > @@ -281,11 +300,120 @@ print_engine_calibrations(void) > printf("\n"); > } > > +static void add_dep(struct deps *deps, struct dep_entry entry) > +{ > + deps->list = realloc(deps->list, sizeof(*deps->list) * (deps->nr + 1)); > + igt_assert(deps->list); > + > + deps->list[deps->nr++] = entry; > +} > + > + > +static int > +parse_dependency(unsigned int nr_steps, struct w_step *w, char *str) > +{ > + struct dep_entry entry = { .working_set = -1 }; > + bool submit_fence = false; > + char *s; > + > + switch (str[0]) { > + case '-': > + if (str[1] < '0' || str[1] > '9') > + return -1; > + > + entry.target = atoi(str); > + if (entry.target > 0 || ((int)nr_steps + entry.target) < 0) > + return -1; add_dep for N steps ago, using a handle. > + > + add_dep(&w->data_deps, entry); > + > + break; > + case 's': > + submit_fence = true; > + /* Fall-through. */ > + case 'f': > + /* Multiple fences not yet supported. */ > + igt_assert_eq(w->fence_deps.nr, 0); > + > + entry.target = atoi(++str); > + if (entry.target > 0 || ((int)nr_steps + entry.target) < 0) > + return -1; > + > + add_dep(&w->fence_deps, entry); > + > + w->fence_deps.submit_fence = submit_fence; add_dep for N steps ago, using the out-fence from that step [A post processing steps adds emit_fence to the earlier steps.] > + break; > + case 'w': > + entry.write = true; Got confused for a moment as I was expecting the submit_fence fallthrough pattern. > + /* Fall-through. */ > + case 'r': > + /* > + * [rw]N-<str> > + * r1-<str> or w2-<str>, where N is working set id. > + */ > + s = index(++str, '-'); > + if (!s) > + return -1; > + > + entry.working_set = atoi(str); if (entry.working_set < 0) return -1; > + > + if (parse_working_set_deps(w->wrk, &w->data_deps, entry, ++s)) > + return -1; The new one... > +static int > +parse_working_set_deps(struct workload *wrk, > + struct deps *deps, > + struct dep_entry _entry, > + char *str) > +{ > + /* > + * 1 - target handle index in the specified working set. > + * 2-4 - range > + */ > + struct dep_entry entry = _entry; > + char *s; > + > + s = index(str, '-'); > + if (s) { > + int from, to; > + > + from = atoi(str); > + if (from < 0) > + return -1; > + > + to = atoi(++s); > + if (to <= 0) > + return -1; if to < from, we add nothing. Is that worth the error? > + > + for (entry.target = from; entry.target <= to; entry.target++) > + add_dep(deps, entry); > + } else { > + entry.target = atoi(str); > + if (entry.target < 0) > + return -1; > + > + add_dep(deps, entry); > + } > + > + return 0; > +} > + > + break; > + default: > + return -1; > + }; > + > + return 0; > +} > + > static int > parse_dependencies(unsigned int nr_steps, struct w_step *w, char *_desc) > { > char *desc = strdup(_desc); > char *token, *tctx = NULL, *tstart = desc; > + int ret = 0; > + > + if (!strcmp(_desc, "0")) > + goto out; Hang on, what this special case? > > igt_assert(desc); > igt_assert(!w->data_deps.nr && w->data_deps.nr == w->fence_deps.nr); > static void __attribute__((format(printf, 1, 2))) > @@ -624,6 +722,88 @@ static int parse_engine_map(struct w_step *step, const char *_str) > return 0; > } > > +static unsigned long parse_size(char *str) > +{ /* "1234567890[gGmMkK]" */ > + const unsigned int len = strlen(str); > + unsigned int mult = 1; > + > + if (len == 0) > + return 0; > + > + switch (str[len - 1]) { T? P? E? Let's plan ahead! :) > + case 'g': > + case 'G': > + mult *= 1024; > + /* Fall-throuogh. */ > + case 'm': > + case 'M': > + mult *= 1024; > + /* Fall-throuogh. */ > + case 'k': > + case 'K': > + mult *= 1024; > + > + str[len - 1] = 0; > + } > + > + return atol(str) * mult; Negatives? > +} > + > +static int add_buffers(struct working_set *set, char *str) > +{ > + /* > + * 4096 > + * 4k > + * 4m > + * 4g > + * 10n4k - 10 4k batches > + */ > + unsigned long *sizes, size; > + unsigned int add, i; > + char *n; > + > + n = index(str, 'n'); > + if (n) { > + *n = 0; > + add = atoi(str); > + if (!add) > + return -1; if (add <= 0) [int add goes without saying then] > + str = ++n; > + } else { > + add = 1; > + } > + > + size = parse_size(str); > + if (!size) > + return -1; > + > + sizes = realloc(set->sizes, (set->nr + add) * sizeof(*sizes)); > + if (!sizes) > + return -1; > + > + for (i = 0; i < add; i++) > + sizes[set->nr + i] = size; > + > + set->nr += add; > + set->sizes = sizes; > + > + return 0; > +} > @@ -1003,6 +1209,51 @@ add_step: > } > } > > + /* > + * Check no duplicate working set ids. > + */ > + for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) { > + struct w_step *w2; > + > + if (w->type != WORKINGSET) > + continue; > + > + for (j = 0, w2 = wrk->steps; j < wrk->nr_steps; w2++, j++) { > + if (j == i) > + continue; > + if (w2->type != WORKINGSET) > + continue; > + > + check_arg(w->working_set.id == w2->working_set.id, > + "Duplicate working set id at %u!\n", j); > + } > + } > + > + /* > + * Allocate shared working sets. > + */ > + for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) { > + if (w->type == WORKINGSET && w->working_set.shared) > + allocate_working_set(&w->working_set); > + } > + > + wrk->max_working_set_id = -1; > + for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) { > + if (w->type == WORKINGSET && > + w->working_set.shared && > + w->working_set.id > wrk->max_working_set_id) > + wrk->max_working_set_id = w->working_set.id; > + } > + > + wrk->working_sets = calloc(wrk->max_working_set_id + 1, > + sizeof(*wrk->working_sets)); > + igt_assert(wrk->working_sets); > + > + for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) { > + if (w->type == WORKINGSET && w->working_set.shared) > + wrk->working_sets[w->working_set.id] = &w->working_set; > + } Ok, sharing works by reusing the same set of handles within the process. Is there room in the parser namespace for dmabuf sharing? -Chris
On 17/06/2020 17:57, Chris Wilson wrote: > Quoting Tvrtko Ursulin (2020-06-17 17:01:12) >> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com> >> >> Add support for defining buffer object working sets and targetting them as >> data dependencies. For more information please see the README file. >> >> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> >> --- >> benchmarks/gem_wsim.c | 453 +++++++++++++++++++++--- >> benchmarks/wsim/README | 59 +++ >> benchmarks/wsim/cloud-gaming-60fps.wsim | 11 + >> benchmarks/wsim/composited-ui.wsim | 7 + >> 4 files changed, 476 insertions(+), 54 deletions(-) >> create mode 100644 benchmarks/wsim/cloud-gaming-60fps.wsim >> create mode 100644 benchmarks/wsim/composited-ui.wsim >> >> diff --git a/benchmarks/gem_wsim.c b/benchmarks/gem_wsim.c >> index 02fe8f5a5e69..9e5bfe6a36d4 100644 >> --- a/benchmarks/gem_wsim.c >> +++ b/benchmarks/gem_wsim.c >> @@ -88,14 +88,21 @@ enum w_type >> LOAD_BALANCE, >> BOND, >> TERMINATE, >> - SSEU >> + SSEU, >> + WORKINGSET, >> +}; >> + >> +struct dep_entry { >> + int target; >> + bool write; >> + int working_set; /* -1 = step dependecy, >= 0 working set id */ >> }; >> >> struct deps >> { >> int nr; >> bool submit_fence; >> - int *list; >> + struct dep_entry *list; >> }; >> >> struct w_arg { >> @@ -110,6 +117,14 @@ struct bond { >> enum intel_engine_id master; >> }; >> >> +struct working_set { >> + int id; >> + bool shared; >> + unsigned int nr; >> + uint32_t *handles; >> + unsigned long *sizes; >> +}; >> + >> struct workload; >> >> struct w_step >> @@ -143,6 +158,7 @@ struct w_step >> enum intel_engine_id bond_master; >> }; >> int sseu; >> + struct working_set working_set; >> }; >> >> /* Implementation details */ >> @@ -193,6 +209,9 @@ struct workload >> unsigned int nr_ctxs; >> struct ctx *ctx_list; >> >> + struct working_set **working_sets; /* array indexed by set id */ >> + int max_working_set_id; >> + >> int sync_timeline; >> uint32_t sync_seqno; >> >> @@ -281,11 +300,120 @@ print_engine_calibrations(void) >> printf("\n"); >> } >> >> +static void add_dep(struct deps *deps, struct dep_entry entry) >> +{ >> + deps->list = realloc(deps->list, sizeof(*deps->list) * (deps->nr + 1)); >> + igt_assert(deps->list); >> + >> + deps->list[deps->nr++] = entry; >> +} >> + >> + >> +static int >> +parse_dependency(unsigned int nr_steps, struct w_step *w, char *str) >> +{ >> + struct dep_entry entry = { .working_set = -1 }; >> + bool submit_fence = false; >> + char *s; >> + >> + switch (str[0]) { >> + case '-': >> + if (str[1] < '0' || str[1] > '9') >> + return -1; >> + >> + entry.target = atoi(str); >> + if (entry.target > 0 || ((int)nr_steps + entry.target) < 0) >> + return -1; > > add_dep for N steps ago, using a handle. > >> + >> + add_dep(&w->data_deps, entry); >> + >> + break; >> + case 's': >> + submit_fence = true; >> + /* Fall-through. */ >> + case 'f': >> + /* Multiple fences not yet supported. */ >> + igt_assert_eq(w->fence_deps.nr, 0); >> + >> + entry.target = atoi(++str); >> + if (entry.target > 0 || ((int)nr_steps + entry.target) < 0) >> + return -1; >> + >> + add_dep(&w->fence_deps, entry); >> + >> + w->fence_deps.submit_fence = submit_fence; > > add_dep for N steps ago, using the out-fence from that step > [A post processing steps adds emit_fence to the earlier steps.] > >> + break; >> + case 'w': >> + entry.write = true; > > Got confused for a moment as I was expecting the submit_fence > fallthrough pattern. >> + /* Fall-through. */ >> + case 'r': >> + /* >> + * [rw]N-<str> >> + * r1-<str> or w2-<str>, where N is working set id. >> + */ >> + s = index(++str, '-'); >> + if (!s) >> + return -1; >> + >> + entry.working_set = atoi(str); > > if (entry.working_set < 0) > return -1; Yep. > >> + >> + if (parse_working_set_deps(w->wrk, &w->data_deps, entry, ++s)) >> + return -1; > > The new one... > >> +static int >> +parse_working_set_deps(struct workload *wrk, >> + struct deps *deps, >> + struct dep_entry _entry, >> + char *str) >> +{ >> + /* >> + * 1 - target handle index in the specified working set. >> + * 2-4 - range >> + */ >> + struct dep_entry entry = _entry; >> + char *s; >> + >> + s = index(str, '-'); >> + if (s) { >> + int from, to; >> + >> + from = atoi(str); >> + if (from < 0) >> + return -1; >> + >> + to = atoi(++s); >> + if (to <= 0) >> + return -1; > > if to < from, we add nothing. Is that worth the error? Yep. > >> + >> + for (entry.target = from; entry.target <= to; entry.target++) >> + add_dep(deps, entry); >> + } else { >> + entry.target = atoi(str); >> + if (entry.target < 0) >> + return -1; >> + >> + add_dep(deps, entry); > > >> + } >> + >> + return 0; >> +} >> + >> + break; >> + default: >> + return -1; >> + }; >> + >> + return 0; >> +} >> + >> static int >> parse_dependencies(unsigned int nr_steps, struct w_step *w, char *_desc) >> { >> char *desc = strdup(_desc); >> char *token, *tctx = NULL, *tstart = desc; >> + int ret = 0; >> + >> + if (!strcmp(_desc, "0")) >> + goto out; > > Hang on, what this special case? For no dependencies. If I move the check to parse_dependency then dependency of "0/0/0/0" would be silently accepted. It wouldn't be a big deal, who cares, but I thought it is better to be more strict. > >> >> igt_assert(desc); >> igt_assert(!w->data_deps.nr && w->data_deps.nr == w->fence_deps.nr); >> static void __attribute__((format(printf, 1, 2))) >> @@ -624,6 +722,88 @@ static int parse_engine_map(struct w_step *step, const char *_str) >> return 0; >> } >> >> +static unsigned long parse_size(char *str) >> +{ > /* "1234567890[gGmMkK]" */ >> + const unsigned int len = strlen(str); >> + unsigned int mult = 1; >> + >> + if (len == 0) >> + return 0; >> + >> + switch (str[len - 1]) { > > T? P? E? Let's plan ahead! :) Error on unrecognized non-digit? Ok. > >> + case 'g': >> + case 'G': >> + mult *= 1024; >> + /* Fall-throuogh. */ >> + case 'm': >> + case 'M': >> + mult *= 1024; >> + /* Fall-throuogh. */ >> + case 'k': >> + case 'K': >> + mult *= 1024; >> + >> + str[len - 1] = 0; >> + } >> + >> + return atol(str) * mult; > > Negatives? Ok. > >> +} >> + >> +static int add_buffers(struct working_set *set, char *str) >> +{ >> + /* >> + * 4096 >> + * 4k >> + * 4m >> + * 4g >> + * 10n4k - 10 4k batches >> + */ >> + unsigned long *sizes, size; >> + unsigned int add, i; >> + char *n; >> + >> + n = index(str, 'n'); >> + if (n) { >> + *n = 0; >> + add = atoi(str); >> + if (!add) >> + return -1; > > if (add <= 0) [int add goes without saying then] Yep. > >> + str = ++n; >> + } else { >> + add = 1; >> + } >> + >> + size = parse_size(str); >> + if (!size) >> + return -1; >> + >> + sizes = realloc(set->sizes, (set->nr + add) * sizeof(*sizes)); >> + if (!sizes) >> + return -1; >> + >> + for (i = 0; i < add; i++) >> + sizes[set->nr + i] = size; >> + >> + set->nr += add; >> + set->sizes = sizes; >> + >> + return 0; >> +} > >> @@ -1003,6 +1209,51 @@ add_step: >> } >> } >> >> + /* >> + * Check no duplicate working set ids. >> + */ >> + for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) { >> + struct w_step *w2; >> + >> + if (w->type != WORKINGSET) >> + continue; >> + >> + for (j = 0, w2 = wrk->steps; j < wrk->nr_steps; w2++, j++) { >> + if (j == i) >> + continue; >> + if (w2->type != WORKINGSET) >> + continue; >> + >> + check_arg(w->working_set.id == w2->working_set.id, >> + "Duplicate working set id at %u!\n", j); >> + } >> + } >> + >> + /* >> + * Allocate shared working sets. >> + */ >> + for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) { >> + if (w->type == WORKINGSET && w->working_set.shared) >> + allocate_working_set(&w->working_set); >> + } >> + >> + wrk->max_working_set_id = -1; >> + for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) { >> + if (w->type == WORKINGSET && >> + w->working_set.shared && >> + w->working_set.id > wrk->max_working_set_id) >> + wrk->max_working_set_id = w->working_set.id; >> + } >> + >> + wrk->working_sets = calloc(wrk->max_working_set_id + 1, >> + sizeof(*wrk->working_sets)); >> + igt_assert(wrk->working_sets); >> + >> + for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) { >> + if (w->type == WORKINGSET && w->working_set.shared) >> + wrk->working_sets[w->working_set.id] = &w->working_set; >> + } > > Ok, sharing works by reusing the same set of handles within the process. > > Is there room in the parser namespace for dmabuf sharing? Plenty of unused characters. :) Regards, Tvrtko
Quoting Tvrtko Ursulin (2020-06-18 10:05:56) > > On 17/06/2020 17:57, Chris Wilson wrote: > > Quoting Tvrtko Ursulin (2020-06-17 17:01:12) > >> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > >> > >> Add support for defining buffer object working sets and targetting them as > >> data dependencies. For more information please see the README file. > >> > >> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > >> --- > >> benchmarks/gem_wsim.c | 453 +++++++++++++++++++++--- > >> benchmarks/wsim/README | 59 +++ > >> benchmarks/wsim/cloud-gaming-60fps.wsim | 11 + > >> benchmarks/wsim/composited-ui.wsim | 7 + > >> 4 files changed, 476 insertions(+), 54 deletions(-) > >> create mode 100644 benchmarks/wsim/cloud-gaming-60fps.wsim > >> create mode 100644 benchmarks/wsim/composited-ui.wsim > >> > >> diff --git a/benchmarks/gem_wsim.c b/benchmarks/gem_wsim.c > >> index 02fe8f5a5e69..9e5bfe6a36d4 100644 > >> --- a/benchmarks/gem_wsim.c > >> +++ b/benchmarks/gem_wsim.c > >> @@ -88,14 +88,21 @@ enum w_type > >> LOAD_BALANCE, > >> BOND, > >> TERMINATE, > >> - SSEU > >> + SSEU, > >> + WORKINGSET, > >> +}; > >> + > >> +struct dep_entry { > >> + int target; > >> + bool write; > >> + int working_set; /* -1 = step dependecy, >= 0 working set id */ > >> }; > >> > >> struct deps > >> { > >> int nr; > >> bool submit_fence; > >> - int *list; > >> + struct dep_entry *list; > >> }; > >> > >> struct w_arg { > >> @@ -110,6 +117,14 @@ struct bond { > >> enum intel_engine_id master; > >> }; > >> > >> +struct working_set { > >> + int id; > >> + bool shared; > >> + unsigned int nr; > >> + uint32_t *handles; > >> + unsigned long *sizes; > >> +}; > >> + > >> struct workload; > >> > >> struct w_step > >> @@ -143,6 +158,7 @@ struct w_step > >> enum intel_engine_id bond_master; > >> }; > >> int sseu; > >> + struct working_set working_set; > >> }; > >> > >> /* Implementation details */ > >> @@ -193,6 +209,9 @@ struct workload > >> unsigned int nr_ctxs; > >> struct ctx *ctx_list; > >> > >> + struct working_set **working_sets; /* array indexed by set id */ > >> + int max_working_set_id; > >> + > >> int sync_timeline; > >> uint32_t sync_seqno; > >> > >> @@ -281,11 +300,120 @@ print_engine_calibrations(void) > >> printf("\n"); > >> } > >> > >> +static void add_dep(struct deps *deps, struct dep_entry entry) > >> +{ > >> + deps->list = realloc(deps->list, sizeof(*deps->list) * (deps->nr + 1)); > >> + igt_assert(deps->list); > >> + > >> + deps->list[deps->nr++] = entry; > >> +} > >> + > >> + > >> +static int > >> +parse_dependency(unsigned int nr_steps, struct w_step *w, char *str) > >> +{ > >> + struct dep_entry entry = { .working_set = -1 }; > >> + bool submit_fence = false; > >> + char *s; > >> + > >> + switch (str[0]) { > >> + case '-': > >> + if (str[1] < '0' || str[1] > '9') > >> + return -1; > >> + > >> + entry.target = atoi(str); > >> + if (entry.target > 0 || ((int)nr_steps + entry.target) < 0) > >> + return -1; > > > > add_dep for N steps ago, using a handle. > > > >> + > >> + add_dep(&w->data_deps, entry); > >> + > >> + break; > >> + case 's': > >> + submit_fence = true; > >> + /* Fall-through. */ > >> + case 'f': > >> + /* Multiple fences not yet supported. */ > >> + igt_assert_eq(w->fence_deps.nr, 0); > >> + > >> + entry.target = atoi(++str); > >> + if (entry.target > 0 || ((int)nr_steps + entry.target) < 0) > >> + return -1; > >> + > >> + add_dep(&w->fence_deps, entry); > >> + > >> + w->fence_deps.submit_fence = submit_fence; > > > > add_dep for N steps ago, using the out-fence from that step > > [A post processing steps adds emit_fence to the earlier steps.] > > > >> + break; > >> + case 'w': > >> + entry.write = true; > > > > Got confused for a moment as I was expecting the submit_fence > > fallthrough pattern. > >> + /* Fall-through. */ > >> + case 'r': > >> + /* > >> + * [rw]N-<str> > >> + * r1-<str> or w2-<str>, where N is working set id. > >> + */ > >> + s = index(++str, '-'); > >> + if (!s) > >> + return -1; > >> + > >> + entry.working_set = atoi(str); > > > > if (entry.working_set < 0) > > return -1; > > Yep. > > > > >> + > >> + if (parse_working_set_deps(w->wrk, &w->data_deps, entry, ++s)) > >> + return -1; > > > > The new one... > > > >> +static int > >> +parse_working_set_deps(struct workload *wrk, > >> + struct deps *deps, > >> + struct dep_entry _entry, > >> + char *str) > >> +{ > >> + /* > >> + * 1 - target handle index in the specified working set. > >> + * 2-4 - range > >> + */ > >> + struct dep_entry entry = _entry; > >> + char *s; > >> + > >> + s = index(str, '-'); > >> + if (s) { > >> + int from, to; > >> + > >> + from = atoi(str); > >> + if (from < 0) > >> + return -1; > >> + > >> + to = atoi(++s); > >> + if (to <= 0) > >> + return -1; > > > > if to < from, we add nothing. Is that worth the error? > > Yep. > > > > >> + > >> + for (entry.target = from; entry.target <= to; entry.target++) > >> + add_dep(deps, entry); > >> + } else { > >> + entry.target = atoi(str); > >> + if (entry.target < 0) > >> + return -1; > >> + > >> + add_dep(deps, entry); > > > > > >> + } > >> + > >> + return 0; > >> +} > >> + > >> + break; > >> + default: > >> + return -1; > >> + }; > >> + > >> + return 0; > >> +} > >> + > >> static int > >> parse_dependencies(unsigned int nr_steps, struct w_step *w, char *_desc) > >> { > >> char *desc = strdup(_desc); > >> char *token, *tctx = NULL, *tstart = desc; > >> + int ret = 0; > >> + > >> + if (!strcmp(_desc, "0")) > >> + goto out; > > > > Hang on, what this special case? > > For no dependencies. > > If I move the check to parse_dependency then dependency of "0/0/0/0" > would be silently accepted. It wouldn't be a big deal, who cares, but I > thought it is better to be more strict. It was just not clear at this point what is being matched, what the meaning of 0 is. /* 0 refers to self, a degenerate dependency */ ? -Chris
diff --git a/benchmarks/gem_wsim.c b/benchmarks/gem_wsim.c index 02fe8f5a5e69..9e5bfe6a36d4 100644 --- a/benchmarks/gem_wsim.c +++ b/benchmarks/gem_wsim.c @@ -88,14 +88,21 @@ enum w_type LOAD_BALANCE, BOND, TERMINATE, - SSEU + SSEU, + WORKINGSET, +}; + +struct dep_entry { + int target; + bool write; + int working_set; /* -1 = step dependecy, >= 0 working set id */ }; struct deps { int nr; bool submit_fence; - int *list; + struct dep_entry *list; }; struct w_arg { @@ -110,6 +117,14 @@ struct bond { enum intel_engine_id master; }; +struct working_set { + int id; + bool shared; + unsigned int nr; + uint32_t *handles; + unsigned long *sizes; +}; + struct workload; struct w_step @@ -143,6 +158,7 @@ struct w_step enum intel_engine_id bond_master; }; int sseu; + struct working_set working_set; }; /* Implementation details */ @@ -193,6 +209,9 @@ struct workload unsigned int nr_ctxs; struct ctx *ctx_list; + struct working_set **working_sets; /* array indexed by set id */ + int max_working_set_id; + int sync_timeline; uint32_t sync_seqno; @@ -281,11 +300,120 @@ print_engine_calibrations(void) printf("\n"); } +static void add_dep(struct deps *deps, struct dep_entry entry) +{ + deps->list = realloc(deps->list, sizeof(*deps->list) * (deps->nr + 1)); + igt_assert(deps->list); + + deps->list[deps->nr++] = entry; +} + +static int +parse_working_set_deps(struct workload *wrk, + struct deps *deps, + struct dep_entry _entry, + char *str) +{ + /* + * 1 - target handle index in the specified working set. + * 2-4 - range + */ + struct dep_entry entry = _entry; + char *s; + + s = index(str, '-'); + if (s) { + int from, to; + + from = atoi(str); + if (from < 0) + return -1; + + to = atoi(++s); + if (to <= 0) + return -1; + + for (entry.target = from; entry.target <= to; entry.target++) + add_dep(deps, entry); + } else { + entry.target = atoi(str); + if (entry.target < 0) + return -1; + + add_dep(deps, entry); + } + + return 0; +} + +static int +parse_dependency(unsigned int nr_steps, struct w_step *w, char *str) +{ + struct dep_entry entry = { .working_set = -1 }; + bool submit_fence = false; + char *s; + + switch (str[0]) { + case '-': + if (str[1] < '0' || str[1] > '9') + return -1; + + entry.target = atoi(str); + if (entry.target > 0 || ((int)nr_steps + entry.target) < 0) + return -1; + + add_dep(&w->data_deps, entry); + + break; + case 's': + submit_fence = true; + /* Fall-through. */ + case 'f': + /* Multiple fences not yet supported. */ + igt_assert_eq(w->fence_deps.nr, 0); + + entry.target = atoi(++str); + if (entry.target > 0 || ((int)nr_steps + entry.target) < 0) + return -1; + + add_dep(&w->fence_deps, entry); + + w->fence_deps.submit_fence = submit_fence; + break; + case 'w': + entry.write = true; + /* Fall-through. */ + case 'r': + /* + * [rw]N-<str> + * r1-<str> or w2-<str>, where N is working set id. + */ + s = index(++str, '-'); + if (!s) + return -1; + + entry.working_set = atoi(str); + + if (parse_working_set_deps(w->wrk, &w->data_deps, entry, ++s)) + return -1; + + break; + default: + return -1; + }; + + return 0; +} + static int parse_dependencies(unsigned int nr_steps, struct w_step *w, char *_desc) { char *desc = strdup(_desc); char *token, *tctx = NULL, *tstart = desc; + int ret = 0; + + if (!strcmp(_desc, "0")) + goto out; igt_assert(desc); igt_assert(!w->data_deps.nr && w->data_deps.nr == w->fence_deps.nr); @@ -293,47 +421,17 @@ parse_dependencies(unsigned int nr_steps, struct w_step *w, char *_desc) w->data_deps.list == w->fence_deps.list); while ((token = strtok_r(tstart, "/", &tctx)) != NULL) { - bool submit_fence = false; - char *str = token; - struct deps *deps; - int dep; - tstart = NULL; - if (str[0] == '-' || (str[0] >= '0' && str[0] <= '9')) { - deps = &w->data_deps; - } else { - if (str[0] == 's') - submit_fence = true; - else if (str[0] != 'f') - return -1; - - deps = &w->fence_deps; - str++; - } - - dep = atoi(str); - if (dep > 0 || ((int)nr_steps + dep) < 0) { - if (deps->list) - free(deps->list); - return -1; - } - - if (dep < 0) { - deps->nr++; - /* Multiple fences not yet supported. */ - igt_assert(deps->nr == 1 || deps != &w->fence_deps); - deps->list = realloc(deps->list, - sizeof(*deps->list) * deps->nr); - igt_assert(deps->list); - deps->list[deps->nr - 1] = dep; - deps->submit_fence = submit_fence; - } + ret = parse_dependency(nr_steps, w, token); + if (ret) + break; } +out: free(desc); - return 0; + return ret; } static void __attribute__((format(printf, 1, 2))) @@ -624,6 +722,88 @@ static int parse_engine_map(struct w_step *step, const char *_str) return 0; } +static unsigned long parse_size(char *str) +{ + const unsigned int len = strlen(str); + unsigned int mult = 1; + + if (len == 0) + return 0; + + switch (str[len - 1]) { + case 'g': + case 'G': + mult *= 1024; + /* Fall-throuogh. */ + case 'm': + case 'M': + mult *= 1024; + /* Fall-throuogh. */ + case 'k': + case 'K': + mult *= 1024; + + str[len - 1] = 0; + } + + return atol(str) * mult; +} + +static int add_buffers(struct working_set *set, char *str) +{ + /* + * 4096 + * 4k + * 4m + * 4g + * 10n4k - 10 4k batches + */ + unsigned long *sizes, size; + unsigned int add, i; + char *n; + + n = index(str, 'n'); + if (n) { + *n = 0; + add = atoi(str); + if (!add) + return -1; + str = ++n; + } else { + add = 1; + } + + size = parse_size(str); + if (!size) + return -1; + + sizes = realloc(set->sizes, (set->nr + add) * sizeof(*sizes)); + if (!sizes) + return -1; + + for (i = 0; i < add; i++) + sizes[set->nr + i] = size; + + set->nr += add; + set->sizes = sizes; + + return 0; +} + +static int parse_working_set(struct working_set *set, char *str) +{ + char *token, *tctx = NULL, *tstart = str; + + while ((token = strtok_r(tstart, "/", &tctx))) { + tstart = NULL; + + if (add_buffers(set, token)) + return -1; + } + + return 0; +} + static uint64_t engine_list_mask(const char *_str) { uint64_t mask = 0; @@ -644,6 +824,8 @@ static uint64_t engine_list_mask(const char *_str) return mask; } +static void allocate_working_set(struct working_set *set); + #define int_field(_STEP_, _FIELD_, _COND_, _ERR_) \ if ((field = strtok_r(fstart, ".", &fctx))) { \ tmp = atoi(field); \ @@ -661,7 +843,7 @@ parse_workload(struct w_arg *arg, unsigned int flags, struct workload *app_w) char *desc = strdup(arg->desc); char *_token, *token, *tctx = NULL, *tstart = desc; char *field, *fctx = NULL, *fstart; - struct w_step step, *steps = NULL; + struct w_step step, *w, *steps = NULL; unsigned int valid; int i, j, tmp; @@ -851,6 +1033,28 @@ parse_workload(struct w_arg *arg, unsigned int flags, struct workload *app_w) step.type = BOND; goto add_step; + } else if (!strcmp(field, "w") || !strcmp(field, "W")) { + unsigned int nr = 0; + + step.working_set.shared = field[0] == 'W'; + + while ((field = strtok_r(fstart, ".", &fctx))) { + tmp = atoi(field); + if (nr == 0) { + step.working_set.id = tmp; + } else { + tmp = parse_working_set(&step.working_set, + field); + check_arg(tmp < 0, + "Invalid working set at step %u!\n", + nr_steps); + } + + nr++; + } + + step.type = WORKINGSET; + goto add_step; } if (!field) { @@ -975,6 +1179,8 @@ add_step: wrk->steps = steps; wrk->prio = arg->prio; wrk->sseu = arg->sseu; + wrk->max_working_set_id = -1; + wrk->working_sets = NULL; free(desc); @@ -984,7 +1190,7 @@ add_step: */ for (i = 0; i < nr_steps; i++) { for (j = 0; j < steps[i].fence_deps.nr; j++) { - tmp = steps[i].idx + steps[i].fence_deps.list[j]; + tmp = steps[i].idx + steps[i].fence_deps.list[j].target; check_arg(tmp < 0 || tmp >= i || (steps[tmp].type != BATCH && steps[tmp].type != SW_FENCE), @@ -1003,6 +1209,51 @@ add_step: } } + /* + * Check no duplicate working set ids. + */ + for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) { + struct w_step *w2; + + if (w->type != WORKINGSET) + continue; + + for (j = 0, w2 = wrk->steps; j < wrk->nr_steps; w2++, j++) { + if (j == i) + continue; + if (w2->type != WORKINGSET) + continue; + + check_arg(w->working_set.id == w2->working_set.id, + "Duplicate working set id at %u!\n", j); + } + } + + /* + * Allocate shared working sets. + */ + for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) { + if (w->type == WORKINGSET && w->working_set.shared) + allocate_working_set(&w->working_set); + } + + wrk->max_working_set_id = -1; + for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) { + if (w->type == WORKINGSET && + w->working_set.shared && + w->working_set.id > wrk->max_working_set_id) + wrk->max_working_set_id = w->working_set.id; + } + + wrk->working_sets = calloc(wrk->max_working_set_id + 1, + sizeof(*wrk->working_sets)); + igt_assert(wrk->working_sets); + + for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) { + if (w->type == WORKINGSET && w->working_set.shared) + wrk->working_sets[w->working_set.id] = &w->working_set; + } + return wrk; } @@ -1024,6 +1275,18 @@ clone_workload(struct workload *_wrk) memcpy(wrk->steps, _wrk->steps, sizeof(struct w_step) * wrk->nr_steps); + wrk->max_working_set_id = _wrk->max_working_set_id; + if (wrk->max_working_set_id >= 0) { + wrk->working_sets = calloc(wrk->max_working_set_id + 1, + sizeof(*wrk->working_sets)); + igt_assert(wrk->working_sets); + + memcpy(wrk->working_sets, + _wrk->working_sets, + (wrk->max_working_set_id + 1) * + sizeof(*wrk->working_sets)); + } + /* Check if we need a sw sync timeline. */ for (i = 0; i < wrk->nr_steps; i++) { if (wrk->steps[i].type == SW_FENCE) { @@ -1226,17 +1489,36 @@ alloc_step_batch(struct workload *wrk, struct w_step *w, unsigned int flags) igt_assert(j < nr_obj); for (i = 0; i < w->data_deps.nr; i++) { - igt_assert(w->data_deps.list[i] <= 0); - if (w->data_deps.list[i]) { - int dep_idx = w->idx + w->data_deps.list[i]; + struct dep_entry *entry = &w->data_deps.list[i]; + uint32_t dep_handle; + + if (entry->working_set == -1) { + int dep_idx = w->idx + entry->target; + igt_assert(entry->target <= 0); igt_assert(dep_idx >= 0 && dep_idx < w->idx); igt_assert(wrk->steps[dep_idx].type == BATCH); - w->obj[j].handle = wrk->steps[dep_idx].obj[0].handle; - j++; - igt_assert(j < nr_obj); + dep_handle = wrk->steps[dep_idx].obj[0].handle; + } else { + struct working_set *set; + + igt_assert(entry->working_set <= + wrk->max_working_set_id); + + set = wrk->working_sets[entry->working_set]; + + igt_assert(set->nr); + igt_assert(entry->target < set->nr); + igt_assert(set->sizes[entry->target]); + + dep_handle = set->handles[entry->target]; } + + w->obj[j].flags = entry->write ? EXEC_OBJECT_WRITE : 0; + w->obj[j].handle = dep_handle; + j++; + igt_assert(j < nr_obj); } if (w->unbound_duration) @@ -1395,11 +1677,23 @@ static size_t sizeof_engines_bond(int count) engines[count]); } +static void allocate_working_set(struct working_set *set) +{ + unsigned int i; + + set->handles = calloc(set->nr, sizeof(*set->handles)); + igt_assert(set->handles); + + for (i = 0; i < set->nr; i++) + set->handles[i] = gem_create(fd, set->sizes[i]); +} + #define alloca0(sz) ({ size_t sz__ = (sz); memset(alloca(sz__), 0, sz__); }) static int prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags) { + struct working_set **sets; uint32_t share_vm = 0; int max_ctx = -1; struct w_step *w; @@ -1634,6 +1928,51 @@ prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags) } } + /* + * Allocate working sets. + */ + for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) { + if (w->type == WORKINGSET && !w->working_set.shared) + allocate_working_set(&w->working_set); + } + + /* + * Map of working set ids. + */ + wrk->max_working_set_id = -1; + for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) { + if (w->type == WORKINGSET && + w->working_set.id > wrk->max_working_set_id) + wrk->max_working_set_id = w->working_set.id; + } + + sets = wrk->working_sets; + wrk->working_sets = calloc(wrk->max_working_set_id + 1, + sizeof(*wrk->working_sets)); + igt_assert(wrk->working_sets); + + for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) { + struct working_set *set; + + if (w->type != WORKINGSET) + continue; + + if (!w->working_set.shared) { + set = &w->working_set; + } else { + igt_assert(sets); + + set = sets[w->working_set.id]; + igt_assert(set->shared); + igt_assert(set->sizes); + } + + wrk->working_sets[w->working_set.id] = set; + } + + if (sets) + free(sets); + /* * Allocate batch buffers. */ @@ -1704,7 +2043,7 @@ do_eb(struct workload *wrk, struct w_step *w, enum intel_engine_id engine, 2 * sizeof(uint32_t)); for (i = 0; i < w->fence_deps.nr; i++) { - int tgt = w->idx + w->fence_deps.list[i]; + int tgt = w->idx + w->fence_deps.list[i].target; /* TODO: fence merging needed to support multiple inputs */ igt_assert(i == 0); @@ -1735,14 +2074,18 @@ static void sync_deps(struct workload *wrk, struct w_step *w) unsigned int i; for (i = 0; i < w->data_deps.nr; i++) { + struct dep_entry *entry = &w->data_deps.list[i]; int dep_idx; - igt_assert(w->data_deps.list[i] <= 0); + if (entry->working_set == -1) + continue; + + igt_assert(entry->target <= 0); - if (!w->data_deps.list[i]) + if (!entry->target) continue; - dep_idx = w->idx + w->data_deps.list[i]; + dep_idx = w->idx + entry->target; igt_assert(dep_idx >= 0 && dep_idx < w->idx); igt_assert(wrk->steps[dep_idx].type == BATCH); @@ -1842,11 +2185,6 @@ static void *run_workload(void *data) MI_BATCH_BUFFER_END; __sync_synchronize(); continue; - } else if (w->type == PREEMPTION || - w->type == ENGINE_MAP || - w->type == LOAD_BALANCE || - w->type == BOND) { - continue; } else if (w->type == SSEU) { if (w->sseu != wrk->ctx_list[w->context * 2].sseu) { wrk->ctx_list[w->context * 2].sseu = @@ -1854,6 +2192,13 @@ static void *run_workload(void *data) w->sseu); } continue; + } else if (w->type == PREEMPTION || + w->type == ENGINE_MAP || + w->type == LOAD_BALANCE || + w->type == BOND || + w->type == WORKINGSET) { + /* No action for these at execution time. */ + continue; } if (do_sleep || w->type == PERIOD) { diff --git a/benchmarks/wsim/README b/benchmarks/wsim/README index 9f770217f075..3d9143226740 100644 --- a/benchmarks/wsim/README +++ b/benchmarks/wsim/README @@ -8,6 +8,7 @@ M.<uint>.<str>[|<str>]... P|S|X.<uint>.<int> d|p|s|t|q|a|T.<int>,... b.<uint>.<str>[|<str>].<str> +w|W.<uint>.<str>[/<str>]... f For duration a range can be given from which a random value will be picked @@ -32,6 +33,8 @@ Additional workload steps are also supported: 'P' - Context priority. 'S' - Context SSEU configuration. 'T' - Terminate an infinite batch. + 'w' - Working set. (See Working sets section.) + 'W' - Shared working set. 'X' - Context preemption control. Engine ids: DEFAULT, RCS, BCS, VCS, VCS1, VCS2, VECS @@ -275,3 +278,59 @@ for the render engine. Slice mask of -1 has a special meaning of "all slices". Otherwise any integer can be specifying as the slice mask, but beware any apart from 1 and -1 can make the workload not portable between different GPUs. + +Working sets +------------ + +When used plainly workload steps can create implicit data dependencies by +relatively referencing another workload steps of a batch buffer type. Fourth +field contains the relative data dependncy. For example: + + 1.RCS.1000.0.0 + 1.BCS.1000.-1.0 + +This means the second batch buffer will be marked as having a read data +dependency on the first one. (The shared buffer is always marked as written to +by the dependency target buffer.) This will cause a serialization between the +two batch buffers. + +Working sets are used where more complex data dependencies are required. Each +working set has an id, a list of buffers, and can either be local to the +workload or shared within the cloned workloads (-c command line option). + +Lower-case 'w' command defines a local working set while upper-case 'W' defines +a shared version. Syntax is as follows: + + w.<id>.<size>[/<size>]... + +For size a byte size can be given, or suffix 'k', 'm' or 'g' can be used (case +insensitive). Prefix in the format of "<int>n<size>" can also be given to create +multiple objects of the same size. + +Examples: + + w.1.4k - Working set 1 with a single 4KiB object in it. + W.2.2M/32768 - Working set 2 with one 2MiB and one 32768 byte object. + w.3.10n4k/2n20000 - Working set 3 with ten 4KiB and two 20000 byte objects. + +Working set objects can be referenced as data dependency targets using the new +'r'/'w' syntax. Simple example: + + w.1.4k + W.2.1m + 1.RCS.1000.r1-0/w2-0.0 + 1.BCS.1000.r2-0.0 + +In this example the RCS batch is reading from working set 1 object 0 and writing +to working set 2 object 0. BCS batch is reading from working set 2 object 0. + +Because working set 2 is of a shared type, should two instances of the same +workload be executed (-c 2) then the 1MiB buffer would be shared and written +and read by both clients creating a serialization point. + +Apart from single objects, ranges can also be given as depenencies: + + w.1.10n4k + 1.RCS.1000.r1-0-9.0 + +Here the RCS batch has a read dependency on working set 1 objects 0 to 9. diff --git a/benchmarks/wsim/cloud-gaming-60fps.wsim b/benchmarks/wsim/cloud-gaming-60fps.wsim new file mode 100644 index 000000000000..9e48bbc2f617 --- /dev/null +++ b/benchmarks/wsim/cloud-gaming-60fps.wsim @@ -0,0 +1,11 @@ +w.1.10n8m +w.2.3n16m +1.RCS.500-1500.r1-0-4/w2-0.0 +1.RCS.500-1500.r1-5-9/w2-1.0 +1.RCS.500-1500.r2-0-1/w2-2.0 +M.2.VCS +B.2 +3.RCS.500-1500.r2-2.0 +2.DEFAULT.2000-4000.-1.0 +4.VCS1.250-750.-1.1 +p.16667 diff --git a/benchmarks/wsim/composited-ui.wsim b/benchmarks/wsim/composited-ui.wsim new file mode 100644 index 000000000000..4164f8bf7393 --- /dev/null +++ b/benchmarks/wsim/composited-ui.wsim @@ -0,0 +1,7 @@ +w.1.10n8m/3n16m +W.2.16m +1.RCS.200-600.r1-0-4/w1-10.0 +1.RCS.200-600.r1-5-9/w1-11.0 +1.RCS.400-800.r1-10-11/w1-12.0 +3.BCS.200-800.r1-12/w2-0.1 +p.16667