[i-g-t,02/10] gem_wsim: Buffer objects working sets and complex dependencies
diff mbox series

Message ID 20200617160120.16555-3-tvrtko.ursulin@linux.intel.com
State New
Headers show
Series
  • gem_wsim improvements
Related show

Commit Message

Tvrtko Ursulin June 17, 2020, 4:01 p.m. UTC
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Add support for defining buffer object working sets and targetting them as
data dependencies. For more information please see the README file.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 benchmarks/gem_wsim.c                   | 453 +++++++++++++++++++++---
 benchmarks/wsim/README                  |  59 +++
 benchmarks/wsim/cloud-gaming-60fps.wsim |  11 +
 benchmarks/wsim/composited-ui.wsim      |   7 +
 4 files changed, 476 insertions(+), 54 deletions(-)
 create mode 100644 benchmarks/wsim/cloud-gaming-60fps.wsim
 create mode 100644 benchmarks/wsim/composited-ui.wsim

Comments

Chris Wilson June 17, 2020, 4:57 p.m. UTC | #1
Quoting Tvrtko Ursulin (2020-06-17 17:01:12)
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> Add support for defining buffer object working sets and targetting them as
> data dependencies. For more information please see the README file.
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>  benchmarks/gem_wsim.c                   | 453 +++++++++++++++++++++---
>  benchmarks/wsim/README                  |  59 +++
>  benchmarks/wsim/cloud-gaming-60fps.wsim |  11 +
>  benchmarks/wsim/composited-ui.wsim      |   7 +
>  4 files changed, 476 insertions(+), 54 deletions(-)
>  create mode 100644 benchmarks/wsim/cloud-gaming-60fps.wsim
>  create mode 100644 benchmarks/wsim/composited-ui.wsim
> 
> diff --git a/benchmarks/gem_wsim.c b/benchmarks/gem_wsim.c
> index 02fe8f5a5e69..9e5bfe6a36d4 100644
> --- a/benchmarks/gem_wsim.c
> +++ b/benchmarks/gem_wsim.c
> @@ -88,14 +88,21 @@ enum w_type
>         LOAD_BALANCE,
>         BOND,
>         TERMINATE,
> -       SSEU
> +       SSEU,
> +       WORKINGSET,
> +};
> +
> +struct dep_entry {
> +       int target;
> +       bool write;
> +       int working_set; /* -1 = step dependecy, >= 0 working set id */
>  };
>  
>  struct deps
>  {
>         int nr;
>         bool submit_fence;
> -       int *list;
> +       struct dep_entry *list;
>  };
>  
>  struct w_arg {
> @@ -110,6 +117,14 @@ struct bond {
>         enum intel_engine_id master;
>  };
>  
> +struct working_set {
> +       int id;
> +       bool shared;
> +       unsigned int nr;
> +       uint32_t *handles;
> +       unsigned long *sizes;
> +};
> +
>  struct workload;
>  
>  struct w_step
> @@ -143,6 +158,7 @@ struct w_step
>                         enum intel_engine_id bond_master;
>                 };
>                 int sseu;
> +               struct working_set working_set;
>         };
>  
>         /* Implementation details */
> @@ -193,6 +209,9 @@ struct workload
>         unsigned int nr_ctxs;
>         struct ctx *ctx_list;
>  
> +       struct working_set **working_sets; /* array indexed by set id */
> +       int max_working_set_id;
> +
>         int sync_timeline;
>         uint32_t sync_seqno;
>  
> @@ -281,11 +300,120 @@ print_engine_calibrations(void)
>         printf("\n");
>  }
>  
> +static void add_dep(struct deps *deps, struct dep_entry entry)
> +{
> +       deps->list = realloc(deps->list, sizeof(*deps->list) * (deps->nr + 1));
> +       igt_assert(deps->list);
> +
> +       deps->list[deps->nr++] = entry;
> +}
> +
> +
> +static int
> +parse_dependency(unsigned int nr_steps, struct w_step *w, char *str)
> +{
> +       struct dep_entry entry = { .working_set = -1 };
> +       bool submit_fence = false;
> +       char *s;
> +
> +       switch (str[0]) {
> +       case '-':
> +               if (str[1] < '0' || str[1] > '9')
> +                       return -1;
> +
> +               entry.target = atoi(str);
> +               if (entry.target > 0 || ((int)nr_steps + entry.target) < 0)
> +                       return -1;

add_dep for N steps ago, using a handle.

> +
> +               add_dep(&w->data_deps, entry);
> +
> +               break;
> +       case 's':
> +               submit_fence = true;
> +               /* Fall-through. */
> +       case 'f':
> +               /* Multiple fences not yet supported. */
> +               igt_assert_eq(w->fence_deps.nr, 0);
> +
> +               entry.target = atoi(++str);
> +               if (entry.target > 0 || ((int)nr_steps + entry.target) < 0)
> +                       return -1;
> +
> +               add_dep(&w->fence_deps, entry);
> +
> +               w->fence_deps.submit_fence = submit_fence;

add_dep for N steps ago, using the out-fence from that step
[A post processing steps adds emit_fence to the earlier steps.]

> +               break;
> +       case 'w':
> +               entry.write = true;

Got confused for a moment as I was expecting the submit_fence
fallthrough pattern.
> +               /* Fall-through. */
> +       case 'r':
> +               /*
> +                * [rw]N-<str>
> +                * r1-<str> or w2-<str>, where N is working set id.
> +                */
> +               s = index(++str, '-');
> +               if (!s)
> +                       return -1;
> +
> +               entry.working_set = atoi(str);

if (entry.working_set < 0)
	return -1;

> +
> +               if (parse_working_set_deps(w->wrk, &w->data_deps, entry, ++s))
> +                       return -1;

The new one...

> +static int
> +parse_working_set_deps(struct workload *wrk,
> +                      struct deps *deps,
> +                      struct dep_entry _entry,
> +                      char *str)
> +{
> +       /*
> +        * 1 - target handle index in the specified working set.
> +        * 2-4 - range
> +        */
> +       struct dep_entry entry = _entry;
> +       char *s;
> +
> +       s = index(str, '-');
> +       if (s) {
> +               int from, to;
> +
> +               from = atoi(str);
> +               if (from < 0)
> +                       return -1;
> +
> +               to = atoi(++s);
> +               if (to <= 0)
> +                       return -1;

if to < from, we add nothing. Is that worth the error?

> +
> +               for (entry.target = from; entry.target <= to; entry.target++)
> +                       add_dep(deps, entry);
> +       } else {
> +               entry.target = atoi(str);
> +               if (entry.target < 0)
> +                       return -1;
> +
> +               add_dep(deps, entry);


> +       }
> +
> +       return 0;
> +}
> +
> +               break;
> +       default:
> +               return -1;
> +       };
> +
> +       return 0;
> +}
> +
>  static int
>  parse_dependencies(unsigned int nr_steps, struct w_step *w, char *_desc)
>  {
>         char *desc = strdup(_desc);
>         char *token, *tctx = NULL, *tstart = desc;
> +       int ret = 0;
> +
> +       if (!strcmp(_desc, "0"))
> +               goto out;

Hang on, what this special case?

>  
>         igt_assert(desc);
>         igt_assert(!w->data_deps.nr && w->data_deps.nr == w->fence_deps.nr);
>  static void __attribute__((format(printf, 1, 2)))
> @@ -624,6 +722,88 @@ static int parse_engine_map(struct w_step *step, const char *_str)
>         return 0;
>  }
>  
> +static unsigned long parse_size(char *str)
> +{
/* "1234567890[gGmMkK]" */
> +       const unsigned int len = strlen(str);
> +       unsigned int mult = 1;
> +
> +       if (len == 0)
> +               return 0;
> +
> +       switch (str[len - 1]) {

T? P? E? Let's plan ahead! :)

> +       case 'g':
> +       case 'G':
> +               mult *= 1024;
> +               /* Fall-throuogh. */
> +       case 'm':
> +       case 'M':
> +               mult *= 1024;
> +               /* Fall-throuogh. */
> +       case 'k':
> +       case 'K':
> +               mult *= 1024;
> +
> +               str[len - 1] = 0;
> +       }
> +
> +       return atol(str) * mult;

Negatives?

> +}
> +
> +static int add_buffers(struct working_set *set, char *str)
> +{
> +       /*
> +        * 4096
> +        * 4k
> +        * 4m
> +        * 4g
> +        * 10n4k - 10 4k batches
> +        */
> +       unsigned long *sizes, size;
> +       unsigned int add, i;
> +       char *n;
> +
> +       n = index(str, 'n');
> +       if (n) {
> +               *n = 0;
> +               add = atoi(str);
> +               if (!add)
> +                       return -1;

if (add <= 0) [int add goes without saying then]

> +               str = ++n;
> +       } else {
> +               add = 1;
> +       }
> +
> +       size = parse_size(str);
> +       if (!size)
> +               return -1;
> +
> +       sizes = realloc(set->sizes, (set->nr + add) * sizeof(*sizes));
> +       if (!sizes)
> +               return -1;
> +
> +       for (i = 0; i < add; i++)
> +               sizes[set->nr + i] = size;
> +
> +       set->nr += add;
> +       set->sizes = sizes;
> +
> +       return 0;
> +}

> @@ -1003,6 +1209,51 @@ add_step:
>                 }
>         }
>  
> +       /*
> +        * Check no duplicate working set ids.
> +        */
> +       for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) {
> +               struct w_step *w2;
> +
> +               if (w->type != WORKINGSET)
> +                       continue;
> +
> +               for (j = 0, w2 = wrk->steps; j < wrk->nr_steps; w2++, j++) {
> +                       if (j == i)
> +                               continue;
> +                       if (w2->type != WORKINGSET)
> +                               continue;
> +
> +                       check_arg(w->working_set.id == w2->working_set.id,
> +                                 "Duplicate working set id at %u!\n", j);
> +               }
> +       }
> +
> +       /*
> +        * Allocate shared working sets.
> +        */
> +       for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) {
> +               if (w->type == WORKINGSET && w->working_set.shared)
> +                       allocate_working_set(&w->working_set);
> +       }
> +
> +       wrk->max_working_set_id = -1;
> +       for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) {
> +               if (w->type == WORKINGSET &&
> +                   w->working_set.shared &&
> +                   w->working_set.id > wrk->max_working_set_id)
> +                       wrk->max_working_set_id = w->working_set.id;
> +       }
> +
> +       wrk->working_sets = calloc(wrk->max_working_set_id + 1,
> +                                  sizeof(*wrk->working_sets));
> +       igt_assert(wrk->working_sets);
> +
> +       for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) {
> +               if (w->type == WORKINGSET && w->working_set.shared)
> +                       wrk->working_sets[w->working_set.id] = &w->working_set;
> +       }

Ok, sharing works by reusing the same set of handles within the process.

Is there room in the parser namespace for dmabuf sharing?
-Chris
Tvrtko Ursulin June 18, 2020, 9:05 a.m. UTC | #2
On 17/06/2020 17:57, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2020-06-17 17:01:12)
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> Add support for defining buffer object working sets and targetting them as
>> data dependencies. For more information please see the README file.
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> ---
>>   benchmarks/gem_wsim.c                   | 453 +++++++++++++++++++++---
>>   benchmarks/wsim/README                  |  59 +++
>>   benchmarks/wsim/cloud-gaming-60fps.wsim |  11 +
>>   benchmarks/wsim/composited-ui.wsim      |   7 +
>>   4 files changed, 476 insertions(+), 54 deletions(-)
>>   create mode 100644 benchmarks/wsim/cloud-gaming-60fps.wsim
>>   create mode 100644 benchmarks/wsim/composited-ui.wsim
>>
>> diff --git a/benchmarks/gem_wsim.c b/benchmarks/gem_wsim.c
>> index 02fe8f5a5e69..9e5bfe6a36d4 100644
>> --- a/benchmarks/gem_wsim.c
>> +++ b/benchmarks/gem_wsim.c
>> @@ -88,14 +88,21 @@ enum w_type
>>          LOAD_BALANCE,
>>          BOND,
>>          TERMINATE,
>> -       SSEU
>> +       SSEU,
>> +       WORKINGSET,
>> +};
>> +
>> +struct dep_entry {
>> +       int target;
>> +       bool write;
>> +       int working_set; /* -1 = step dependecy, >= 0 working set id */
>>   };
>>   
>>   struct deps
>>   {
>>          int nr;
>>          bool submit_fence;
>> -       int *list;
>> +       struct dep_entry *list;
>>   };
>>   
>>   struct w_arg {
>> @@ -110,6 +117,14 @@ struct bond {
>>          enum intel_engine_id master;
>>   };
>>   
>> +struct working_set {
>> +       int id;
>> +       bool shared;
>> +       unsigned int nr;
>> +       uint32_t *handles;
>> +       unsigned long *sizes;
>> +};
>> +
>>   struct workload;
>>   
>>   struct w_step
>> @@ -143,6 +158,7 @@ struct w_step
>>                          enum intel_engine_id bond_master;
>>                  };
>>                  int sseu;
>> +               struct working_set working_set;
>>          };
>>   
>>          /* Implementation details */
>> @@ -193,6 +209,9 @@ struct workload
>>          unsigned int nr_ctxs;
>>          struct ctx *ctx_list;
>>   
>> +       struct working_set **working_sets; /* array indexed by set id */
>> +       int max_working_set_id;
>> +
>>          int sync_timeline;
>>          uint32_t sync_seqno;
>>   
>> @@ -281,11 +300,120 @@ print_engine_calibrations(void)
>>          printf("\n");
>>   }
>>   
>> +static void add_dep(struct deps *deps, struct dep_entry entry)
>> +{
>> +       deps->list = realloc(deps->list, sizeof(*deps->list) * (deps->nr + 1));
>> +       igt_assert(deps->list);
>> +
>> +       deps->list[deps->nr++] = entry;
>> +}
>> +
>> +
>> +static int
>> +parse_dependency(unsigned int nr_steps, struct w_step *w, char *str)
>> +{
>> +       struct dep_entry entry = { .working_set = -1 };
>> +       bool submit_fence = false;
>> +       char *s;
>> +
>> +       switch (str[0]) {
>> +       case '-':
>> +               if (str[1] < '0' || str[1] > '9')
>> +                       return -1;
>> +
>> +               entry.target = atoi(str);
>> +               if (entry.target > 0 || ((int)nr_steps + entry.target) < 0)
>> +                       return -1;
> 
> add_dep for N steps ago, using a handle.
> 
>> +
>> +               add_dep(&w->data_deps, entry);
>> +
>> +               break;
>> +       case 's':
>> +               submit_fence = true;
>> +               /* Fall-through. */
>> +       case 'f':
>> +               /* Multiple fences not yet supported. */
>> +               igt_assert_eq(w->fence_deps.nr, 0);
>> +
>> +               entry.target = atoi(++str);
>> +               if (entry.target > 0 || ((int)nr_steps + entry.target) < 0)
>> +                       return -1;
>> +
>> +               add_dep(&w->fence_deps, entry);
>> +
>> +               w->fence_deps.submit_fence = submit_fence;
> 
> add_dep for N steps ago, using the out-fence from that step
> [A post processing steps adds emit_fence to the earlier steps.]
> 
>> +               break;
>> +       case 'w':
>> +               entry.write = true;
> 
> Got confused for a moment as I was expecting the submit_fence
> fallthrough pattern.
>> +               /* Fall-through. */
>> +       case 'r':
>> +               /*
>> +                * [rw]N-<str>
>> +                * r1-<str> or w2-<str>, where N is working set id.
>> +                */
>> +               s = index(++str, '-');
>> +               if (!s)
>> +                       return -1;
>> +
>> +               entry.working_set = atoi(str);
> 
> if (entry.working_set < 0)
> 	return -1;

Yep.

> 
>> +
>> +               if (parse_working_set_deps(w->wrk, &w->data_deps, entry, ++s))
>> +                       return -1;
> 
> The new one...
> 
>> +static int
>> +parse_working_set_deps(struct workload *wrk,
>> +                      struct deps *deps,
>> +                      struct dep_entry _entry,
>> +                      char *str)
>> +{
>> +       /*
>> +        * 1 - target handle index in the specified working set.
>> +        * 2-4 - range
>> +        */
>> +       struct dep_entry entry = _entry;
>> +       char *s;
>> +
>> +       s = index(str, '-');
>> +       if (s) {
>> +               int from, to;
>> +
>> +               from = atoi(str);
>> +               if (from < 0)
>> +                       return -1;
>> +
>> +               to = atoi(++s);
>> +               if (to <= 0)
>> +                       return -1;
> 
> if to < from, we add nothing. Is that worth the error?

Yep.

> 
>> +
>> +               for (entry.target = from; entry.target <= to; entry.target++)
>> +                       add_dep(deps, entry);
>> +       } else {
>> +               entry.target = atoi(str);
>> +               if (entry.target < 0)
>> +                       return -1;
>> +
>> +               add_dep(deps, entry);
> 
> 
>> +       }
>> +
>> +       return 0;
>> +}
>> +
>> +               break;
>> +       default:
>> +               return -1;
>> +       };
>> +
>> +       return 0;
>> +}
>> +
>>   static int
>>   parse_dependencies(unsigned int nr_steps, struct w_step *w, char *_desc)
>>   {
>>          char *desc = strdup(_desc);
>>          char *token, *tctx = NULL, *tstart = desc;
>> +       int ret = 0;
>> +
>> +       if (!strcmp(_desc, "0"))
>> +               goto out;
> 
> Hang on, what this special case?

For no dependencies.

If I move the check to parse_dependency then dependency of "0/0/0/0" 
would be silently accepted. It wouldn't be a big deal, who cares, but I 
thought it is better to be more strict.

> 
>>   
>>          igt_assert(desc);
>>          igt_assert(!w->data_deps.nr && w->data_deps.nr == w->fence_deps.nr);
>>   static void __attribute__((format(printf, 1, 2)))
>> @@ -624,6 +722,88 @@ static int parse_engine_map(struct w_step *step, const char *_str)
>>          return 0;
>>   }
>>   
>> +static unsigned long parse_size(char *str)
>> +{
> /* "1234567890[gGmMkK]" */
>> +       const unsigned int len = strlen(str);
>> +       unsigned int mult = 1;
>> +
>> +       if (len == 0)
>> +               return 0;
>> +
>> +       switch (str[len - 1]) {
> 
> T? P? E? Let's plan ahead! :)

Error on unrecognized non-digit? Ok.

> 
>> +       case 'g':
>> +       case 'G':
>> +               mult *= 1024;
>> +               /* Fall-throuogh. */
>> +       case 'm':
>> +       case 'M':
>> +               mult *= 1024;
>> +               /* Fall-throuogh. */
>> +       case 'k':
>> +       case 'K':
>> +               mult *= 1024;
>> +
>> +               str[len - 1] = 0;
>> +       }
>> +
>> +       return atol(str) * mult;
> 
> Negatives?

Ok.

> 
>> +}
>> +
>> +static int add_buffers(struct working_set *set, char *str)
>> +{
>> +       /*
>> +        * 4096
>> +        * 4k
>> +        * 4m
>> +        * 4g
>> +        * 10n4k - 10 4k batches
>> +        */
>> +       unsigned long *sizes, size;
>> +       unsigned int add, i;
>> +       char *n;
>> +
>> +       n = index(str, 'n');
>> +       if (n) {
>> +               *n = 0;
>> +               add = atoi(str);
>> +               if (!add)
>> +                       return -1;
> 
> if (add <= 0) [int add goes without saying then]

Yep.

> 
>> +               str = ++n;
>> +       } else {
>> +               add = 1;
>> +       }
>> +
>> +       size = parse_size(str);
>> +       if (!size)
>> +               return -1;
>> +
>> +       sizes = realloc(set->sizes, (set->nr + add) * sizeof(*sizes));
>> +       if (!sizes)
>> +               return -1;
>> +
>> +       for (i = 0; i < add; i++)
>> +               sizes[set->nr + i] = size;
>> +
>> +       set->nr += add;
>> +       set->sizes = sizes;
>> +
>> +       return 0;
>> +}
> 
>> @@ -1003,6 +1209,51 @@ add_step:
>>                  }
>>          }
>>   
>> +       /*
>> +        * Check no duplicate working set ids.
>> +        */
>> +       for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) {
>> +               struct w_step *w2;
>> +
>> +               if (w->type != WORKINGSET)
>> +                       continue;
>> +
>> +               for (j = 0, w2 = wrk->steps; j < wrk->nr_steps; w2++, j++) {
>> +                       if (j == i)
>> +                               continue;
>> +                       if (w2->type != WORKINGSET)
>> +                               continue;
>> +
>> +                       check_arg(w->working_set.id == w2->working_set.id,
>> +                                 "Duplicate working set id at %u!\n", j);
>> +               }
>> +       }
>> +
>> +       /*
>> +        * Allocate shared working sets.
>> +        */
>> +       for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) {
>> +               if (w->type == WORKINGSET && w->working_set.shared)
>> +                       allocate_working_set(&w->working_set);
>> +       }
>> +
>> +       wrk->max_working_set_id = -1;
>> +       for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) {
>> +               if (w->type == WORKINGSET &&
>> +                   w->working_set.shared &&
>> +                   w->working_set.id > wrk->max_working_set_id)
>> +                       wrk->max_working_set_id = w->working_set.id;
>> +       }
>> +
>> +       wrk->working_sets = calloc(wrk->max_working_set_id + 1,
>> +                                  sizeof(*wrk->working_sets));
>> +       igt_assert(wrk->working_sets);
>> +
>> +       for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) {
>> +               if (w->type == WORKINGSET && w->working_set.shared)
>> +                       wrk->working_sets[w->working_set.id] = &w->working_set;
>> +       }
> 
> Ok, sharing works by reusing the same set of handles within the process.
> 
> Is there room in the parser namespace for dmabuf sharing?

Plenty of unused characters. :)

Regards,

Tvrtko
Chris Wilson June 18, 2020, 9:22 a.m. UTC | #3
Quoting Tvrtko Ursulin (2020-06-18 10:05:56)
> 
> On 17/06/2020 17:57, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2020-06-17 17:01:12)
> >> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>
> >> Add support for defining buffer object working sets and targetting them as
> >> data dependencies. For more information please see the README file.
> >>
> >> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >> ---
> >>   benchmarks/gem_wsim.c                   | 453 +++++++++++++++++++++---
> >>   benchmarks/wsim/README                  |  59 +++
> >>   benchmarks/wsim/cloud-gaming-60fps.wsim |  11 +
> >>   benchmarks/wsim/composited-ui.wsim      |   7 +
> >>   4 files changed, 476 insertions(+), 54 deletions(-)
> >>   create mode 100644 benchmarks/wsim/cloud-gaming-60fps.wsim
> >>   create mode 100644 benchmarks/wsim/composited-ui.wsim
> >>
> >> diff --git a/benchmarks/gem_wsim.c b/benchmarks/gem_wsim.c
> >> index 02fe8f5a5e69..9e5bfe6a36d4 100644
> >> --- a/benchmarks/gem_wsim.c
> >> +++ b/benchmarks/gem_wsim.c
> >> @@ -88,14 +88,21 @@ enum w_type
> >>          LOAD_BALANCE,
> >>          BOND,
> >>          TERMINATE,
> >> -       SSEU
> >> +       SSEU,
> >> +       WORKINGSET,
> >> +};
> >> +
> >> +struct dep_entry {
> >> +       int target;
> >> +       bool write;
> >> +       int working_set; /* -1 = step dependecy, >= 0 working set id */
> >>   };
> >>   
> >>   struct deps
> >>   {
> >>          int nr;
> >>          bool submit_fence;
> >> -       int *list;
> >> +       struct dep_entry *list;
> >>   };
> >>   
> >>   struct w_arg {
> >> @@ -110,6 +117,14 @@ struct bond {
> >>          enum intel_engine_id master;
> >>   };
> >>   
> >> +struct working_set {
> >> +       int id;
> >> +       bool shared;
> >> +       unsigned int nr;
> >> +       uint32_t *handles;
> >> +       unsigned long *sizes;
> >> +};
> >> +
> >>   struct workload;
> >>   
> >>   struct w_step
> >> @@ -143,6 +158,7 @@ struct w_step
> >>                          enum intel_engine_id bond_master;
> >>                  };
> >>                  int sseu;
> >> +               struct working_set working_set;
> >>          };
> >>   
> >>          /* Implementation details */
> >> @@ -193,6 +209,9 @@ struct workload
> >>          unsigned int nr_ctxs;
> >>          struct ctx *ctx_list;
> >>   
> >> +       struct working_set **working_sets; /* array indexed by set id */
> >> +       int max_working_set_id;
> >> +
> >>          int sync_timeline;
> >>          uint32_t sync_seqno;
> >>   
> >> @@ -281,11 +300,120 @@ print_engine_calibrations(void)
> >>          printf("\n");
> >>   }
> >>   
> >> +static void add_dep(struct deps *deps, struct dep_entry entry)
> >> +{
> >> +       deps->list = realloc(deps->list, sizeof(*deps->list) * (deps->nr + 1));
> >> +       igt_assert(deps->list);
> >> +
> >> +       deps->list[deps->nr++] = entry;
> >> +}
> >> +
> >> +
> >> +static int
> >> +parse_dependency(unsigned int nr_steps, struct w_step *w, char *str)
> >> +{
> >> +       struct dep_entry entry = { .working_set = -1 };
> >> +       bool submit_fence = false;
> >> +       char *s;
> >> +
> >> +       switch (str[0]) {
> >> +       case '-':
> >> +               if (str[1] < '0' || str[1] > '9')
> >> +                       return -1;
> >> +
> >> +               entry.target = atoi(str);
> >> +               if (entry.target > 0 || ((int)nr_steps + entry.target) < 0)
> >> +                       return -1;
> > 
> > add_dep for N steps ago, using a handle.
> > 
> >> +
> >> +               add_dep(&w->data_deps, entry);
> >> +
> >> +               break;
> >> +       case 's':
> >> +               submit_fence = true;
> >> +               /* Fall-through. */
> >> +       case 'f':
> >> +               /* Multiple fences not yet supported. */
> >> +               igt_assert_eq(w->fence_deps.nr, 0);
> >> +
> >> +               entry.target = atoi(++str);
> >> +               if (entry.target > 0 || ((int)nr_steps + entry.target) < 0)
> >> +                       return -1;
> >> +
> >> +               add_dep(&w->fence_deps, entry);
> >> +
> >> +               w->fence_deps.submit_fence = submit_fence;
> > 
> > add_dep for N steps ago, using the out-fence from that step
> > [A post processing steps adds emit_fence to the earlier steps.]
> > 
> >> +               break;
> >> +       case 'w':
> >> +               entry.write = true;
> > 
> > Got confused for a moment as I was expecting the submit_fence
> > fallthrough pattern.
> >> +               /* Fall-through. */
> >> +       case 'r':
> >> +               /*
> >> +                * [rw]N-<str>
> >> +                * r1-<str> or w2-<str>, where N is working set id.
> >> +                */
> >> +               s = index(++str, '-');
> >> +               if (!s)
> >> +                       return -1;
> >> +
> >> +               entry.working_set = atoi(str);
> > 
> > if (entry.working_set < 0)
> >       return -1;
> 
> Yep.
> 
> > 
> >> +
> >> +               if (parse_working_set_deps(w->wrk, &w->data_deps, entry, ++s))
> >> +                       return -1;
> > 
> > The new one...
> > 
> >> +static int
> >> +parse_working_set_deps(struct workload *wrk,
> >> +                      struct deps *deps,
> >> +                      struct dep_entry _entry,
> >> +                      char *str)
> >> +{
> >> +       /*
> >> +        * 1 - target handle index in the specified working set.
> >> +        * 2-4 - range
> >> +        */
> >> +       struct dep_entry entry = _entry;
> >> +       char *s;
> >> +
> >> +       s = index(str, '-');
> >> +       if (s) {
> >> +               int from, to;
> >> +
> >> +               from = atoi(str);
> >> +               if (from < 0)
> >> +                       return -1;
> >> +
> >> +               to = atoi(++s);
> >> +               if (to <= 0)
> >> +                       return -1;
> > 
> > if to < from, we add nothing. Is that worth the error?
> 
> Yep.
> 
> > 
> >> +
> >> +               for (entry.target = from; entry.target <= to; entry.target++)
> >> +                       add_dep(deps, entry);
> >> +       } else {
> >> +               entry.target = atoi(str);
> >> +               if (entry.target < 0)
> >> +                       return -1;
> >> +
> >> +               add_dep(deps, entry);
> > 
> > 
> >> +       }
> >> +
> >> +       return 0;
> >> +}
> >> +
> >> +               break;
> >> +       default:
> >> +               return -1;
> >> +       };
> >> +
> >> +       return 0;
> >> +}
> >> +
> >>   static int
> >>   parse_dependencies(unsigned int nr_steps, struct w_step *w, char *_desc)
> >>   {
> >>          char *desc = strdup(_desc);
> >>          char *token, *tctx = NULL, *tstart = desc;
> >> +       int ret = 0;
> >> +
> >> +       if (!strcmp(_desc, "0"))
> >> +               goto out;
> > 
> > Hang on, what this special case?
> 
> For no dependencies.
> 
> If I move the check to parse_dependency then dependency of "0/0/0/0" 
> would be silently accepted. It wouldn't be a big deal, who cares, but I 
> thought it is better to be more strict.

It was just not clear at this point what is being matched, what the
meaning of 0 is.

/* 0 refers to self, a degenerate dependency */
?
-Chris

Patch
diff mbox series

diff --git a/benchmarks/gem_wsim.c b/benchmarks/gem_wsim.c
index 02fe8f5a5e69..9e5bfe6a36d4 100644
--- a/benchmarks/gem_wsim.c
+++ b/benchmarks/gem_wsim.c
@@ -88,14 +88,21 @@  enum w_type
 	LOAD_BALANCE,
 	BOND,
 	TERMINATE,
-	SSEU
+	SSEU,
+	WORKINGSET,
+};
+
+struct dep_entry {
+	int target;
+	bool write;
+	int working_set; /* -1 = step dependecy, >= 0 working set id */
 };
 
 struct deps
 {
 	int nr;
 	bool submit_fence;
-	int *list;
+	struct dep_entry *list;
 };
 
 struct w_arg {
@@ -110,6 +117,14 @@  struct bond {
 	enum intel_engine_id master;
 };
 
+struct working_set {
+	int id;
+	bool shared;
+	unsigned int nr;
+	uint32_t *handles;
+	unsigned long *sizes;
+};
+
 struct workload;
 
 struct w_step
@@ -143,6 +158,7 @@  struct w_step
 			enum intel_engine_id bond_master;
 		};
 		int sseu;
+		struct working_set working_set;
 	};
 
 	/* Implementation details */
@@ -193,6 +209,9 @@  struct workload
 	unsigned int nr_ctxs;
 	struct ctx *ctx_list;
 
+	struct working_set **working_sets; /* array indexed by set id */
+	int max_working_set_id;
+
 	int sync_timeline;
 	uint32_t sync_seqno;
 
@@ -281,11 +300,120 @@  print_engine_calibrations(void)
 	printf("\n");
 }
 
+static void add_dep(struct deps *deps, struct dep_entry entry)
+{
+	deps->list = realloc(deps->list, sizeof(*deps->list) * (deps->nr + 1));
+	igt_assert(deps->list);
+
+	deps->list[deps->nr++] = entry;
+}
+
+static int
+parse_working_set_deps(struct workload *wrk,
+		       struct deps *deps,
+		       struct dep_entry _entry,
+		       char *str)
+{
+	/*
+	 * 1 - target handle index in the specified working set.
+	 * 2-4 - range
+	 */
+	struct dep_entry entry = _entry;
+	char *s;
+
+	s = index(str, '-');
+	if (s) {
+		int from, to;
+
+		from = atoi(str);
+		if (from < 0)
+			return -1;
+
+		to = atoi(++s);
+		if (to <= 0)
+			return -1;
+
+		for (entry.target = from; entry.target <= to; entry.target++)
+			add_dep(deps, entry);
+	} else {
+		entry.target = atoi(str);
+		if (entry.target < 0)
+			return -1;
+
+		add_dep(deps, entry);
+	}
+
+	return 0;
+}
+
+static int
+parse_dependency(unsigned int nr_steps, struct w_step *w, char *str)
+{
+	struct dep_entry entry = { .working_set = -1 };
+	bool submit_fence = false;
+	char *s;
+
+	switch (str[0]) {
+	case '-':
+		if (str[1] < '0' || str[1] > '9')
+			return -1;
+
+		entry.target = atoi(str);
+		if (entry.target > 0 || ((int)nr_steps + entry.target) < 0)
+			return -1;
+
+		add_dep(&w->data_deps, entry);
+
+		break;
+	case 's':
+		submit_fence = true;
+		/* Fall-through. */
+	case 'f':
+		/* Multiple fences not yet supported. */
+		igt_assert_eq(w->fence_deps.nr, 0);
+
+		entry.target = atoi(++str);
+		if (entry.target > 0 || ((int)nr_steps + entry.target) < 0)
+			return -1;
+
+		add_dep(&w->fence_deps, entry);
+
+		w->fence_deps.submit_fence = submit_fence;
+		break;
+	case 'w':
+		entry.write = true;
+		/* Fall-through. */
+	case 'r':
+		/*
+		 * [rw]N-<str>
+		 * r1-<str> or w2-<str>, where N is working set id.
+		 */
+		s = index(++str, '-');
+		if (!s)
+			return -1;
+
+		entry.working_set = atoi(str);
+
+		if (parse_working_set_deps(w->wrk, &w->data_deps, entry, ++s))
+			return -1;
+
+		break;
+	default:
+		return -1;
+	};
+
+	return 0;
+}
+
 static int
 parse_dependencies(unsigned int nr_steps, struct w_step *w, char *_desc)
 {
 	char *desc = strdup(_desc);
 	char *token, *tctx = NULL, *tstart = desc;
+	int ret = 0;
+
+	if (!strcmp(_desc, "0"))
+		goto out;
 
 	igt_assert(desc);
 	igt_assert(!w->data_deps.nr && w->data_deps.nr == w->fence_deps.nr);
@@ -293,47 +421,17 @@  parse_dependencies(unsigned int nr_steps, struct w_step *w, char *_desc)
 		   w->data_deps.list == w->fence_deps.list);
 
 	while ((token = strtok_r(tstart, "/", &tctx)) != NULL) {
-		bool submit_fence = false;
-		char *str = token;
-		struct deps *deps;
-		int dep;
-
 		tstart = NULL;
 
-		if (str[0] == '-' || (str[0] >= '0' && str[0] <= '9')) {
-			deps = &w->data_deps;
-		} else {
-			if (str[0] == 's')
-				submit_fence = true;
-			else if (str[0] != 'f')
-				return -1;
-
-			deps = &w->fence_deps;
-			str++;
-		}
-
-		dep = atoi(str);
-		if (dep > 0 || ((int)nr_steps + dep) < 0) {
-			if (deps->list)
-				free(deps->list);
-			return -1;
-		}
-
-		if (dep < 0) {
-			deps->nr++;
-			/* Multiple fences not yet supported. */
-			igt_assert(deps->nr == 1 || deps != &w->fence_deps);
-			deps->list = realloc(deps->list,
-					     sizeof(*deps->list) * deps->nr);
-			igt_assert(deps->list);
-			deps->list[deps->nr - 1] = dep;
-			deps->submit_fence = submit_fence;
-		}
+		ret = parse_dependency(nr_steps, w, token);
+		if (ret)
+			break;
 	}
 
+out:
 	free(desc);
 
-	return 0;
+	return ret;
 }
 
 static void __attribute__((format(printf, 1, 2)))
@@ -624,6 +722,88 @@  static int parse_engine_map(struct w_step *step, const char *_str)
 	return 0;
 }
 
+static unsigned long parse_size(char *str)
+{
+	const unsigned int len = strlen(str);
+	unsigned int mult = 1;
+
+	if (len == 0)
+		return 0;
+
+	switch (str[len - 1]) {
+	case 'g':
+	case 'G':
+		mult *= 1024;
+		/* Fall-throuogh. */
+	case 'm':
+	case 'M':
+		mult *= 1024;
+		/* Fall-throuogh. */
+	case 'k':
+	case 'K':
+		mult *= 1024;
+
+		str[len - 1] = 0;
+	}
+
+	return atol(str) * mult;
+}
+
+static int add_buffers(struct working_set *set, char *str)
+{
+	/*
+	 * 4096
+	 * 4k
+	 * 4m
+	 * 4g
+	 * 10n4k - 10 4k batches
+	 */
+	unsigned long *sizes, size;
+	unsigned int add, i;
+	char *n;
+
+	n = index(str, 'n');
+	if (n) {
+		*n = 0;
+		add = atoi(str);
+		if (!add)
+			return -1;
+		str = ++n;
+	} else {
+		add = 1;
+	}
+
+	size = parse_size(str);
+	if (!size)
+		return -1;
+
+	sizes = realloc(set->sizes, (set->nr + add) * sizeof(*sizes));
+	if (!sizes)
+		return -1;
+
+	for (i = 0; i < add; i++)
+		sizes[set->nr + i] = size;
+
+	set->nr += add;
+	set->sizes = sizes;
+
+	return 0;
+}
+
+static int parse_working_set(struct working_set *set, char *str)
+{
+	char *token, *tctx = NULL, *tstart = str;
+
+	while ((token = strtok_r(tstart, "/", &tctx))) {
+		tstart = NULL;
+
+		if (add_buffers(set, token))
+			return -1;
+	}
+
+	return 0;
+}
+
 static uint64_t engine_list_mask(const char *_str)
 {
 	uint64_t mask = 0;
@@ -644,6 +824,8 @@  static uint64_t engine_list_mask(const char *_str)
 	return mask;
 }
 
+static void allocate_working_set(struct working_set *set);
+
 #define int_field(_STEP_, _FIELD_, _COND_, _ERR_) \
 	if ((field = strtok_r(fstart, ".", &fctx))) { \
 		tmp = atoi(field); \
@@ -661,7 +843,7 @@  parse_workload(struct w_arg *arg, unsigned int flags, struct workload *app_w)
 	char *desc = strdup(arg->desc);
 	char *_token, *token, *tctx = NULL, *tstart = desc;
 	char *field, *fctx = NULL, *fstart;
-	struct w_step step, *steps = NULL;
+	struct w_step step, *w, *steps = NULL;
 	unsigned int valid;
 	int i, j, tmp;
 
@@ -851,6 +1033,28 @@  parse_workload(struct w_arg *arg, unsigned int flags, struct workload *app_w)
 
 				step.type = BOND;
 				goto add_step;
+			} else if (!strcmp(field, "w") || !strcmp(field, "W")) {
+				unsigned int nr = 0;
+
+				step.working_set.shared = field[0] == 'W';
+
+				while ((field = strtok_r(fstart, ".", &fctx))) {
+					tmp = atoi(field);
+					if (nr == 0) {
+						step.working_set.id = tmp;
+					} else {
+						tmp = parse_working_set(&step.working_set,
+									field);
+						check_arg(tmp < 0,
+							  "Invalid working set at step %u!\n",
+							  nr_steps);
+					}
+
+					nr++;
+				}
+
+				step.type = WORKINGSET;
+				goto add_step;
 			}
 
 			if (!field) {
@@ -975,6 +1179,8 @@  add_step:
 	wrk->steps = steps;
 	wrk->prio = arg->prio;
 	wrk->sseu = arg->sseu;
+	wrk->max_working_set_id = -1;
+	wrk->working_sets = NULL;
 
 	free(desc);
 
@@ -984,7 +1190,7 @@  add_step:
 	 */
 	for (i = 0; i < nr_steps; i++) {
 		for (j = 0; j < steps[i].fence_deps.nr; j++) {
-			tmp = steps[i].idx + steps[i].fence_deps.list[j];
+			tmp = steps[i].idx + steps[i].fence_deps.list[j].target;
 			check_arg(tmp < 0 || tmp >= i ||
 				  (steps[tmp].type != BATCH &&
 				   steps[tmp].type != SW_FENCE),
@@ -1003,6 +1209,51 @@  add_step:
 		}
 	}
 
+	/*
+	 * Check no duplicate working set ids.
+	 */
+	for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) {
+		struct w_step *w2;
+
+		if (w->type != WORKINGSET)
+			continue;
+
+		for (j = 0, w2 = wrk->steps; j < wrk->nr_steps; w2++, j++) {
+			if (j == i)
+				continue;
+			if (w2->type != WORKINGSET)
+				continue;
+
+			check_arg(w->working_set.id == w2->working_set.id,
+				  "Duplicate working set id at %u!\n", j);
+		}
+	}
+
+	/*
+	 * Allocate shared working sets.
+	 */
+	for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) {
+		if (w->type == WORKINGSET && w->working_set.shared)
+			allocate_working_set(&w->working_set);
+	}
+
+	wrk->max_working_set_id = -1;
+	for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) {
+		if (w->type == WORKINGSET &&
+		    w->working_set.shared &&
+		    w->working_set.id > wrk->max_working_set_id)
+			wrk->max_working_set_id = w->working_set.id;
+	}
+
+	wrk->working_sets = calloc(wrk->max_working_set_id + 1,
+				   sizeof(*wrk->working_sets));
+	igt_assert(wrk->working_sets);
+
+	for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) {
+		if (w->type == WORKINGSET && w->working_set.shared)
+			wrk->working_sets[w->working_set.id] = &w->working_set;
+	}
+
 	return wrk;
 }
 
@@ -1024,6 +1275,18 @@  clone_workload(struct workload *_wrk)
 
 	memcpy(wrk->steps, _wrk->steps, sizeof(struct w_step) * wrk->nr_steps);
 
+	wrk->max_working_set_id = _wrk->max_working_set_id;
+	if (wrk->max_working_set_id >= 0) {
+		wrk->working_sets = calloc(wrk->max_working_set_id + 1,
+					sizeof(*wrk->working_sets));
+		igt_assert(wrk->working_sets);
+
+		memcpy(wrk->working_sets,
+		       _wrk->working_sets,
+		       (wrk->max_working_set_id + 1) *
+		       sizeof(*wrk->working_sets));
+	}
+
 	/* Check if we need a sw sync timeline. */
 	for (i = 0; i < wrk->nr_steps; i++) {
 		if (wrk->steps[i].type == SW_FENCE) {
@@ -1226,17 +1489,36 @@  alloc_step_batch(struct workload *wrk, struct w_step *w, unsigned int flags)
 	igt_assert(j < nr_obj);
 
 	for (i = 0; i < w->data_deps.nr; i++) {
-		igt_assert(w->data_deps.list[i] <= 0);
-		if (w->data_deps.list[i]) {
-			int dep_idx = w->idx + w->data_deps.list[i];
+		struct dep_entry *entry = &w->data_deps.list[i];
+		uint32_t dep_handle;
+
+		if (entry->working_set == -1) {
+			int dep_idx = w->idx + entry->target;
 
+			igt_assert(entry->target <= 0);
 			igt_assert(dep_idx >= 0 && dep_idx < w->idx);
 			igt_assert(wrk->steps[dep_idx].type == BATCH);
 
-			w->obj[j].handle = wrk->steps[dep_idx].obj[0].handle;
-			j++;
-			igt_assert(j < nr_obj);
+			dep_handle = wrk->steps[dep_idx].obj[0].handle;
+		} else {
+			struct working_set *set;
+
+			igt_assert(entry->working_set <=
+				   wrk->max_working_set_id);
+
+			set = wrk->working_sets[entry->working_set];
+
+			igt_assert(set->nr);
+			igt_assert(entry->target < set->nr);
+			igt_assert(set->sizes[entry->target]);
+
+			dep_handle = set->handles[entry->target];
 		}
+
+		w->obj[j].flags = entry->write ? EXEC_OBJECT_WRITE : 0;
+		w->obj[j].handle = dep_handle;
+		j++;
+		igt_assert(j < nr_obj);
 	}
 
 	if (w->unbound_duration)
@@ -1395,11 +1677,23 @@  static size_t sizeof_engines_bond(int count)
 			engines[count]);
 }
 
+static void allocate_working_set(struct working_set *set)
+{
+	unsigned int i;
+
+	set->handles = calloc(set->nr, sizeof(*set->handles));
+	igt_assert(set->handles);
+
+	for (i = 0; i < set->nr; i++)
+		set->handles[i] = gem_create(fd, set->sizes[i]);
+}
+
 #define alloca0(sz) ({ size_t sz__ = (sz); memset(alloca(sz__), 0, sz__); })
 
 static int
 prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags)
 {
+	struct working_set **sets;
 	uint32_t share_vm = 0;
 	int max_ctx = -1;
 	struct w_step *w;
@@ -1634,6 +1928,51 @@  prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags)
 		}
 	}
 
+	/*
+	 * Allocate working sets.
+	 */
+	for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) {
+		if (w->type == WORKINGSET && !w->working_set.shared)
+			allocate_working_set(&w->working_set);
+	}
+
+	/*
+	 * Map of working set ids.
+	 */
+	wrk->max_working_set_id = -1;
+	for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) {
+		if (w->type == WORKINGSET &&
+		    w->working_set.id > wrk->max_working_set_id)
+			wrk->max_working_set_id = w->working_set.id;
+	}
+
+	sets = wrk->working_sets;
+	wrk->working_sets = calloc(wrk->max_working_set_id + 1,
+				   sizeof(*wrk->working_sets));
+	igt_assert(wrk->working_sets);
+
+	for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) {
+		struct working_set *set;
+
+		if (w->type != WORKINGSET)
+			continue;
+
+		if (!w->working_set.shared) {
+			set = &w->working_set;
+		} else {
+			igt_assert(sets);
+
+			set = sets[w->working_set.id];
+			igt_assert(set->shared);
+			igt_assert(set->sizes);
+		}
+
+		wrk->working_sets[w->working_set.id] = set;
+	}
+
+	if (sets)
+		free(sets);
+
 	/*
 	 * Allocate batch buffers.
 	 */
@@ -1704,7 +2043,7 @@  do_eb(struct workload *wrk, struct w_step *w, enum intel_engine_id engine,
 		      2 * sizeof(uint32_t));
 
 	for (i = 0; i < w->fence_deps.nr; i++) {
-		int tgt = w->idx + w->fence_deps.list[i];
+		int tgt = w->idx + w->fence_deps.list[i].target;
 
 		/* TODO: fence merging needed to support multiple inputs */
 		igt_assert(i == 0);
@@ -1735,14 +2074,18 @@  static void sync_deps(struct workload *wrk, struct w_step *w)
 	unsigned int i;
 
 	for (i = 0; i < w->data_deps.nr; i++) {
+		struct dep_entry *entry = &w->data_deps.list[i];
 		int dep_idx;
 
-		igt_assert(w->data_deps.list[i] <= 0);
+		if (entry->working_set == -1)
+			continue;
+
+		igt_assert(entry->target <= 0);
 
-		if (!w->data_deps.list[i])
+		if (!entry->target)
 			continue;
 
-		dep_idx = w->idx + w->data_deps.list[i];
+		dep_idx = w->idx + entry->target;
 
 		igt_assert(dep_idx >= 0 && dep_idx < w->idx);
 		igt_assert(wrk->steps[dep_idx].type == BATCH);
@@ -1842,11 +2185,6 @@  static void *run_workload(void *data)
 					MI_BATCH_BUFFER_END;
 				__sync_synchronize();
 				continue;
-			} else if (w->type == PREEMPTION ||
-				   w->type == ENGINE_MAP ||
-				   w->type == LOAD_BALANCE ||
-				   w->type == BOND) {
-				continue;
 			} else if (w->type == SSEU) {
 				if (w->sseu != wrk->ctx_list[w->context * 2].sseu) {
 					wrk->ctx_list[w->context * 2].sseu =
@@ -1854,6 +2192,13 @@  static void *run_workload(void *data)
 							     w->sseu);
 				}
 				continue;
+			} else if (w->type == PREEMPTION ||
+				   w->type == ENGINE_MAP ||
+				   w->type == LOAD_BALANCE ||
+				   w->type == BOND ||
+				   w->type == WORKINGSET) {
+				   /* No action for these at execution time. */
+				continue;
 			}
 
 			if (do_sleep || w->type == PERIOD) {
diff --git a/benchmarks/wsim/README b/benchmarks/wsim/README
index 9f770217f075..3d9143226740 100644
--- a/benchmarks/wsim/README
+++ b/benchmarks/wsim/README
@@ -8,6 +8,7 @@  M.<uint>.<str>[|<str>]...
 P|S|X.<uint>.<int>
 d|p|s|t|q|a|T.<int>,...
 b.<uint>.<str>[|<str>].<str>
+w|W.<uint>.<str>[/<str>]...
 f
 
 For duration a range can be given from which a random value will be picked
@@ -32,6 +33,8 @@  Additional workload steps are also supported:
  'P' - Context priority.
  'S' - Context SSEU configuration.
  'T' - Terminate an infinite batch.
+ 'w' - Working set. (See Working sets section.)
+ 'W' - Shared working set.
  'X' - Context preemption control.
 
 Engine ids: DEFAULT, RCS, BCS, VCS, VCS1, VCS2, VECS
@@ -275,3 +278,59 @@  for the render engine.
 Slice mask of -1 has a special meaning of "all slices". Otherwise any integer
 can be specifying as the slice mask, but beware any apart from 1 and -1 can make
 the workload not portable between different GPUs.
+
+Working sets
+------------
+
+When used plainly workload steps can create implicit data dependencies by
+relatively referencing another workload steps of a batch buffer type. Fourth
+field contains the relative data dependncy. For example:
+
+  1.RCS.1000.0.0
+  1.BCS.1000.-1.0
+
+This means the second batch buffer will be marked as having a read data
+dependency on the first one. (The shared buffer is always marked as written to
+by the dependency target buffer.) This will cause a serialization between the
+two batch buffers.
+
+Working sets are used where more complex data dependencies are required. Each
+working set has an id, a list of buffers, and can either be local to the
+workload or shared within the cloned workloads (-c command line option).
+
+Lower-case 'w' command defines a local working set while upper-case 'W' defines
+a shared version. Syntax is as follows:
+
+  w.<id>.<size>[/<size>]...
+
+For size a byte size can be given, or suffix 'k', 'm' or 'g' can be used (case
+insensitive). Prefix in the format of "<int>n<size>" can also be given to create
+multiple objects of the same size.
+
+Examples:
+
+  w.1.4k - Working set 1 with a single 4KiB object in it.
+  W.2.2M/32768 - Working set 2 with one 2MiB and one 32768 byte object.
+  w.3.10n4k/2n20000 - Working set 3 with ten 4KiB and two 20000 byte objects.
+
+Working set objects can be referenced as data dependency targets using the new
+'r'/'w' syntax. Simple example:
+
+  w.1.4k
+  W.2.1m
+  1.RCS.1000.r1-0/w2-0.0
+  1.BCS.1000.r2-0.0
+
+In this example the RCS batch is reading from working set 1 object 0 and writing
+to working set 2 object 0. BCS batch is reading from working set 2 object 0.
+
+Because working set 2 is of a shared type, should two instances of the same
+workload be executed (-c 2) then the 1MiB buffer would be shared and written
+and read by both clients creating a serialization point.
+
+Apart from single objects, ranges can also be given as depenencies:
+
+  w.1.10n4k
+  1.RCS.1000.r1-0-9.0
+
+Here the RCS batch has a read dependency on working set 1 objects 0 to 9.
diff --git a/benchmarks/wsim/cloud-gaming-60fps.wsim b/benchmarks/wsim/cloud-gaming-60fps.wsim
new file mode 100644
index 000000000000..9e48bbc2f617
--- /dev/null
+++ b/benchmarks/wsim/cloud-gaming-60fps.wsim
@@ -0,0 +1,11 @@ 
+w.1.10n8m
+w.2.3n16m
+1.RCS.500-1500.r1-0-4/w2-0.0
+1.RCS.500-1500.r1-5-9/w2-1.0
+1.RCS.500-1500.r2-0-1/w2-2.0
+M.2.VCS
+B.2
+3.RCS.500-1500.r2-2.0
+2.DEFAULT.2000-4000.-1.0
+4.VCS1.250-750.-1.1
+p.16667
diff --git a/benchmarks/wsim/composited-ui.wsim b/benchmarks/wsim/composited-ui.wsim
new file mode 100644
index 000000000000..4164f8bf7393
--- /dev/null
+++ b/benchmarks/wsim/composited-ui.wsim
@@ -0,0 +1,7 @@ 
+w.1.10n8m/3n16m
+W.2.16m
+1.RCS.200-600.r1-0-4/w1-10.0
+1.RCS.200-600.r1-5-9/w1-11.0
+1.RCS.400-800.r1-10-11/w1-12.0
+3.BCS.200-800.r1-12/w2-0.1
+p.16667