Message ID | 20240821114100.2261167-3-dtatulea@nvidia.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | vdpa/mlx5: Optimze MKEY operations | expand |
On Wed, Aug 21, 2024 at 1:41 PM Dragos Tatulea <dtatulea@nvidia.com> wrote: > > Use the async interface to issue MTT MKEY creation. > Extra care is taken at the allocation of FW input commands > due to the MTT tables having variable sizes depending on > MR. > > The indirect MKEY is still created synchronously at the > end as the direct MKEYs need to be filled in. > > This makes create_user_mr() 3-5x faster, depending on > the size of the MR. > > Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com> > Reviewed-by: Cosmin Ratiu <cratiu@nvidia.com> > --- > drivers/vdpa/mlx5/core/mr.c | 118 +++++++++++++++++++++++++++++------- > 1 file changed, 96 insertions(+), 22 deletions(-) > > diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c > index 4758914ccf86..66e6a15f823f 100644 > --- a/drivers/vdpa/mlx5/core/mr.c > +++ b/drivers/vdpa/mlx5/core/mr.c > @@ -49,17 +49,18 @@ static void populate_mtts(struct mlx5_vdpa_direct_mr *mr, __be64 *mtt) > } > } > > -static int create_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr) > +struct mlx5_create_mkey_mem { > + u8 out[MLX5_ST_SZ_BYTES(create_mkey_out)]; > + u8 in[MLX5_ST_SZ_BYTES(create_mkey_in)]; > + DECLARE_FLEX_ARRAY(__be64, mtt); I may be missing something obvious, but why do we need DECLARE_FLEX_ARRAY here? My understanding is that it is only needed in special cases like uapi headers and we can use "__be64 mtt[]" here. > +}; > + > +static void fill_create_direct_mr(struct mlx5_vdpa_dev *mvdev, > + struct mlx5_vdpa_direct_mr *mr, > + struct mlx5_create_mkey_mem *mem) > { > - int inlen; > + void *in = &mem->in; > void *mkc; > - void *in; > - int err; > - > - inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + roundup(MLX5_ST_SZ_BYTES(mtt) * mr->nsg, 16); > - in = kvzalloc(inlen, GFP_KERNEL); > - if (!in) > - return -ENOMEM; > > MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid); > mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); > @@ -76,18 +77,25 @@ static int create_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct > MLX5_SET(create_mkey_in, in, translations_octword_actual_size, > get_octo_len(mr->end - mr->start, mr->log_size)); > populate_mtts(mr, MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt)); > - err = mlx5_vdpa_create_mkey(mvdev, &mr->mr, in, inlen); > - kvfree(in); > - if (err) { > - mlx5_vdpa_warn(mvdev, "Failed to create direct MR\n"); > - return err; > - } > > - return 0; > + MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY); > + MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid); > +} > + > +static void create_direct_mr_end(struct mlx5_vdpa_dev *mvdev, > + struct mlx5_vdpa_direct_mr *mr, > + struct mlx5_create_mkey_mem *mem) > +{ > + u32 mkey_index = MLX5_GET(create_mkey_out, mem->out, mkey_index); > + > + mr->mr = mlx5_idx_to_mkey(mkey_index); > } > > static void destroy_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr) > { > + if (!mr->mr) > + return; > + > mlx5_vdpa_destroy_mkey(mvdev, mr->mr); > } > > @@ -179,6 +187,74 @@ static int klm_byte_size(int nklms) > return 16 * ALIGN(nklms, 4); > } > > +static int create_direct_keys(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) > +{ > + struct mlx5_vdpa_async_cmd *cmds = NULL; > + struct mlx5_vdpa_direct_mr *dmr; > + int err = 0; > + int i = 0; > + > + cmds = kvcalloc(mr->num_directs, sizeof(*cmds), GFP_KERNEL); > + if (!cmds) > + return -ENOMEM; Nit: this could benefit from Scope-based Cleanup Helpers [1], as it would make clear that memory is always removed at the end of the function & could prevent errors if the function is modified in the future. I'm a big fan of them so my opinion may be biased though :). It would be great to be able to remove the array members with that too, but I think the kernel doesn't have any facility for that. > + > + list_for_each_entry(dmr, &mr->head, list) { > + struct mlx5_create_mkey_mem *cmd_mem; > + int mttlen, mttcount; > + > + mttlen = roundup(MLX5_ST_SZ_BYTES(mtt) * dmr->nsg, 16); I don't get the roundup here, I guess it is so the driver does not send potentially uninitialized memory to the device? Maybe the 16 should be a macro? > + mttcount = mttlen / sizeof(cmd_mem->mtt[0]); > + cmd_mem = kvcalloc(1, struct_size(cmd_mem, mtt, mttcount), GFP_KERNEL); > + if (!cmd_mem) { > + err = -ENOMEM; > + goto done; > + } > + > + cmds[i].out = cmd_mem->out; > + cmds[i].outlen = sizeof(cmd_mem->out); > + cmds[i].in = cmd_mem->in; > + cmds[i].inlen = struct_size(cmd_mem, mtt, mttcount); > + > + fill_create_direct_mr(mvdev, dmr, cmd_mem); > + > + i++; > + } > + > + err = mlx5_vdpa_exec_async_cmds(mvdev, cmds, mr->num_directs); > + if (err) { > + > + mlx5_vdpa_err(mvdev, "error issuing MTT mkey creation for direct mrs: %d\n", err); > + goto done; > + } > + > + i = 0; > + list_for_each_entry(dmr, &mr->head, list) { > + struct mlx5_vdpa_async_cmd *cmd = &cmds[i++]; > + struct mlx5_create_mkey_mem *cmd_mem; > + > + cmd_mem = container_of(cmd->out, struct mlx5_create_mkey_mem, out); > + > + if (!cmd->err) { > + create_direct_mr_end(mvdev, dmr, cmd_mem); The caller function doesn't trust the result if we return an error. Why not use the previous loop to call create_direct_mr_end? Am I missing any side effect? > + } else { > + err = err ? err : cmd->err; > + mlx5_vdpa_err(mvdev, "error creating MTT mkey [0x%llx, 0x%llx]: %d\n", > + dmr->start, dmr->end, cmd->err); > + } > + } > + > +done: > + for (i = i-1; i >= 0; i--) { > + struct mlx5_create_mkey_mem *cmd_mem; > + > + cmd_mem = container_of(cmds[i].out, struct mlx5_create_mkey_mem, out); > + kvfree(cmd_mem); > + } > + > + kvfree(cmds); > + return err; > +} > + > static int create_indirect_key(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) > { > int inlen; > @@ -279,14 +355,8 @@ static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr > goto err_map; > } > > - err = create_direct_mr(mvdev, mr); > - if (err) > - goto err_direct; > - > return 0; > > -err_direct: > - dma_unmap_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0); > err_map: > sg_free_table(&mr->sg_head); > return err; > @@ -401,6 +471,10 @@ static int create_user_mr(struct mlx5_vdpa_dev *mvdev, > if (err) > goto err_chain; > > + err = create_direct_keys(mvdev, mr); > + if (err) > + goto err_chain; > + > /* Create the memory key that defines the guests's address space. This > * memory key refers to the direct keys that contain the MTT > * translations > -- > 2.45.1 >
On Thu, Aug 29, 2024 at 3:54 PM Dragos Tatulea <dtatulea@nvidia.com> wrote: > > > > On 29.08.24 15:10, Eugenio Perez Martin wrote: > > On Wed, Aug 21, 2024 at 1:41 PM Dragos Tatulea <dtatulea@nvidia.com> wrote: > >> > >> Use the async interface to issue MTT MKEY creation. > >> Extra care is taken at the allocation of FW input commands > >> due to the MTT tables having variable sizes depending on > >> MR. > >> > >> The indirect MKEY is still created synchronously at the > >> end as the direct MKEYs need to be filled in. > >> > >> This makes create_user_mr() 3-5x faster, depending on > >> the size of the MR. > >> > >> Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com> > >> Reviewed-by: Cosmin Ratiu <cratiu@nvidia.com> > >> --- > >> drivers/vdpa/mlx5/core/mr.c | 118 +++++++++++++++++++++++++++++------- > >> 1 file changed, 96 insertions(+), 22 deletions(-) > >> > >> diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c > >> index 4758914ccf86..66e6a15f823f 100644 > >> --- a/drivers/vdpa/mlx5/core/mr.c > >> +++ b/drivers/vdpa/mlx5/core/mr.c > >> @@ -49,17 +49,18 @@ static void populate_mtts(struct mlx5_vdpa_direct_mr *mr, __be64 *mtt) > >> } > >> } > >> > >> -static int create_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr) > >> +struct mlx5_create_mkey_mem { > >> + u8 out[MLX5_ST_SZ_BYTES(create_mkey_out)]; > >> + u8 in[MLX5_ST_SZ_BYTES(create_mkey_in)]; > >> + DECLARE_FLEX_ARRAY(__be64, mtt); > > > > I may be missing something obvious, but why do we need > > DECLARE_FLEX_ARRAY here? My understanding is that it is only needed in > > special cases like uapi headers and we can use "__be64 mtt[]" here. > > > checkpatch.pl was complaining about it because in my initial version I > used the "[0]" version of zero length based arrays. > > My impression was that DECLARE_FLEX_ARRAY is preferred option because it > triggers a compiler error if the zero lenth array is not at the end of > the struct. But on closer inspection I see that using the right C99 > empty brackets notation is enough to trigger this error. > DECLARE_FLEX_ARRAY seems to be useful for the union case. > > I will change it in a v2. > > >> +}; > >> + > >> +static void fill_create_direct_mr(struct mlx5_vdpa_dev *mvdev, > >> + struct mlx5_vdpa_direct_mr *mr, > >> + struct mlx5_create_mkey_mem *mem) > >> { > >> - int inlen; > >> + void *in = &mem->in; > >> void *mkc; > >> - void *in; > >> - int err; > >> - > >> - inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + roundup(MLX5_ST_SZ_BYTES(mtt) * mr->nsg, 16); > >> - in = kvzalloc(inlen, GFP_KERNEL); > >> - if (!in) > >> - return -ENOMEM; > >> > >> MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid); > >> mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); > >> @@ -76,18 +77,25 @@ static int create_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct > >> MLX5_SET(create_mkey_in, in, translations_octword_actual_size, > >> get_octo_len(mr->end - mr->start, mr->log_size)); > >> populate_mtts(mr, MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt)); > >> - err = mlx5_vdpa_create_mkey(mvdev, &mr->mr, in, inlen); > >> - kvfree(in); > >> - if (err) { > >> - mlx5_vdpa_warn(mvdev, "Failed to create direct MR\n"); > >> - return err; > >> - } > >> > >> - return 0; > >> + MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY); > >> + MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid); > >> +} > >> + > >> +static void create_direct_mr_end(struct mlx5_vdpa_dev *mvdev, > >> + struct mlx5_vdpa_direct_mr *mr, > >> + struct mlx5_create_mkey_mem *mem) > >> +{ > >> + u32 mkey_index = MLX5_GET(create_mkey_out, mem->out, mkey_index); > >> + > >> + mr->mr = mlx5_idx_to_mkey(mkey_index); > >> } > >> > >> static void destroy_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr) > >> { > >> + if (!mr->mr) > >> + return; > >> + > >> mlx5_vdpa_destroy_mkey(mvdev, mr->mr); > >> } > >> > >> @@ -179,6 +187,74 @@ static int klm_byte_size(int nklms) > >> return 16 * ALIGN(nklms, 4); > >> } > >> > >> +static int create_direct_keys(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) > >> +{ > >> + struct mlx5_vdpa_async_cmd *cmds = NULL; > >> + struct mlx5_vdpa_direct_mr *dmr; > >> + int err = 0; > >> + int i = 0; > >> + > >> + cmds = kvcalloc(mr->num_directs, sizeof(*cmds), GFP_KERNEL); > >> + if (!cmds) > >> + return -ENOMEM; > > > > Nit: this could benefit from Scope-based Cleanup Helpers [1], as it > > would make clear that memory is always removed at the end of the > > function & could prevent errors if the function is modified in the > > future. I'm a big fan of them so my opinion may be biased though :). > > > > It would be great to be able to remove the array members with that > > too, but I think the kernel doesn't have any facility for that. > > > I didn't know about those. It sounds like a good idea! I will try > to use them in v2. > > >> + > >> + list_for_each_entry(dmr, &mr->head, list) { > >> + struct mlx5_create_mkey_mem *cmd_mem; > >> + int mttlen, mttcount; > >> + > >> + mttlen = roundup(MLX5_ST_SZ_BYTES(mtt) * dmr->nsg, 16); > > > > I don't get the roundup here, I guess it is so the driver does not > > send potentially uninitialized memory to the device? Maybe the 16 > > should be a macro? > > > The roundup is a hw requirement. A define would be a good idea. Will add > it. > > >> + mttcount = mttlen / sizeof(cmd_mem->mtt[0]); > >> + cmd_mem = kvcalloc(1, struct_size(cmd_mem, mtt, mttcount), GFP_KERNEL); > >> + if (!cmd_mem) { > >> + err = -ENOMEM; > >> + goto done; > >> + } > >> + > >> + cmds[i].out = cmd_mem->out; > >> + cmds[i].outlen = sizeof(cmd_mem->out); > >> + cmds[i].in = cmd_mem->in; > >> + cmds[i].inlen = struct_size(cmd_mem, mtt, mttcount); > >> + > >> + fill_create_direct_mr(mvdev, dmr, cmd_mem); > >> + > >> + i++; > >> + } > >> + > >> + err = mlx5_vdpa_exec_async_cmds(mvdev, cmds, mr->num_directs); > >> + if (err) { > >> + > >> + mlx5_vdpa_err(mvdev, "error issuing MTT mkey creation for direct mrs: %d\n", err); > >> + goto done; > >> + } > >> + > >> + i = 0; > >> + list_for_each_entry(dmr, &mr->head, list) { > >> + struct mlx5_vdpa_async_cmd *cmd = &cmds[i++]; > >> + struct mlx5_create_mkey_mem *cmd_mem; > >> + > >> + cmd_mem = container_of(cmd->out, struct mlx5_create_mkey_mem, out); > >> + > >> + if (!cmd->err) { > >> + create_direct_mr_end(mvdev, dmr, cmd_mem); > > > > The caller function doesn't trust the result if we return an error. > > Why not use the previous loop to call create_direct_mr_end? Am I > > missing any side effect? > > > Which previous loop? We have the mkey value only after the command has > been executed. Ok, now I see what I proposed didn't make sense, thanks! > I added the if here (instead of always calling > create_direct_mr_end()) just to make things more explicit for the > reader. > > >> + } else { > >> + err = err ? err : cmd->err; > >> + mlx5_vdpa_err(mvdev, "error creating MTT mkey [0x%llx, 0x%llx]: %d\n", > >> + dmr->start, dmr->end, cmd->err); > >> + } > >> + } > >> + > >> +done: > >> + for (i = i-1; i >= 0; i--) { > >> + struct mlx5_create_mkey_mem *cmd_mem; > >> + > >> + cmd_mem = container_of(cmds[i].out, struct mlx5_create_mkey_mem, out); > >> + kvfree(cmd_mem); > >> + } > >> + > >> + kvfree(cmds); > >> + return err; > >> +} > >> + > >> static int create_indirect_key(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) > >> { > >> int inlen; > >> @@ -279,14 +355,8 @@ static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr > >> goto err_map; > >> } > >> > >> - err = create_direct_mr(mvdev, mr); > >> - if (err) > >> - goto err_direct; > >> - > >> return 0; > >> > >> -err_direct: > >> - dma_unmap_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0); > >> err_map: > >> sg_free_table(&mr->sg_head); > >> return err; > >> @@ -401,6 +471,10 @@ static int create_user_mr(struct mlx5_vdpa_dev *mvdev, > >> if (err) > >> goto err_chain; > >> > >> + err = create_direct_keys(mvdev, mr); > >> + if (err) > >> + goto err_chain; > >> + > >> /* Create the memory key that defines the guests's address space. This > >> * memory key refers to the direct keys that contain the MTT > >> * translations > >> -- > >> 2.45.1 > >> > > >
On 29.08.24 17:15, Eugenio Perez Martin wrote: > On Thu, Aug 29, 2024 at 3:54 PM Dragos Tatulea <dtatulea@nvidia.com> wrote: >> >> >> >> On 29.08.24 15:10, Eugenio Perez Martin wrote: >>> On Wed, Aug 21, 2024 at 1:41 PM Dragos Tatulea <dtatulea@nvidia.com> wrote: >>>> >>>> Use the async interface to issue MTT MKEY creation. >>>> Extra care is taken at the allocation of FW input commands >>>> due to the MTT tables having variable sizes depending on >>>> MR. >>>> >>>> The indirect MKEY is still created synchronously at the >>>> end as the direct MKEYs need to be filled in. >>>> >>>> This makes create_user_mr() 3-5x faster, depending on >>>> the size of the MR. >>>> >>>> Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com> >>>> Reviewed-by: Cosmin Ratiu <cratiu@nvidia.com> >>>> --- >>>> drivers/vdpa/mlx5/core/mr.c | 118 +++++++++++++++++++++++++++++------- >>>> 1 file changed, 96 insertions(+), 22 deletions(-) >>>> >>>> diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c >>>> index 4758914ccf86..66e6a15f823f 100644 >>>> --- a/drivers/vdpa/mlx5/core/mr.c >>>> +++ b/drivers/vdpa/mlx5/core/mr.c >>>> @@ -49,17 +49,18 @@ static void populate_mtts(struct mlx5_vdpa_direct_mr *mr, __be64 *mtt) >>>> } >>>> } >>>> >>>> -static int create_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr) >>>> +struct mlx5_create_mkey_mem { >>>> + u8 out[MLX5_ST_SZ_BYTES(create_mkey_out)]; >>>> + u8 in[MLX5_ST_SZ_BYTES(create_mkey_in)]; >>>> + DECLARE_FLEX_ARRAY(__be64, mtt); >>> >>> I may be missing something obvious, but why do we need >>> DECLARE_FLEX_ARRAY here? My understanding is that it is only needed in >>> special cases like uapi headers and we can use "__be64 mtt[]" here. >>> >> checkpatch.pl was complaining about it because in my initial version I >> used the "[0]" version of zero length based arrays. >> >> My impression was that DECLARE_FLEX_ARRAY is preferred option because it >> triggers a compiler error if the zero lenth array is not at the end of >> the struct. But on closer inspection I see that using the right C99 >> empty brackets notation is enough to trigger this error. >> DECLARE_FLEX_ARRAY seems to be useful for the union case. >> >> I will change it in a v2. >> >>>> +}; >>>> + >>>> +static void fill_create_direct_mr(struct mlx5_vdpa_dev *mvdev, >>>> + struct mlx5_vdpa_direct_mr *mr, >>>> + struct mlx5_create_mkey_mem *mem) >>>> { >>>> - int inlen; >>>> + void *in = &mem->in; >>>> void *mkc; >>>> - void *in; >>>> - int err; >>>> - >>>> - inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + roundup(MLX5_ST_SZ_BYTES(mtt) * mr->nsg, 16); >>>> - in = kvzalloc(inlen, GFP_KERNEL); >>>> - if (!in) >>>> - return -ENOMEM; >>>> >>>> MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid); >>>> mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); >>>> @@ -76,18 +77,25 @@ static int create_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct >>>> MLX5_SET(create_mkey_in, in, translations_octword_actual_size, >>>> get_octo_len(mr->end - mr->start, mr->log_size)); >>>> populate_mtts(mr, MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt)); >>>> - err = mlx5_vdpa_create_mkey(mvdev, &mr->mr, in, inlen); >>>> - kvfree(in); >>>> - if (err) { >>>> - mlx5_vdpa_warn(mvdev, "Failed to create direct MR\n"); >>>> - return err; >>>> - } >>>> >>>> - return 0; >>>> + MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY); >>>> + MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid); >>>> +} >>>> + >>>> +static void create_direct_mr_end(struct mlx5_vdpa_dev *mvdev, >>>> + struct mlx5_vdpa_direct_mr *mr, >>>> + struct mlx5_create_mkey_mem *mem) >>>> +{ >>>> + u32 mkey_index = MLX5_GET(create_mkey_out, mem->out, mkey_index); >>>> + >>>> + mr->mr = mlx5_idx_to_mkey(mkey_index); >>>> } >>>> >>>> static void destroy_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr) >>>> { >>>> + if (!mr->mr) >>>> + return; >>>> + >>>> mlx5_vdpa_destroy_mkey(mvdev, mr->mr); >>>> } >>>> >>>> @@ -179,6 +187,74 @@ static int klm_byte_size(int nklms) >>>> return 16 * ALIGN(nklms, 4); >>>> } >>>> >>>> +static int create_direct_keys(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) >>>> +{ >>>> + struct mlx5_vdpa_async_cmd *cmds = NULL; >>>> + struct mlx5_vdpa_direct_mr *dmr; >>>> + int err = 0; >>>> + int i = 0; >>>> + >>>> + cmds = kvcalloc(mr->num_directs, sizeof(*cmds), GFP_KERNEL); >>>> + if (!cmds) >>>> + return -ENOMEM; >>> >>> Nit: this could benefit from Scope-based Cleanup Helpers [1], as it >>> would make clear that memory is always removed at the end of the >>> function & could prevent errors if the function is modified in the >>> future. I'm a big fan of them so my opinion may be biased though :). >>> >>> It would be great to be able to remove the array members with that >>> too, but I think the kernel doesn't have any facility for that. >>> >> I didn't know about those. It sounds like a good idea! I will try >> to use them in v2. >> I tried for this patch: doing the DECLARE_FREE only for the cmds array is only more confusing because the goto done still has to happen due to cmd_mem elements being of different size. I preferred not to mix and match. I also tried adding a ctor/dtor with DECLARE_CLASS that wraps both of these resources: the problem there is that the ctor can't fail so the allocation still has to happen in this function but we stilll need an extra struct to hold the cmds + size. Overall the code was introducing more confusion. However, it works very well for the next patch so I used it there in v2. In the future I will look at introducing more of these, they are interesting helpers. Thanks, Dragos >>>> + >>>> + list_for_each_entry(dmr, &mr->head, list) { >>>> + struct mlx5_create_mkey_mem *cmd_mem; >>>> + int mttlen, mttcount; >>>> + >>>> + mttlen = roundup(MLX5_ST_SZ_BYTES(mtt) * dmr->nsg, 16); >>> >>> I don't get the roundup here, I guess it is so the driver does not >>> send potentially uninitialized memory to the device? Maybe the 16 >>> should be a macro? >>> >> The roundup is a hw requirement. A define would be a good idea. Will add >> it. >> >>>> + mttcount = mttlen / sizeof(cmd_mem->mtt[0]); >>>> + cmd_mem = kvcalloc(1, struct_size(cmd_mem, mtt, mttcount), GFP_KERNEL); >>>> + if (!cmd_mem) { >>>> + err = -ENOMEM; >>>> + goto done; >>>> + } >>>> + >>>> + cmds[i].out = cmd_mem->out; >>>> + cmds[i].outlen = sizeof(cmd_mem->out); >>>> + cmds[i].in = cmd_mem->in; >>>> + cmds[i].inlen = struct_size(cmd_mem, mtt, mttcount); >>>> + >>>> + fill_create_direct_mr(mvdev, dmr, cmd_mem); >>>> + >>>> + i++; >>>> + } >>>> + >>>> + err = mlx5_vdpa_exec_async_cmds(mvdev, cmds, mr->num_directs); >>>> + if (err) { >>>> + >>>> + mlx5_vdpa_err(mvdev, "error issuing MTT mkey creation for direct mrs: %d\n", err); >>>> + goto done; >>>> + } >>>> + >>>> + i = 0; >>>> + list_for_each_entry(dmr, &mr->head, list) { >>>> + struct mlx5_vdpa_async_cmd *cmd = &cmds[i++]; >>>> + struct mlx5_create_mkey_mem *cmd_mem; >>>> + >>>> + cmd_mem = container_of(cmd->out, struct mlx5_create_mkey_mem, out); >>>> + >>>> + if (!cmd->err) { >>>> + create_direct_mr_end(mvdev, dmr, cmd_mem); >>> >>> The caller function doesn't trust the result if we return an error. >>> Why not use the previous loop to call create_direct_mr_end? Am I >>> missing any side effect? >>> >> Which previous loop? We have the mkey value only after the command has >> been executed. > > Ok, now I see what I proposed didn't make sense, thanks! > >> I added the if here (instead of always calling >> create_direct_mr_end()) just to make things more explicit for the >> reader. >> >>>> + } else { >>>> + err = err ? err : cmd->err; >>>> + mlx5_vdpa_err(mvdev, "error creating MTT mkey [0x%llx, 0x%llx]: %d\n", >>>> + dmr->start, dmr->end, cmd->err); >>>> + } >>>> + } >>>> + >>>> +done: >>>> + for (i = i-1; i >= 0; i--) { >>>> + struct mlx5_create_mkey_mem *cmd_mem; >>>> + >>>> + cmd_mem = container_of(cmds[i].out, struct mlx5_create_mkey_mem, out); >>>> + kvfree(cmd_mem); >>>> + } >>>> + >>>> + kvfree(cmds); >>>> + return err; >>>> +} >>>> + >>>> static int create_indirect_key(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) >>>> { >>>> int inlen; >>>> @@ -279,14 +355,8 @@ static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr >>>> goto err_map; >>>> } >>>> >>>> - err = create_direct_mr(mvdev, mr); >>>> - if (err) >>>> - goto err_direct; >>>> - >>>> return 0; >>>> >>>> -err_direct: >>>> - dma_unmap_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0); >>>> err_map: >>>> sg_free_table(&mr->sg_head); >>>> return err; >>>> @@ -401,6 +471,10 @@ static int create_user_mr(struct mlx5_vdpa_dev *mvdev, >>>> if (err) >>>> goto err_chain; >>>> >>>> + err = create_direct_keys(mvdev, mr); >>>> + if (err) >>>> + goto err_chain; >>>> + >>>> /* Create the memory key that defines the guests's address space. This >>>> * memory key refers to the direct keys that contain the MTT >>>> * translations >>>> -- >>>> 2.45.1 >>>> >>> >> >
diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c index 4758914ccf86..66e6a15f823f 100644 --- a/drivers/vdpa/mlx5/core/mr.c +++ b/drivers/vdpa/mlx5/core/mr.c @@ -49,17 +49,18 @@ static void populate_mtts(struct mlx5_vdpa_direct_mr *mr, __be64 *mtt) } } -static int create_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr) +struct mlx5_create_mkey_mem { + u8 out[MLX5_ST_SZ_BYTES(create_mkey_out)]; + u8 in[MLX5_ST_SZ_BYTES(create_mkey_in)]; + DECLARE_FLEX_ARRAY(__be64, mtt); +}; + +static void fill_create_direct_mr(struct mlx5_vdpa_dev *mvdev, + struct mlx5_vdpa_direct_mr *mr, + struct mlx5_create_mkey_mem *mem) { - int inlen; + void *in = &mem->in; void *mkc; - void *in; - int err; - - inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + roundup(MLX5_ST_SZ_BYTES(mtt) * mr->nsg, 16); - in = kvzalloc(inlen, GFP_KERNEL); - if (!in) - return -ENOMEM; MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid); mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); @@ -76,18 +77,25 @@ static int create_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct MLX5_SET(create_mkey_in, in, translations_octword_actual_size, get_octo_len(mr->end - mr->start, mr->log_size)); populate_mtts(mr, MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt)); - err = mlx5_vdpa_create_mkey(mvdev, &mr->mr, in, inlen); - kvfree(in); - if (err) { - mlx5_vdpa_warn(mvdev, "Failed to create direct MR\n"); - return err; - } - return 0; + MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY); + MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid); +} + +static void create_direct_mr_end(struct mlx5_vdpa_dev *mvdev, + struct mlx5_vdpa_direct_mr *mr, + struct mlx5_create_mkey_mem *mem) +{ + u32 mkey_index = MLX5_GET(create_mkey_out, mem->out, mkey_index); + + mr->mr = mlx5_idx_to_mkey(mkey_index); } static void destroy_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr) { + if (!mr->mr) + return; + mlx5_vdpa_destroy_mkey(mvdev, mr->mr); } @@ -179,6 +187,74 @@ static int klm_byte_size(int nklms) return 16 * ALIGN(nklms, 4); } +static int create_direct_keys(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) +{ + struct mlx5_vdpa_async_cmd *cmds = NULL; + struct mlx5_vdpa_direct_mr *dmr; + int err = 0; + int i = 0; + + cmds = kvcalloc(mr->num_directs, sizeof(*cmds), GFP_KERNEL); + if (!cmds) + return -ENOMEM; + + list_for_each_entry(dmr, &mr->head, list) { + struct mlx5_create_mkey_mem *cmd_mem; + int mttlen, mttcount; + + mttlen = roundup(MLX5_ST_SZ_BYTES(mtt) * dmr->nsg, 16); + mttcount = mttlen / sizeof(cmd_mem->mtt[0]); + cmd_mem = kvcalloc(1, struct_size(cmd_mem, mtt, mttcount), GFP_KERNEL); + if (!cmd_mem) { + err = -ENOMEM; + goto done; + } + + cmds[i].out = cmd_mem->out; + cmds[i].outlen = sizeof(cmd_mem->out); + cmds[i].in = cmd_mem->in; + cmds[i].inlen = struct_size(cmd_mem, mtt, mttcount); + + fill_create_direct_mr(mvdev, dmr, cmd_mem); + + i++; + } + + err = mlx5_vdpa_exec_async_cmds(mvdev, cmds, mr->num_directs); + if (err) { + + mlx5_vdpa_err(mvdev, "error issuing MTT mkey creation for direct mrs: %d\n", err); + goto done; + } + + i = 0; + list_for_each_entry(dmr, &mr->head, list) { + struct mlx5_vdpa_async_cmd *cmd = &cmds[i++]; + struct mlx5_create_mkey_mem *cmd_mem; + + cmd_mem = container_of(cmd->out, struct mlx5_create_mkey_mem, out); + + if (!cmd->err) { + create_direct_mr_end(mvdev, dmr, cmd_mem); + } else { + err = err ? err : cmd->err; + mlx5_vdpa_err(mvdev, "error creating MTT mkey [0x%llx, 0x%llx]: %d\n", + dmr->start, dmr->end, cmd->err); + } + } + +done: + for (i = i-1; i >= 0; i--) { + struct mlx5_create_mkey_mem *cmd_mem; + + cmd_mem = container_of(cmds[i].out, struct mlx5_create_mkey_mem, out); + kvfree(cmd_mem); + } + + kvfree(cmds); + return err; +} + static int create_indirect_key(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) { int inlen; @@ -279,14 +355,8 @@ static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr goto err_map; } - err = create_direct_mr(mvdev, mr); - if (err) - goto err_direct; - return 0; -err_direct: - dma_unmap_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0); err_map: sg_free_table(&mr->sg_head); return err; @@ -401,6 +471,10 @@ static int create_user_mr(struct mlx5_vdpa_dev *mvdev, if (err) goto err_chain; + err = create_direct_keys(mvdev, mr); + if (err) + goto err_chain; + /* Create the memory key that defines the guests's address space. This * memory key refers to the direct keys that contain the MTT * translations