Message ID | 1564477010-29804-11-git-send-email-oulijun@huawei.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Jason Gunthorpe |
Headers | show |
Series | Updates for 5.3-rc2 | expand |
On Tue, Jul 30, 2019 at 04:56:47PM +0800, Lijun Ou wrote: > From: Lang Cheng <chenglang@huawei.com> > > For hns_roce_v2_query_qp and hns_roce_v2_modify_qp, > we can use stack memory to create qp context data. > Make the code simpler. > > Signed-off-by: Lang Cheng <chenglang@huawei.com> > --- > drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 64 +++++++++++++----------------- > 1 file changed, 27 insertions(+), 37 deletions(-) > > diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c > index 1186e34..07ddfae 100644 > --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c > +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c > @@ -4288,22 +4288,19 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, > { > struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); > struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); > - struct hns_roce_v2_qp_context *context; > - struct hns_roce_v2_qp_context *qpc_mask; > + struct hns_roce_v2_qp_context ctx[2]; > + struct hns_roce_v2_qp_context *context = ctx; > + struct hns_roce_v2_qp_context *qpc_mask = ctx + 1; > struct device *dev = hr_dev->dev; > int ret; > > - context = kcalloc(2, sizeof(*context), GFP_ATOMIC); > - if (!context) > - return -ENOMEM; > - > - qpc_mask = context + 1; > /* > * In v2 engine, software pass context and context mask to hardware > * when modifying qp. If software need modify some fields in context, > * we should set all bits of the relevant fields in context mask to > * 0 at the same time, else set them to 0x1. > */ > + memset(context, 0, sizeof(*context)); "struct hns_roce_v2_qp_context ctx[2] = {};" will do the trick. Thanks
在 2019/7/30 21:40, Leon Romanovsky 写道: > On Tue, Jul 30, 2019 at 04:56:47PM +0800, Lijun Ou wrote: >> From: Lang Cheng <chenglang@huawei.com> >> >> For hns_roce_v2_query_qp and hns_roce_v2_modify_qp, >> we can use stack memory to create qp context data. >> Make the code simpler. >> >> Signed-off-by: Lang Cheng <chenglang@huawei.com> >> --- >> drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 64 +++++++++++++----------------- >> 1 file changed, 27 insertions(+), 37 deletions(-) >> >> diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c >> index 1186e34..07ddfae 100644 >> --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c >> +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c >> @@ -4288,22 +4288,19 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, >> { >> struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); >> struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); >> - struct hns_roce_v2_qp_context *context; >> - struct hns_roce_v2_qp_context *qpc_mask; >> + struct hns_roce_v2_qp_context ctx[2]; >> + struct hns_roce_v2_qp_context *context = ctx; >> + struct hns_roce_v2_qp_context *qpc_mask = ctx + 1; >> struct device *dev = hr_dev->dev; >> int ret; >> >> - context = kcalloc(2, sizeof(*context), GFP_ATOMIC); >> - if (!context) >> - return -ENOMEM; >> - >> - qpc_mask = context + 1; >> /* >> * In v2 engine, software pass context and context mask to hardware >> * when modifying qp. If software need modify some fields in context, >> * we should set all bits of the relevant fields in context mask to >> * 0 at the same time, else set them to 0x1. >> */ >> + memset(context, 0, sizeof(*context)); > "struct hns_roce_v2_qp_context ctx[2] = {};" will do the trick. > > Thanks > > . In this case, the mask is actually writen twice. if you do this, will it bring extra overhead when modify qp?
On Wed, Jul 31, 2019 at 10:43:01AM +0800, oulijun wrote: > 在 2019/7/30 21:40, Leon Romanovsky 写道: > > On Tue, Jul 30, 2019 at 04:56:47PM +0800, Lijun Ou wrote: > >> From: Lang Cheng <chenglang@huawei.com> > >> > >> For hns_roce_v2_query_qp and hns_roce_v2_modify_qp, > >> we can use stack memory to create qp context data. > >> Make the code simpler. > >> > >> Signed-off-by: Lang Cheng <chenglang@huawei.com> > >> --- > >> drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 64 +++++++++++++----------------- > >> 1 file changed, 27 insertions(+), 37 deletions(-) > >> > >> diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c > >> index 1186e34..07ddfae 100644 > >> --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c > >> +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c > >> @@ -4288,22 +4288,19 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, > >> { > >> struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); > >> struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); > >> - struct hns_roce_v2_qp_context *context; > >> - struct hns_roce_v2_qp_context *qpc_mask; > >> + struct hns_roce_v2_qp_context ctx[2]; > >> + struct hns_roce_v2_qp_context *context = ctx; > >> + struct hns_roce_v2_qp_context *qpc_mask = ctx + 1; > >> struct device *dev = hr_dev->dev; > >> int ret; > >> > >> - context = kcalloc(2, sizeof(*context), GFP_ATOMIC); > >> - if (!context) > >> - return -ENOMEM; > >> - > >> - qpc_mask = context + 1; > >> /* > >> * In v2 engine, software pass context and context mask to hardware > >> * when modifying qp. If software need modify some fields in context, > >> * we should set all bits of the relevant fields in context mask to > >> * 0 at the same time, else set them to 0x1. > >> */ > >> + memset(context, 0, sizeof(*context)); > > "struct hns_roce_v2_qp_context ctx[2] = {};" will do the trick. > > > > Thanks > > > > . > In this case, the mask is actually writen twice. if you do this, will it bring extra overhead when modify qp? I don't understand first part of your sentence, and "no" to second part. Thanks > > >
On Wed, Jul 31, 2019 at 05:16:38PM +0800, c00284828 wrote: > > 在 2019/7/31 15:49, Leon Romanovsky 写道: > > On Wed, Jul 31, 2019 at 10:43:01AM +0800, oulijun wrote: > > > 在 2019/7/30 21:40, Leon Romanovsky 写道: > > > > On Tue, Jul 30, 2019 at 04:56:47PM +0800, Lijun Ou wrote: > > > > > From: Lang Cheng <chenglang@huawei.com> > > > > > > > > > > For hns_roce_v2_query_qp and hns_roce_v2_modify_qp, > > > > > we can use stack memory to create qp context data. > > > > > Make the code simpler. > > > > > > > > > > Signed-off-by: Lang Cheng <chenglang@huawei.com> > > > > > --- > > > > > drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 64 +++++++++++++----------------- > > > > > 1 file changed, 27 insertions(+), 37 deletions(-) > > > > > > > > > > diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c > > > > > index 1186e34..07ddfae 100644 > > > > > --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c > > > > > +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c > > > > > @@ -4288,22 +4288,19 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, > > > > > { > > > > > struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); > > > > > struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); > > > > > - struct hns_roce_v2_qp_context *context; > > > > > - struct hns_roce_v2_qp_context *qpc_mask; > > > > > + struct hns_roce_v2_qp_context ctx[2]; > > > > > + struct hns_roce_v2_qp_context *context = ctx; > > > > > + struct hns_roce_v2_qp_context *qpc_mask = ctx + 1; > > > > > struct device *dev = hr_dev->dev; > > > > > int ret; > > > > > > > > > > - context = kcalloc(2, sizeof(*context), GFP_ATOMIC); > > > > > - if (!context) > > > > > - return -ENOMEM; > > > > > - > > > > > - qpc_mask = context + 1; > > > > > /* > > > > > * In v2 engine, software pass context and context mask to hardware > > > > > * when modifying qp. If software need modify some fields in context, > > > > > * we should set all bits of the relevant fields in context mask to > > > > > * 0 at the same time, else set them to 0x1. > > > > > */ > > > > > + memset(context, 0, sizeof(*context)); > > > > "struct hns_roce_v2_qp_context ctx[2] = {};" will do the trick. > > > > > > > > Thanks > > > > > > > > . > > > In this case, the mask is actually writen twice. if you do this, will it bring extra overhead when modify qp? > > I don't understand first part of your sentence, and "no" to second part. > > > > Thanks > > + struct hns_roce_v2_qp_context ctx[2]; > + struct hns_roce_v2_qp_context *context = ctx; > + struct hns_roce_v2_qp_context *qpc_mask = ctx + 1; > ... > + memset(context, 0, sizeof(*context)); > memset(qpc_mask, 0xff, sizeof(*qpc_mask)); > > ctx[2] ={} does look more simple, just like another function in patch. > But ctx[1] will be written 0 before being written to 0xff, is it faster than twice memset ? Are you seriously talking about this micro optimization while you have mailbox allocation in your modify_qp function? Thanks > > > > > > > > > . > >
On Wed, Jul 31, 2019 at 12:59:01PM +0300, Leon Romanovsky wrote: > On Wed, Jul 31, 2019 at 05:16:38PM +0800, c00284828 wrote: > > > > 在 2019/7/31 15:49, Leon Romanovsky 写道: > > > On Wed, Jul 31, 2019 at 10:43:01AM +0800, oulijun wrote: > > > > 在 2019/7/30 21:40, Leon Romanovsky 写道: > > > > > On Tue, Jul 30, 2019 at 04:56:47PM +0800, Lijun Ou wrote: > > > > > > From: Lang Cheng <chenglang@huawei.com> > > > > > > > > > > > > For hns_roce_v2_query_qp and hns_roce_v2_modify_qp, > > > > > > we can use stack memory to create qp context data. > > > > > > Make the code simpler. > > > > > > > > > > > > Signed-off-by: Lang Cheng <chenglang@huawei.com> > > > > > > drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 64 +++++++++++++----------------- > > > > > > 1 file changed, 27 insertions(+), 37 deletions(-) > > > > > > > > > > > > diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c > > > > > > index 1186e34..07ddfae 100644 > > > > > > +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c > > > > > > @@ -4288,22 +4288,19 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, > > > > > > { > > > > > > struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); > > > > > > struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); > > > > > > - struct hns_roce_v2_qp_context *context; > > > > > > - struct hns_roce_v2_qp_context *qpc_mask; > > > > > > + struct hns_roce_v2_qp_context ctx[2]; > > > > > > + struct hns_roce_v2_qp_context *context = ctx; > > > > > > + struct hns_roce_v2_qp_context *qpc_mask = ctx + 1; > > > > > > struct device *dev = hr_dev->dev; > > > > > > int ret; > > > > > > > > > > > > - context = kcalloc(2, sizeof(*context), GFP_ATOMIC); > > > > > > - if (!context) > > > > > > - return -ENOMEM; > > > > > > - > > > > > > - qpc_mask = context + 1; > > > > > > /* > > > > > > * In v2 engine, software pass context and context mask to hardware > > > > > > * when modifying qp. If software need modify some fields in context, > > > > > > * we should set all bits of the relevant fields in context mask to > > > > > > * 0 at the same time, else set them to 0x1. > > > > > > */ > > > > > > + memset(context, 0, sizeof(*context)); > > > > > "struct hns_roce_v2_qp_context ctx[2] = {};" will do the trick. > > > > > > > > > > Thanks > > > > > > > > > > . > > > > In this case, the mask is actually writen twice. if you do this, will it bring extra overhead when modify qp? > > > I don't understand first part of your sentence, and "no" to second part. > > > > > > Thanks > > > > + struct hns_roce_v2_qp_context ctx[2]; > > + struct hns_roce_v2_qp_context *context = ctx; > > + struct hns_roce_v2_qp_context *qpc_mask = ctx + 1; > > ... > > + memset(context, 0, sizeof(*context)); > > memset(qpc_mask, 0xff, sizeof(*qpc_mask)); > > > > ctx[2] ={} does look more simple, just like another function in patch. > > But ctx[1] will be written 0 before being written to 0xff, is it faster than twice memset ? > > Are you seriously talking about this micro optimization while you have mailbox > allocation in your modify_qp function? In any event the compiler eliminates duplicate stores in a lot of cases like this. Jason
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 1186e34..07ddfae 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -4288,22 +4288,19 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, { struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); - struct hns_roce_v2_qp_context *context; - struct hns_roce_v2_qp_context *qpc_mask; + struct hns_roce_v2_qp_context ctx[2]; + struct hns_roce_v2_qp_context *context = ctx; + struct hns_roce_v2_qp_context *qpc_mask = ctx + 1; struct device *dev = hr_dev->dev; int ret; - context = kcalloc(2, sizeof(*context), GFP_ATOMIC); - if (!context) - return -ENOMEM; - - qpc_mask = context + 1; /* * In v2 engine, software pass context and context mask to hardware * when modifying qp. If software need modify some fields in context, * we should set all bits of the relevant fields in context mask to * 0 at the same time, else set them to 0x1. */ + memset(context, 0, sizeof(*context)); memset(qpc_mask, 0xff, sizeof(*qpc_mask)); ret = hns_roce_v2_set_abs_fields(ibqp, attr, attr_mask, cur_state, new_state, context, qpc_mask); @@ -4349,8 +4346,7 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, V2_QPC_BYTE_60_QP_ST_S, 0); /* SW pass context to HW */ - ret = hns_roce_v2_qp_modify(hr_dev, cur_state, new_state, - context, hr_qp); + ret = hns_roce_v2_qp_modify(hr_dev, cur_state, new_state, ctx, hr_qp); if (ret) { dev_err(dev, "hns_roce_qp_modify failed(%d)\n", ret); goto out; @@ -4378,7 +4374,6 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, } out: - kfree(context); return ret; } @@ -4429,16 +4424,12 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, { struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); - struct hns_roce_v2_qp_context *context; + struct hns_roce_v2_qp_context context = {}; struct device *dev = hr_dev->dev; int tmp_qp_state; int state; int ret; - context = kzalloc(sizeof(*context), GFP_KERNEL); - if (!context) - return -ENOMEM; - memset(qp_attr, 0, sizeof(*qp_attr)); memset(qp_init_attr, 0, sizeof(*qp_init_attr)); @@ -4450,14 +4441,14 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, goto done; } - ret = hns_roce_v2_query_qpc(hr_dev, hr_qp, context); + ret = hns_roce_v2_query_qpc(hr_dev, hr_qp, &context); if (ret) { dev_err(dev, "query qpc error\n"); ret = -EINVAL; goto out; } - state = roce_get_field(context->byte_60_qpst_tempid, + state = roce_get_field(context.byte_60_qpst_tempid, V2_QPC_BYTE_60_QP_ST_M, V2_QPC_BYTE_60_QP_ST_S); tmp_qp_state = to_ib_qp_st((enum hns_roce_v2_qp_state)state); if (tmp_qp_state == -1) { @@ -4467,7 +4458,7 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, } hr_qp->state = (u8)tmp_qp_state; qp_attr->qp_state = (enum ib_qp_state)hr_qp->state; - qp_attr->path_mtu = (enum ib_mtu)roce_get_field(context->byte_24_mtu_tc, + qp_attr->path_mtu = (enum ib_mtu)roce_get_field(context.byte_24_mtu_tc, V2_QPC_BYTE_24_MTU_M, V2_QPC_BYTE_24_MTU_S); qp_attr->path_mig_state = IB_MIG_ARMED; @@ -4475,20 +4466,20 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, if (hr_qp->ibqp.qp_type == IB_QPT_UD) qp_attr->qkey = V2_QKEY_VAL; - qp_attr->rq_psn = roce_get_field(context->byte_108_rx_reqepsn, + qp_attr->rq_psn = roce_get_field(context.byte_108_rx_reqepsn, V2_QPC_BYTE_108_RX_REQ_EPSN_M, V2_QPC_BYTE_108_RX_REQ_EPSN_S); - qp_attr->sq_psn = (u32)roce_get_field(context->byte_172_sq_psn, + qp_attr->sq_psn = (u32)roce_get_field(context.byte_172_sq_psn, V2_QPC_BYTE_172_SQ_CUR_PSN_M, V2_QPC_BYTE_172_SQ_CUR_PSN_S); - qp_attr->dest_qp_num = (u8)roce_get_field(context->byte_56_dqpn_err, + qp_attr->dest_qp_num = (u8)roce_get_field(context.byte_56_dqpn_err, V2_QPC_BYTE_56_DQPN_M, V2_QPC_BYTE_56_DQPN_S); - qp_attr->qp_access_flags = ((roce_get_bit(context->byte_76_srqn_op_en, + qp_attr->qp_access_flags = ((roce_get_bit(context.byte_76_srqn_op_en, V2_QPC_BYTE_76_RRE_S)) << V2_QP_RWE_S) | - ((roce_get_bit(context->byte_76_srqn_op_en, + ((roce_get_bit(context.byte_76_srqn_op_en, V2_QPC_BYTE_76_RWE_S)) << V2_QP_RRE_S) | - ((roce_get_bit(context->byte_76_srqn_op_en, + ((roce_get_bit(context.byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S)) << V2_QP_ATE_S); if (hr_qp->ibqp.qp_type == IB_QPT_RC || @@ -4497,43 +4488,43 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, rdma_ah_retrieve_grh(&qp_attr->ah_attr); rdma_ah_set_sl(&qp_attr->ah_attr, - roce_get_field(context->byte_28_at_fl, + roce_get_field(context.byte_28_at_fl, V2_QPC_BYTE_28_SL_M, V2_QPC_BYTE_28_SL_S)); - grh->flow_label = roce_get_field(context->byte_28_at_fl, + grh->flow_label = roce_get_field(context.byte_28_at_fl, V2_QPC_BYTE_28_FL_M, V2_QPC_BYTE_28_FL_S); - grh->sgid_index = roce_get_field(context->byte_20_smac_sgid_idx, + grh->sgid_index = roce_get_field(context.byte_20_smac_sgid_idx, V2_QPC_BYTE_20_SGID_IDX_M, V2_QPC_BYTE_20_SGID_IDX_S); - grh->hop_limit = roce_get_field(context->byte_24_mtu_tc, + grh->hop_limit = roce_get_field(context.byte_24_mtu_tc, V2_QPC_BYTE_24_HOP_LIMIT_M, V2_QPC_BYTE_24_HOP_LIMIT_S); - grh->traffic_class = roce_get_field(context->byte_24_mtu_tc, + grh->traffic_class = roce_get_field(context.byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M, V2_QPC_BYTE_24_TC_S); - memcpy(grh->dgid.raw, context->dgid, sizeof(grh->dgid.raw)); + memcpy(grh->dgid.raw, context.dgid, sizeof(grh->dgid.raw)); } qp_attr->port_num = hr_qp->port + 1; qp_attr->sq_draining = 0; - qp_attr->max_rd_atomic = 1 << roce_get_field(context->byte_208_irrl, + qp_attr->max_rd_atomic = 1 << roce_get_field(context.byte_208_irrl, V2_QPC_BYTE_208_SR_MAX_M, V2_QPC_BYTE_208_SR_MAX_S); - qp_attr->max_dest_rd_atomic = 1 << roce_get_field(context->byte_140_raq, + qp_attr->max_dest_rd_atomic = 1 << roce_get_field(context.byte_140_raq, V2_QPC_BYTE_140_RR_MAX_M, V2_QPC_BYTE_140_RR_MAX_S); - qp_attr->min_rnr_timer = (u8)roce_get_field(context->byte_80_rnr_rx_cqn, + qp_attr->min_rnr_timer = (u8)roce_get_field(context.byte_80_rnr_rx_cqn, V2_QPC_BYTE_80_MIN_RNR_TIME_M, V2_QPC_BYTE_80_MIN_RNR_TIME_S); - qp_attr->timeout = (u8)roce_get_field(context->byte_28_at_fl, + qp_attr->timeout = (u8)roce_get_field(context.byte_28_at_fl, V2_QPC_BYTE_28_AT_M, V2_QPC_BYTE_28_AT_S); - qp_attr->retry_cnt = roce_get_field(context->byte_212_lsn, + qp_attr->retry_cnt = roce_get_field(context.byte_212_lsn, V2_QPC_BYTE_212_RETRY_CNT_M, V2_QPC_BYTE_212_RETRY_CNT_S); - qp_attr->rnr_retry = context->rq_rnr_timer; + qp_attr->rnr_retry = context.rq_rnr_timer; done: qp_attr->cur_qp_state = qp_attr->qp_state; @@ -4552,7 +4543,6 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, out: mutex_unlock(&hr_qp->mutex); - kfree(context); return ret; }