[RFC,V2,for-next] RDMA/hns: Add UD support for hip08
diff mbox series

Message ID 1571474772-2212-1-git-send-email-oulijun@huawei.com
State New
Delegated to: Doug Ledford
Headers show
Series
  • [RFC,V2,for-next] RDMA/hns: Add UD support for hip08
Related show

Commit Message

oulijun Oct. 19, 2019, 8:46 a.m. UTC
This patch aims to let hip08 support communication of Unreliable
Datagram.

Signed-off-by: Lijun Ou <oulijun@huawei.com>
Signed-off-by: Weihang Li <liweihang@hisilicon.com>
---
V1->V2:
1. Use CAP_NET_RAW for limit the creation UD queue
---
 drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 17 ++++++++++-------
 drivers/infiniband/hw/hns/hns_roce_qp.c    |  7 +++++++
 2 files changed, 17 insertions(+), 7 deletions(-)

Comments

Jason Gunthorpe Oct. 21, 2019, 2:13 p.m. UTC | #1
On Sat, Oct 19, 2019 at 04:46:12PM +0800, Lijun Ou wrote:
> index bd78ff9..722cc5f 100644
> +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
> @@ -377,6 +377,10 @@ static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev,
>  		hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt *
>  							(hr_qp->sq.max_gs - 2));
>  
> +	if (hr_qp->ibqp.qp_type == IB_QPT_UD)
> +		hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt *
> +						       hr_qp->sq.max_gs);
> +
>  	if ((hr_qp->sq.max_gs > 2) && (hr_dev->pci_dev->revision == 0x20)) {
>  		if (hr_qp->sge.sge_cnt > hr_dev->caps.max_extend_sg) {
>  			dev_err(hr_dev->dev,
> @@ -1022,6 +1026,9 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd,
>  	int ret;
>  
>  	switch (init_attr->qp_type) {
> +	case IB_QPT_UD:
> +		if (!capable(CAP_NET_RAW))
> +			return -EPERM;

This needs a big comment explaining why this HW requires it.

Jason
oulijun Oct. 21, 2019, 2:20 p.m. UTC | #2
在 2019/10/21 22:13, Jason Gunthorpe 写道:
> On Sat, Oct 19, 2019 at 04:46:12PM +0800, Lijun Ou wrote:
>> index bd78ff9..722cc5f 100644
>> +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
>> @@ -377,6 +377,10 @@ static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev,
>>  		hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt *
>>  							(hr_qp->sq.max_gs - 2));
>>  
>> +	if (hr_qp->ibqp.qp_type == IB_QPT_UD)
>> +		hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt *
>> +						       hr_qp->sq.max_gs);
>> +
>>  	if ((hr_qp->sq.max_gs > 2) && (hr_dev->pci_dev->revision == 0x20)) {
>>  		if (hr_qp->sge.sge_cnt > hr_dev->caps.max_extend_sg) {
>>  			dev_err(hr_dev->dev,
>> @@ -1022,6 +1026,9 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd,
>>  	int ret;
>>  
>>  	switch (init_attr->qp_type) {
>> +	case IB_QPT_UD:
>> +		if (!capable(CAP_NET_RAW))
>> +			return -EPERM;
> This needs a big comment explaining why this HW requires it.
>
> Jason
>
Add the detail comments for HW limit?
> .
>
Doug Ledford Oct. 21, 2019, 2:58 p.m. UTC | #3
On Mon, 2019-10-21 at 22:20 +0800, oulijun wrote:
> 在 2019/10/21 22:13, Jason Gunthorpe 写道:
> > On Sat, Oct 19, 2019 at 04:46:12PM +0800, Lijun Ou wrote:
> > > index bd78ff9..722cc5f 100644
> > > +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
> > > @@ -377,6 +377,10 @@ static int hns_roce_set_user_sq_size(struct
> > > hns_roce_dev *hr_dev,
> > >  		hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp-
> > > >sq.wqe_cnt *
> > >  							(hr_qp-
> > > >sq.max_gs - 2));
> > >  
> > > +	if (hr_qp->ibqp.qp_type == IB_QPT_UD)
> > > +		hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp-
> > > >sq.wqe_cnt *
> > > +						       hr_qp-
> > > >sq.max_gs);
> > > +
> > >  	if ((hr_qp->sq.max_gs > 2) && (hr_dev->pci_dev->revision ==
> > > 0x20)) {
> > >  		if (hr_qp->sge.sge_cnt > hr_dev->caps.max_extend_sg) {
> > >  			dev_err(hr_dev->dev,
> > > @@ -1022,6 +1026,9 @@ struct ib_qp *hns_roce_create_qp(struct
> > > ib_pd *pd,
> > >  	int ret;
> > >  
> > >  	switch (init_attr->qp_type) {
> > > +	case IB_QPT_UD:
> > > +		if (!capable(CAP_NET_RAW))
> > > +			return -EPERM;
> > This needs a big comment explaining why this HW requires it.
> > 
> > Jason
> > 
> Add the detail comments for HW limit?

I can add those comments while taking the pactch.  Plus we need to add a
fallthrough annotation at the same place.  I'll fix it up and unfreeze
the hns queue.
Doug Ledford Oct. 21, 2019, 4:45 p.m. UTC | #4
On Mon, 2019-10-21 at 10:58 -0400, Doug Ledford wrote:
> On Mon, 2019-10-21 at 22:20 +0800, oulijun wrote:
> > 在 2019/10/21 22:13, Jason Gunthorpe 写道:
> > > On Sat, Oct 19, 2019 at 04:46:12PM +0800, Lijun Ou wrote:
> > > > index bd78ff9..722cc5f 100644
> > > > +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
> > > > @@ -377,6 +377,10 @@ static int hns_roce_set_user_sq_size(struct
> > > > hns_roce_dev *hr_dev,
> > > >  		hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp-
> > > > > sq.wqe_cnt *
> > > >  							(hr_qp-
> > > > > sq.max_gs - 2));
> > > >  
> > > > +	if (hr_qp->ibqp.qp_type == IB_QPT_UD)
> > > > +		hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp-
> > > > > sq.wqe_cnt *
> > > > +						       hr_qp-
> > > > > sq.max_gs);
> > > > +
> > > >  	if ((hr_qp->sq.max_gs > 2) && (hr_dev->pci_dev->revision 
> > > > ==
> > > > 0x20)) {
> > > >  		if (hr_qp->sge.sge_cnt > hr_dev-
> > > > >caps.max_extend_sg) {
> > > >  			dev_err(hr_dev->dev,
> > > > @@ -1022,6 +1026,9 @@ struct ib_qp *hns_roce_create_qp(struct
> > > > ib_pd *pd,
> > > >  	int ret;
> > > >  
> > > >  	switch (init_attr->qp_type) {
> > > > +	case IB_QPT_UD:
> > > > +		if (!capable(CAP_NET_RAW))
> > > > +			return -EPERM;
> > > This needs a big comment explaining why this HW requires it.
> > > 
> > > Jason
> > > 
> > Add the detail comments for HW limit?
> 
> I can add those comments while taking the pactch.  Plus we need to add
> a
> fallthrough annotation at the same place.  I'll fix it up and unfreeze
> the hns queue.
> 

Does this meet people's approval?

        switch (init_attr->qp_type) {
        case IB_QPT_UD:
                /*
                 * DO NOT REMOVE!
                 * The HNS RoCE hardware has a security vulnerability.
                 * Normally, UD packet routing is achieved using nothing
                 * but the ib_ah struct, which contains the src gid in the
                 * sgid_attr element.  Th src gid is sufficient for the
                 * hardware to know if any vlan tag is needed, as well as
                 * any priority tag.  In the case of HNS RoCE, the vlan
                 * tag is passed to the hardware along with the src gid.
                 * This allows a situation where a malicious user could
                 * intentionally send packets with a gid that belongs to
                 * vlan A, but direct the packets to go out to vlan B
                 * instead.
                 * Because the ability to send out packets with arbitrary
                 * headers is reserved for CAP_NET_RAW, and because UD
                 * queue pairs can be tricked into doing that, make all
                 * UD queue pairs on this hardware require CAP_NET_RAW.
                 */
                if (!capable(CAP_NET_RAW))
                        return -EPERM;
                /* fallthrough */
        case IB_QPT_RC: {
oulijun Oct. 22, 2019, 1:07 a.m. UTC | #5
在 2019/10/21 22:58, Doug Ledford 写道:
> On Mon, 2019-10-21 at 22:20 +0800, oulijun wrote:
>> 在 2019/10/21 22:13, Jason Gunthorpe 写道:
>>> On Sat, Oct 19, 2019 at 04:46:12PM +0800, Lijun Ou wrote:
>>>> index bd78ff9..722cc5f 100644
>>>> +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
>>>> @@ -377,6 +377,10 @@ static int hns_roce_set_user_sq_size(struct
>>>> hns_roce_dev *hr_dev,
>>>>  		hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp-
>>>>> sq.wqe_cnt *
>>>>  							(hr_qp-
>>>>> sq.max_gs - 2));
>>>>  
>>>> +	if (hr_qp->ibqp.qp_type == IB_QPT_UD)
>>>> +		hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp-
>>>>> sq.wqe_cnt *
>>>> +						       hr_qp-
>>>>> sq.max_gs);
>>>> +
>>>>  	if ((hr_qp->sq.max_gs > 2) && (hr_dev->pci_dev->revision ==
>>>> 0x20)) {
>>>>  		if (hr_qp->sge.sge_cnt > hr_dev->caps.max_extend_sg) {
>>>>  			dev_err(hr_dev->dev,
>>>> @@ -1022,6 +1026,9 @@ struct ib_qp *hns_roce_create_qp(struct
>>>> ib_pd *pd,
>>>>  	int ret;
>>>>  
>>>>  	switch (init_attr->qp_type) {
>>>> +	case IB_QPT_UD:
>>>> +		if (!capable(CAP_NET_RAW))
>>>> +			return -EPERM;
>>> This needs a big comment explaining why this HW requires it.
>>>
>>> Jason
>>>
>> Add the detail comments for HW limit?
> I can add those comments while taking the pactch.  Plus we need to add a
> fallthrough annotation at the same place.  I'll fix it up and unfreeze
> the hns queue.
>
Thanks
Leon Romanovsky Oct. 22, 2019, 5:44 a.m. UTC | #6
On Mon, Oct 21, 2019 at 12:45:56PM -0400, Doug Ledford wrote:
> On Mon, 2019-10-21 at 10:58 -0400, Doug Ledford wrote:
> > On Mon, 2019-10-21 at 22:20 +0800, oulijun wrote:
> > > 在 2019/10/21 22:13, Jason Gunthorpe 写道:
> > > > On Sat, Oct 19, 2019 at 04:46:12PM +0800, Lijun Ou wrote:
> > > > > index bd78ff9..722cc5f 100644
> > > > > +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
> > > > > @@ -377,6 +377,10 @@ static int hns_roce_set_user_sq_size(struct
> > > > > hns_roce_dev *hr_dev,
> > > > >  		hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp-
> > > > > > sq.wqe_cnt *
> > > > >  							(hr_qp-
> > > > > > sq.max_gs - 2));
> > > > >
> > > > > +	if (hr_qp->ibqp.qp_type == IB_QPT_UD)
> > > > > +		hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp-
> > > > > > sq.wqe_cnt *
> > > > > +						       hr_qp-
> > > > > > sq.max_gs);
> > > > > +
> > > > >  	if ((hr_qp->sq.max_gs > 2) && (hr_dev->pci_dev->revision
> > > > > ==
> > > > > 0x20)) {
> > > > >  		if (hr_qp->sge.sge_cnt > hr_dev-
> > > > > >caps.max_extend_sg) {
> > > > >  			dev_err(hr_dev->dev,
> > > > > @@ -1022,6 +1026,9 @@ struct ib_qp *hns_roce_create_qp(struct
> > > > > ib_pd *pd,
> > > > >  	int ret;
> > > > >
> > > > >  	switch (init_attr->qp_type) {
> > > > > +	case IB_QPT_UD:
> > > > > +		if (!capable(CAP_NET_RAW))
> > > > > +			return -EPERM;
> > > > This needs a big comment explaining why this HW requires it.
> > > >
> > > > Jason
> > > >
> > > Add the detail comments for HW limit?
> >
> > I can add those comments while taking the pactch.  Plus we need to add
> > a
> > fallthrough annotation at the same place.  I'll fix it up and unfreeze
> > the hns queue.
> >
>
> Does this meet people's approval?

It is much more detailed than I would imagine, Thanks.

>
>         switch (init_attr->qp_type) {
>         case IB_QPT_UD:
>                 /*
>                  * DO NOT REMOVE!
>                  * The HNS RoCE hardware has a security vulnerability.
>                  * Normally, UD packet routing is achieved using nothing
>                  * but the ib_ah struct, which contains the src gid in the
>                  * sgid_attr element.  Th src gid is sufficient for the
>                  * hardware to know if any vlan tag is needed, as well as
>                  * any priority tag.  In the case of HNS RoCE, the vlan
>                  * tag is passed to the hardware along with the src gid.
>                  * This allows a situation where a malicious user could
>                  * intentionally send packets with a gid that belongs to
>                  * vlan A, but direct the packets to go out to vlan B
>                  * instead.
>                  * Because the ability to send out packets with arbitrary
>                  * headers is reserved for CAP_NET_RAW, and because UD
>                  * queue pairs can be tricked into doing that, make all
>                  * UD queue pairs on this hardware require CAP_NET_RAW.
>                  */
>                 if (!capable(CAP_NET_RAW))
>                         return -EPERM;
>                 /* fallthrough */
>         case IB_QPT_RC: {
>
> --
> Doug Ledford <dledford@redhat.com>
>     GPG KeyID: B826A3330E572FDD
>     Fingerprint = AE6B 1BDA 122B 23B4 265B  1274 B826 A333 0E57 2FDD
Weihang Li Nov. 13, 2019, 6:47 a.m. UTC | #7
On 2019/10/22 0:45, Doug Ledford wrote:
> On Mon, 2019-10-21 at 10:58 -0400, Doug Ledford wrote:
>> On Mon, 2019-10-21 at 22:20 +0800, oulijun wrote:
>>> 在 2019/10/21 22:13, Jason Gunthorpe 写道:
>>>> On Sat, Oct 19, 2019 at 04:46:12PM +0800, Lijun Ou wrote:
>>>>> index bd78ff9..722cc5f 100644
>>>>> +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
>>>>> @@ -377,6 +377,10 @@ static int hns_roce_set_user_sq_size(struct
>>>>> hns_roce_dev *hr_dev,
>>>>>  		hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp-
>>>>>> sq.wqe_cnt *
>>>>>  							(hr_qp-
>>>>>> sq.max_gs - 2));
>>>>>  
>>>>> +	if (hr_qp->ibqp.qp_type == IB_QPT_UD)
>>>>> +		hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp-
>>>>>> sq.wqe_cnt *
>>>>> +						       hr_qp-
>>>>>> sq.max_gs);
>>>>> +
>>>>>  	if ((hr_qp->sq.max_gs > 2) && (hr_dev->pci_dev->revision 
>>>>> ==
>>>>> 0x20)) {
>>>>>  		if (hr_qp->sge.sge_cnt > hr_dev-
>>>>>> caps.max_extend_sg) {
>>>>>  			dev_err(hr_dev->dev,
>>>>> @@ -1022,6 +1026,9 @@ struct ib_qp *hns_roce_create_qp(struct
>>>>> ib_pd *pd,
>>>>>  	int ret;
>>>>>  
>>>>>  	switch (init_attr->qp_type) {
>>>>> +	case IB_QPT_UD:
>>>>> +		if (!capable(CAP_NET_RAW))
>>>>> +			return -EPERM;
>>>> This needs a big comment explaining why this HW requires it.
>>>>
>>>> Jason
>>>>
>>> Add the detail comments for HW limit?
>>
>> I can add those comments while taking the pactch.  Plus we need to add
>> a
>> fallthrough annotation at the same place.  I'll fix it up and unfreeze
>> the hns queue.
>>
> 
> Does this meet people's approval?
> 
>         switch (init_attr->qp_type) {
>         case IB_QPT_UD:
>                 /*
>                  * DO NOT REMOVE!
>                  * The HNS RoCE hardware has a security vulnerability.
>                  * Normally, UD packet routing is achieved using nothing
>                  * but the ib_ah struct, which contains the src gid in the
>                  * sgid_attr element.  Th src gid is sufficient for the
>                  * hardware to know if any vlan tag is needed, as well as
>                  * any priority tag.  In the case of HNS RoCE, the vlan
>                  * tag is passed to the hardware along with the src gid.
>                  * This allows a situation where a malicious user could
>                  * intentionally send packets with a gid that belongs to
>                  * vlan A, but direct the packets to go out to vlan B
>                  * instead.
>                  * Because the ability to send out packets with arbitrary
>                  * headers is reserved for CAP_NET_RAW, and because UD
>                  * queue pairs can be tricked into doing that, make all
>                  * UD queue pairs on this hardware require CAP_NET_RAW.
>                  */
>                 if (!capable(CAP_NET_RAW))
>                         return -EPERM;
>                 /* fallthrough */
>         case IB_QPT_RC: {
> 

Hi Doug,

To avoid the potential risk of vlan config issue, we decide to remove this patch.
Thanks a lot for the work of you, Jason and Leon on this issue.

Weihang

Patch
diff mbox series

diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index 7a89d66..e320ab0 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -296,7 +296,7 @@  static int hns_roce_v2_post_send(struct ib_qp *ibqp,
 		tmp_len = 0;
 
 		/* Corresponding to the QP type, wqe process separately */
-		if (ibqp->qp_type == IB_QPT_GSI) {
+		if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_UD) {
 			ud_sq_wqe = wqe;
 			memset(ud_sq_wqe, 0, sizeof(*ud_sq_wqe));
 
@@ -417,8 +417,7 @@  static int hns_roce_v2_post_send(struct ib_qp *ibqp,
 			roce_set_field(ud_sq_wqe->byte_48,
 				       V2_UD_SEND_WQE_BYTE_48_SGID_INDX_M,
 				       V2_UD_SEND_WQE_BYTE_48_SGID_INDX_S,
-				       hns_get_gid_index(hr_dev, qp->phy_port,
-							 ah->av.gid_index));
+				       ah->av.gid_index);
 
 			memcpy(&ud_sq_wqe->dgid[0], &ah->av.dgid[0],
 			       GID_LEN_V2);
@@ -3217,7 +3216,8 @@  static void set_qpc_wqe_cnt(struct hns_roce_qp *hr_qp,
 			    struct hns_roce_v2_qp_context *context,
 			    struct hns_roce_v2_qp_context *qpc_mask)
 {
-	if (hr_qp->ibqp.qp_type == IB_QPT_GSI)
+	if (hr_qp->ibqp.qp_type == IB_QPT_GSI ||
+	    hr_qp->ibqp.qp_type == IB_QPT_UD)
 		roce_set_field(context->byte_4_sqpn_tst,
 			       V2_QPC_BYTE_4_SGE_SHIFT_M,
 			       V2_QPC_BYTE_4_SGE_SHIFT_S,
@@ -3737,8 +3737,9 @@  static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
 	roce_set_field(context->byte_20_smac_sgid_idx,
 		       V2_QPC_BYTE_20_SGE_HOP_NUM_M,
 		       V2_QPC_BYTE_20_SGE_HOP_NUM_S,
-		       ((ibqp->qp_type == IB_QPT_GSI) ||
-		       hr_qp->sq.max_gs > HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE) ?
+		       ((ibqp->qp_type == IB_QPT_GSI ||
+			ibqp->qp_type == IB_QPT_UD) ||
+			hr_qp->sq.max_gs > HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE) ?
 		       hr_dev->caps.wqe_sge_hop_num : 0);
 	roce_set_field(qpc_mask->byte_20_smac_sgid_idx,
 		       V2_QPC_BYTE_20_SGE_HOP_NUM_M,
@@ -4652,7 +4653,9 @@  static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev,
 	struct ib_device *ibdev = &hr_dev->ib_dev;
 	int ret;
 
-	if (hr_qp->ibqp.qp_type == IB_QPT_RC && hr_qp->state != IB_QPS_RESET) {
+	if ((hr_qp->ibqp.qp_type == IB_QPT_RC ||
+	     hr_qp->ibqp.qp_type == IB_QPT_UD) &&
+	    hr_qp->state != IB_QPS_RESET) {
 		/* Modify qp to reset before destroying qp */
 		ret = hns_roce_v2_modify_qp(&hr_qp->ibqp, NULL, 0,
 					    hr_qp->state, IB_QPS_RESET);
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index bd78ff9..722cc5f 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -377,6 +377,10 @@  static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev,
 		hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt *
 							(hr_qp->sq.max_gs - 2));
 
+	if (hr_qp->ibqp.qp_type == IB_QPT_UD)
+		hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt *
+						       hr_qp->sq.max_gs);
+
 	if ((hr_qp->sq.max_gs > 2) && (hr_dev->pci_dev->revision == 0x20)) {
 		if (hr_qp->sge.sge_cnt > hr_dev->caps.max_extend_sg) {
 			dev_err(hr_dev->dev,
@@ -1022,6 +1026,9 @@  struct ib_qp *hns_roce_create_qp(struct ib_pd *pd,
 	int ret;
 
 	switch (init_attr->qp_type) {
+	case IB_QPT_UD:
+		if (!capable(CAP_NET_RAW))
+			return -EPERM;
 	case IB_QPT_RC: {
 		hr_qp = kzalloc(sizeof(*hr_qp), GFP_KERNEL);
 		if (!hr_qp)