diff mbox series

[v4,06/12] net: mana: Define data structures for protection domain and memory registration

Message ID 1655345240-26411-7-git-send-email-longli@linuxonhyperv.com (mailing list archive)
State Superseded
Delegated to: Jason Gunthorpe
Headers show
Series Introduce Microsoft Azure Network Adapter (MANA) RDMA driver | expand

Commit Message

Long Li June 16, 2022, 2:07 a.m. UTC
From: Ajay Sharma <sharmaajay@microsoft.com>

The MANA hardware support protection domain and memory registration for use
in RDMA environment. Add those definitions and expose them for use by the
RDMA driver.

Signed-off-by: Ajay Sharma <sharmaajay@microsoft.com>
Signed-off-by: Long Li <longli@microsoft.com>
---
Change log:
v3: format/coding style changes

 drivers/net/ethernet/microsoft/mana/gdma.h    | 146 +++++++++++++++++-
 .../net/ethernet/microsoft/mana/gdma_main.c   |  27 ++--
 drivers/net/ethernet/microsoft/mana/mana_en.c |  18 ++-
 3 files changed, 168 insertions(+), 23 deletions(-)

Comments

Dexuan Cui July 11, 2022, 1:29 a.m. UTC | #1
> From: longli@linuxonhyperv.com <longli@linuxonhyperv.com>
> Sent: Wednesday, June 15, 2022 7:07 PM
> 
> The MANA hardware support protection domain and memory registration for
s/support/supports
 
> diff --git a/drivers/net/ethernet/microsoft/mana/gdma.h
> b/drivers/net/ethernet/microsoft/mana/gdma.h
> index f945755760dc..b1bec8ab5695 100644
> --- a/drivers/net/ethernet/microsoft/mana/gdma.h
> +++ b/drivers/net/ethernet/microsoft/mana/gdma.h
> @@ -27,6 +27,10 @@ enum gdma_request_type {
>  	GDMA_CREATE_DMA_REGION		= 25,
>  	GDMA_DMA_REGION_ADD_PAGES	= 26,
>  	GDMA_DESTROY_DMA_REGION		= 27,
> +	GDMA_CREATE_PD			= 29,
> +	GDMA_DESTROY_PD			= 30,
> +	GDMA_CREATE_MR			= 31,
> +	GDMA_DESTROY_MR			= 32,
These are not used in this patch. They're used in the 12th 
patch for the first time. Can we move these to that patch?

>  #define GDMA_RESOURCE_DOORBELL_PAGE	27
> @@ -59,6 +63,8 @@ enum {
>  	GDMA_DEVICE_MANA	= 2,
>  };
> 
> +typedef u64 gdma_obj_handle_t;
> +
>  struct gdma_resource {
>  	/* Protect the bitmap */
>  	spinlock_t lock;
> @@ -192,7 +198,7 @@ struct gdma_mem_info {
>  	u64 length;
> 
>  	/* Allocated by the PF driver */
> -	u64 gdma_region;
> +	gdma_obj_handle_t dma_region_handle;
The old name "gdma_region" is shorter and it has "gdma"
rather than "dma". 

The new name is longer. When one starts to read the code for
the first time, I feel that "dma_region_handle" might be confusing
as it's similar to "dma_handle" (which is the DMA address returned
by dma_alloc_coherent()). "dma_region_handle" is an integer
rather than a memory address. 

You use the new name probably because there is a "mr_handle "
in the 12 patch. I prefer the old name, though the new name is
also ok to me. If you decide to use the new name, it would be
great if this patch could split into two patches: one for the
renaming only, and the other for the real changes.

>  #define REGISTER_ATB_MST_MKEY_LOWER_SIZE 8
> @@ -599,7 +605,7 @@ struct gdma_create_queue_req {
>  	u32 reserved1;
>  	u32 pdid;
>  	u32 doolbell_id;
> -	u64 gdma_region;
> +	gdma_obj_handle_t gdma_region;
If we decide to use the new name "dma_region_handle", should
we change the field/param names in the below structs and
functions as well (this may not be a complete list)?
  struct mana_ib_wq
  struct mana_ib_cq
  mana_ib_gd_create_dma_region
  mana_ib_gd_destroy_dma_region

>  	u32 reserved2;
>  	u32 queue_size;
>  	u32 log2_throttle_limit;
> @@ -626,6 +632,28 @@ struct gdma_disable_queue_req {
>  	u32 alloc_res_id_on_creation;
>  }; /* HW DATA */
> 
> +enum atb_page_size {
> +	ATB_PAGE_SIZE_4K,
> +	ATB_PAGE_SIZE_8K,
> +	ATB_PAGE_SIZE_16K,
> +	ATB_PAGE_SIZE_32K,
> +	ATB_PAGE_SIZE_64K,
> +	ATB_PAGE_SIZE_128K,
> +	ATB_PAGE_SIZE_256K,
> +	ATB_PAGE_SIZE_512K,
> +	ATB_PAGE_SIZE_1M,
> +	ATB_PAGE_SIZE_2M,
> +	ATB_PAGE_SIZE_MAX,
> +};
> +
> +enum gdma_mr_access_flags {
> +	GDMA_ACCESS_FLAG_LOCAL_READ = (1 << 0),
> +	GDMA_ACCESS_FLAG_LOCAL_WRITE = (1 << 1),
> +	GDMA_ACCESS_FLAG_REMOTE_READ = (1 << 2),
> +	GDMA_ACCESS_FLAG_REMOTE_WRITE = (1 << 3),
> +	GDMA_ACCESS_FLAG_REMOTE_ATOMIC = (1 << 4),
> +};
It would be better to use BIT_ULL(0), BIT_ULL(1), etc.

>  /* GDMA_CREATE_DMA_REGION */
>  struct gdma_create_dma_region_req {
>  	struct gdma_req_hdr hdr;
> @@ -652,14 +680,14 @@ struct gdma_create_dma_region_req {
> 
>  struct gdma_create_dma_region_resp {
>  	struct gdma_resp_hdr hdr;
> -	u64 gdma_region;
> +	gdma_obj_handle_t dma_region_handle;
>  }; /* HW DATA */
> 
>  /* GDMA_DMA_REGION_ADD_PAGES */
>  struct gdma_dma_region_add_pages_req {
>  	struct gdma_req_hdr hdr;
> 
> -	u64 gdma_region;
> +	gdma_obj_handle_t dma_region_handle;
> 
>  	u32 page_addr_list_len;
>  	u32 reserved3;
> @@ -671,9 +699,114 @@ struct gdma_dma_region_add_pages_req {
>  struct gdma_destroy_dma_region_req {
>  	struct gdma_req_hdr hdr;
> 
> -	u64 gdma_region;
> +	gdma_obj_handle_t dma_region_handle;
>  }; /* HW DATA */
> 
> +enum gdma_pd_flags {
> +	GDMA_PD_FLAG_ALLOW_GPA_MR = (1 << 0),
> +	GDMA_PD_FLAG_ALLOW_FMR_MR = (1 << 1),
> +};
Use BIT_ULL(0), BIT_ULL(1) ?

> +struct gdma_create_pd_req {
> +	struct gdma_req_hdr hdr;
> +	enum gdma_pd_flags flags;
> +	u32 reserved;
> +};
> +
> +struct gdma_create_pd_resp {
> +	struct gdma_resp_hdr hdr;
> +	gdma_obj_handle_t pd_handle;
> +	u32 pd_id;
> +	u32 reserved;
> +};
> +
> +struct gdma_destroy_pd_req {
> +	struct gdma_req_hdr hdr;
> +	gdma_obj_handle_t pd_handle;
> +};
> +
> +struct gdma_destory_pd_resp {
> +	struct gdma_resp_hdr hdr;
> +};
> +
> +enum gdma_mr_type {
> +	/* Guest Physical Address - MRs of this type allow access
> +	 * to any DMA-mapped memory using bus-logical address
> +	 */
> +	GDMA_MR_TYPE_GPA = 1,
> +
> +	/* Guest Virtual Address - MRs of this type allow access
> +	 * to memory mapped by PTEs associated with this MR using a virtual
> +	 * address that is set up in the MST
> +	 */
> +	GDMA_MR_TYPE_GVA,
> +
> +	/* Fast Memory Register - Like GVA but the MR is initially put in the
> +	 * FREE state (as opposed to Valid), and the specified number of
> +	 * PTEs are reserved for future fast memory reservations.
> +	 */
> +	GDMA_MR_TYPE_FMR,
> +};
> +
> +struct gdma_create_mr_params {
> +	gdma_obj_handle_t pd_handle;
> +	enum gdma_mr_type mr_type;
> +	union {
> +		struct {
> +			gdma_obj_handle_t dma_region_handle;
> +			u64 virtual_address;
> +			enum gdma_mr_access_flags access_flags;
> +		} gva;
Add an empty line to make it more readable?

> +		struct {
> +			enum gdma_mr_access_flags access_flags;
> +		} gpa;
Add an empty line?

> +		struct {
> +			enum atb_page_size page_size;
> +			u32  reserved_pte_count;
> +		} fmr;
> +	};
> +};

The definition of struct gdma_create_mr_params is not naturally aligned.
This can potenially cause issues.

According to my test, sizeof(struct gdma_create_mr_params) is 40 bytes,
meaning the compiler adds two "hidden" fields:

struct gdma_create_mr_params {
        gdma_obj_handle_t pd_handle;                        // offset = 0
        enum gdma_mr_type mr_type;                        // offset = 8
+       u32 hidden_field_a;
        union {                                            // offset = 0x10
                struct {
                        gdma_obj_handle_t dma_region_handle;   // offset =0x10
                        u64 virtual_address;                    // offset =0x18
                        enum gdma_mr_access_flags access_flags;  // offset =0x20
+                       u32 hidden_field_b;
                } gva;

We'll run into trouble some day if the Linux VF driver or the host PF
driver adds something like __attribute__((packed)).

Can we work with the host team to improve the definition? If it's
hard/impossible to change the PF driver side definition, both sides
should at least explicitly define the two hidden fields as reserved fields.

BTW, can we assume the size of "enum" is 4 bytes? I prefer using u32
explicitly when a struct is used to talk to the PF driver or the device.

If we decide to use "enum", I suggest we add 
BUILD_BUG_ON(sizeof(struct gdma_create_mr_params) != 40)
to make sure the assumptin is true.

BTW, Haiyang added "/* HW DATA */ " to other definitions, 
e.g. gdma_create_queue_resp. Can you please add the same comment
for consistency?

> +struct gdma_create_mr_request {
> +	struct gdma_req_hdr hdr;
> +	gdma_obj_handle_t pd_handle;
> +	enum gdma_mr_type mr_type;
> +	u32 reserved;
> +
> +	union {
> +		struct {
> +			enum gdma_mr_access_flags access_flags;
> +		} gpa;
> +
> +		struct {
> +			gdma_obj_handle_t dma_region_handle;
> +			u64 virtual_address;
> +			enum gdma_mr_access_flags access_flags;

Similarly, there is a hidden u32 field here. We should explicitly define it.

> +		} gva;
Can we use the same order of "gva; gpa" used in
struct gdma_create_mr_params?

> +		struct {
> +			enum atb_page_size page_size;
> +			u32 reserved_pte_count;
> +		} fmr;
> +	};
> +};

Add BUILD_BUG_ON(sizeof(struct gdma_create_mr_request) != 80) ?
Add /* HW DATA */ ?

> +struct gdma_create_mr_response {
> +	struct gdma_resp_hdr hdr;
> +	gdma_obj_handle_t mr_handle;
> +	u32 lkey;
> +	u32 rkey;
> +};
> +
> +struct gdma_destroy_mr_request {
> +	struct gdma_req_hdr hdr;
> +	gdma_obj_handle_t mr_handle;
> +};
> +
> +struct gdma_destroy_mr_response {
> +	struct gdma_resp_hdr hdr;
> +};
> +

None of the new defines are really used in this patch:

+enum atb_page_size {
+enum gdma_mr_access_flags {
+enum gdma_pd_flags {
+struct gdma_create_pd_req {
+struct gdma_create_pd_resp {
+struct gdma_destroy_pd_req {
+struct gdma_destory_pd_resp {
+enum gdma_mr_type {
+struct gdma_create_mr_params {
+struct gdma_create_mr_request {
+struct gdma_create_mr_response {
+struct gdma_destroy_mr_request {
+struct gdma_destroy_mr_response

The new defines are used in the 12th patch for the first time.
Can we move these to that patch or at least move these defines
to before the 12th patch?
Ajay Sharma July 13, 2022, 4:39 a.m. UTC | #2
Please see inline.

-----Original Message-----
From: Dexuan Cui <decui@microsoft.com> 
Sent: Sunday, July 10, 2022 8:29 PM
To: Long Li <longli@microsoft.com>; KY Srinivasan <kys@microsoft.com>; Haiyang Zhang <haiyangz@microsoft.com>; Stephen Hemminger <sthemmin@microsoft.com>; Wei Liu <wei.liu@kernel.org>; David S. Miller <davem@davemloft.net>; Jakub Kicinski <kuba@kernel.org>; Paolo Abeni <pabeni@redhat.com>; Jason Gunthorpe <jgg@ziepe.ca>; Leon Romanovsky <leon@kernel.org>; edumazet@google.com; shiraz.saleem@intel.com; Ajay Sharma <sharmaajay@microsoft.com>
Cc: linux-hyperv@vger.kernel.org; netdev@vger.kernel.org; linux-kernel@vger.kernel.org; linux-rdma@vger.kernel.org
Subject: RE: [Patch v4 06/12] net: mana: Define data structures for protection domain and memory registration

> From: longli@linuxonhyperv.com <longli@linuxonhyperv.com>
> Sent: Wednesday, June 15, 2022 7:07 PM
> 
> The MANA hardware support protection domain and memory registration 
> for
s/support/supports
 
> diff --git a/drivers/net/ethernet/microsoft/mana/gdma.h
> b/drivers/net/ethernet/microsoft/mana/gdma.h
> index f945755760dc..b1bec8ab5695 100644
> --- a/drivers/net/ethernet/microsoft/mana/gdma.h
> +++ b/drivers/net/ethernet/microsoft/mana/gdma.h
> @@ -27,6 +27,10 @@ enum gdma_request_type {
>  	GDMA_CREATE_DMA_REGION		= 25,
>  	GDMA_DMA_REGION_ADD_PAGES	= 26,
>  	GDMA_DESTROY_DMA_REGION		= 27,
> +	GDMA_CREATE_PD			= 29,
> +	GDMA_DESTROY_PD			= 30,
> +	GDMA_CREATE_MR			= 31,
> +	GDMA_DESTROY_MR			= 32,
These are not used in this patch. They're used in the 12th patch for the first time. Can we move these to that patch?

>  #define GDMA_RESOURCE_DOORBELL_PAGE	27
> @@ -59,6 +63,8 @@ enum {
>  	GDMA_DEVICE_MANA	= 2,
>  };
> 
> +typedef u64 gdma_obj_handle_t;
> +
>  struct gdma_resource {
>  	/* Protect the bitmap */
>  	spinlock_t lock;
> @@ -192,7 +198,7 @@ struct gdma_mem_info {
>  	u64 length;
> 
>  	/* Allocated by the PF driver */
> -	u64 gdma_region;
> +	gdma_obj_handle_t dma_region_handle;
The old name "gdma_region" is shorter and it has "gdma"
rather than "dma". 

The new name is longer. When one starts to read the code for the first time, I feel that "dma_region_handle" might be confusing as it's similar to "dma_handle" (which is the DMA address returned by dma_alloc_coherent()). "dma_region_handle" is an integer rather than a memory address. 

You use the new name probably because there is a "mr_handle "
in the 12 patch. I prefer the old name, though the new name is also ok to me. If you decide to use the new name, it would be great if this patch could split into two patches: one for the renaming only, and the other for the real changes.

>  #define REGISTER_ATB_MST_MKEY_LOWER_SIZE 8 @@ -599,7 +605,7 @@ struct 
> gdma_create_queue_req {
>  	u32 reserved1;
>  	u32 pdid;
>  	u32 doolbell_id;
> -	u64 gdma_region;
> +	gdma_obj_handle_t gdma_region;
If we decide to use the new name "dma_region_handle", should we change the field/param names in the below structs and functions as well (this may not be a complete list)?
  struct mana_ib_wq
  struct mana_ib_cq
  mana_ib_gd_create_dma_region
  mana_ib_gd_destroy_dma_region

>  	u32 reserved2;
>  	u32 queue_size;
>  	u32 log2_throttle_limit;
> @@ -626,6 +632,28 @@ struct gdma_disable_queue_req {
>  	u32 alloc_res_id_on_creation;
>  }; /* HW DATA */
> 
> +enum atb_page_size {
> +	ATB_PAGE_SIZE_4K,
> +	ATB_PAGE_SIZE_8K,
> +	ATB_PAGE_SIZE_16K,
> +	ATB_PAGE_SIZE_32K,
> +	ATB_PAGE_SIZE_64K,
> +	ATB_PAGE_SIZE_128K,
> +	ATB_PAGE_SIZE_256K,
> +	ATB_PAGE_SIZE_512K,
> +	ATB_PAGE_SIZE_1M,
> +	ATB_PAGE_SIZE_2M,
> +	ATB_PAGE_SIZE_MAX,
> +};
> +
> +enum gdma_mr_access_flags {
> +	GDMA_ACCESS_FLAG_LOCAL_READ = (1 << 0),
> +	GDMA_ACCESS_FLAG_LOCAL_WRITE = (1 << 1),
> +	GDMA_ACCESS_FLAG_REMOTE_READ = (1 << 2),
> +	GDMA_ACCESS_FLAG_REMOTE_WRITE = (1 << 3),
> +	GDMA_ACCESS_FLAG_REMOTE_ATOMIC = (1 << 4), };
It would be better to use BIT_ULL(0), BIT_ULL(1), etc.
Agreed, updated in the new patch.

>  /* GDMA_CREATE_DMA_REGION */
>  struct gdma_create_dma_region_req {
>  	struct gdma_req_hdr hdr;
> @@ -652,14 +680,14 @@ struct gdma_create_dma_region_req {
> 
>  struct gdma_create_dma_region_resp {
>  	struct gdma_resp_hdr hdr;
> -	u64 gdma_region;
> +	gdma_obj_handle_t dma_region_handle;
>  }; /* HW DATA */
> 
>  /* GDMA_DMA_REGION_ADD_PAGES */
>  struct gdma_dma_region_add_pages_req {
>  	struct gdma_req_hdr hdr;
> 
> -	u64 gdma_region;
> +	gdma_obj_handle_t dma_region_handle;
> 
>  	u32 page_addr_list_len;
>  	u32 reserved3;
> @@ -671,9 +699,114 @@ struct gdma_dma_region_add_pages_req {  struct 
> gdma_destroy_dma_region_req {
>  	struct gdma_req_hdr hdr;
> 
> -	u64 gdma_region;
> +	gdma_obj_handle_t dma_region_handle;
>  }; /* HW DATA */
> 
> +enum gdma_pd_flags {
> +	GDMA_PD_FLAG_ALLOW_GPA_MR = (1 << 0),
> +	GDMA_PD_FLAG_ALLOW_FMR_MR = (1 << 1), };
Use BIT_ULL(0), BIT_ULL(1) ?
Agreed and updated the patch

> +struct gdma_create_pd_req {
> +	struct gdma_req_hdr hdr;
> +	enum gdma_pd_flags flags;
> +	u32 reserved;
> +};
> +
> +struct gdma_create_pd_resp {
> +	struct gdma_resp_hdr hdr;
> +	gdma_obj_handle_t pd_handle;
> +	u32 pd_id;
> +	u32 reserved;
> +};
> +
> +struct gdma_destroy_pd_req {
> +	struct gdma_req_hdr hdr;
> +	gdma_obj_handle_t pd_handle;
> +};
> +
> +struct gdma_destory_pd_resp {
> +	struct gdma_resp_hdr hdr;
> +};
> +
> +enum gdma_mr_type {
> +	/* Guest Physical Address - MRs of this type allow access
> +	 * to any DMA-mapped memory using bus-logical address
> +	 */
> +	GDMA_MR_TYPE_GPA = 1,
> +
> +	/* Guest Virtual Address - MRs of this type allow access
> +	 * to memory mapped by PTEs associated with this MR using a virtual
> +	 * address that is set up in the MST
> +	 */
> +	GDMA_MR_TYPE_GVA,
> +
> +	/* Fast Memory Register - Like GVA but the MR is initially put in the
> +	 * FREE state (as opposed to Valid), and the specified number of
> +	 * PTEs are reserved for future fast memory reservations.
> +	 */
> +	GDMA_MR_TYPE_FMR,
> +};
> +
> +struct gdma_create_mr_params {
> +	gdma_obj_handle_t pd_handle;
> +	enum gdma_mr_type mr_type;
> +	union {
> +		struct {
> +			gdma_obj_handle_t dma_region_handle;
> +			u64 virtual_address;
> +			enum gdma_mr_access_flags access_flags;
> +		} gva;
Add an empty line to make it more readable?
Done.
> +		struct {
> +			enum gdma_mr_access_flags access_flags;
> +		} gpa;
Add an empty line?

> +		struct {
> +			enum atb_page_size page_size;
> +			u32  reserved_pte_count;
> +		} fmr;
> +	};
> +};

The definition of struct gdma_create_mr_params is not naturally aligned.
This can potenially cause issues.
This is union and so the biggest element is aligned to word. I feel since this is not passed to the hw it should be fine.

According to my test, sizeof(struct gdma_create_mr_params) is 40 bytes, meaning the compiler adds two "hidden" fields:

struct gdma_create_mr_params {
        gdma_obj_handle_t pd_handle;                        // offset = 0
        enum gdma_mr_type mr_type;                        // offset = 8
+       u32 hidden_field_a;
        union {                                            // offset = 0x10
                struct {
                        gdma_obj_handle_t dma_region_handle;   // offset =0x10
                        u64 virtual_address;                    // offset =0x18
                        enum gdma_mr_access_flags access_flags;  // offset =0x20
+                       u32 hidden_field_b;
                } gva;

We'll run into trouble some day if the Linux VF driver or the host PF driver adds something like __attribute__((packed)).

Can we work with the host team to improve the definition? If it's hard/impossible to change the PF driver side definition, both sides should at least explicitly define the two hidden fields as reserved fields.

BTW, can we assume the size of "enum" is 4 bytes? I prefer using u32 explicitly when a struct is used to talk to the PF driver or the device.

If we decide to use "enum", I suggest we add BUILD_BUG_ON(sizeof(struct gdma_create_mr_params) != 40) to make sure the assumptin is true.

BTW, Haiyang added "/* HW DATA */ " to other definitions, e.g. gdma_create_queue_resp. Can you please add the same comment for consistency?

> +struct gdma_create_mr_request {
> +	struct gdma_req_hdr hdr;
> +	gdma_obj_handle_t pd_handle;
> +	enum gdma_mr_type mr_type;
> +	u32 reserved;
> +
> +	union {
> +		struct {
> +			enum gdma_mr_access_flags access_flags;
> +		} gpa;
> +
> +		struct {
> +			gdma_obj_handle_t dma_region_handle;
> +			u64 virtual_address;
> +			enum gdma_mr_access_flags access_flags;

Similarly, there is a hidden u32 field here. We should explicitly define it.

> +		} gva;
Can we use the same order of "gva; gpa" used in struct gdma_create_mr_params?
Done, although it shouldn't matter in union case.

> +		struct {
> +			enum atb_page_size page_size;
> +			u32 reserved_pte_count;
> +		} fmr;
> +	};
> +};

Add BUILD_BUG_ON(sizeof(struct gdma_create_mr_request) != 80) ?
Add /* HW DATA */ ?

> +struct gdma_create_mr_response {
> +	struct gdma_resp_hdr hdr;
> +	gdma_obj_handle_t mr_handle;
> +	u32 lkey;
> +	u32 rkey;
> +};
> +
> +struct gdma_destroy_mr_request {
> +	struct gdma_req_hdr hdr;
> +	gdma_obj_handle_t mr_handle;
> +};
> +
> +struct gdma_destroy_mr_response {
> +	struct gdma_resp_hdr hdr;
> +};
> +

None of the new defines are really used in this patch:

+enum atb_page_size {
+enum gdma_mr_access_flags {
+enum gdma_pd_flags {
+struct gdma_create_pd_req {
+struct gdma_create_pd_resp {
+struct gdma_destroy_pd_req {
+struct gdma_destory_pd_resp {
+enum gdma_mr_type {
+struct gdma_create_mr_params {
+struct gdma_create_mr_request {
+struct gdma_create_mr_response {
+struct gdma_destroy_mr_request {
+struct gdma_destroy_mr_response

The new defines are used in the 12th patch for the first time.
Can we move these to that patch or at least move these defines to before the 12th patch?
Dexuan Cui July 13, 2022, 7:22 a.m. UTC | #3
> From: Ajay Sharma <sharmaajay@microsoft.com>
> Sent: Tuesday, July 12, 2022 9:39 PM
> To: Dexuan Cui <decui@microsoft.com>; Long Li <longli@microsoft.com>; KY
>  ...
> > The definition of struct gdma_create_mr_params is not naturally aligned.
> > This can potenially cause issues.
> This is union and so the biggest element is aligned to word. I feel since this is
> not passed to the hw it should be fine.

Ajay, you're right. I didn't realize struct gdma_create_mr_params is not really
passed to the PF driver or the device. Please ignore my comments on
struct gdma_create_mr_params. Sorry for the confusion!

> > BTW, Haiyang added "/* HW DATA */ " to other definitions, e.g.
> > gdma_create_queue_resp. Can you please add the same comment for
> > consistency?
It's still recommended that we add the tag "/* HW DATA */ " to new definitions
that are passed to the PF driver or the device.

> > +struct gdma_create_mr_request {
> > +	struct gdma_req_hdr hdr;
> > +	gdma_obj_handle_t pd_handle;
> > +	enum gdma_mr_type mr_type;
> > +	u32 reserved;
> > +
> > +	union {
> > +		struct {
> > +			enum gdma_mr_access_flags access_flags;
> > +		} gpa;
> > +
> > +		struct {
> > +			gdma_obj_handle_t dma_region_handle;
> > +			u64 virtual_address;
> > +			enum gdma_mr_access_flags access_flags;
> 
> Similarly, there is a hidden u32 field here. We should explicitly define it.

The issue with struct gdma_create_mr_request is valid, since it's
passed to the PF driver. We should explicitly define the hidden field.
Jason Gunthorpe July 20, 2022, 11:43 p.m. UTC | #4
On Mon, Jul 11, 2022 at 01:29:08AM +0000, Dexuan Cui wrote:
> > From: longli@linuxonhyperv.com <longli@linuxonhyperv.com>
> > Sent: Wednesday, June 15, 2022 7:07 PM
> > 
> > The MANA hardware support protection domain and memory registration for
> s/support/supports
>  
> > diff --git a/drivers/net/ethernet/microsoft/mana/gdma.h
> > b/drivers/net/ethernet/microsoft/mana/gdma.h
> > index f945755760dc..b1bec8ab5695 100644
> > --- a/drivers/net/ethernet/microsoft/mana/gdma.h
> > +++ b/drivers/net/ethernet/microsoft/mana/gdma.h
> > @@ -27,6 +27,10 @@ enum gdma_request_type {
> >  	GDMA_CREATE_DMA_REGION		= 25,
> >  	GDMA_DMA_REGION_ADD_PAGES	= 26,
> >  	GDMA_DESTROY_DMA_REGION		= 27,
> > +	GDMA_CREATE_PD			= 29,
> > +	GDMA_DESTROY_PD			= 30,
> > +	GDMA_CREATE_MR			= 31,
> > +	GDMA_DESTROY_MR			= 32,
> These are not used in this patch. They're used in the 12th 
> patch for the first time. Can we move these to that patch?

This looks like RDMA code anyhow, why is it under net/ethernet?

Jason
Long Li July 21, 2022, 12:15 a.m. UTC | #5
> Subject: Re: [Patch v4 06/12] net: mana: Define data structures for protection
> domain and memory registration
> 
> On Mon, Jul 11, 2022 at 01:29:08AM +0000, Dexuan Cui wrote:
> > > From: longli@linuxonhyperv.com <longli@linuxonhyperv.com>
> > > Sent: Wednesday, June 15, 2022 7:07 PM
> > >
> > > The MANA hardware support protection domain and memory
> registration
> > > for
> > s/support/supports
> >
> > > diff --git a/drivers/net/ethernet/microsoft/mana/gdma.h
> > > b/drivers/net/ethernet/microsoft/mana/gdma.h
> > > index f945755760dc..b1bec8ab5695 100644
> > > --- a/drivers/net/ethernet/microsoft/mana/gdma.h
> > > +++ b/drivers/net/ethernet/microsoft/mana/gdma.h
> > > @@ -27,6 +27,10 @@ enum gdma_request_type {
> > >  	GDMA_CREATE_DMA_REGION		= 25,
> > >  	GDMA_DMA_REGION_ADD_PAGES	= 26,
> > >  	GDMA_DESTROY_DMA_REGION		= 27,
> > > +	GDMA_CREATE_PD			= 29,
> > > +	GDMA_DESTROY_PD			= 30,
> > > +	GDMA_CREATE_MR			= 31,
> > > +	GDMA_DESTROY_MR			= 32,
> > These are not used in this patch. They're used in the 12th patch for
> > the first time. Can we move these to that patch?
> 
> This looks like RDMA code anyhow, why is it under net/ethernet?
> 
> Jason

This header file belongs to the GDMA layer (as its filename implies) . It's a hardware communication layer used by both ethernet and RDMA for communicating with the hardware.

Some of the RDMA functionalities are implemented at GDMA layer in the PF running on the host, so the message definitions are also there.

Long
diff mbox series

Patch

diff --git a/drivers/net/ethernet/microsoft/mana/gdma.h b/drivers/net/ethernet/microsoft/mana/gdma.h
index f945755760dc..b1bec8ab5695 100644
--- a/drivers/net/ethernet/microsoft/mana/gdma.h
+++ b/drivers/net/ethernet/microsoft/mana/gdma.h
@@ -27,6 +27,10 @@  enum gdma_request_type {
 	GDMA_CREATE_DMA_REGION		= 25,
 	GDMA_DMA_REGION_ADD_PAGES	= 26,
 	GDMA_DESTROY_DMA_REGION		= 27,
+	GDMA_CREATE_PD			= 29,
+	GDMA_DESTROY_PD			= 30,
+	GDMA_CREATE_MR			= 31,
+	GDMA_DESTROY_MR			= 32,
 };
 
 #define GDMA_RESOURCE_DOORBELL_PAGE	27
@@ -59,6 +63,8 @@  enum {
 	GDMA_DEVICE_MANA	= 2,
 };
 
+typedef u64 gdma_obj_handle_t;
+
 struct gdma_resource {
 	/* Protect the bitmap */
 	spinlock_t lock;
@@ -192,7 +198,7 @@  struct gdma_mem_info {
 	u64 length;
 
 	/* Allocated by the PF driver */
-	u64 gdma_region;
+	gdma_obj_handle_t dma_region_handle;
 };
 
 #define REGISTER_ATB_MST_MKEY_LOWER_SIZE 8
@@ -599,7 +605,7 @@  struct gdma_create_queue_req {
 	u32 reserved1;
 	u32 pdid;
 	u32 doolbell_id;
-	u64 gdma_region;
+	gdma_obj_handle_t gdma_region;
 	u32 reserved2;
 	u32 queue_size;
 	u32 log2_throttle_limit;
@@ -626,6 +632,28 @@  struct gdma_disable_queue_req {
 	u32 alloc_res_id_on_creation;
 }; /* HW DATA */
 
+enum atb_page_size {
+	ATB_PAGE_SIZE_4K,
+	ATB_PAGE_SIZE_8K,
+	ATB_PAGE_SIZE_16K,
+	ATB_PAGE_SIZE_32K,
+	ATB_PAGE_SIZE_64K,
+	ATB_PAGE_SIZE_128K,
+	ATB_PAGE_SIZE_256K,
+	ATB_PAGE_SIZE_512K,
+	ATB_PAGE_SIZE_1M,
+	ATB_PAGE_SIZE_2M,
+	ATB_PAGE_SIZE_MAX,
+};
+
+enum gdma_mr_access_flags {
+	GDMA_ACCESS_FLAG_LOCAL_READ = (1 << 0),
+	GDMA_ACCESS_FLAG_LOCAL_WRITE = (1 << 1),
+	GDMA_ACCESS_FLAG_REMOTE_READ = (1 << 2),
+	GDMA_ACCESS_FLAG_REMOTE_WRITE = (1 << 3),
+	GDMA_ACCESS_FLAG_REMOTE_ATOMIC = (1 << 4),
+};
+
 /* GDMA_CREATE_DMA_REGION */
 struct gdma_create_dma_region_req {
 	struct gdma_req_hdr hdr;
@@ -652,14 +680,14 @@  struct gdma_create_dma_region_req {
 
 struct gdma_create_dma_region_resp {
 	struct gdma_resp_hdr hdr;
-	u64 gdma_region;
+	gdma_obj_handle_t dma_region_handle;
 }; /* HW DATA */
 
 /* GDMA_DMA_REGION_ADD_PAGES */
 struct gdma_dma_region_add_pages_req {
 	struct gdma_req_hdr hdr;
 
-	u64 gdma_region;
+	gdma_obj_handle_t dma_region_handle;
 
 	u32 page_addr_list_len;
 	u32 reserved3;
@@ -671,9 +699,114 @@  struct gdma_dma_region_add_pages_req {
 struct gdma_destroy_dma_region_req {
 	struct gdma_req_hdr hdr;
 
-	u64 gdma_region;
+	gdma_obj_handle_t dma_region_handle;
 }; /* HW DATA */
 
+enum gdma_pd_flags {
+	GDMA_PD_FLAG_ALLOW_GPA_MR = (1 << 0),
+	GDMA_PD_FLAG_ALLOW_FMR_MR = (1 << 1),
+};
+
+struct gdma_create_pd_req {
+	struct gdma_req_hdr hdr;
+	enum gdma_pd_flags flags;
+	u32 reserved;
+};
+
+struct gdma_create_pd_resp {
+	struct gdma_resp_hdr hdr;
+	gdma_obj_handle_t pd_handle;
+	u32 pd_id;
+	u32 reserved;
+};
+
+struct gdma_destroy_pd_req {
+	struct gdma_req_hdr hdr;
+	gdma_obj_handle_t pd_handle;
+};
+
+struct gdma_destory_pd_resp {
+	struct gdma_resp_hdr hdr;
+};
+
+enum gdma_mr_type {
+	/* Guest Physical Address - MRs of this type allow access
+	 * to any DMA-mapped memory using bus-logical address
+	 */
+	GDMA_MR_TYPE_GPA = 1,
+
+	/* Guest Virtual Address - MRs of this type allow access
+	 * to memory mapped by PTEs associated with this MR using a virtual
+	 * address that is set up in the MST
+	 */
+	GDMA_MR_TYPE_GVA,
+
+	/* Fast Memory Register - Like GVA but the MR is initially put in the
+	 * FREE state (as opposed to Valid), and the specified number of
+	 * PTEs are reserved for future fast memory reservations.
+	 */
+	GDMA_MR_TYPE_FMR,
+};
+
+struct gdma_create_mr_params {
+	gdma_obj_handle_t pd_handle;
+	enum gdma_mr_type mr_type;
+	union {
+		struct {
+			gdma_obj_handle_t dma_region_handle;
+			u64 virtual_address;
+			enum gdma_mr_access_flags access_flags;
+		} gva;
+		struct {
+			enum gdma_mr_access_flags access_flags;
+		} gpa;
+		struct {
+			enum atb_page_size page_size;
+			u32  reserved_pte_count;
+		} fmr;
+	};
+};
+
+struct gdma_create_mr_request {
+	struct gdma_req_hdr hdr;
+	gdma_obj_handle_t pd_handle;
+	enum gdma_mr_type mr_type;
+	u32 reserved;
+
+	union {
+		struct {
+			enum gdma_mr_access_flags access_flags;
+		} gpa;
+
+		struct {
+			gdma_obj_handle_t dma_region_handle;
+			u64 virtual_address;
+			enum gdma_mr_access_flags access_flags;
+		} gva;
+
+		struct {
+			enum atb_page_size page_size;
+			u32 reserved_pte_count;
+		} fmr;
+	};
+};
+
+struct gdma_create_mr_response {
+	struct gdma_resp_hdr hdr;
+	gdma_obj_handle_t mr_handle;
+	u32 lkey;
+	u32 rkey;
+};
+
+struct gdma_destroy_mr_request {
+	struct gdma_req_hdr hdr;
+	gdma_obj_handle_t mr_handle;
+};
+
+struct gdma_destroy_mr_response {
+	struct gdma_resp_hdr hdr;
+};
+
 int mana_gd_verify_vf_version(struct pci_dev *pdev);
 
 int mana_gd_register_device(struct gdma_dev *gd);
@@ -705,4 +838,7 @@  int mana_gd_allocate_doorbell_page(struct gdma_context *gc, int *doorbell_page);
 
 int mana_gd_destroy_doorbell_page(struct gdma_context *gc, int doorbell_page);
 
+int mana_gd_destroy_dma_region(struct gdma_context *gc,
+			       gdma_obj_handle_t dma_region_handle);
+
 #endif /* _GDMA_H */
diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c
index 0c38c9a539f9..60cc1270b7d5 100644
--- a/drivers/net/ethernet/microsoft/mana/gdma_main.c
+++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c
@@ -226,7 +226,7 @@  static int mana_gd_create_hw_eq(struct gdma_context *gc,
 	req.type = queue->type;
 	req.pdid = queue->gdma_dev->pdid;
 	req.doolbell_id = queue->gdma_dev->doorbell;
-	req.gdma_region = queue->mem_info.gdma_region;
+	req.gdma_region = queue->mem_info.dma_region_handle;
 	req.queue_size = queue->queue_size;
 	req.log2_throttle_limit = queue->eq.log2_throttle_limit;
 	req.eq_pci_msix_index = queue->eq.msix_index;
@@ -240,7 +240,7 @@  static int mana_gd_create_hw_eq(struct gdma_context *gc,
 
 	queue->id = resp.queue_index;
 	queue->eq.disable_needed = true;
-	queue->mem_info.gdma_region = GDMA_INVALID_DMA_REGION;
+	queue->mem_info.dma_region_handle = GDMA_INVALID_DMA_REGION;
 	return 0;
 }
 
@@ -694,24 +694,30 @@  int mana_gd_create_hwc_queue(struct gdma_dev *gd,
 	return err;
 }
 
-static void mana_gd_destroy_dma_region(struct gdma_context *gc, u64 gdma_region)
+int mana_gd_destroy_dma_region(struct gdma_context *gc,
+			       gdma_obj_handle_t dma_region_handle)
 {
 	struct gdma_destroy_dma_region_req req = {};
 	struct gdma_general_resp resp = {};
 	int err;
 
-	if (gdma_region == GDMA_INVALID_DMA_REGION)
-		return;
+	if (dma_region_handle == GDMA_INVALID_DMA_REGION)
+		return 0;
 
 	mana_gd_init_req_hdr(&req.hdr, GDMA_DESTROY_DMA_REGION, sizeof(req),
 			     sizeof(resp));
-	req.gdma_region = gdma_region;
+	req.dma_region_handle = dma_region_handle;
 
 	err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
-	if (err || resp.hdr.status)
+	if (err || resp.hdr.status) {
 		dev_err(gc->dev, "Failed to destroy DMA region: %d, 0x%x\n",
 			err, resp.hdr.status);
+		return -EPROTO;
+	}
+
+	return 0;
 }
+EXPORT_SYMBOL(mana_gd_destroy_dma_region);
 
 static int mana_gd_create_dma_region(struct gdma_dev *gd,
 				     struct gdma_mem_info *gmi)
@@ -756,14 +762,15 @@  static int mana_gd_create_dma_region(struct gdma_dev *gd,
 	if (err)
 		goto out;
 
-	if (resp.hdr.status || resp.gdma_region == GDMA_INVALID_DMA_REGION) {
+	if (resp.hdr.status ||
+	    resp.dma_region_handle == GDMA_INVALID_DMA_REGION) {
 		dev_err(gc->dev, "Failed to create DMA region: 0x%x\n",
 			resp.hdr.status);
 		err = -EPROTO;
 		goto out;
 	}
 
-	gmi->gdma_region = resp.gdma_region;
+	gmi->dma_region_handle = resp.dma_region_handle;
 out:
 	kfree(req);
 	return err;
@@ -886,7 +893,7 @@  void mana_gd_destroy_queue(struct gdma_context *gc, struct gdma_queue *queue)
 		return;
 	}
 
-	mana_gd_destroy_dma_region(gc, gmi->gdma_region);
+	mana_gd_destroy_dma_region(gc, gmi->dma_region_handle);
 	mana_gd_free_memory(gmi);
 	kfree(queue);
 }
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index 23e7e423a544..c18c358607a7 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -1382,10 +1382,10 @@  static int mana_create_txq(struct mana_port_context *apc,
 		memset(&wq_spec, 0, sizeof(wq_spec));
 		memset(&cq_spec, 0, sizeof(cq_spec));
 
-		wq_spec.gdma_region = txq->gdma_sq->mem_info.gdma_region;
+		wq_spec.gdma_region = txq->gdma_sq->mem_info.dma_region_handle;
 		wq_spec.queue_size = txq->gdma_sq->queue_size;
 
-		cq_spec.gdma_region = cq->gdma_cq->mem_info.gdma_region;
+		cq_spec.gdma_region = cq->gdma_cq->mem_info.dma_region_handle;
 		cq_spec.queue_size = cq->gdma_cq->queue_size;
 		cq_spec.modr_ctx_id = 0;
 		cq_spec.attached_eq = cq->gdma_cq->cq.parent->id;
@@ -1400,8 +1400,10 @@  static int mana_create_txq(struct mana_port_context *apc,
 		txq->gdma_sq->id = wq_spec.queue_index;
 		cq->gdma_cq->id = cq_spec.queue_index;
 
-		txq->gdma_sq->mem_info.gdma_region = GDMA_INVALID_DMA_REGION;
-		cq->gdma_cq->mem_info.gdma_region = GDMA_INVALID_DMA_REGION;
+		txq->gdma_sq->mem_info.dma_region_handle =
+			GDMA_INVALID_DMA_REGION;
+		cq->gdma_cq->mem_info.dma_region_handle =
+			GDMA_INVALID_DMA_REGION;
 
 		txq->gdma_txq_id = txq->gdma_sq->id;
 
@@ -1612,10 +1614,10 @@  static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc,
 
 	memset(&wq_spec, 0, sizeof(wq_spec));
 	memset(&cq_spec, 0, sizeof(cq_spec));
-	wq_spec.gdma_region = rxq->gdma_rq->mem_info.gdma_region;
+	wq_spec.gdma_region = rxq->gdma_rq->mem_info.dma_region_handle;
 	wq_spec.queue_size = rxq->gdma_rq->queue_size;
 
-	cq_spec.gdma_region = cq->gdma_cq->mem_info.gdma_region;
+	cq_spec.gdma_region = cq->gdma_cq->mem_info.dma_region_handle;
 	cq_spec.queue_size = cq->gdma_cq->queue_size;
 	cq_spec.modr_ctx_id = 0;
 	cq_spec.attached_eq = cq->gdma_cq->cq.parent->id;
@@ -1628,8 +1630,8 @@  static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc,
 	rxq->gdma_rq->id = wq_spec.queue_index;
 	cq->gdma_cq->id = cq_spec.queue_index;
 
-	rxq->gdma_rq->mem_info.gdma_region = GDMA_INVALID_DMA_REGION;
-	cq->gdma_cq->mem_info.gdma_region = GDMA_INVALID_DMA_REGION;
+	rxq->gdma_rq->mem_info.dma_region_handle = GDMA_INVALID_DMA_REGION;
+	cq->gdma_cq->mem_info.dma_region_handle = GDMA_INVALID_DMA_REGION;
 
 	rxq->gdma_id = rxq->gdma_rq->id;
 	cq->gdma_id = cq->gdma_cq->id;