diff mbox series

[V4,08/13] hyperv/vmbus: Initialize VMbus ring buffer for Isolation VM

Message ID 20210827172114.414281-9-ltykernel@gmail.com (mailing list archive)
State Superseded
Headers show
Series x86/Hyper-V: Add Hyper-V Isolation VM support | expand

Commit Message

Tianyu Lan Aug. 27, 2021, 5:21 p.m. UTC
From: Tianyu Lan <Tianyu.Lan@microsoft.com>

VMbus ring buffer are shared with host and it's need to
be accessed via extra address space of Isolation VM with
AMD SNP support. This patch is to map the ring buffer
address in extra address space via vmap_pfn(). Hyperv set
memory host visibility hvcall smears data in the ring buffer
and so reset the ring buffer memory to zero after mapping.

Signed-off-by: Tianyu Lan <Tianyu.Lan@microsoft.com>
---
Change since v3:
	* Remove hv_ringbuffer_post_init(), merge map
	operation for Isolation VM into hv_ringbuffer_init()
	* Call hv_ringbuffer_init() after __vmbus_establish_gpadl().
---
 drivers/hv/Kconfig       |  1 +
 drivers/hv/channel.c     | 19 +++++++-------
 drivers/hv/ring_buffer.c | 56 ++++++++++++++++++++++++++++++----------
 3 files changed, 54 insertions(+), 22 deletions(-)

Comments

Michael Kelley (LINUX) Sept. 2, 2021, 12:23 a.m. UTC | #1
From: Tianyu Lan <ltykernel@gmail.com> Sent: Friday, August 27, 2021 10:21 AM
> 

Subject tag should be "Drivers: hv: vmbus: "

> VMbus ring buffer are shared with host and it's need to
> be accessed via extra address space of Isolation VM with
> AMD SNP support. This patch is to map the ring buffer
> address in extra address space via vmap_pfn(). Hyperv set
> memory host visibility hvcall smears data in the ring buffer
> and so reset the ring buffer memory to zero after mapping.
> 
> Signed-off-by: Tianyu Lan <Tianyu.Lan@microsoft.com>
> ---
> Change since v3:
> 	* Remove hv_ringbuffer_post_init(), merge map
> 	operation for Isolation VM into hv_ringbuffer_init()
> 	* Call hv_ringbuffer_init() after __vmbus_establish_gpadl().
> ---
>  drivers/hv/Kconfig       |  1 +
>  drivers/hv/channel.c     | 19 +++++++-------
>  drivers/hv/ring_buffer.c | 56 ++++++++++++++++++++++++++++++----------
>  3 files changed, 54 insertions(+), 22 deletions(-)
> 
> diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig
> index d1123ceb38f3..dd12af20e467 100644
> --- a/drivers/hv/Kconfig
> +++ b/drivers/hv/Kconfig
> @@ -8,6 +8,7 @@ config HYPERV
>  		|| (ARM64 && !CPU_BIG_ENDIAN))
>  	select PARAVIRT
>  	select X86_HV_CALLBACK_VECTOR if X86
> +	select VMAP_PFN
>  	help
>  	  Select this option to run Linux as a Hyper-V client operating
>  	  system.
> diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
> index 82650beb3af0..81f8629e4491 100644
> --- a/drivers/hv/channel.c
> +++ b/drivers/hv/channel.c
> @@ -679,15 +679,6 @@ static int __vmbus_open(struct vmbus_channel *newchannel,
>  	if (!newchannel->max_pkt_size)
>  		newchannel->max_pkt_size = VMBUS_DEFAULT_MAX_PKT_SIZE;
> 
> -	err = hv_ringbuffer_init(&newchannel->outbound, page, send_pages, 0);
> -	if (err)
> -		goto error_clean_ring;
> -
> -	err = hv_ringbuffer_init(&newchannel->inbound, &page[send_pages],
> -				 recv_pages, newchannel->max_pkt_size);
> -	if (err)
> -		goto error_clean_ring;
> -
>  	/* Establish the gpadl for the ring buffer */
>  	newchannel->ringbuffer_gpadlhandle = 0;
> 
> @@ -699,6 +690,16 @@ static int __vmbus_open(struct vmbus_channel *newchannel,
>  	if (err)
>  		goto error_clean_ring;
> 
> +	err = hv_ringbuffer_init(&newchannel->outbound,
> +				 page, send_pages, 0);
> +	if (err)
> +		goto error_free_gpadl;
> +
> +	err = hv_ringbuffer_init(&newchannel->inbound, &page[send_pages],
> +				 recv_pages, newchannel->max_pkt_size);
> +	if (err)
> +		goto error_free_gpadl;
> +
>  	/* Create and init the channel open message */
>  	open_info = kzalloc(sizeof(*open_info) +
>  			   sizeof(struct vmbus_channel_open_channel),
> diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c
> index 2aee356840a2..24d64d18eb65 100644
> --- a/drivers/hv/ring_buffer.c
> +++ b/drivers/hv/ring_buffer.c
> @@ -17,6 +17,8 @@
>  #include <linux/vmalloc.h>
>  #include <linux/slab.h>
>  #include <linux/prefetch.h>
> +#include <linux/io.h>
> +#include <asm/mshyperv.h>
> 
>  #include "hyperv_vmbus.h"
> 
> @@ -183,8 +185,10 @@ void hv_ringbuffer_pre_init(struct vmbus_channel *channel)
>  int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info,
>  		       struct page *pages, u32 page_cnt, u32 max_pkt_size)
>  {
> -	int i;
>  	struct page **pages_wraparound;
> +	unsigned long *pfns_wraparound;
> +	u64 pfn;
> +	int i;
> 
>  	BUILD_BUG_ON((sizeof(struct hv_ring_buffer) != PAGE_SIZE));
> 
> @@ -192,23 +196,49 @@ int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info,
>  	 * First page holds struct hv_ring_buffer, do wraparound mapping for
>  	 * the rest.
>  	 */
> -	pages_wraparound = kcalloc(page_cnt * 2 - 1, sizeof(struct page *),
> -				   GFP_KERNEL);
> -	if (!pages_wraparound)
> -		return -ENOMEM;
> +	if (hv_isolation_type_snp()) {
> +		pfn = page_to_pfn(pages) +
> +			HVPFN_DOWN(ms_hyperv.shared_gpa_boundary);

Use PFN_DOWN, not HVPFN_DOWN.  This is all done in units of guest page
size, not Hyper-V page size.

> 
> -	pages_wraparound[0] = pages;
> -	for (i = 0; i < 2 * (page_cnt - 1); i++)
> -		pages_wraparound[i + 1] = &pages[i % (page_cnt - 1) + 1];
> +		pfns_wraparound = kcalloc(page_cnt * 2 - 1,
> +			sizeof(unsigned long), GFP_KERNEL);
> +		if (!pfns_wraparound)
> +			return -ENOMEM;
> 
> -	ring_info->ring_buffer = (struct hv_ring_buffer *)
> -		vmap(pages_wraparound, page_cnt * 2 - 1, VM_MAP, PAGE_KERNEL);
> +		pfns_wraparound[0] = pfn;
> +		for (i = 0; i < 2 * (page_cnt - 1); i++)
> +			pfns_wraparound[i + 1] = pfn + i % (page_cnt - 1) + 1;
> 
> -	kfree(pages_wraparound);
> +		ring_info->ring_buffer = (struct hv_ring_buffer *)
> +			vmap_pfn(pfns_wraparound, page_cnt * 2 - 1,
> +				 PAGE_KERNEL);
> +		kfree(pfns_wraparound);
> 
> +		if (!ring_info->ring_buffer)
> +			return -ENOMEM;
> +
> +		/* Zero ring buffer after setting memory host visibility. */
> +		memset(ring_info->ring_buffer, 0x00,
> +			HV_HYP_PAGE_SIZE * page_cnt);

The page_cnt parameter is in units of the guest page size.  So this
should use PAGE_SIZE, not HV_HYP_PAGE_SIZE.

> +	} else {
> +		pages_wraparound = kcalloc(page_cnt * 2 - 1,
> +					   sizeof(struct page *),
> +					   GFP_KERNEL);
> +
> +		pages_wraparound[0] = pages;
> +		for (i = 0; i < 2 * (page_cnt - 1); i++)
> +			pages_wraparound[i + 1] =
> +				&pages[i % (page_cnt - 1) + 1];
> +
> +		ring_info->ring_buffer = (struct hv_ring_buffer *)
> +			vmap(pages_wraparound, page_cnt * 2 - 1, VM_MAP,
> +				PAGE_KERNEL);
> +
> +		kfree(pages_wraparound);
> +		if (!ring_info->ring_buffer)
> +			return -ENOMEM;
> +	}

With this patch, the code is a big "if" statement with two halves -- one
when SNP isolation is in effect, and the other when not.  The SNP isolation
case does the work using PFNs with the shared_gpa_boundary added,
while the other case does the same work but using struct page.  Perhaps
I'm missing something, but can both halves be combined and always
do the work using PFNs?  The only difference is whether to add the
shared_gpa_boundary, and whether to zero the memory when done.
So get the starting PFN, then have an "if" statement for whether to
add the shared_gpa_boundary.  Then everything else is the same.
At the end, use an "if" statement to decide whether to zero the
memory.  It would really be better to have the logic in this algorithm
coded only once.

> 
> -	if (!ring_info->ring_buffer)
> -		return -ENOMEM;
> 
>  	ring_info->ring_buffer->read_index =
>  		ring_info->ring_buffer->write_index = 0;
> --
> 2.25.1
Tianyu Lan Sept. 2, 2021, 1:35 p.m. UTC | #2
On 9/2/2021 8:23 AM, Michael Kelley wrote:
>> +	} else {
>> +		pages_wraparound = kcalloc(page_cnt * 2 - 1,
>> +					   sizeof(struct page *),
>> +					   GFP_KERNEL);
>> +
>> +		pages_wraparound[0] = pages;
>> +		for (i = 0; i < 2 * (page_cnt - 1); i++)
>> +			pages_wraparound[i + 1] =
>> +				&pages[i % (page_cnt - 1) + 1];
>> +
>> +		ring_info->ring_buffer = (struct hv_ring_buffer *)
>> +			vmap(pages_wraparound, page_cnt * 2 - 1, VM_MAP,
>> +				PAGE_KERNEL);
>> +
>> +		kfree(pages_wraparound);
>> +		if (!ring_info->ring_buffer)
>> +			return -ENOMEM;
>> +	}
> With this patch, the code is a big "if" statement with two halves -- one
> when SNP isolation is in effect, and the other when not.  The SNP isolation
> case does the work using PFNs with the shared_gpa_boundary added,
> while the other case does the same work but using struct page.  Perhaps
> I'm missing something, but can both halves be combined and always
> do the work using PFNs?  The only difference is whether to add the
> shared_gpa_boundary, and whether to zero the memory when done.
> So get the starting PFN, then have an "if" statement for whether to
> add the shared_gpa_boundary.  Then everything else is the same.
> At the end, use an "if" statement to decide whether to zero the
> memory.  It would really be better to have the logic in this algorithm
> coded only once.
> 

Hi Michael:
	I have tried this before. But vmap_pfn() only works for those pfns out 
of normal memory. Please see vmap_pfn_apply() for detail and
return error when the PFN is valid.
Michael Kelley (LINUX) Sept. 2, 2021, 4:14 p.m. UTC | #3
From: Tianyu Lan <ltykernel@gmail.com> Sent: Thursday, September 2, 2021 6:36 AM
> 
> On 9/2/2021 8:23 AM, Michael Kelley wrote:
> >> +	} else {
> >> +		pages_wraparound = kcalloc(page_cnt * 2 - 1,
> >> +					   sizeof(struct page *),
> >> +					   GFP_KERNEL);
> >> +
> >> +		pages_wraparound[0] = pages;
> >> +		for (i = 0; i < 2 * (page_cnt - 1); i++)
> >> +			pages_wraparound[i + 1] =
> >> +				&pages[i % (page_cnt - 1) + 1];
> >> +
> >> +		ring_info->ring_buffer = (struct hv_ring_buffer *)
> >> +			vmap(pages_wraparound, page_cnt * 2 - 1, VM_MAP,
> >> +				PAGE_KERNEL);
> >> +
> >> +		kfree(pages_wraparound);
> >> +		if (!ring_info->ring_buffer)
> >> +			return -ENOMEM;
> >> +	}
> > With this patch, the code is a big "if" statement with two halves -- one
> > when SNP isolation is in effect, and the other when not.  The SNP isolation
> > case does the work using PFNs with the shared_gpa_boundary added,
> > while the other case does the same work but using struct page.  Perhaps
> > I'm missing something, but can both halves be combined and always
> > do the work using PFNs?  The only difference is whether to add the
> > shared_gpa_boundary, and whether to zero the memory when done.
> > So get the starting PFN, then have an "if" statement for whether to
> > add the shared_gpa_boundary.  Then everything else is the same.
> > At the end, use an "if" statement to decide whether to zero the
> > memory.  It would really be better to have the logic in this algorithm
> > coded only once.
> >
> 
> Hi Michael:
> 	I have tried this before. But vmap_pfn() only works for those pfns out
> of normal memory. Please see vmap_pfn_apply() for detail and
> return error when the PFN is valid.
> 

Indeed.  This ties into the discussion with Christoph about coming up
with generalized helper functions to assist in handling the
shared_gpa_boundary.   Having a single implementation here in
hv_ringbuffer_init() would be a good goal as well.

Michael
diff mbox series

Patch

diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig
index d1123ceb38f3..dd12af20e467 100644
--- a/drivers/hv/Kconfig
+++ b/drivers/hv/Kconfig
@@ -8,6 +8,7 @@  config HYPERV
 		|| (ARM64 && !CPU_BIG_ENDIAN))
 	select PARAVIRT
 	select X86_HV_CALLBACK_VECTOR if X86
+	select VMAP_PFN
 	help
 	  Select this option to run Linux as a Hyper-V client operating
 	  system.
diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 82650beb3af0..81f8629e4491 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -679,15 +679,6 @@  static int __vmbus_open(struct vmbus_channel *newchannel,
 	if (!newchannel->max_pkt_size)
 		newchannel->max_pkt_size = VMBUS_DEFAULT_MAX_PKT_SIZE;
 
-	err = hv_ringbuffer_init(&newchannel->outbound, page, send_pages, 0);
-	if (err)
-		goto error_clean_ring;
-
-	err = hv_ringbuffer_init(&newchannel->inbound, &page[send_pages],
-				 recv_pages, newchannel->max_pkt_size);
-	if (err)
-		goto error_clean_ring;
-
 	/* Establish the gpadl for the ring buffer */
 	newchannel->ringbuffer_gpadlhandle = 0;
 
@@ -699,6 +690,16 @@  static int __vmbus_open(struct vmbus_channel *newchannel,
 	if (err)
 		goto error_clean_ring;
 
+	err = hv_ringbuffer_init(&newchannel->outbound,
+				 page, send_pages, 0);
+	if (err)
+		goto error_free_gpadl;
+
+	err = hv_ringbuffer_init(&newchannel->inbound, &page[send_pages],
+				 recv_pages, newchannel->max_pkt_size);
+	if (err)
+		goto error_free_gpadl;
+
 	/* Create and init the channel open message */
 	open_info = kzalloc(sizeof(*open_info) +
 			   sizeof(struct vmbus_channel_open_channel),
diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c
index 2aee356840a2..24d64d18eb65 100644
--- a/drivers/hv/ring_buffer.c
+++ b/drivers/hv/ring_buffer.c
@@ -17,6 +17,8 @@ 
 #include <linux/vmalloc.h>
 #include <linux/slab.h>
 #include <linux/prefetch.h>
+#include <linux/io.h>
+#include <asm/mshyperv.h>
 
 #include "hyperv_vmbus.h"
 
@@ -183,8 +185,10 @@  void hv_ringbuffer_pre_init(struct vmbus_channel *channel)
 int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info,
 		       struct page *pages, u32 page_cnt, u32 max_pkt_size)
 {
-	int i;
 	struct page **pages_wraparound;
+	unsigned long *pfns_wraparound;
+	u64 pfn;
+	int i;
 
 	BUILD_BUG_ON((sizeof(struct hv_ring_buffer) != PAGE_SIZE));
 
@@ -192,23 +196,49 @@  int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info,
 	 * First page holds struct hv_ring_buffer, do wraparound mapping for
 	 * the rest.
 	 */
-	pages_wraparound = kcalloc(page_cnt * 2 - 1, sizeof(struct page *),
-				   GFP_KERNEL);
-	if (!pages_wraparound)
-		return -ENOMEM;
+	if (hv_isolation_type_snp()) {
+		pfn = page_to_pfn(pages) +
+			HVPFN_DOWN(ms_hyperv.shared_gpa_boundary);
 
-	pages_wraparound[0] = pages;
-	for (i = 0; i < 2 * (page_cnt - 1); i++)
-		pages_wraparound[i + 1] = &pages[i % (page_cnt - 1) + 1];
+		pfns_wraparound = kcalloc(page_cnt * 2 - 1,
+			sizeof(unsigned long), GFP_KERNEL);
+		if (!pfns_wraparound)
+			return -ENOMEM;
 
-	ring_info->ring_buffer = (struct hv_ring_buffer *)
-		vmap(pages_wraparound, page_cnt * 2 - 1, VM_MAP, PAGE_KERNEL);
+		pfns_wraparound[0] = pfn;
+		for (i = 0; i < 2 * (page_cnt - 1); i++)
+			pfns_wraparound[i + 1] = pfn + i % (page_cnt - 1) + 1;
 
-	kfree(pages_wraparound);
+		ring_info->ring_buffer = (struct hv_ring_buffer *)
+			vmap_pfn(pfns_wraparound, page_cnt * 2 - 1,
+				 PAGE_KERNEL);
+		kfree(pfns_wraparound);
 
+		if (!ring_info->ring_buffer)
+			return -ENOMEM;
+
+		/* Zero ring buffer after setting memory host visibility. */
+		memset(ring_info->ring_buffer, 0x00,
+			HV_HYP_PAGE_SIZE * page_cnt);
+	} else {
+		pages_wraparound = kcalloc(page_cnt * 2 - 1,
+					   sizeof(struct page *),
+					   GFP_KERNEL);
+
+		pages_wraparound[0] = pages;
+		for (i = 0; i < 2 * (page_cnt - 1); i++)
+			pages_wraparound[i + 1] =
+				&pages[i % (page_cnt - 1) + 1];
+
+		ring_info->ring_buffer = (struct hv_ring_buffer *)
+			vmap(pages_wraparound, page_cnt * 2 - 1, VM_MAP,
+				PAGE_KERNEL);
+
+		kfree(pages_wraparound);
+		if (!ring_info->ring_buffer)
+			return -ENOMEM;
+	}
 
-	if (!ring_info->ring_buffer)
-		return -ENOMEM;
 
 	ring_info->ring_buffer->read_index =
 		ring_info->ring_buffer->write_index = 0;