diff mbox

[1/1] Check emulator_read_write_onepage will not access beyond one page size

Message ID 1372735891-25529-1-git-send-email-user@zzy-Lenovo (mailing list archive)
State New, archived
Headers show

Commit Message

Zhouyi Zhou July 2, 2013, 3:31 a.m. UTC
From: Zhouyi Zhou <yizhouzhou@ict.ac.cn>

Currently the callers of emulator_read_write_onepage do not check the value range
of the argument "bytes". If bytes is greater than the guest page size, many 
unexpected things will happen.

This patch performs a check on the access size.    

Tested on my x86_64 machine
Signed-off-by: Zhouyi Zhou <yizhouzhou@ict.ac.cn>
---
 arch/x86/include/asm/kvm_host.h |    6 ++++++
 arch/x86/kvm/paging_tmpl.h      |    4 ++++
 arch/x86/kvm/x86.c              |    8 ++++++++
 3 files changed, 18 insertions(+)

1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Gleb Natapov July 2, 2013, 8:30 a.m. UTC | #1
On Tue, Jul 02, 2013 at 11:31:31AM +0800, Zhouyi Zhou wrote:
> From: Zhouyi Zhou <yizhouzhou@ict.ac.cn>
> 
> Currently the callers of emulator_read_write_onepage do not check the value range
> of the argument "bytes". If bytes is greater than the guest page size, many 
> unexpected things will happen.
> 
The caller of emulator_read_write_onepage() explicitly checks that bytes
do not cross page boundaries. Since emulator_read_write() should not
be called to write more then 4k at a time this is enough. But even if
it is kvm_write_guest() makes sure that each write does not cross page
boundary too.

> This patch performs a check on the access size.    
> 
> Tested on my x86_64 machine
What is tested? The patch fixes nothing. Can you trigger this WARN_ON()?
How?

> Signed-off-by: Zhouyi Zhou <yizhouzhou@ict.ac.cn>
> ---
>  arch/x86/include/asm/kvm_host.h |    6 ++++++
>  arch/x86/kvm/paging_tmpl.h      |    4 ++++
>  arch/x86/kvm/x86.c              |    8 ++++++++
>  3 files changed, 18 insertions(+)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 3741c65..e28e6fe 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -239,6 +239,11 @@ struct kvm_pio_request {
>  	int size;
>  };
>  
> +enum guest_page_size {
> +	GUEST_PGSZ_4K = 0x0,
> +	GUEST_PGSZ_2M = 0x1,
> +	GUEST_PGSZ_4M = 0x2,
> +};
>  /*
>   * x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level
>   * 32-bit).  The kvm_mmu structure abstracts the details of the current mmu
> @@ -289,6 +294,7 @@ struct kvm_mmu {
>  	bool nx;
>  
>  	u64 pdptrs[4]; /* pae */
> +	enum guest_page_size last_guest_page_size; /* last guest page size in walk*/
>  };
>  
>  enum pmc_type {
> diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
> index da20860..6eb0471 100644
> --- a/arch/x86/kvm/paging_tmpl.h
> +++ b/arch/x86/kvm/paging_tmpl.h
> @@ -244,6 +244,10 @@ retry_walk:
>  	real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn), access);
>  	if (real_gpa == UNMAPPED_GVA)
>  		return 0;
> +	
> +	vcpu->arch.walk_mmu->last_guest_page_size
> +	  = !is_large_pte(pte) ? GUEST_PGSZ_4K :
> +	  ((!is_long_mode(vcpu) && !is_pae(vcpu)) ? GUEST_PGSZ_4M : GUEST_PGSZ_2M);
>  
>  	walker->gfn = real_gpa >> PAGE_SHIFT;
>  
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index e8ba99c..de196f7 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -4130,6 +4130,8 @@ static int emulator_read_write_onepage(unsigned long addr, void *val,
>  	bool write = ops->write;
>  	struct kvm_mmio_fragment *frag;
>  
> +	vcpu->arch.walk_mmu->last_guest_page_size = GUEST_PGSZ_4K;
> +
>  	ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
>  
>  	if (ret < 0)
> @@ -4139,7 +4141,12 @@ static int emulator_read_write_onepage(unsigned long addr, void *val,
>  	if (ret)
>  		goto mmio;
>  
> +	WARN_ON((vcpu->arch.walk_mmu->last_guest_page_size == GUEST_PGSZ_4K) ?
> +		(((gpa&(PAGE_SIZE-1)) + bytes) > PAGE_SIZE) :
> +		(vcpu->arch.walk_mmu->last_guest_page_size == GUEST_PGSZ_2M) ?
> +		(((gpa&(PAGE_SIZE-1)) + bytes) > HPAGE_SIZE) :
> +		(((gpa&(PAGE_SIZE-1)) + bytes) > HPAGE_SIZE*2));
> +
>  	if (ops->read_write_emulate(vcpu, gpa, val, bytes))
>  		return X86EMUL_CONTINUE;
>  
> -- 
> 1.7.10.4

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Zhouyi Zhou July 2, 2013, 9:31 a.m. UTC | #2
Thanks for reviewing

On Tue, Jul 2, 2013 at 4:30 PM, Gleb Natapov <gleb@redhat.com> wrote:
>
> On Tue, Jul 02, 2013 at 11:31:31AM +0800, Zhouyi Zhou wrote:
> > From: Zhouyi Zhou <yizhouzhou@ict.ac.cn>
> >
> > Currently the callers of emulator_read_write_onepage do not check the value range
> > of the argument "bytes". If bytes is greater than the guest page size, many
> > unexpected things will happen.
> >
> The caller of emulator_read_write_onepage() explicitly checks that bytes
> do not cross page boundaries. Since emulator_read_write() should not
> be called to write more then 4k at a time this is enough. But even if
Yes, current Linux kernel will not trigger this marginal condition, so my patch
is just a suggestion :-)
> it is kvm_write_guest() makes sure that each write does not cross page
> boundary too.
kvm_write_guest increase the guest physical page number but not guest virtual
page, gfn++ will point to else where in guest.

>
> > This patch performs a check on the access size.
> >
> > Tested on my x86_64 machine
> What is tested? The patch fixes nothing. Can you trigger this WARN_ON()?
> How?
My research kernel originally use emulator_read_write to read write
lot of bytes,(now
I replace emulator_read_write with kvm_read_guest_virt)
official linux kernel has nowhere to trigger this WARN_ON. Can this patch
serves as a future safeguard?
>
> > Signed-off-by: Zhouyi Zhou <yizhouzhou@ict.ac.cn>
> > ---
> >  arch/x86/include/asm/kvm_host.h |    6 ++++++
> >  arch/x86/kvm/paging_tmpl.h      |    4 ++++
> >  arch/x86/kvm/x86.c              |    8 ++++++++
> >  3 files changed, 18 insertions(+)
> >
> > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > index 3741c65..e28e6fe 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -239,6 +239,11 @@ struct kvm_pio_request {
> >       int size;
> >  };
> >
> > +enum guest_page_size {
> > +     GUEST_PGSZ_4K = 0x0,
> > +     GUEST_PGSZ_2M = 0x1,
> > +     GUEST_PGSZ_4M = 0x2,
> > +};
> >  /*
> >   * x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level
> >   * 32-bit).  The kvm_mmu structure abstracts the details of the current mmu
> > @@ -289,6 +294,7 @@ struct kvm_mmu {
> >       bool nx;
> >
> >       u64 pdptrs[4]; /* pae */
> > +     enum guest_page_size last_guest_page_size; /* last guest page size in walk*/
> >  };
> >
> >  enum pmc_type {
> > diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
> > index da20860..6eb0471 100644
> > --- a/arch/x86/kvm/paging_tmpl.h
> > +++ b/arch/x86/kvm/paging_tmpl.h
> > @@ -244,6 +244,10 @@ retry_walk:
> >       real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn), access);
> >       if (real_gpa == UNMAPPED_GVA)
> >               return 0;
> > +
> > +     vcpu->arch.walk_mmu->last_guest_page_size
> > +       = !is_large_pte(pte) ? GUEST_PGSZ_4K :
> > +       ((!is_long_mode(vcpu) && !is_pae(vcpu)) ? GUEST_PGSZ_4M : GUEST_PGSZ_2M);
> >
> >       walker->gfn = real_gpa >> PAGE_SHIFT;
> >
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index e8ba99c..de196f7 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -4130,6 +4130,8 @@ static int emulator_read_write_onepage(unsigned long addr, void *val,
> >       bool write = ops->write;
> >       struct kvm_mmio_fragment *frag;
> >
> > +     vcpu->arch.walk_mmu->last_guest_page_size = GUEST_PGSZ_4K;
> > +
> >       ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
> >
> >       if (ret < 0)
> > @@ -4139,7 +4141,12 @@ static int emulator_read_write_onepage(unsigned long addr, void *val,
> >       if (ret)
> >               goto mmio;
> >
> > +     WARN_ON((vcpu->arch.walk_mmu->last_guest_page_size == GUEST_PGSZ_4K) ?
> > +             (((gpa&(PAGE_SIZE-1)) + bytes) > PAGE_SIZE) :
> > +             (vcpu->arch.walk_mmu->last_guest_page_size == GUEST_PGSZ_2M) ?
> > +             (((gpa&(PAGE_SIZE-1)) + bytes) > HPAGE_SIZE) :
> > +             (((gpa&(PAGE_SIZE-1)) + bytes) > HPAGE_SIZE*2));
> > +
> >       if (ops->read_write_emulate(vcpu, gpa, val, bytes))
> >               return X86EMUL_CONTINUE;
> >
> > --
> > 1.7.10.4
>
> --
>                         Gleb.
Thanks again
Zhouyi
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov July 2, 2013, 10:11 a.m. UTC | #3
On Tue, Jul 02, 2013 at 05:31:09PM +0800, Zhouyi Zhou wrote:
> Thanks for reviewing
> 
> On Tue, Jul 2, 2013 at 4:30 PM, Gleb Natapov <gleb@redhat.com> wrote:
> >
> > On Tue, Jul 02, 2013 at 11:31:31AM +0800, Zhouyi Zhou wrote:
> > > From: Zhouyi Zhou <yizhouzhou@ict.ac.cn>
> > >
> > > Currently the callers of emulator_read_write_onepage do not check the value range
> > > of the argument "bytes". If bytes is greater than the guest page size, many
> > > unexpected things will happen.
> > >
> > The caller of emulator_read_write_onepage() explicitly checks that bytes
> > do not cross page boundaries. Since emulator_read_write() should not
> > be called to write more then 4k at a time this is enough. But even if
> Yes, current Linux kernel will not trigger this marginal condition, so my patch
> is just a suggestion :-)
It is not just by chance it does not trigger it, this is how it suppose
to work, even if we want to protect API from been abused your patch is
incorrect. Just put BUG_ON(bytes > PAGE_SIZE) in emulator_read_write(),
as simple as that.
 
> > it is kvm_write_guest() makes sure that each write does not cross page
> > boundary too.
> kvm_write_guest increase the guest physical page number but not guest virtual
> page, gfn++ will point to else where in guest.
> 
The important point is that guest will not be able to overwrite random
host memory.

> >
> > > This patch performs a check on the access size.
> > >
> > > Tested on my x86_64 machine
> > What is tested? The patch fixes nothing. Can you trigger this WARN_ON()?
> > How?
> My research kernel originally use emulator_read_write to read write
> lot of bytes,(now
> I replace emulator_read_write with kvm_read_guest_virt)
> official linux kernel has nowhere to trigger this WARN_ON. Can this patch
> serves as a future safeguard?
emulator_read_write() should be used by emulator only. Even then if you
wanted it to handle bigger writes you should have fixed
emulator_read_write() to call emulator_read_write_onepage() in a loop.

> >
> > > Signed-off-by: Zhouyi Zhou <yizhouzhou@ict.ac.cn>
> > > ---
> > >  arch/x86/include/asm/kvm_host.h |    6 ++++++
> > >  arch/x86/kvm/paging_tmpl.h      |    4 ++++
> > >  arch/x86/kvm/x86.c              |    8 ++++++++
> > >  3 files changed, 18 insertions(+)
> > >
> > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > > index 3741c65..e28e6fe 100644
> > > --- a/arch/x86/include/asm/kvm_host.h
> > > +++ b/arch/x86/include/asm/kvm_host.h
> > > @@ -239,6 +239,11 @@ struct kvm_pio_request {
> > >       int size;
> > >  };
> > >
> > > +enum guest_page_size {
> > > +     GUEST_PGSZ_4K = 0x0,
> > > +     GUEST_PGSZ_2M = 0x1,
> > > +     GUEST_PGSZ_4M = 0x2,
> > > +};
> > >  /*
> > >   * x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level
> > >   * 32-bit).  The kvm_mmu structure abstracts the details of the current mmu
> > > @@ -289,6 +294,7 @@ struct kvm_mmu {
> > >       bool nx;
> > >
> > >       u64 pdptrs[4]; /* pae */
> > > +     enum guest_page_size last_guest_page_size; /* last guest page size in walk*/
> > >  };
> > >
> > >  enum pmc_type {
> > > diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
> > > index da20860..6eb0471 100644
> > > --- a/arch/x86/kvm/paging_tmpl.h
> > > +++ b/arch/x86/kvm/paging_tmpl.h
> > > @@ -244,6 +244,10 @@ retry_walk:
> > >       real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn), access);
> > >       if (real_gpa == UNMAPPED_GVA)
> > >               return 0;
> > > +
> > > +     vcpu->arch.walk_mmu->last_guest_page_size
> > > +       = !is_large_pte(pte) ? GUEST_PGSZ_4K :
> > > +       ((!is_long_mode(vcpu) && !is_pae(vcpu)) ? GUEST_PGSZ_4M : GUEST_PGSZ_2M);
> > >
> > >       walker->gfn = real_gpa >> PAGE_SHIFT;
> > >
> > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > > index e8ba99c..de196f7 100644
> > > --- a/arch/x86/kvm/x86.c
> > > +++ b/arch/x86/kvm/x86.c
> > > @@ -4130,6 +4130,8 @@ static int emulator_read_write_onepage(unsigned long addr, void *val,
> > >       bool write = ops->write;
> > >       struct kvm_mmio_fragment *frag;
> > >
> > > +     vcpu->arch.walk_mmu->last_guest_page_size = GUEST_PGSZ_4K;
> > > +
> > >       ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
> > >
> > >       if (ret < 0)
> > > @@ -4139,7 +4141,12 @@ static int emulator_read_write_onepage(unsigned long addr, void *val,
> > >       if (ret)
> > >               goto mmio;
> > >
> > > +     WARN_ON((vcpu->arch.walk_mmu->last_guest_page_size == GUEST_PGSZ_4K) ?
> > > +             (((gpa&(PAGE_SIZE-1)) + bytes) > PAGE_SIZE) :
> > > +             (vcpu->arch.walk_mmu->last_guest_page_size == GUEST_PGSZ_2M) ?
> > > +             (((gpa&(PAGE_SIZE-1)) + bytes) > HPAGE_SIZE) :
> > > +             (((gpa&(PAGE_SIZE-1)) + bytes) > HPAGE_SIZE*2));
> > > +
> > >       if (ops->read_write_emulate(vcpu, gpa, val, bytes))
> > >               return X86EMUL_CONTINUE;
> > >
> > > --
> > > 1.7.10.4
> >
> > --
> >                         Gleb.
> Thanks again
> Zhouyi

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 3741c65..e28e6fe 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -239,6 +239,11 @@  struct kvm_pio_request {
 	int size;
 };
 
+enum guest_page_size {
+	GUEST_PGSZ_4K = 0x0,
+	GUEST_PGSZ_2M = 0x1,
+	GUEST_PGSZ_4M = 0x2,
+};
 /*
  * x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level
  * 32-bit).  The kvm_mmu structure abstracts the details of the current mmu
@@ -289,6 +294,7 @@  struct kvm_mmu {
 	bool nx;
 
 	u64 pdptrs[4]; /* pae */
+	enum guest_page_size last_guest_page_size; /* last guest page size in walk*/
 };
 
 enum pmc_type {
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index da20860..6eb0471 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -244,6 +244,10 @@  retry_walk:
 	real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn), access);
 	if (real_gpa == UNMAPPED_GVA)
 		return 0;
+	
+	vcpu->arch.walk_mmu->last_guest_page_size
+	  = !is_large_pte(pte) ? GUEST_PGSZ_4K :
+	  ((!is_long_mode(vcpu) && !is_pae(vcpu)) ? GUEST_PGSZ_4M : GUEST_PGSZ_2M);
 
 	walker->gfn = real_gpa >> PAGE_SHIFT;
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e8ba99c..de196f7 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4130,6 +4130,8 @@  static int emulator_read_write_onepage(unsigned long addr, void *val,
 	bool write = ops->write;
 	struct kvm_mmio_fragment *frag;
 
+	vcpu->arch.walk_mmu->last_guest_page_size = GUEST_PGSZ_4K;
+
 	ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
 
 	if (ret < 0)
@@ -4139,7 +4141,12 @@  static int emulator_read_write_onepage(unsigned long addr, void *val,
 	if (ret)
 		goto mmio;
 
+	WARN_ON((vcpu->arch.walk_mmu->last_guest_page_size == GUEST_PGSZ_4K) ?
+		(((gpa&(PAGE_SIZE-1)) + bytes) > PAGE_SIZE) :
+		(vcpu->arch.walk_mmu->last_guest_page_size == GUEST_PGSZ_2M) ?
+		(((gpa&(PAGE_SIZE-1)) + bytes) > HPAGE_SIZE) :
+		(((gpa&(PAGE_SIZE-1)) + bytes) > HPAGE_SIZE*2));
+
 	if (ops->read_write_emulate(vcpu, gpa, val, bytes))
 		return X86EMUL_CONTINUE;
 
--