Message ID | 20180901124811.591511876@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | introduce /proc/PID/idle_bitmap | expand |
On 1 Sep 2018, at 13:28, Fengguang Wu <fengguang.wu@intel.com> wrote: > +static ssize_t ept_idle_read(struct file *file, char *buf, > + size_t count, loff_t *ppos) > +{ > + struct task_struct *task = file->private_data; > + struct ept_idle_ctrl *eic; > + unsigned long hva_start = *ppos << BITMAP_BYTE2PVA_SHIFT; > + unsigned long hva_end = hva_start + (count << BITMAP_BYTE2PVA_SHIFT); > + int ret; > + > + if (*ppos % IDLE_BITMAP_CHUNK_SIZE || > + count % IDLE_BITMAP_CHUNK_SIZE) > + return -EINVAL; > + > + eic = kzalloc(sizeof(*eic), GFP_KERNEL); > + if (!eic) > + return -EBUSY; > + > + eic->buf = buf; > + eic->buf_size = count; > + eic->kvm = task_kvm(task); > + if (!eic->kvm) { > + ret = -EINVAL; > + goto out_free; > + } I think you need to increment the refcount while using kvm, otherwise kvm can be destroyed from another thread while you're walking it. -Nikita > + > + ret = ept_idle_walk_hva_range(eic, hva_start, hva_end); > + if (ret) > + goto out_free; > + > + ret = eic->bytes_copied; > + *ppos += ret; > +out_free: > + kfree(eic); > + > + return ret; > +}
kvm_get_kvm() kvm_put_kvm() -----Original Message----- From: Nikita Leshenko [mailto:nikita.leshchenko@oracle.com] Sent: Tuesday, September 4, 2018 3:57 PM To: Wu, Fengguang <fengguang.wu@intel.com> Cc: Andrew Morton <akpm@linux-foundation.org>; Linux Memory Management List <linux-mm@kvack.org>; Peng, DongX <dongx.peng@intel.com>; Liu, Jingqi <jingqi.liu@intel.com>; Dong, Eddie <eddie.dong@intel.com>; Hansen, Dave <dave.hansen@intel.com>; Huang, Ying <ying.huang@intel.com>; Brendan Gregg <bgregg@netflix.com>; kvm@vger.kernel.org; LKML <linux-kernel@vger.kernel.org> Subject: Re: [RFC][PATCH 3/5] [PATCH 3/5] kvm-ept-idle: HVA indexed EPT read On 1 Sep 2018, at 13:28, Fengguang Wu <fengguang.wu@intel.com> wrote: > +static ssize_t ept_idle_read(struct file *file, char *buf, > + size_t count, loff_t *ppos) > +{ > + struct task_struct *task = file->private_data; > + struct ept_idle_ctrl *eic; > + unsigned long hva_start = *ppos << BITMAP_BYTE2PVA_SHIFT; > + unsigned long hva_end = hva_start + (count << BITMAP_BYTE2PVA_SHIFT); > + int ret; > + > + if (*ppos % IDLE_BITMAP_CHUNK_SIZE || > + count % IDLE_BITMAP_CHUNK_SIZE) > + return -EINVAL; > + > + eic = kzalloc(sizeof(*eic), GFP_KERNEL); > + if (!eic) > + return -EBUSY; > + > + eic->buf = buf; > + eic->buf_size = count; > + eic->kvm = task_kvm(task); > + if (!eic->kvm) { > + ret = -EINVAL; > + goto out_free; > + } I think you need to increment the refcount while using kvm, otherwise kvm can be destroyed from another thread while you're walking it. -Nikita > + > + ret = ept_idle_walk_hva_range(eic, hva_start, hva_end); > + if (ret) > + goto out_free; > + > + ret = eic->bytes_copied; > + *ppos += ret; > +out_free: > + kfree(eic); > + > + return ret; > +}
Yeah thanks! Currently we are restructuring the related functions, will add these calls when sorted out the walk order and hole issues. Thanks, Fengguang On Tue, Sep 04, 2018 at 04:12:00PM +0800, Peng Dong wrote: >kvm_get_kvm() kvm_put_kvm() > >-----Original Message----- >From: Nikita Leshenko [mailto:nikita.leshchenko@oracle.com] >Sent: Tuesday, September 4, 2018 3:57 PM >To: Wu, Fengguang <fengguang.wu@intel.com> >Cc: Andrew Morton <akpm@linux-foundation.org>; Linux Memory Management List <linux-mm@kvack.org>; Peng, DongX <dongx.peng@intel.com>; Liu, Jingqi <jingqi.liu@intel.com>; Dong, Eddie <eddie.dong@intel.com>; Hansen, Dave <dave.hansen@intel.com>; Huang, Ying <ying.huang@intel.com>; Brendan Gregg <bgregg@netflix.com>; kvm@vger.kernel.org; LKML <linux-kernel@vger.kernel.org> >Subject: Re: [RFC][PATCH 3/5] [PATCH 3/5] kvm-ept-idle: HVA indexed EPT read > >On 1 Sep 2018, at 13:28, Fengguang Wu <fengguang.wu@intel.com> wrote: >> +static ssize_t ept_idle_read(struct file *file, char *buf, >> + size_t count, loff_t *ppos) >> +{ >> + struct task_struct *task = file->private_data; >> + struct ept_idle_ctrl *eic; >> + unsigned long hva_start = *ppos << BITMAP_BYTE2PVA_SHIFT; >> + unsigned long hva_end = hva_start + (count << BITMAP_BYTE2PVA_SHIFT); >> + int ret; >> + >> + if (*ppos % IDLE_BITMAP_CHUNK_SIZE || >> + count % IDLE_BITMAP_CHUNK_SIZE) >> + return -EINVAL; >> + >> + eic = kzalloc(sizeof(*eic), GFP_KERNEL); >> + if (!eic) >> + return -EBUSY; >> + >> + eic->buf = buf; >> + eic->buf_size = count; >> + eic->kvm = task_kvm(task); >> + if (!eic->kvm) { >> + ret = -EINVAL; >> + goto out_free; >> + } >I think you need to increment the refcount while using kvm, otherwise kvm can be destroyed from another thread while you're walking it. > >-Nikita >> + >> + ret = ept_idle_walk_hva_range(eic, hva_start, hva_end); >> + if (ret) >> + goto out_free; >> + >> + ret = eic->bytes_copied; >> + *ppos += ret; >> +out_free: >> + kfree(eic); >> + >> + return ret; >> +} >
diff --git a/arch/x86/kvm/ept_idle.c b/arch/x86/kvm/ept_idle.c new file mode 100644 index 000000000000..5b97dd01011b --- /dev/null +++ b/arch/x86/kvm/ept_idle.c @@ -0,0 +1,118 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/proc_fs.h> +#include <linux/uaccess.h> +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <linux/bitmap.h> + +#include "ept_idle.h" + + +// mindless copy from kvm_handle_hva_range(). +// TODO: handle order and hole. +static int ept_idle_walk_hva_range(struct ept_idle_ctrl *eic, + unsigned long start, + unsigned long end) +{ + struct kvm_memslots *slots; + struct kvm_memory_slot *memslot; + int ret = 0; + + slots = kvm_memslots(eic->kvm); + kvm_for_each_memslot(memslot, slots) { + unsigned long hva_start, hva_end; + gfn_t gfn_start, gfn_end; + + hva_start = max(start, memslot->userspace_addr); + hva_end = min(end, memslot->userspace_addr + + (memslot->npages << PAGE_SHIFT)); + if (hva_start >= hva_end) + continue; + /* + * {gfn(page) | page intersects with [hva_start, hva_end)} = + * {gfn_start, gfn_start+1, ..., gfn_end-1}. + */ + gfn_start = hva_to_gfn_memslot(hva_start, memslot); + gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot); + + ret = ept_idle_walk_gfn_range(eic, gfn_start, gfn_end); + if (ret) + return ret; + } + + return ret; +} + +static ssize_t ept_idle_read(struct file *file, char *buf, + size_t count, loff_t *ppos) +{ + struct task_struct *task = file->private_data; + struct ept_idle_ctrl *eic; + unsigned long hva_start = *ppos << BITMAP_BYTE2PVA_SHIFT; + unsigned long hva_end = hva_start + (count << BITMAP_BYTE2PVA_SHIFT); + int ret; + + if (*ppos % IDLE_BITMAP_CHUNK_SIZE || + count % IDLE_BITMAP_CHUNK_SIZE) + return -EINVAL; + + eic = kzalloc(sizeof(*eic), GFP_KERNEL); + if (!eic) + return -EBUSY; + + eic->buf = buf; + eic->buf_size = count; + eic->kvm = task_kvm(task); + if (!eic->kvm) { + ret = -EINVAL; + goto out_free; + } + + ret = ept_idle_walk_hva_range(eic, hva_start, hva_end); + if (ret) + goto out_free; + + ret = eic->bytes_copied; + *ppos += ret; +out_free: + kfree(eic); + + return ret; +} + +static int ept_idle_open(struct inode *inode, struct file *file) +{ + if (!try_module_get(THIS_MODULE)) + return -EBUSY; + + return 0; +} + +static int ept_idle_release(struct inode *inode, struct file *file) +{ + module_put(THIS_MODULE); + return 0; +} + +extern struct file_operations proc_ept_idle_operations; + +static int ept_idle_entry(void) +{ + proc_ept_idle_operations.owner = THIS_MODULE; + proc_ept_idle_operations.read = ept_idle_read; + proc_ept_idle_operations.open = ept_idle_open; + proc_ept_idle_operations.release = ept_idle_release; + + return 0; +} + +static void ept_idle_exit(void) +{ + memset(&proc_ept_idle_operations, 0, sizeof(proc_ept_idle_operations)); +} + +MODULE_LICENSE("GPL"); +module_init(ept_idle_entry); +module_exit(ept_idle_exit); diff --git a/arch/x86/kvm/ept_idle.h b/arch/x86/kvm/ept_idle.h new file mode 100644 index 000000000000..e0b9dcecf50b --- /dev/null +++ b/arch/x86/kvm/ept_idle.h @@ -0,0 +1,24 @@ +#ifndef _EPT_IDLE_H +#define _EPT_IDLE_H + +#define IDLE_BITMAP_CHUNK_SIZE sizeof(u64) +#define IDLE_BITMAP_CHUNK_BITS (IDLE_BITMAP_CHUNK_SIZE * BITS_PER_BYTE) + +#define BITMAP_BYTE2PVA_SHIFT (3 + PAGE_SHIFT) + +#define EPT_IDLE_KBUF_FULL 1 +#define EPT_IDLE_KBUF_BYTES 8000 +#define EPT_IDLE_KBUF_BITS (EPT_IDLE_KBUF_BYTES * 8) + +struct ept_idle_ctrl { + struct kvm *kvm; + + u64 kbuf[EPT_IDLE_KBUF_BITS / IDLE_BITMAP_CHUNK_BITS]; + int bits_read; + + void __user *buf; + int buf_size; + int bytes_copied; +}; + +#endif