Message ID | 1516280198-25386-3-git-send-email-imbrenda@linux.vnet.ibm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 01/18/2018 01:56 PM, Claudio Imbrenda wrote: > This is a fix for several issues that were found in the original code > for storage attributes migration. > > Now no bitmap is allocated to keep track of dirty storage attributes; > the extra bits of the per-memslot bitmap that are always present anyway > are now used for this purpose. > > The code has also been refactored a little to improve readability. > > Fixes: 190df4a212a ("KVM: s390: CMMA tracking, ESSA emulation, migration mode") > Fixes: 4036e3874a1 ("KVM: s390: ioctls to get and set guest storage attributes") > Signed-off-by: Claudio Imbrenda <imbrenda@linux.vnet.ibm.com> [..] > + /* mark all the pages in active slots as dirty */ > + for (slotnr = 0; slotnr < slots->used_slots; slotnr++) { > + sl = slots->memslots + slotnr; > /* > - * Get the last slot. They should be sorted by base_gfn, so the > - * last slot is also the one at the end of the address space. > - * We have verified above that at least one slot is present. Can you please rebase on top of 4.15-rc8 (or master)? This code has already some fixes. [...] > +static unsigned long kvm_s390_next_dirty_cmma(struct kvm *kvm, > + unsigned long cur) > +{ > + struct kvm_memslots *slots = kvm_memslots(kvm); > + struct kvm_memory_slot *sl; > + unsigned long ofs; > + int slotidx; > + > + slotidx = gfn_to_memslot_approx(kvm, cur); > + sl = slots->memslots + slotidx; > + > + if (sl->base_gfn + sl->npages <= cur) { > + slotidx--; > + /* If we are above the highest slot, wrap around */ > + if (slotidx < 0) > + slotidx = slots->used_slots - 1; > + > + sl = slots->memslots + slotidx; > + cur = sl->base_gfn; > + } > + ofs = find_next_bit(cmma_bitmap(sl), sl->npages, cur - sl->base_gfn); > + while ((slotidx > 0) && (ofs >= sl->npages)) { > + slotidx--; > + sl = slots->memslots + slotidx; > + ofs = find_next_bit(cmma_bitmap(sl), sl->npages, 0); > + } > + return sl->base_gfn + ofs; > +} > + > +static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args, > + u8 *res, unsigned long bufsize) > +{ > + unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev; > + struct kvm_memslots *slots = kvm_memslots(kvm); > + struct kvm_memory_slot *sl; > + > + cur_gfn = kvm_s390_next_dirty_cmma(kvm, args->start_gfn); > + sl = gfn_to_memslot(kvm, cur_gfn); > + args->count = 0; > + args->start_gfn = cur_gfn; > + if (!sl) > + return 0; > + next_gfn = kvm_s390_next_dirty_cmma(kvm, cur_gfn + 1); > + mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages; > + > + while (args->count < bufsize) { > + hva = gfn_to_hva(kvm, cur_gfn); > + if (kvm_is_error_hva(hva)) > + break; > + /* decrement only if we actually flipped the bit to 0 */ > + if (test_and_clear_bit(cur_gfn - sl->base_gfn, cmma_bitmap(sl))) > + atomic64_dec(&kvm->arch.cmma_dirty_pages); > + if (get_pgste(kvm->mm, hva, &pgstev) < 0) > + pgstev = 0; > + /* save the value */ > + res[args->count++] = (pgstev >> 24) & 0x43; > + /* > + * if the next bit is too far away, stop. > + * if we reached the previous "next", find the next one > + */ > + if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE) > + break; > + if (cur_gfn == next_gfn) > + next_gfn = kvm_s390_next_dirty_cmma(kvm, cur_gfn + 1); > + /* reached the end of memory or of the buffer, stop */ > + if ((next_gfn >= mem_end) || > + (next_gfn - args->start_gfn >= bufsize)) > + break; > + cur_gfn++; > + if (cur_gfn - sl->base_gfn >= sl->npages) { > + sl = gfn_to_memslot(kvm, cur_gfn); > + if (!sl) > + break; > + } > + } I have to say that this code does not get easier to understand, I fear that I wont be able to review this is time for 4.15.
On 01/18/2018 01:56 PM, Claudio Imbrenda wrote: > This is a fix for several issues that were found in the original code > for storage attributes migration. > > Now no bitmap is allocated to keep track of dirty storage attributes; > the extra bits of the per-memslot bitmap that are always present anyway > are now used for this purpose. > > The code has also been refactored a little to improve readability. > > Fixes: 190df4a212a ("KVM: s390: CMMA tracking, ESSA emulation, migration mode") > Fixes: 4036e3874a1 ("KVM: s390: ioctls to get and set guest storage attributes") > Signed-off-by: Claudio Imbrenda <imbrenda@linux.vnet.ibm.com> > --- > arch/s390/include/asm/kvm_host.h | 9 +- > arch/s390/kvm/kvm-s390.c | 265 ++++++++++++++++++++++----------------- > arch/s390/kvm/priv.c | 26 ++-- > 3 files changed, 168 insertions(+), 132 deletions(-) > > diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h > index 819838c..2ca73b0 100644 > --- a/arch/s390/include/asm/kvm_host.h > +++ b/arch/s390/include/asm/kvm_host.h > @@ -763,12 +763,6 @@ struct kvm_s390_vsie { > struct page *pages[KVM_MAX_VCPUS]; > }; > > -struct kvm_s390_migration_state { > - unsigned long bitmap_size; /* in bits (number of guest pages) */ > - atomic64_t dirty_pages; /* number of dirty pages */ > - unsigned long *pgste_bitmap; > -}; > - > struct kvm_arch{ > void *sca; > int use_esca; > @@ -796,7 +790,8 @@ struct kvm_arch{ > struct kvm_s390_vsie vsie; > u8 epdx; > u64 epoch; > - struct kvm_s390_migration_state *migration_state; > + atomic_t migration_mode; > + atomic64_t cmma_dirty_pages; > /* subset of available cpu features enabled by user space */ > DECLARE_BITMAP(cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS); > struct kvm_s390_gisa *gisa; > diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c > index 5a69334..85448a2 100644 > --- a/arch/s390/kvm/kvm-s390.c > +++ b/arch/s390/kvm/kvm-s390.c > @@ -770,53 +770,37 @@ static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req) > */ > static int kvm_s390_vm_start_migration(struct kvm *kvm) > { > - struct kvm_s390_migration_state *mgs; > - struct kvm_memory_slot *ms; > - /* should be the only one */ > struct kvm_memslots *slots; > - unsigned long ram_pages; > + struct kvm_memory_slot *sl; > + unsigned long ram_pages = 0; > int slotnr; > > /* migration mode already enabled */ > - if (kvm->arch.migration_state) > + if (atomic_read(&kvm->arch.migration_mode)) > return 0; Being atomic does not help here, because you just use read and write (and not an atomic op). We are protected by a mutex, though. So maybe just use a normal bool variable and add a comment. Please also have a look at KVM: s390: add proper locking for CMMA migration bitmap which changes the locking. Do we want to keep some of these changes (e.g. use the slots_lock)? Or do we want to schedule that fix for stable and use your patches only for 4.16 and later? [...] > + atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages); > + atomic_set(&kvm->arch.migration_mode, 1); > + kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION); > return 0; > } > > @@ -826,19 +810,12 @@ static int kvm_s390_vm_start_migration(struct kvm *kvm) > */ > static int kvm_s390_vm_stop_migration(struct kvm *kvm) > { > - struct kvm_s390_migration_state *mgs; > - > /* migration mode already disabled */ > - if (!kvm->arch.migration_state) > + if (!atomic_read(&kvm->arch.migration_mode)) > return 0; > - mgs = kvm->arch.migration_state; > - kvm->arch.migration_state = NULL; > - > - if (kvm->arch.use_cmma) { > + atomic_set(&kvm->arch.migration_mode, 0); > + if (kvm->arch.use_cmma) > kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION); > - vfree(mgs->pgste_bitmap); > - } > - kfree(mgs); > return 0; > } > > @@ -868,7 +845,7 @@ static int kvm_s390_vm_set_migration(struct kvm *kvm, > static int kvm_s390_vm_get_migration(struct kvm *kvm, > struct kvm_device_attr *attr) > { > - u64 mig = (kvm->arch.migration_state != NULL); > + u64 mig = atomic_read(&kvm->arch.migration_mode); > > if (attr->attr != KVM_S390_VM_MIGRATION_STATUS) > return -ENXIO; > @@ -1543,6 +1520,108 @@ static int gfn_to_memslot_approx(struct kvm *kvm, gfn_t gfn) > return start; > } > > +static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args, > + u8 *res, unsigned long bufsize) > +{ > + unsigned long pgstev, cur_gfn, hva; > + > + cur_gfn = args->start_gfn; > + args->count = 0; > + while (args->count < bufsize) { > + hva = gfn_to_hva(kvm, cur_gfn); > + /* > + * We return an error if the first value was invalid, but we > + * return successfully if at least one value was copied. > + */ > + if (kvm_is_error_hva(hva)) > + return args->count ? 0 : -EFAULT; > + if (get_pgste(kvm->mm, hva, &pgstev) < 0) > + pgstev = 0; > + res[args->count++] = (pgstev >> 24) & 0x43; > + cur_gfn++; > + } > + > + return 0; > +} > + > +static unsigned long kvm_s390_next_dirty_cmma(struct kvm *kvm, > + unsigned long cur) > +{ > + struct kvm_memslots *slots = kvm_memslots(kvm); > + struct kvm_memory_slot *sl; > + unsigned long ofs; > + int slotidx; > + > + slotidx = gfn_to_memslot_approx(kvm, cur); > + sl = slots->memslots + slotidx; > + > + if (sl->base_gfn + sl->npages <= cur) { > + slotidx--; > + /* If we are above the highest slot, wrap around */ > + if (slotidx < 0) > + slotidx = slots->used_slots - 1; > + > + sl = slots->memslots + slotidx; > + cur = sl->base_gfn; > + } > + ofs = find_next_bit(cmma_bitmap(sl), sl->npages, cur - sl->base_gfn); > + while ((slotidx > 0) && (ofs >= sl->npages)) { > + slotidx--; > + sl = slots->memslots + slotidx; > + ofs = find_next_bit(cmma_bitmap(sl), sl->npages, 0); > + } > + return sl->base_gfn + ofs; > +} > + > +static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args, > + u8 *res, unsigned long bufsize) > +{ > + unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev; > + struct kvm_memslots *slots = kvm_memslots(kvm); > + struct kvm_memory_slot *sl; > + > + cur_gfn = kvm_s390_next_dirty_cmma(kvm, args->start_gfn); > + sl = gfn_to_memslot(kvm, cur_gfn); > + args->count = 0; > + args->start_gfn = cur_gfn; > + if (!sl) > + return 0; > + next_gfn = kvm_s390_next_dirty_cmma(kvm, cur_gfn + 1); > + mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages; > + > + while (args->count < bufsize) { > + hva = gfn_to_hva(kvm, cur_gfn); > + if (kvm_is_error_hva(hva)) > + break; > + /* decrement only if we actually flipped the bit to 0 */ > + if (test_and_clear_bit(cur_gfn - sl->base_gfn, cmma_bitmap(sl))) > + atomic64_dec(&kvm->arch.cmma_dirty_pages); > + if (get_pgste(kvm->mm, hva, &pgstev) < 0) > + pgstev = 0; > + /* save the value */ > + res[args->count++] = (pgstev >> 24) & 0x43; > + /* > + * if the next bit is too far away, stop. > + * if we reached the previous "next", find the next one > + */ > + if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE) > + break; > + if (cur_gfn == next_gfn) > + next_gfn = kvm_s390_next_dirty_cmma(kvm, cur_gfn + 1); > + /* reached the end of memory or of the buffer, stop */ > + if ((next_gfn >= mem_end) || > + (next_gfn - args->start_gfn >= bufsize)) > + break; > + cur_gfn++; > + if (cur_gfn - sl->base_gfn >= sl->npages) { > + sl = gfn_to_memslot(kvm, cur_gfn); > + if (!sl) > + break; > + } > + } > + return 0; > +} > + > /* > * This function searches for the next page with dirty CMMA attributes, and > * saves the attributes in the buffer up to either the end of the buffer or > @@ -1554,97 +1633,53 @@ static int gfn_to_memslot_approx(struct kvm *kvm, gfn_t gfn) > static int kvm_s390_get_cmma_bits(struct kvm *kvm, > struct kvm_s390_cmma_log *args) > { > - struct kvm_s390_migration_state *s = kvm->arch.migration_state; > - unsigned long bufsize, hva, pgstev, i, next, cur; > - int srcu_idx, peek, r = 0, rr; > - u8 *res; > + unsigned long bufsize; > + int srcu_idx, peek, migr, ret; > + u8 *values; > > - cur = args->start_gfn; > - i = next = pgstev = 0; > + migr = atomic_read(&kvm->arch.migration_mode); > > if (unlikely(!kvm->arch.use_cmma)) > return -ENXIO; > /* Invalid/unsupported flags were specified */ > - if (args->flags & ~KVM_S390_CMMA_PEEK) > + if (unlikely(args->flags & ~KVM_S390_CMMA_PEEK)) > return -EINVAL; > /* Migration mode query, and we are not doing a migration */ > peek = !!(args->flags & KVM_S390_CMMA_PEEK); > - if (!peek && !s) > + if (unlikely(!peek && !migr)) > return -EINVAL; > /* CMMA is disabled or was not used, or the buffer has length zero */ > bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX); > - if (!bufsize || !kvm->mm->context.use_cmma) { > + if (unlikely(!bufsize || !kvm->mm->context.use_cmma)) { > memset(args, 0, sizeof(*args)); > return 0; > } > - > - if (!peek) { > - /* We are not peeking, and there are no dirty pages */ > - if (!atomic64_read(&s->dirty_pages)) { > - memset(args, 0, sizeof(*args)); > - return 0; > - } > - cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, > - args->start_gfn); > - if (cur >= s->bitmap_size) /* nothing found, loop back */ > - cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0); > - if (cur >= s->bitmap_size) { /* again! (very unlikely) */ > - memset(args, 0, sizeof(*args)); > - return 0; > - } > - next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1); > + /* We are not peeking, and there are no dirty pages */ > + if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) { > + memset(args, 0, sizeof(*args)); > + return 0; > } > > - res = vmalloc(bufsize); > - if (!res) > + values = vmalloc(bufsize); > + if (!values) > return -ENOMEM; > > - args->start_gfn = cur; > - > down_read(&kvm->mm->mmap_sem); > srcu_idx = srcu_read_lock(&kvm->srcu); > - while (i < bufsize) { > - hva = gfn_to_hva(kvm, cur); > - if (kvm_is_error_hva(hva)) { > - r = -EFAULT; > - break; > - } > - /* decrement only if we actually flipped the bit to 0 */ > - if (!peek && test_and_clear_bit(cur, s->pgste_bitmap)) > - atomic64_dec(&s->dirty_pages); > - r = get_pgste(kvm->mm, hva, &pgstev); > - if (r < 0) > - pgstev = 0; > - /* save the value */ > - res[i++] = (pgstev >> 24) & 0x43; > - /* > - * if the next bit is too far away, stop. > - * if we reached the previous "next", find the next one > - */ > - if (!peek) { > - if (next > cur + KVM_S390_MAX_BIT_DISTANCE) > - break; > - if (cur == next) > - next = find_next_bit(s->pgste_bitmap, > - s->bitmap_size, cur + 1); > - /* reached the end of the bitmap or of the buffer, stop */ > - if ((next >= s->bitmap_size) || > - (next >= args->start_gfn + bufsize)) > - break; > - } > - cur++; > - } > + if (peek) > + ret = kvm_s390_peek_cmma(kvm, args, values, bufsize); > + else > + ret = kvm_s390_get_cmma(kvm, args, values, bufsize); > srcu_read_unlock(&kvm->srcu, srcu_idx); > up_read(&kvm->mm->mmap_sem); > - args->count = i; > - args->remaining = s ? atomic64_read(&s->dirty_pages) : 0; > > - rr = copy_to_user((void __user *)args->values, res, args->count); > - if (rr) > - r = -EFAULT; > + args->remaining = migr ? atomic64_read(&kvm->arch.cmma_dirty_pages) : 0; > > - vfree(res); > - return r; > + if (copy_to_user((void __user *)args->values, values, args->count)) > + ret = -EFAULT; > + > + vfree(values); > + return ret; > } > > /* > @@ -2088,10 +2123,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm) > kvm_s390_destroy_adapters(kvm); > kvm_s390_clear_float_irqs(kvm); > kvm_s390_vsie_destroy(kvm); > - if (kvm->arch.migration_state) { > - vfree(kvm->arch.migration_state->pgste_bitmap); > - kfree(kvm->arch.migration_state); > - } > KVM_EVENT(3, "vm 0x%pK destroyed", kvm); > } > > diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c > index 572496c..7ae4893 100644 > --- a/arch/s390/kvm/priv.c > +++ b/arch/s390/kvm/priv.c > @@ -954,9 +954,11 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) > return 0; > } > > -static inline int do_essa(struct kvm_vcpu *vcpu, const int orc) > +/* > + * Must be called with relevant read locks held (kvm->mm->mmap_sem, kvm->srcu) > + */ > +static inline int __do_essa(struct kvm_vcpu *vcpu, const int orc) > { > - struct kvm_s390_migration_state *ms = vcpu->kvm->arch.migration_state; > int r1, r2, nappended, entries; > unsigned long gfn, hva, res, pgstev, ptev; > unsigned long *cbrlo; > @@ -1007,9 +1009,11 @@ static inline int do_essa(struct kvm_vcpu *vcpu, const int orc) > } > > if (orc) { > - /* increment only if we are really flipping the bit to 1 */ > - if (!test_and_set_bit(gfn, ms->pgste_bitmap)) > - atomic64_inc(&ms->dirty_pages); > + struct kvm_memory_slot *s = gfn_to_memslot(vcpu->kvm, gfn); > + > + /* Increment only if we are really flipping the bit */ > + if (s && !test_and_set_bit(gfn - s->base_gfn, cmma_bitmap(s))) > + atomic64_inc(&vcpu->kvm->arch.cmma_dirty_pages); > } > > return nappended; > @@ -1038,7 +1042,7 @@ static int handle_essa(struct kvm_vcpu *vcpu) > : ESSA_SET_STABLE_IF_RESIDENT)) > return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); > > - if (likely(!vcpu->kvm->arch.migration_state)) { > + if (likely(!atomic_read(&vcpu->kvm->arch.migration_mode))) { > /* > * CMMA is enabled in the KVM settings, but is disabled in > * the SIE block and in the mm_context, and we are not doing > @@ -1066,10 +1070,16 @@ static int handle_essa(struct kvm_vcpu *vcpu) > /* Retry the ESSA instruction */ > kvm_s390_retry_instr(vcpu); > } else { > - /* Account for the possible extra cbrl entry */ > - i = do_essa(vcpu, orc); > + int srcu_idx; > + > + down_read(&vcpu->kvm->mm->mmap_sem); > + srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); > + i = __do_essa(vcpu, orc); > + srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); > + up_read(&vcpu->kvm->mm->mmap_sem); > if (i < 0) > return i; > + /* Account for the possible extra cbrl entry */ > entries += i; > } > vcpu->arch.sie_block->cbrlo &= PAGE_MASK; /* reset nceo */ >
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 819838c..2ca73b0 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -763,12 +763,6 @@ struct kvm_s390_vsie { struct page *pages[KVM_MAX_VCPUS]; }; -struct kvm_s390_migration_state { - unsigned long bitmap_size; /* in bits (number of guest pages) */ - atomic64_t dirty_pages; /* number of dirty pages */ - unsigned long *pgste_bitmap; -}; - struct kvm_arch{ void *sca; int use_esca; @@ -796,7 +790,8 @@ struct kvm_arch{ struct kvm_s390_vsie vsie; u8 epdx; u64 epoch; - struct kvm_s390_migration_state *migration_state; + atomic_t migration_mode; + atomic64_t cmma_dirty_pages; /* subset of available cpu features enabled by user space */ DECLARE_BITMAP(cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS); struct kvm_s390_gisa *gisa; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 5a69334..85448a2 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -770,53 +770,37 @@ static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req) */ static int kvm_s390_vm_start_migration(struct kvm *kvm) { - struct kvm_s390_migration_state *mgs; - struct kvm_memory_slot *ms; - /* should be the only one */ struct kvm_memslots *slots; - unsigned long ram_pages; + struct kvm_memory_slot *sl; + unsigned long ram_pages = 0; int slotnr; /* migration mode already enabled */ - if (kvm->arch.migration_state) + if (atomic_read(&kvm->arch.migration_mode)) return 0; - slots = kvm_memslots(kvm); if (!slots || !slots->used_slots) return -EINVAL; - mgs = kzalloc(sizeof(*mgs), GFP_KERNEL); - if (!mgs) - return -ENOMEM; - kvm->arch.migration_state = mgs; - - if (kvm->arch.use_cmma) { + if (!kvm->arch.use_cmma) { + atomic_set(&kvm->arch.migration_mode, 1); + return 0; + } + /* mark all the pages in active slots as dirty */ + for (slotnr = 0; slotnr < slots->used_slots; slotnr++) { + sl = slots->memslots + slotnr; /* - * Get the last slot. They should be sorted by base_gfn, so the - * last slot is also the one at the end of the address space. - * We have verified above that at least one slot is present. + * The second half of the bitmap is only used on x86, + * and would be wasted otherwise, so we put it to good + * use here to keep track of the state of the storage + * attributes. */ - ms = slots->memslots + slots->used_slots - 1; - /* round up so we only use full longs */ - ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG); - /* allocate enough bytes to store all the bits */ - mgs->pgste_bitmap = vmalloc(ram_pages / 8); - if (!mgs->pgste_bitmap) { - kfree(mgs); - kvm->arch.migration_state = NULL; - return -ENOMEM; - } - - mgs->bitmap_size = ram_pages; - atomic64_set(&mgs->dirty_pages, ram_pages); - /* mark all the pages in active slots as dirty */ - for (slotnr = 0; slotnr < slots->used_slots; slotnr++) { - ms = slots->memslots + slotnr; - bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages); - } - - kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION); + memset(cmma_bitmap(sl), 0xff, kvm_dirty_bitmap_bytes(sl)); + ram_pages += sl->npages; } + atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages); + atomic_set(&kvm->arch.migration_mode, 1); + kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION); return 0; } @@ -826,19 +810,12 @@ static int kvm_s390_vm_start_migration(struct kvm *kvm) */ static int kvm_s390_vm_stop_migration(struct kvm *kvm) { - struct kvm_s390_migration_state *mgs; - /* migration mode already disabled */ - if (!kvm->arch.migration_state) + if (!atomic_read(&kvm->arch.migration_mode)) return 0; - mgs = kvm->arch.migration_state; - kvm->arch.migration_state = NULL; - - if (kvm->arch.use_cmma) { + atomic_set(&kvm->arch.migration_mode, 0); + if (kvm->arch.use_cmma) kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION); - vfree(mgs->pgste_bitmap); - } - kfree(mgs); return 0; } @@ -868,7 +845,7 @@ static int kvm_s390_vm_set_migration(struct kvm *kvm, static int kvm_s390_vm_get_migration(struct kvm *kvm, struct kvm_device_attr *attr) { - u64 mig = (kvm->arch.migration_state != NULL); + u64 mig = atomic_read(&kvm->arch.migration_mode); if (attr->attr != KVM_S390_VM_MIGRATION_STATUS) return -ENXIO; @@ -1543,6 +1520,108 @@ static int gfn_to_memslot_approx(struct kvm *kvm, gfn_t gfn) return start; } +static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args, + u8 *res, unsigned long bufsize) +{ + unsigned long pgstev, cur_gfn, hva; + + cur_gfn = args->start_gfn; + args->count = 0; + while (args->count < bufsize) { + hva = gfn_to_hva(kvm, cur_gfn); + /* + * We return an error if the first value was invalid, but we + * return successfully if at least one value was copied. + */ + if (kvm_is_error_hva(hva)) + return args->count ? 0 : -EFAULT; + if (get_pgste(kvm->mm, hva, &pgstev) < 0) + pgstev = 0; + res[args->count++] = (pgstev >> 24) & 0x43; + cur_gfn++; + } + + return 0; +} + +static unsigned long kvm_s390_next_dirty_cmma(struct kvm *kvm, + unsigned long cur) +{ + struct kvm_memslots *slots = kvm_memslots(kvm); + struct kvm_memory_slot *sl; + unsigned long ofs; + int slotidx; + + slotidx = gfn_to_memslot_approx(kvm, cur); + sl = slots->memslots + slotidx; + + if (sl->base_gfn + sl->npages <= cur) { + slotidx--; + /* If we are above the highest slot, wrap around */ + if (slotidx < 0) + slotidx = slots->used_slots - 1; + + sl = slots->memslots + slotidx; + cur = sl->base_gfn; + } + ofs = find_next_bit(cmma_bitmap(sl), sl->npages, cur - sl->base_gfn); + while ((slotidx > 0) && (ofs >= sl->npages)) { + slotidx--; + sl = slots->memslots + slotidx; + ofs = find_next_bit(cmma_bitmap(sl), sl->npages, 0); + } + return sl->base_gfn + ofs; +} + +static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args, + u8 *res, unsigned long bufsize) +{ + unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev; + struct kvm_memslots *slots = kvm_memslots(kvm); + struct kvm_memory_slot *sl; + + cur_gfn = kvm_s390_next_dirty_cmma(kvm, args->start_gfn); + sl = gfn_to_memslot(kvm, cur_gfn); + args->count = 0; + args->start_gfn = cur_gfn; + if (!sl) + return 0; + next_gfn = kvm_s390_next_dirty_cmma(kvm, cur_gfn + 1); + mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages; + + while (args->count < bufsize) { + hva = gfn_to_hva(kvm, cur_gfn); + if (kvm_is_error_hva(hva)) + break; + /* decrement only if we actually flipped the bit to 0 */ + if (test_and_clear_bit(cur_gfn - sl->base_gfn, cmma_bitmap(sl))) + atomic64_dec(&kvm->arch.cmma_dirty_pages); + if (get_pgste(kvm->mm, hva, &pgstev) < 0) + pgstev = 0; + /* save the value */ + res[args->count++] = (pgstev >> 24) & 0x43; + /* + * if the next bit is too far away, stop. + * if we reached the previous "next", find the next one + */ + if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE) + break; + if (cur_gfn == next_gfn) + next_gfn = kvm_s390_next_dirty_cmma(kvm, cur_gfn + 1); + /* reached the end of memory or of the buffer, stop */ + if ((next_gfn >= mem_end) || + (next_gfn - args->start_gfn >= bufsize)) + break; + cur_gfn++; + if (cur_gfn - sl->base_gfn >= sl->npages) { + sl = gfn_to_memslot(kvm, cur_gfn); + if (!sl) + break; + } + } + return 0; +} + /* * This function searches for the next page with dirty CMMA attributes, and * saves the attributes in the buffer up to either the end of the buffer or @@ -1554,97 +1633,53 @@ static int gfn_to_memslot_approx(struct kvm *kvm, gfn_t gfn) static int kvm_s390_get_cmma_bits(struct kvm *kvm, struct kvm_s390_cmma_log *args) { - struct kvm_s390_migration_state *s = kvm->arch.migration_state; - unsigned long bufsize, hva, pgstev, i, next, cur; - int srcu_idx, peek, r = 0, rr; - u8 *res; + unsigned long bufsize; + int srcu_idx, peek, migr, ret; + u8 *values; - cur = args->start_gfn; - i = next = pgstev = 0; + migr = atomic_read(&kvm->arch.migration_mode); if (unlikely(!kvm->arch.use_cmma)) return -ENXIO; /* Invalid/unsupported flags were specified */ - if (args->flags & ~KVM_S390_CMMA_PEEK) + if (unlikely(args->flags & ~KVM_S390_CMMA_PEEK)) return -EINVAL; /* Migration mode query, and we are not doing a migration */ peek = !!(args->flags & KVM_S390_CMMA_PEEK); - if (!peek && !s) + if (unlikely(!peek && !migr)) return -EINVAL; /* CMMA is disabled or was not used, or the buffer has length zero */ bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX); - if (!bufsize || !kvm->mm->context.use_cmma) { + if (unlikely(!bufsize || !kvm->mm->context.use_cmma)) { memset(args, 0, sizeof(*args)); return 0; } - - if (!peek) { - /* We are not peeking, and there are no dirty pages */ - if (!atomic64_read(&s->dirty_pages)) { - memset(args, 0, sizeof(*args)); - return 0; - } - cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, - args->start_gfn); - if (cur >= s->bitmap_size) /* nothing found, loop back */ - cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0); - if (cur >= s->bitmap_size) { /* again! (very unlikely) */ - memset(args, 0, sizeof(*args)); - return 0; - } - next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1); + /* We are not peeking, and there are no dirty pages */ + if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) { + memset(args, 0, sizeof(*args)); + return 0; } - res = vmalloc(bufsize); - if (!res) + values = vmalloc(bufsize); + if (!values) return -ENOMEM; - args->start_gfn = cur; - down_read(&kvm->mm->mmap_sem); srcu_idx = srcu_read_lock(&kvm->srcu); - while (i < bufsize) { - hva = gfn_to_hva(kvm, cur); - if (kvm_is_error_hva(hva)) { - r = -EFAULT; - break; - } - /* decrement only if we actually flipped the bit to 0 */ - if (!peek && test_and_clear_bit(cur, s->pgste_bitmap)) - atomic64_dec(&s->dirty_pages); - r = get_pgste(kvm->mm, hva, &pgstev); - if (r < 0) - pgstev = 0; - /* save the value */ - res[i++] = (pgstev >> 24) & 0x43; - /* - * if the next bit is too far away, stop. - * if we reached the previous "next", find the next one - */ - if (!peek) { - if (next > cur + KVM_S390_MAX_BIT_DISTANCE) - break; - if (cur == next) - next = find_next_bit(s->pgste_bitmap, - s->bitmap_size, cur + 1); - /* reached the end of the bitmap or of the buffer, stop */ - if ((next >= s->bitmap_size) || - (next >= args->start_gfn + bufsize)) - break; - } - cur++; - } + if (peek) + ret = kvm_s390_peek_cmma(kvm, args, values, bufsize); + else + ret = kvm_s390_get_cmma(kvm, args, values, bufsize); srcu_read_unlock(&kvm->srcu, srcu_idx); up_read(&kvm->mm->mmap_sem); - args->count = i; - args->remaining = s ? atomic64_read(&s->dirty_pages) : 0; - rr = copy_to_user((void __user *)args->values, res, args->count); - if (rr) - r = -EFAULT; + args->remaining = migr ? atomic64_read(&kvm->arch.cmma_dirty_pages) : 0; - vfree(res); - return r; + if (copy_to_user((void __user *)args->values, values, args->count)) + ret = -EFAULT; + + vfree(values); + return ret; } /* @@ -2088,10 +2123,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kvm_s390_destroy_adapters(kvm); kvm_s390_clear_float_irqs(kvm); kvm_s390_vsie_destroy(kvm); - if (kvm->arch.migration_state) { - vfree(kvm->arch.migration_state->pgste_bitmap); - kfree(kvm->arch.migration_state); - } KVM_EVENT(3, "vm 0x%pK destroyed", kvm); } diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 572496c..7ae4893 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -954,9 +954,11 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) return 0; } -static inline int do_essa(struct kvm_vcpu *vcpu, const int orc) +/* + * Must be called with relevant read locks held (kvm->mm->mmap_sem, kvm->srcu) + */ +static inline int __do_essa(struct kvm_vcpu *vcpu, const int orc) { - struct kvm_s390_migration_state *ms = vcpu->kvm->arch.migration_state; int r1, r2, nappended, entries; unsigned long gfn, hva, res, pgstev, ptev; unsigned long *cbrlo; @@ -1007,9 +1009,11 @@ static inline int do_essa(struct kvm_vcpu *vcpu, const int orc) } if (orc) { - /* increment only if we are really flipping the bit to 1 */ - if (!test_and_set_bit(gfn, ms->pgste_bitmap)) - atomic64_inc(&ms->dirty_pages); + struct kvm_memory_slot *s = gfn_to_memslot(vcpu->kvm, gfn); + + /* Increment only if we are really flipping the bit */ + if (s && !test_and_set_bit(gfn - s->base_gfn, cmma_bitmap(s))) + atomic64_inc(&vcpu->kvm->arch.cmma_dirty_pages); } return nappended; @@ -1038,7 +1042,7 @@ static int handle_essa(struct kvm_vcpu *vcpu) : ESSA_SET_STABLE_IF_RESIDENT)) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - if (likely(!vcpu->kvm->arch.migration_state)) { + if (likely(!atomic_read(&vcpu->kvm->arch.migration_mode))) { /* * CMMA is enabled in the KVM settings, but is disabled in * the SIE block and in the mm_context, and we are not doing @@ -1066,10 +1070,16 @@ static int handle_essa(struct kvm_vcpu *vcpu) /* Retry the ESSA instruction */ kvm_s390_retry_instr(vcpu); } else { - /* Account for the possible extra cbrl entry */ - i = do_essa(vcpu, orc); + int srcu_idx; + + down_read(&vcpu->kvm->mm->mmap_sem); + srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); + i = __do_essa(vcpu, orc); + srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); + up_read(&vcpu->kvm->mm->mmap_sem); if (i < 0) return i; + /* Account for the possible extra cbrl entry */ entries += i; } vcpu->arch.sie_block->cbrlo &= PAGE_MASK; /* reset nceo */
This is a fix for several issues that were found in the original code for storage attributes migration. Now no bitmap is allocated to keep track of dirty storage attributes; the extra bits of the per-memslot bitmap that are always present anyway are now used for this purpose. The code has also been refactored a little to improve readability. Fixes: 190df4a212a ("KVM: s390: CMMA tracking, ESSA emulation, migration mode") Fixes: 4036e3874a1 ("KVM: s390: ioctls to get and set guest storage attributes") Signed-off-by: Claudio Imbrenda <imbrenda@linux.vnet.ibm.com> --- arch/s390/include/asm/kvm_host.h | 9 +- arch/s390/kvm/kvm-s390.c | 265 ++++++++++++++++++++++----------------- arch/s390/kvm/priv.c | 26 ++-- 3 files changed, 168 insertions(+), 132 deletions(-)