[RFCv2,4/6] pseries: Implement HPT resizing

Message ID	1454049744-19131-5-git-send-email-david@gibson.dropbear.id.au (mailing list archive)
State	New, archived
Headers	show Return-Path: <qemu-devel-bounces+patchwork-qemu-devel=patchwork.kernel.org@nongnu.org> From: David Gibson <david@gibson.dropbear.id.au> To: benh@kernel.crashing.org, paulus@samba.org, aik@ozlabs.ru Date: Fri, 29 Jan 2016 17:42:22 +1100 Message-Id: <1454049744-19131-5-git-send-email-david@gibson.dropbear.id.au> In-Reply-To: <1454049744-19131-1-git-send-email-david@gibson.dropbear.id.au> References: <1454049744-19131-1-git-send-email-david@gibson.dropbear.id.au> Cc: lvivier@redhat.com, thuth@redhat.com, mdroth@linux.vnet.ibm.com, qemu-devel@nongnu.org, agraf@suse.de, qemu-ppc@nongnu.org, David Gibson <david@gibson.dropbear.id.au> Subject: [Qemu-devel] [RFCv2 4/6] pseries: Implement HPT resizing Precedence: list Errors-To: qemu-devel-bounces+patchwork-qemu-devel=patchwork.kernel.org@nongnu.org Sender: qemu-devel-bounces+patchwork-qemu-devel=patchwork.kernel.org@nongnu.org

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index b7bd1c1..ddd8b99 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -90,8 +90,6 @@ #define PHANDLE_XICP 0x00001111 -#define HTAB_SIZE(spapr) (1ULL << ((spapr)->htab_shift)) - static XICSState *try_create_xics(const char *type, int nr_servers, int nr_irqs, Error **errp) { diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c index f285d34..2345196 100644 --- a/hw/ppc/spapr_hcall.c +++ b/hw/ppc/spapr_hcall.c @@ -1,4 +1,5 @@ #include "sysemu/sysemu.h" +#include "qemu/error-report.h" #include "cpu.h" #include "helper_regs.h" #include "hw/ppc/spapr.h" @@ -331,20 +332,290 @@ static target_ulong h_read(PowerPCCPU *cpu, sPAPRMachineState *spapr, return H_SUCCESS; } +struct sPAPRPendingHPT { + /* These fields are read-only after initialization */ + int shift; + QemuThread thread; + + /* These fields are protected by the BQL */ + bool complete; + + /* These fields are private to the preparation thread if + * !complete, otherwise protected by the BQL */ + int ret; + void *hpt; +}; + +static void free_pending_hpt(sPAPRPendingHPT *pending) +{ + if (pending->hpt) { + qemu_vfree(pending->hpt); + } + + g_free(pending); +} + +static void *hpt_prepare_thread(void *opaque) +{ + sPAPRPendingHPT *pending = opaque; + size_t size = 1ULL << pending->shift; + + pending->hpt = qemu_memalign(size, size); + if (pending->hpt) { + memset(pending->hpt, 0, size); + pending->ret = H_SUCCESS; + } else { + pending->ret = H_NO_MEM; + } + + qemu_mutex_lock_iothread(); + + if (SPAPR_MACHINE(qdev_get_machine())->pending_hpt != pending) { + /* We've been cancelled, clean ourselves up */ + free_pending_hpt(pending); + goto out; + } + + pending->complete = true; + +out: + qemu_mutex_unlock_iothread(); + return NULL; +} + +/* Must be called with BQL held */ +static void cancel_hpt_prepare(sPAPRMachineState *spapr) +{ + sPAPRPendingHPT *pending = spapr->pending_hpt; + + /* Let the thread know it's cancelled */ + spapr->pending_hpt = NULL; + + if (!pending) { + /* Nothing to do */ + return; + } + + if (!pending->complete) { + /* thread will clean itself up */ + return; + } + + free_pending_hpt(pending); +} + static target_ulong h_resize_hpt_prepare(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { target_ulong flags = args[0]; - target_ulong shift = args[1]; + int shift = args[1]; + sPAPRPendingHPT *pending = spapr->pending_hpt; if (spapr->resize_hpt == SPAPR_RESIZE_HPT_DISABLED) { return H_AUTHORITY; } trace_spapr_h_resize_hpt_prepare(flags, shift); - return H_HARDWARE; + + if (flags != 0) { + return H_PARAMETER; + } + + if (shift && ((shift < 18) || (shift > 46))) { + return H_PARAMETER; + } + + if (pending) { + /* something already in progress */ + if (pending->shift == shift) { + /* and it's suitable */ + if (pending->complete) { + return pending->ret; + } else { + return H_LONG_BUSY_ORDER_100_MSEC; + } + } + + /* not suitable, cancel and replace */ + cancel_hpt_prepare(spapr); + } + + if (!shift) { + /* nothing to do */ + return H_SUCCESS; + } + + /* start new prepare */ + + /* We only allow the guest to allocate an HPT one order above what + * we'd normally give them (to stop a small guest claiming a huge + * chunk of resources in the HPT */ + if (shift > (spapr_hpt_shift_for_ramsize(get_current_ram_size()) + 1)) { + return H_RESOURCE; + } + + pending = g_malloc0(sizeof(*pending)); + pending->shift = shift; + pending->ret = H_HARDWARE; + + qemu_thread_create(&pending->thread, "sPAPR HPT prepare", + hpt_prepare_thread, pending, QEMU_THREAD_DETACHED); + + spapr->pending_hpt = pending; + + /* In theory we could estimate the time more accurately based on + * the new size, but there's not much point */ + return H_LONG_BUSY_ORDER_100_MSEC; +} + +static uint64_t new_hpte_load0(void *htab, uint64_t pteg, int slot) +{ + uint8_t *addr = htab; + + addr += pteg * HASH_PTEG_SIZE_64; + addr += slot * HASH_PTE_SIZE_64; + return ldq_p(addr); +} + +static void new_hpte_store(void *htab, uint64_t pteg, int slot, + uint64_t pte0, uint64_t pte1) +{ + uint8_t *addr = htab; + + addr += pteg * HASH_PTEG_SIZE_64; + addr += slot * HASH_PTE_SIZE_64; + + stq_p(addr, pte0); + stq_p(addr + HASH_PTE_SIZE_64/2, pte1); +} + +static int rehash_hpte(PowerPCCPU *cpu, uint64_t token, + void *old, uint64_t oldsize, + void *new, uint64_t newsize, + uint64_t pteg, int slot) +{ + uint64_t old_hash_mask = (oldsize >> 7) - 1; + uint64_t new_hash_mask = (newsize >> 7) - 1; + target_ulong pte0 = ppc_hash64_load_hpte0(cpu, token, slot); + target_ulong pte1; + uint64_t avpn; + unsigned shift, spshift; + uint64_t hash, new_pteg, replace_pte0; + + if (!(pte0 & HPTE64_V_VALID) || !(pte0 & HPTE64_V_BOLTED)) { + return H_SUCCESS; + } + + pte1 = ppc_hash64_load_hpte1(cpu, token, slot); + + shift = ppc_hash64_hpte_page_shift_noslb(cpu, pte0, pte1, &spshift); + assert(shift); /* H_ENTER should never have allowed a bad encoding */ + avpn = HPTE64_V_AVPN_VAL(pte0) & ~(((1ULL << shift) - 1) >> 23); + + if (pte0 & HPTE64_V_SECONDARY) { + pteg = ~pteg; + } + + if ((pte0 & HPTE64_V_SSIZE) == HPTE64_V_SSIZE_256M) { + uint64_t offset, vsid; + + /* We only have 28 - 23 bits of offset in avpn */ + offset = (avpn & 0x1f) << 23; + vsid = avpn >> 5; + /* We can find more bits from the pteg value */ + if (shift < 23) { + offset |= ((vsid ^ pteg) & old_hash_mask) << shift; + } + + hash = vsid ^ (offset >> shift); + } else if ((pte0 & HPTE64_V_SSIZE) == HPTE64_V_SSIZE_1T) { + uint64_t offset, vsid; + + /* We only have 40 - 23 bits of seg_off in avpn */ + offset = (avpn & 0x1ffff) << 23; + vsid = avpn >> 17; + if (shift < 23) { + offset |= ((vsid ^ (vsid << 25) ^ pteg) & old_hash_mask) << shift; + } + + hash = vsid ^ (vsid << 25) ^ (offset >> shift); + } else { + error_report("rehash_pte: Bad segment size in HPTE"); + return H_HARDWARE; + } + + new_pteg = hash & new_hash_mask; + if (pte0 & HPTE64_V_SECONDARY) { + assert(~pteg == (hash & old_hash_mask)); + new_pteg = ~new_pteg; + } else { + assert(pteg == (hash & old_hash_mask)); + } + assert((oldsize != newsize) || (pteg == new_pteg)); + replace_pte0 = new_hpte_load0(new, new_pteg, slot); + if (replace_pte0 & HPTE64_V_VALID) { + assert(newsize < oldsize); + if (replace_pte0 & HPTE64_V_BOLTED) { + if (pte0 & HPTE64_V_BOLTED) { + /* Bolted collision, nothing we can do */ + return H_PTEG_FULL; + } else { + /* Discard this hpte */ + return H_SUCCESS; + } + } + } + + new_hpte_store(new, new_pteg, slot, pte0, pte1); + return H_SUCCESS; +} + +static int rehash_hpt(PowerPCCPU *cpu, + void *old, uint64_t oldsize, + void *new, uint64_t newsize) +{ + CPUPPCState *env = &cpu->env; + uint64_t n_ptegs = oldsize >> 7; + uint64_t pteg; + int slot; + int rc; + + assert(env->external_htab == old); + + for (pteg = 0; pteg < n_ptegs; pteg++) { + uint64_t token = ppc_hash64_start_access(cpu, pteg * HPTES_PER_GROUP); + + if (!token) { + return H_HARDWARE; + } + + for (slot = 0; slot < HPTES_PER_GROUP; slot++) { + rc = rehash_hpte(cpu, token, old, oldsize, new, newsize, + pteg, slot); + if (rc != H_SUCCESS) { + ppc_hash64_stop_access(token); + return rc; + } + } + ppc_hash64_stop_access(token); + } + + return H_SUCCESS; +} + +static void pivot_hpt(void *arg) +{ + sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); + CPUState *cs = arg; + CPUPPCState *env = &POWERPC_CPU(cs)->env; + + cpu_synchronize_state(cs); + env->external_htab = spapr->htab; + env->htab_mask = (1ULL << (spapr->htab_shift - 7)) - 1; + env->spr[SPR_SDR1] = (target_ulong)(uintptr_t)spapr->htab | + (spapr->htab_shift - 18); } static target_ulong h_resize_hpt_commit(PowerPCCPU *cpu, @@ -354,13 +625,52 @@ static target_ulong h_resize_hpt_commit(PowerPCCPU *cpu, { target_ulong flags = args[0]; target_ulong shift = args[1]; + sPAPRPendingHPT *pending = spapr->pending_hpt; + int rc; + size_t newsize; if (spapr->resize_hpt == SPAPR_RESIZE_HPT_DISABLED) { return H_AUTHORITY; } trace_spapr_h_resize_hpt_commit(flags, shift); - return H_HARDWARE; + + if (flags != 0) { + return H_PARAMETER; + } + + if (!pending || (pending->shift != shift)) { + /* no matching prepare */ + return H_CLOSED; + } + + if (!pending->complete) { + /* prepare has not completed */ + return H_BUSY; + } + + newsize = 1ULL << pending->shift; + rc = rehash_hpt(cpu, spapr->htab, HTAB_SIZE(spapr), + pending->hpt, newsize); + if (rc == H_SUCCESS) { + CPUState *cs; + + qemu_vfree(spapr->htab); + spapr->htab = pending->hpt; + spapr->htab_shift = pending->shift; + + CPU_FOREACH(cs) { + run_on_cpu(cs, pivot_hpt, cs); + } + + pending->hpt = NULL; /* so it's not free()d */ + } + + /* Clean up */ + spapr->pending_hpt = NULL; + free_pending_hpt(pending); + + return rc; } static target_ulong h_set_dabr(PowerPCCPU *cpu, sPAPRMachineState *spapr, diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index 6136d45..64fd76e 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -12,6 +12,7 @@ struct sPAPRPHBState; struct sPAPRNVRAM; typedef struct sPAPRConfigureConnectorState sPAPRConfigureConnectorState; typedef struct sPAPREventLogEntry sPAPREventLogEntry; +typedef struct sPAPRPendingHPT sPAPRPendingHPT; #define HPTE64_V_HPTE_DIRTY 0x0000000000000040ULL #define SPAPR_ENTRY_POINT 0x100 @@ -61,6 +62,8 @@ struct sPAPRMachineState { sPAPRResizeHPT resize_hpt; void *htab; uint32_t htab_shift; + sPAPRPendingHPT *pending_hpt; /* in-progress resize */ + hwaddr rma_size; int vrma_adjust; hwaddr fdt_addr, rtas_addr; @@ -640,4 +643,6 @@ int spapr_rng_populate_dt(void *fdt); */ #define SPAPR_LMB_FLAGS_ASSIGNED 0x00000008 +#define HTAB_SIZE(spapr) (1ULL << ((spapr)->htab_shift)) + #endif /* !defined (__HW_SPAPR_H__) */ diff --git a/target-ppc/mmu-hash64.h b/target-ppc/mmu-hash64.h index ab0f86b..1b598a5 100644 --- a/target-ppc/mmu-hash64.h +++ b/target-ppc/mmu-hash64.h @@ -59,11 +59,15 @@ unsigned ppc_hash64_hpte_page_shift_noslb(PowerPCCPU *cpu, #define HASH_PTE_SIZE_64 16 #define HASH_PTEG_SIZE_64 (HASH_PTE_SIZE_64 * HPTES_PER_GROUP) +#define HPTE64_V_SSIZE SLB_VSID_B +#define HPTE64_V_SSIZE_256M SLB_VSID_B_256M +#define HPTE64_V_SSIZE_1T SLB_VSID_B_1T #define HPTE64_V_SSIZE_SHIFT 62 #define HPTE64_V_AVPN_SHIFT 7 #define HPTE64_V_AVPN 0x3fffffffffffff80ULL #define HPTE64_V_AVPN_VAL(x) (((x) & HPTE64_V_AVPN) >> HPTE64_V_AVPN_SHIFT) #define HPTE64_V_COMPARE(x, y) (!(((x) ^ (y)) & 0xffffffffffffff80ULL)) +#define HPTE64_V_BOLTED 0x0000000000000010ULL #define HPTE64_V_LARGE 0x0000000000000004ULL #define HPTE64_V_SECONDARY 0x0000000000000002ULL #define HPTE64_V_VALID 0x0000000000000001ULL

[RFCv2,4/6] pseries: Implement HPT resizing

Commit Message

Patch