From patchwork Thu Jan 10 00:20:40 2013 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Nicolas Pitre X-Patchwork-Id: 1957901 Return-Path: X-Original-To: patchwork-linux-arm@patchwork.kernel.org Delivered-To: patchwork-process-083081@patchwork2.kernel.org Received: from merlin.infradead.org (merlin.infradead.org [205.233.59.134]) by patchwork2.kernel.org (Postfix) with ESMTP id 9D68BDF2EB for ; Thu, 10 Jan 2013 00:25:04 +0000 (UTC) Received: from localhost ([::1] helo=merlin.infradead.org) by merlin.infradead.org with esmtp (Exim 4.76 #1 (Red Hat Linux)) id 1Tt5u8-0000Jt-Vn; Thu, 10 Jan 2013 00:21:57 +0000 Received: from relais.videotron.ca ([24.201.245.36]) by merlin.infradead.org with esmtp (Exim 4.76 #1 (Red Hat Linux)) id 1Tt5tF-0008T3-UH for linux-arm-kernel@lists.infradead.org; Thu, 10 Jan 2013 00:21:03 +0000 Received: from yoda.home ([24.202.213.203]) by VL-VM-MR003.ip.videotron.ca (Oracle Communications Messaging Exchange Server 7u4-22.01 64bit (built Apr 21 2011)) with ESMTP id <0MGD00D15VMYNP31@VL-VM-MR003.ip.videotron.ca> for linux-arm-kernel@lists.infradead.org; Wed, 09 Jan 2013 19:20:58 -0500 (EST) Received: from xanadu.home (xanadu.home [192.168.2.2]) by yoda.home (Postfix) with ESMTP id 8D4D52DA0145 for ; Wed, 09 Jan 2013 19:20:58 -0500 (EST) From: Nicolas Pitre To: linux-arm-kernel@lists.infradead.org Subject: [PATCH 05/16] ARM: bL_head: vlock-based first man election Date: Wed, 09 Jan 2013 19:20:40 -0500 Message-id: <1357777251-13541-6-git-send-email-nicolas.pitre@linaro.org> X-Mailer: git-send-email 1.8.0 In-reply-to: <1357777251-13541-1-git-send-email-nicolas.pitre@linaro.org> References: <1357777251-13541-1-git-send-email-nicolas.pitre@linaro.org> X-CRM114-Version: 20100106-BlameMichelson ( TRE 0.8.0 (BSD) ) MR-646709E3 X-CRM114-CacheID: sfid-20130109_192102_166674_4EAD2207 X-CRM114-Status: GOOD ( 16.05 ) X-Spam-Score: -1.9 (-) X-Spam-Report: SpamAssassin version 3.3.2 on merlin.infradead.org summary: Content analysis details: (-1.9 points) pts rule name description ---- ---------------------- -------------------------------------------------- -0.0 RCVD_IN_DNSWL_NONE RBL: Sender listed at http://www.dnswl.org/, no trust [24.201.245.36 listed in list.dnswl.org] -1.9 BAYES_00 BODY: Bayes spam probability is 0 to 1% [score: 0.0000] X-BeenThere: linux-arm-kernel@lists.infradead.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , MIME-Version: 1.0 Sender: linux-arm-kernel-bounces@lists.infradead.org Errors-To: linux-arm-kernel-bounces+patchwork-linux-arm=patchwork.kernel.org@lists.infradead.org From: Dave Martin Instead of requiring the first man to be elected in advance (which can be suboptimal in some situations), this patch uses a per- cluster mutex to co-ordinate selection of the first man. This should also make it more feasible to reuse this code path for asynchronous cluster resume (as in CPUidle scenarios). Signed-off-by: Dave Martin Signed-off-by: Nicolas Pitre --- arch/arm/common/Makefile | 2 +- arch/arm/common/bL_head.S | 91 ++++++++++++++++++++++++++++++++++++++++------- 2 files changed, 80 insertions(+), 13 deletions(-) diff --git a/arch/arm/common/Makefile b/arch/arm/common/Makefile index 50880c494f..894c2ddf9b 100644 --- a/arch/arm/common/Makefile +++ b/arch/arm/common/Makefile @@ -15,4 +15,4 @@ obj-$(CONFIG_PCI_HOST_ITE8152) += it8152.o obj-$(CONFIG_ARM_TIMER_SP804) += timer-sp.o obj-$(CONFIG_FIQ_GLUE) += fiq_glue.o fiq_glue_setup.o obj-$(CONFIG_FIQ_DEBUGGER) += fiq_debugger.o -obj-$(CONFIG_BIG_LITTLE) += bL_head.o bL_entry.o +obj-$(CONFIG_BIG_LITTLE) += bL_head.o bL_entry.o vlock.o diff --git a/arch/arm/common/bL_head.S b/arch/arm/common/bL_head.S index f7a64ac127..e70dd432e8 100644 --- a/arch/arm/common/bL_head.S +++ b/arch/arm/common/bL_head.S @@ -16,6 +16,8 @@ #include #include +#include "vlock.h" + .if BL_SYNC_CLUSTER_CPUS .error "cpus must be the first member of struct bL_cluster_sync_struct" .endif @@ -64,10 +66,11 @@ ENTRY(bL_entry_point) * position independent way. */ adr r5, 3f - ldmia r5, {r6, r7, r8} + ldmia r5, {r6, r7, r8, r11} add r6, r5, r6 @ r6 = bL_entry_vectors ldr r7, [r5, r7] @ r7 = bL_power_up_setup_phys add r8, r5, r8 @ r8 = bL_sync + add r11, r5, r11 @ r11 = first_man_locks mov r0, #BL_SYNC_CLUSTER_SIZE mla r8, r0, r10, r8 @ r8 = bL_sync cluster base @@ -83,11 +86,25 @@ ENTRY(bL_entry_point) @ At this point, the cluster cannot unexpectedly enter the GOING_DOWN @ state, because there is at least one active CPU (this CPU). - @ Check if the cluster has been set up yet: + mov r0, #.Lvlock_size + mla r11, r0, r10, r11 @ r11 = cluster first man lock + mov r0, r11 + mov r1, r9 @ cpu + bl vlock_trylock + + cmp r0, #0 @ failed to get the lock? + bne cluster_setup_wait @ wait for cluster setup if so + ldrb r0, [r8, #BL_SYNC_CLUSTER_CLUSTER] - cmp r0, #CLUSTER_UP - beq cluster_already_up + cmp r0, #CLUSTER_UP @ cluster already up? + bne cluster_setup @ if not, set up the cluster + + @ Otherwise, release the first man lock and skip setup: + mov r0, r11 + bl vlock_unlock + b cluster_setup_complete +cluster_setup: @ Signal that the cluster is being brought up: mov r0, #INBOUND_COMING_UP strb r0, [r8, #BL_SYNC_CLUSTER_INBOUND] @@ -102,26 +119,47 @@ ENTRY(bL_entry_point) cluster_teardown_wait: ldrb r0, [r8, #BL_SYNC_CLUSTER_CLUSTER] cmp r0, #CLUSTER_GOING_DOWN - wfeeq - beq cluster_teardown_wait + bne first_man_setup + wfe + b cluster_teardown_wait + +first_man_setup: + @ If the outbound gave up before teardown started, skip cluster setup: - @ power_up_setup is responsible for setting up the cluster: + cmp r0, #CLUSTER_UP + beq cluster_setup_leave + + @ power_up_setup is now responsible for setting up the cluster: cmp r7, #0 mov r0, #1 @ second (cluster) affinity level blxne r7 @ Call power_up_setup if defined + dsb + mov r0, #CLUSTER_UP + strb r0, [r8, #BL_SYNC_CLUSTER_CLUSTER] + +cluster_setup_leave: @ Leave the cluster setup critical section: - dsb mov r0, #INBOUND_NOT_COMING_UP strb r0, [r8, #BL_SYNC_CLUSTER_INBOUND] - mov r0, #CLUSTER_UP - strb r0, [r8, #BL_SYNC_CLUSTER_CLUSTER] dsb sev -cluster_already_up: + mov r0, r11 + bl vlock_unlock + b cluster_setup_complete + + @ In the contended case, non-first men wait here for cluster setup + @ to complete: +cluster_setup_wait: + ldrb r0, [r8, #BL_SYNC_CLUSTER_CLUSTER] + cmp r0, #CLUSTER_UP + wfene + bne cluster_setup_wait + +cluster_setup_complete: @ If a platform-specific CPU setup hook is needed, it is @ called from here. @@ -150,11 +188,40 @@ bL_entry_gated: 3: .word bL_entry_vectors - . .word bL_power_up_setup_phys - 3b .word bL_sync - 3b + .word first_man_locks - 3b ENDPROC(bL_entry_point) .bss - .align 5 + + @ Magic to size and align the first-man vlock structures + @ so that each does not cross a 1KB boundary. + @ We also must ensure that none of these shares a cacheline with + @ any data which might be accessed through the cache. + + .equ .Log2, 0 + .rept 11 + .if (1 << .Log2) < VLOCK_SIZE + .equ .Log2, .Log2 + 1 + .endif + .endr + .if .Log2 > 10 + .error "vlock struct is too large for guaranteed barrierless access ordering" + .endif + .equ .Lvlock_size, 1 << .Log2 + + @ The presence of two .align directives here is deliberate: we must + @ align to whichever of the two boundaries is larger: + .align __CACHE_WRITEBACK_ORDER + .align .Log2 +first_man_locks: + .rept BL_NR_CLUSTERS + .space .Lvlock_size + .endr + .size first_man_locks, . - first_man_locks + .type first_man_locks, #object + + .align __CACHE_WRITEBACK_ORDER .type bL_entry_vectors, #object ENTRY(bL_entry_vectors)