diff mbox

[v2,3/3] ath10k: enable pci soc powersaving

Message ID 1431941898-13591-4-git-send-email-michal.kazior@tieto.com (mailing list archive)
State Not Applicable
Delegated to: Kalle Valo
Headers show

Commit Message

Michal Kazior May 18, 2015, 9:38 a.m. UTC
By using SOC_WAKE register it is possible to bring
down power consumption of QCA61X4 from 36mA to
16mA when associated and idle.

Currently the sleep threshold/grace period is at a
very conservative value of 60ms.

Contrary to QCA61X4 the QCA988X firmware doesn't
have Rx/beacon filtering available for client mode
and SWBA events are used for beaconing in AP/IBSS
so the SoC needs to be woken up at least every
~100ms in most cases. This means that QCA988X
is at a disadvantage and the power consumption
won't drop as much as for QCA61X4.

Due to putting irq-safe spinlocks on every MMIO
read/write it is expected this can cause a little
performance regression on some systems. I haven't
done any thorough measurements but some of my
tests don't show any extreme degradation.

The patch removes some explicit pci_wake calls
that were added in 320e14b8db51aa ("ath10k: fix
some pci wake/sleep issues"). This is safe because
all MMIO accesses are now wrapped and the device
is woken up automatically if necessary.

Signed-off-by: Michal Kazior <michal.kazior@tieto.com>
---

Notes:
    v2:
     * remove unnecessary hif.h include [Peter]

 drivers/net/wireless/ath/ath10k/debug.h |   1 +
 drivers/net/wireless/ath/ath10k/pci.c   | 304 +++++++++++++++++++++++---------
 drivers/net/wireless/ath/ath10k/pci.h   |  91 +++++-----
 3 files changed, 261 insertions(+), 135 deletions(-)
diff mbox

Patch

diff --git a/drivers/net/wireless/ath/ath10k/debug.h b/drivers/net/wireless/ath/ath10k/debug.h
index a12b8323f9f1..53bd6a19eab6 100644
--- a/drivers/net/wireless/ath/ath10k/debug.h
+++ b/drivers/net/wireless/ath/ath10k/debug.h
@@ -36,6 +36,7 @@  enum ath10k_debug_mask {
 	ATH10K_DBG_REGULATORY	= 0x00000800,
 	ATH10K_DBG_TESTMODE	= 0x00001000,
 	ATH10K_DBG_WMI_PRINT	= 0x00002000,
+	ATH10K_DBG_PCI_PS	= 0x00004000,
 	ATH10K_DBG_ANY		= 0xffffffff,
 };
 
diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c
index 8be07c653b2d..17a060e8efa2 100644
--- a/drivers/net/wireless/ath/ath10k/pci.c
+++ b/drivers/net/wireless/ath/ath10k/pci.c
@@ -330,6 +330,205 @@  static const struct service_to_pipe target_service_to_ce_map_wlan[] = {
 	},
 };
 
+static bool ath10k_pci_is_awake(struct ath10k *ar)
+{
+	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
+	u32 val = ioread32(ar_pci->mem + PCIE_LOCAL_BASE_ADDRESS +
+			   RTC_STATE_ADDRESS);
+
+	return RTC_STATE_V_GET(val) == RTC_STATE_V_ON;
+}
+
+static void __ath10k_pci_wake(struct ath10k *ar)
+{
+	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
+
+	lockdep_assert_held(&ar_pci->ps_lock);
+
+	ath10k_dbg(ar, ATH10K_DBG_PCI_PS, "pci ps wake reg refcount %lu awake %d\n",
+		   ar_pci->ps_wake_refcount, ar_pci->ps_awake);
+
+	iowrite32(PCIE_SOC_WAKE_V_MASK,
+		  ar_pci->mem + PCIE_LOCAL_BASE_ADDRESS +
+		  PCIE_SOC_WAKE_ADDRESS);
+}
+
+static void __ath10k_pci_sleep(struct ath10k *ar)
+{
+	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
+
+	lockdep_assert_held(&ar_pci->ps_lock);
+
+	ath10k_dbg(ar, ATH10K_DBG_PCI_PS, "pci ps sleep reg refcount %lu awake %d\n",
+		   ar_pci->ps_wake_refcount, ar_pci->ps_awake);
+
+	iowrite32(PCIE_SOC_WAKE_RESET,
+		  ar_pci->mem + PCIE_LOCAL_BASE_ADDRESS +
+		  PCIE_SOC_WAKE_ADDRESS);
+	ar_pci->ps_awake = false;
+}
+
+static int ath10k_pci_wake_wait(struct ath10k *ar)
+{
+	int tot_delay = 0;
+	int curr_delay = 5;
+
+	while (tot_delay < PCIE_WAKE_TIMEOUT) {
+		if (ath10k_pci_is_awake(ar))
+			return 0;
+
+		udelay(curr_delay);
+		tot_delay += curr_delay;
+
+		if (curr_delay < 50)
+			curr_delay += 5;
+	}
+
+	return -ETIMEDOUT;
+}
+
+static int ath10k_pci_wake(struct ath10k *ar)
+{
+	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&ar_pci->ps_lock, flags);
+
+	ath10k_dbg(ar, ATH10K_DBG_PCI_PS, "pci ps wake refcount %lu awake %d\n",
+		   ar_pci->ps_wake_refcount, ar_pci->ps_awake);
+
+	/* This function can be called very frequently. To avoid excessive
+	 * CPU stalls for MMIO reads use a cache var to hold the device state.
+	 */
+	if (!ar_pci->ps_awake) {
+		__ath10k_pci_wake(ar);
+
+		ret = ath10k_pci_wake_wait(ar);
+		if (ret == 0)
+			ar_pci->ps_awake = true;
+	}
+
+	if (ret == 0) {
+		ar_pci->ps_wake_refcount++;
+		WARN_ON(ar_pci->ps_wake_refcount == 0);
+	}
+
+	spin_unlock_irqrestore(&ar_pci->ps_lock, flags);
+
+	return ret;
+}
+
+static void ath10k_pci_sleep(struct ath10k *ar)
+{
+	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
+	unsigned long flags;
+
+	spin_lock_irqsave(&ar_pci->ps_lock, flags);
+
+	ath10k_dbg(ar, ATH10K_DBG_PCI_PS, "pci ps sleep refcount %lu awake %d\n",
+		   ar_pci->ps_wake_refcount, ar_pci->ps_awake);
+
+	if (WARN_ON(ar_pci->ps_wake_refcount == 0))
+		goto skip;
+
+	ar_pci->ps_wake_refcount--;
+
+	mod_timer(&ar_pci->ps_timer, jiffies +
+		  msecs_to_jiffies(ATH10K_PCI_SLEEP_GRACE_PERIOD_MSEC));
+
+skip:
+	spin_unlock_irqrestore(&ar_pci->ps_lock, flags);
+}
+
+static void ath10k_pci_ps_timer(unsigned long ptr)
+{
+	struct ath10k *ar = (void *)ptr;
+	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
+	unsigned long flags;
+
+	spin_lock_irqsave(&ar_pci->ps_lock, flags);
+
+	ath10k_dbg(ar, ATH10K_DBG_PCI_PS, "pci ps timer refcount %lu awake %d\n",
+		   ar_pci->ps_wake_refcount, ar_pci->ps_awake);
+
+	if (ar_pci->ps_wake_refcount > 0)
+		goto skip;
+
+	__ath10k_pci_sleep(ar);
+
+skip:
+	spin_unlock_irqrestore(&ar_pci->ps_lock, flags);
+}
+
+static void ath10k_pci_sleep_sync(struct ath10k *ar)
+{
+	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
+	unsigned long flags;
+
+	del_timer_sync(&ar_pci->ps_timer);
+
+	spin_lock_irqsave(&ar_pci->ps_lock, flags);
+	WARN_ON(ar_pci->ps_wake_refcount > 0);
+	__ath10k_pci_sleep(ar);
+	spin_unlock_irqrestore(&ar_pci->ps_lock, flags);
+}
+
+void ath10k_pci_write32(struct ath10k *ar, u32 offset, u32 value)
+{
+	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
+	int ret;
+
+	ret = ath10k_pci_wake(ar);
+	if (ret) {
+		ath10k_warn(ar, "failed to wake target for write32 of 0x%08x at 0x%08x: %d\n",
+			    value, offset, ret);
+		return;
+	}
+
+	iowrite32(value, ar_pci->mem + offset);
+	ath10k_pci_sleep(ar);
+}
+
+u32 ath10k_pci_read32(struct ath10k *ar, u32 offset)
+{
+	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
+	u32 val;
+	int ret;
+
+	ret = ath10k_pci_wake(ar);
+	if (ret) {
+		ath10k_warn(ar, "failed to wake target for read32 at 0x%08x: %d\n",
+			    offset, ret);
+		return 0xffffffff;
+	}
+
+	val = ioread32(ar_pci->mem + offset);
+	ath10k_pci_sleep(ar);
+
+	return val;
+}
+
+u32 ath10k_pci_soc_read32(struct ath10k *ar, u32 addr)
+{
+	return ath10k_pci_read32(ar, RTC_SOC_BASE_ADDRESS + addr);
+}
+
+void ath10k_pci_soc_write32(struct ath10k *ar, u32 addr, u32 val)
+{
+	ath10k_pci_write32(ar, RTC_SOC_BASE_ADDRESS + addr, val);
+}
+
+u32 ath10k_pci_reg_read32(struct ath10k *ar, u32 addr)
+{
+	return ath10k_pci_read32(ar, PCIE_LOCAL_BASE_ADDRESS + addr);
+}
+
+void ath10k_pci_reg_write32(struct ath10k *ar, u32 addr, u32 val)
+{
+	ath10k_pci_write32(ar, PCIE_LOCAL_BASE_ADDRESS + addr, val);
+}
+
 static bool ath10k_pci_irq_pending(struct ath10k *ar)
 {
 	u32 cause;
@@ -793,60 +992,6 @@  static int ath10k_pci_diag_write32(struct ath10k *ar, u32 address, u32 value)
 	return ath10k_pci_diag_write_mem(ar, address, &val, sizeof(val));
 }
 
-static bool ath10k_pci_is_awake(struct ath10k *ar)
-{
-	u32 val = ath10k_pci_reg_read32(ar, RTC_STATE_ADDRESS);
-
-	return RTC_STATE_V_GET(val) == RTC_STATE_V_ON;
-}
-
-static int ath10k_pci_wake_wait(struct ath10k *ar)
-{
-	int tot_delay = 0;
-	int curr_delay = 5;
-
-	while (tot_delay < PCIE_WAKE_TIMEOUT) {
-		if (ath10k_pci_is_awake(ar))
-			return 0;
-
-		udelay(curr_delay);
-		tot_delay += curr_delay;
-
-		if (curr_delay < 50)
-			curr_delay += 5;
-	}
-
-	return -ETIMEDOUT;
-}
-
-/* The rule is host is forbidden from accessing device registers while it's
- * asleep. Currently ath10k_pci_wake() and ath10k_pci_sleep() calls aren't
- * balanced and the device is kept awake all the time. This is intended for a
- * simpler solution for the following problems:
- *
- *   * device can enter sleep during s2ram without the host knowing,
- *
- *   * irq handlers access registers which is a problem if other device asserts
- *     a shared irq line when ath10k is between hif_power_down() and
- *     hif_power_up().
- *
- * FIXME: If power consumption is a concern (and there are *real* gains) then a
- * refcounted wake/sleep needs to be implemented.
- */
-
-static int ath10k_pci_wake(struct ath10k *ar)
-{
-	ath10k_pci_reg_write32(ar, PCIE_SOC_WAKE_ADDRESS,
-			       PCIE_SOC_WAKE_V_MASK);
-	return ath10k_pci_wake_wait(ar);
-}
-
-static void ath10k_pci_sleep(struct ath10k *ar)
-{
-	ath10k_pci_reg_write32(ar, PCIE_SOC_WAKE_ADDRESS,
-			       PCIE_SOC_WAKE_RESET);
-}
-
 /* Called by lower (CE) layer when a send to Target completes. */
 static void ath10k_pci_ce_send_done(struct ath10k_ce_pipe *ce_state)
 {
@@ -1348,6 +1493,9 @@  static void ath10k_pci_flush(struct ath10k *ar)
 
 static void ath10k_pci_hif_stop(struct ath10k *ar)
 {
+	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
+	unsigned long flags;
+
 	ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot hif stop\n");
 
 	/* Most likely the device has HTT Rx ring configured. The only way to
@@ -1366,6 +1514,10 @@  static void ath10k_pci_hif_stop(struct ath10k *ar)
 	ath10k_pci_irq_disable(ar);
 	ath10k_pci_irq_sync(ar);
 	ath10k_pci_flush(ar);
+
+	spin_lock_irqsave(&ar_pci->ps_lock, flags);
+	WARN_ON(ar_pci->ps_wake_refcount > 0);
+	spin_unlock_irqrestore(&ar_pci->ps_lock, flags);
 }
 
 static int ath10k_pci_hif_exchange_bmi_msg(struct ath10k *ar,
@@ -1990,12 +2142,6 @@  static int ath10k_pci_hif_power_up(struct ath10k *ar)
 
 	ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot hif power up\n");
 
-	ret = ath10k_pci_wake(ar);
-	if (ret) {
-		ath10k_err(ar, "failed to wake up target: %d\n", ret);
-		return ret;
-	}
-
 	pcie_capability_read_word(ar_pci->pdev, PCI_EXP_LNKCTL,
 				  &ar_pci->link_ctl);
 	pcie_capability_write_word(ar_pci->pdev, PCI_EXP_LNKCTL,
@@ -2047,7 +2193,6 @@  err_ce:
 	ath10k_pci_ce_deinit(ar);
 
 err_sleep:
-	ath10k_pci_sleep(ar);
 	return ret;
 }
 
@@ -2064,7 +2209,12 @@  static void ath10k_pci_hif_power_down(struct ath10k *ar)
 
 static int ath10k_pci_hif_suspend(struct ath10k *ar)
 {
-	ath10k_pci_sleep(ar);
+	/* The grace timer can still be counting down and ar->ps_awake be true.
+	 * It is known that the device may be asleep after resuming regardless
+	 * of the SoC powersave state before suspending. Hence make sure the
+	 * device is asleep before proceeding.
+	 */
+	ath10k_pci_sleep_sync(ar);
 
 	return 0;
 }
@@ -2074,13 +2224,6 @@  static int ath10k_pci_hif_resume(struct ath10k *ar)
 	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
 	struct pci_dev *pdev = ar_pci->pdev;
 	u32 val;
-	int ret;
-
-	ret = ath10k_pci_wake(ar);
-	if (ret) {
-		ath10k_err(ar, "failed to wake device up on resume: %d\n", ret);
-		return ret;
-	}
 
 	/* Suspend/Resume resets the PCI configuration space, so we have to
 	 * re-disable the RETRY_TIMEOUT register (0x41) to keep PCI Tx retries
@@ -2091,7 +2234,7 @@  static int ath10k_pci_hif_resume(struct ath10k *ar)
 	if ((val & 0x0000ff00) != 0)
 		pci_write_config_dword(pdev, 0x40, val & 0xffff00ff);
 
-	return ret;
+	return 0;
 }
 #endif
 
@@ -2185,13 +2328,6 @@  static irqreturn_t ath10k_pci_interrupt_handler(int irq, void *arg)
 {
 	struct ath10k *ar = arg;
 	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
-	int ret;
-
-	ret = ath10k_pci_wake(ar);
-	if (ret) {
-		ath10k_warn(ar, "failed to wake device up on irq: %d\n", ret);
-		return IRQ_NONE;
-	}
 
 	if (ar_pci->num_msi_intrs == 0) {
 		if (!ath10k_pci_irq_pending(ar))
@@ -2638,8 +2774,12 @@  static int ath10k_pci_probe(struct pci_dev *pdev,
 			  pdev->subsystem_vendor, pdev->subsystem_device);
 
 	spin_lock_init(&ar_pci->ce_lock);
+	spin_lock_init(&ar_pci->ps_lock);
+
 	setup_timer(&ar_pci->rx_post_retry, ath10k_pci_rx_replenish_retry,
 		    (unsigned long)ar);
+	setup_timer(&ar_pci->ps_timer, ath10k_pci_ps_timer,
+		    (unsigned long)ar);
 
 	ret = ath10k_pci_claim(ar);
 	if (ret) {
@@ -2647,12 +2787,6 @@  static int ath10k_pci_probe(struct pci_dev *pdev,
 		goto err_core_destroy;
 	}
 
-	ret = ath10k_pci_wake(ar);
-	if (ret) {
-		ath10k_err(ar, "failed to wake up: %d\n", ret);
-		goto err_release;
-	}
-
 	ret = ath10k_pci_alloc_pipes(ar);
 	if (ret) {
 		ath10k_err(ar, "failed to allocate copy engine pipes: %d\n",
@@ -2716,9 +2850,6 @@  err_free_pipes:
 	ath10k_pci_free_pipes(ar);
 
 err_sleep:
-	ath10k_pci_sleep(ar);
-
-err_release:
 	ath10k_pci_release(ar);
 
 err_core_destroy:
@@ -2748,6 +2879,7 @@  static void ath10k_pci_remove(struct pci_dev *pdev)
 	ath10k_pci_deinit_irq(ar);
 	ath10k_pci_ce_deinit(ar);
 	ath10k_pci_free_pipes(ar);
+	ath10k_pci_sleep_sync(ar);
 	ath10k_pci_release(ar);
 	ath10k_core_destroy(ar);
 }
diff --git a/drivers/net/wireless/ath/ath10k/pci.h b/drivers/net/wireless/ath/ath10k/pci.h
index ee2173d61257..d7696ddc03c4 100644
--- a/drivers/net/wireless/ath/ath10k/pci.h
+++ b/drivers/net/wireless/ath/ath10k/pci.h
@@ -191,6 +191,35 @@  struct ath10k_pci {
 	 * device bootup is executed and re-programmed later.
 	 */
 	u16 link_ctl;
+
+	/* Protects ps_awake and ps_wake_refcount */
+	spinlock_t ps_lock;
+
+	/* The device has a special powersave-oriented register. When device is
+	 * considered asleep it drains less power and driver is forbidden from
+	 * accessing most MMIO registers. If host were to access them without
+	 * waking up the device might scribble over host memory or return
+	 * 0xdeadbeef readouts.
+	 */
+	unsigned long ps_wake_refcount;
+
+	/* Waking up takes some time (up to 2ms in some cases) so it can be bad
+	 * for latency. To mitigate this the device isn't immediately allowed
+	 * to sleep after all references are undone - instead there's a grace
+	 * period after which the powersave register is updated unless some
+	 * activity to/from device happened in the meantime.
+	 *
+	 * Also see comments on ATH10K_PCI_SLEEP_GRACE_PERIOD_MSEC.
+	 */
+	struct timer_list ps_timer;
+
+	/* MMIO registers are used to communicate with the device. With
+	 * intensive traffic accessing powersave register would be a bit
+	 * wasteful overhead and would needlessly stall CPU. It is far more
+	 * efficient to rely on a variable in RAM and update it only upon
+	 * powersave register state changes.
+	 */
+	bool ps_awake;
 };
 
 static inline struct ath10k_pci *ath10k_pci_priv(struct ath10k *ar)
@@ -215,61 +244,25 @@  static inline struct ath10k_pci *ath10k_pci_priv(struct ath10k *ar)
  * for this device; but that's not guaranteed.
  */
 #define TARG_CPU_SPACE_TO_CE_SPACE(ar, pci_addr, addr)			\
-	(((ioread32((pci_addr)+(SOC_CORE_BASE_ADDRESS|			\
+	(((ath10k_pci_read32(ar, (SOC_CORE_BASE_ADDRESS |		\
 	  CORE_CTRL_ADDRESS)) & 0x7ff) << 21) |				\
 	 0x100000 | ((addr) & 0xfffff))
 
 /* Wait up to this many Ms for a Diagnostic Access CE operation to complete */
 #define DIAG_ACCESS_CE_TIMEOUT_MS 10
 
-/* Target exposes its registers for direct access. However before host can
- * access them it needs to make sure the target is awake (ath10k_pci_wake,
- * ath10k_pci_wake_wait, ath10k_pci_is_awake). Once target is awake it won't go
- * to sleep unless host tells it to (ath10k_pci_sleep).
- *
- * If host tries to access target registers without waking it up it can
- * scribble over host memory.
- *
- * If target is asleep waking it up may take up to even 2ms.
+void ath10k_pci_write32(struct ath10k *ar, u32 offset, u32 value);
+void ath10k_pci_soc_write32(struct ath10k *ar, u32 addr, u32 val);
+void ath10k_pci_reg_write32(struct ath10k *ar, u32 addr, u32 val);
+
+u32 ath10k_pci_read32(struct ath10k *ar, u32 offset);
+u32 ath10k_pci_soc_read32(struct ath10k *ar, u32 addr);
+u32 ath10k_pci_reg_read32(struct ath10k *ar, u32 addr);
+
+/* QCA6174 is known to have Tx/Rx issues when SOC_WAKE register is poked too
+ * frequently. To avoid this put SoC to sleep after a very conservative grace
+ * period. Adjust with great care.
  */
-
-static inline void ath10k_pci_write32(struct ath10k *ar, u32 offset,
-				      u32 value)
-{
-	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
-
-	iowrite32(value, ar_pci->mem + offset);
-}
-
-static inline u32 ath10k_pci_read32(struct ath10k *ar, u32 offset)
-{
-	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
-
-	return ioread32(ar_pci->mem + offset);
-}
-
-static inline u32 ath10k_pci_soc_read32(struct ath10k *ar, u32 addr)
-{
-	return ath10k_pci_read32(ar, RTC_SOC_BASE_ADDRESS + addr);
-}
-
-static inline void ath10k_pci_soc_write32(struct ath10k *ar, u32 addr, u32 val)
-{
-	ath10k_pci_write32(ar, RTC_SOC_BASE_ADDRESS + addr, val);
-}
-
-static inline u32 ath10k_pci_reg_read32(struct ath10k *ar, u32 addr)
-{
-	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
-
-	return ioread32(ar_pci->mem + PCIE_LOCAL_BASE_ADDRESS + addr);
-}
-
-static inline void ath10k_pci_reg_write32(struct ath10k *ar, u32 addr, u32 val)
-{
-	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
-
-	iowrite32(val, ar_pci->mem + PCIE_LOCAL_BASE_ADDRESS + addr);
-}
+#define ATH10K_PCI_SLEEP_GRACE_PERIOD_MSEC 60
 
 #endif /* _PCI_H_ */