diff mbox

[v5,2/7] scsi: libsas: shut down the PHY if events reached the threshold

Message ID 20171208094210.24887-3-yanaijie@huawei.com (mailing list archive)
State Changes Requested
Headers show

Commit Message

Jason Yan Dec. 8, 2017, 9:42 a.m. UTC
If the PHY burst too many events, we will alloc a lot of events for the
worker. This may leads to memory exhaustion.

Dan Williams suggested to shut down the PHY if the events reached the
threshold, because in this case the PHY may have gone into some
erroneous state. Users can re-enable the PHY by sysfs if they want.

We cannot use the fixed memory pool because if we run out of events, the
shut down event and loss of signal event will lost too. The events still
need to be allocated and processed in this case.

Suggested-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Jason Yan <yanaijie@huawei.com>
CC: John Garry <john.garry@huawei.com>
CC: Johannes Thumshirn <jthumshirn@suse.de>
CC: Ewan Milne <emilne@redhat.com>
CC: Christoph Hellwig <hch@lst.de>
CC: Tomas Henzl <thenzl@redhat.com>
---
 drivers/scsi/libsas/sas_init.c | 33 ++++++++++++++++++++++++++++++++-
 drivers/scsi/libsas/sas_phy.c  | 27 ++++++++++++++++++++++++++-
 include/scsi/libsas.h          |  6 ++++++
 3 files changed, 64 insertions(+), 2 deletions(-)

Comments

Hannes Reinecke Dec. 15, 2017, 12:18 p.m. UTC | #1
On 12/08/2017 10:42 AM, Jason Yan wrote:
> If the PHY burst too many events, we will alloc a lot of events for the
> worker. This may leads to memory exhaustion.
> 
> Dan Williams suggested to shut down the PHY if the events reached the
> threshold, because in this case the PHY may have gone into some
> erroneous state. Users can re-enable the PHY by sysfs if they want.
> 
> We cannot use the fixed memory pool because if we run out of events, the
> shut down event and loss of signal event will lost too. The events still
> need to be allocated and processed in this case.
> 
> Suggested-by: Dan Williams <dan.j.williams@intel.com>
> Signed-off-by: Jason Yan <yanaijie@huawei.com>
> CC: John Garry <john.garry@huawei.com>
> CC: Johannes Thumshirn <jthumshirn@suse.de>
> CC: Ewan Milne <emilne@redhat.com>
> CC: Christoph Hellwig <hch@lst.de>
> CC: Tomas Henzl <thenzl@redhat.com>
> ---
>  drivers/scsi/libsas/sas_init.c | 33 ++++++++++++++++++++++++++++++++-
>  drivers/scsi/libsas/sas_phy.c  | 27 ++++++++++++++++++++++++++-
>  include/scsi/libsas.h          |  6 ++++++
>  3 files changed, 64 insertions(+), 2 deletions(-)
> 
Well, this still looks a bit error prone; what if the system runs out of
memory before the pool is exhausted?
(Also a threshold of 1024 events is a bit arbitrary; one might want to
adjust that).

Couldn't you allocate two static events always (for shutdown and signal
loss), and then use a fixed pool?

Cheers,

Hannes
Hannes Reinecke Jan. 8, 2018, 7:38 a.m. UTC | #2
On 12/15/2017 01:18 PM, Hannes Reinecke wrote:
> On 12/08/2017 10:42 AM, Jason Yan wrote:
>> If the PHY burst too many events, we will alloc a lot of events for the
>> worker. This may leads to memory exhaustion.
>>
>> Dan Williams suggested to shut down the PHY if the events reached the
>> threshold, because in this case the PHY may have gone into some
>> erroneous state. Users can re-enable the PHY by sysfs if they want.
>>
>> We cannot use the fixed memory pool because if we run out of events, the
>> shut down event and loss of signal event will lost too. The events still
>> need to be allocated and processed in this case.
>>
>> Suggested-by: Dan Williams <dan.j.williams@intel.com>
>> Signed-off-by: Jason Yan <yanaijie@huawei.com>
>> CC: John Garry <john.garry@huawei.com>
>> CC: Johannes Thumshirn <jthumshirn@suse.de>
>> CC: Ewan Milne <emilne@redhat.com>
>> CC: Christoph Hellwig <hch@lst.de>
>> CC: Tomas Henzl <thenzl@redhat.com>
>> ---
>>  drivers/scsi/libsas/sas_init.c | 33 ++++++++++++++++++++++++++++++++-
>>  drivers/scsi/libsas/sas_phy.c  | 27 ++++++++++++++++++++++++++-
>>  include/scsi/libsas.h          |  6 ++++++
>>  3 files changed, 64 insertions(+), 2 deletions(-)
>>
> Well, this still looks a bit error prone; what if the system runs out of
> memory before the pool is exhausted?
> (Also a threshold of 1024 events is a bit arbitrary; one might want to
> adjust that).
> 
> Couldn't you allocate two static events always (for shutdown and signal
> loss), and then use a fixed pool?
> 
Has actually been resolved by the next patch.

Reviewed-by: Hannes Reinecke <hare@suse.com>

Cheers,

Hannes
diff mbox

Patch

diff --git a/drivers/scsi/libsas/sas_init.c b/drivers/scsi/libsas/sas_init.c
index e04f6d6..22bfc02 100644
--- a/drivers/scsi/libsas/sas_init.c
+++ b/drivers/scsi/libsas/sas_init.c
@@ -123,6 +123,8 @@  int sas_register_ha(struct sas_ha_struct *sas_ha)
 	INIT_LIST_HEAD(&sas_ha->defer_q);
 	INIT_LIST_HEAD(&sas_ha->eh_dev_q);
 
+	sas_ha->event_thres = SAS_PHY_SHUTDOWN_THRES;
+
 	error = sas_register_phys(sas_ha);
 	if (error) {
 		printk(KERN_NOTICE "couldn't register sas phys:%d\n", error);
@@ -557,14 +559,43 @@  EXPORT_SYMBOL_GPL(sas_domain_attach_transport);
 
 struct asd_sas_event *sas_alloc_event(struct asd_sas_phy *phy)
 {
+	struct asd_sas_event *event;
 	gfp_t flags = in_interrupt() ? GFP_ATOMIC : GFP_KERNEL;
+	struct sas_ha_struct *sas_ha = phy->ha;
+	struct sas_internal *i =
+		to_sas_internal(sas_ha->core.shost->transportt);
+
+	event = kmem_cache_zalloc(sas_event_cache, flags);
+	if (!event)
+		return NULL;
 
-	return kmem_cache_zalloc(sas_event_cache, flags);
+	atomic_inc(&phy->event_nr);
+
+	if (atomic_read(&phy->event_nr) > phy->ha->event_thres) {
+		if (i->dft->lldd_control_phy) {
+			if (cmpxchg(&phy->in_shutdown, 0, 1) == 0) {
+				sas_printk("The phy%02d bursting events, shut it down.\n",
+					phy->id);
+				sas_notify_phy_event(phy, PHYE_SHUTDOWN);
+			}
+		} else {
+			/* Do not support PHY control, stop allocating events */
+			WARN_ONCE(1, "PHY control not supported.\n");
+			kmem_cache_free(sas_event_cache, event);
+			atomic_dec(&phy->event_nr);
+			event = NULL;
+		}
+	}
+
+	return event;
 }
 
 void sas_free_event(struct asd_sas_event *event)
 {
+	struct asd_sas_phy *phy = event->phy;
+
 	kmem_cache_free(sas_event_cache, event);
+	atomic_dec(&phy->event_nr);
 }
 
 /* ---------- SAS Class register/unregister ---------- */
diff --git a/drivers/scsi/libsas/sas_phy.c b/drivers/scsi/libsas/sas_phy.c
index 59f8292..bf3e1b9 100644
--- a/drivers/scsi/libsas/sas_phy.c
+++ b/drivers/scsi/libsas/sas_phy.c
@@ -35,6 +35,7 @@  static void sas_phye_loss_of_signal(struct work_struct *work)
 	struct asd_sas_event *ev = to_asd_sas_event(work);
 	struct asd_sas_phy *phy = ev->phy;
 
+	phy->in_shutdown = 0;
 	phy->error = 0;
 	sas_deform_port(phy, 1);
 }
@@ -44,6 +45,7 @@  static void sas_phye_oob_done(struct work_struct *work)
 	struct asd_sas_event *ev = to_asd_sas_event(work);
 	struct asd_sas_phy *phy = ev->phy;
 
+	phy->in_shutdown = 0;
 	phy->error = 0;
 }
 
@@ -105,6 +107,28 @@  static void sas_phye_resume_timeout(struct work_struct *work)
 }
 
 
+static void sas_phye_shutdown(struct work_struct *work)
+{
+	struct asd_sas_event *ev = to_asd_sas_event(work);
+	struct asd_sas_phy *phy = ev->phy;
+	struct sas_ha_struct *sas_ha = phy->ha;
+	struct sas_internal *i =
+		to_sas_internal(sas_ha->core.shost->transportt);
+
+	if (phy->enabled) {
+		int ret;
+
+		phy->error = 0;
+		phy->enabled = 0;
+		ret = i->dft->lldd_control_phy(phy, PHY_FUNC_DISABLE, NULL);
+		if (ret)
+			sas_printk("lldd disable phy%02d returned %d\n",
+				phy->id, ret);
+	} else
+		sas_printk("phy%02d is not enabled, cannot shutdown\n",
+			phy->id);
+}
+
 /* ---------- Phy class registration ---------- */
 
 int sas_register_phys(struct sas_ha_struct *sas_ha)
@@ -116,6 +140,7 @@  int sas_register_phys(struct sas_ha_struct *sas_ha)
 		struct asd_sas_phy *phy = sas_ha->sas_phy[i];
 
 		phy->error = 0;
+		atomic_set(&phy->event_nr, 0);
 		INIT_LIST_HEAD(&phy->port_phy_el);
 
 		phy->port = NULL;
@@ -151,5 +176,5 @@  const work_func_t sas_phy_event_fns[PHY_NUM_EVENTS] = {
 	[PHYE_OOB_ERROR] = sas_phye_oob_error,
 	[PHYE_SPINUP_HOLD] = sas_phye_spinup_hold,
 	[PHYE_RESUME_TIMEOUT] = sas_phye_resume_timeout,
-
+	[PHYE_SHUTDOWN] = sas_phye_shutdown,
 };
diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h
index 61c84d5..26683e2 100644
--- a/include/scsi/libsas.h
+++ b/include/scsi/libsas.h
@@ -75,6 +75,7 @@  enum phy_event {
 	PHYE_OOB_ERROR,
 	PHYE_SPINUP_HOLD,             /* hot plug SATA, no COMWAKE sent */
 	PHYE_RESUME_TIMEOUT,
+	PHYE_SHUTDOWN,
 	PHY_NUM_EVENTS,
 };
 
@@ -311,12 +312,15 @@  static inline void INIT_SAS_EVENT(struct asd_sas_event *ev,
 	ev->event = event;
 }
 
+#define SAS_PHY_SHUTDOWN_THRES   1024
 
 /* The phy pretty much is controlled by the LLDD.
  * The class only reads those fields.
  */
 struct asd_sas_phy {
 /* private: */
+	atomic_t event_nr;
+	int in_shutdown;
 	int error;
 	int suspended;
 
@@ -404,6 +408,8 @@  struct sas_ha_struct {
 
 	struct list_head eh_done_q;  /* complete via scsi_eh_flush_done_q */
 	struct list_head eh_ata_q; /* scmds to promote from sas to ata eh */
+
+	int event_thres;
 };
 
 #define SHOST_TO_SAS_HA(_shost) (*(struct sas_ha_struct **)(_shost)->hostdata)