diff mbox

acpi, nfit: skip ARS on machine-check-recovery capable platforms

Message ID 148651103020.2605.2544850985469410207.stgit@dwillia2-desk3.amr.corp.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Dan Williams Feb. 7, 2017, 11:43 p.m. UTC
If the platform supports machine-check-recovery then there is little
reason to kick off opportunistic scrubs to collect a media error list.
That initial scrub is only useful when it might prevent a kernel panic
from consuming poison (a media error from memory).

Cc: Vishal Verma <vishal.l.verma@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/acpi/nfit/core.c |    6 ++++--
 drivers/acpi/nfit/mce.c  |    7 +++++++
 drivers/acpi/nfit/nfit.h |    5 +++++
 3 files changed, 16 insertions(+), 2 deletions(-)

Comments

kernel test robot Feb. 8, 2017, 12:11 p.m. UTC | #1
Hi Dan,

[auto build test ERROR on pm/linux-next]
[also build test ERROR on v4.10-rc7 next-20170207]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Dan-Williams/acpi-nfit-skip-ARS-on-machine-check-recovery-capable-platforms/20170208-081649
base:   https://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git linux-next
config: i386-randconfig-x0-02081903 (attached as .config)
compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
reproduce:
        # save the attached .config to linux build tree
        make ARCH=i386 

All error/warnings (new ones prefixed by >>):

   In file included from arch/x86/include/asm/current.h:4:0,
                    from include/linux/mutex.h:13,
                    from include/linux/notifier.h:13,
                    from drivers/acpi/nfit/mce.c:15:
   drivers/acpi/nfit/mce.c: In function 'is_ars_required':
>> drivers/acpi/nfit/mce.c:97:37: error: 'mcsafe_key' undeclared (first use in this function)
            if (static_branch_unlikely(&mcsafe_key))
                                        ^
   include/linux/compiler.h:168:42: note: in definition of macro 'unlikely'
    # define unlikely(x) __builtin_expect(!!(x), 0)
                                             ^
   include/linux/jump_label.h:387:44: note: in expansion of macro 'static_key_enabled'
    #define static_branch_unlikely(x) unlikely(static_key_enabled(&(x)->key))
                                               ^~~~~~~~~~~~~~~~~~
>> drivers/acpi/nfit/mce.c:97:13: note: in expansion of macro 'static_branch_unlikely'
            if (static_branch_unlikely(&mcsafe_key))
                ^~~~~~~~~~~~~~~~~~~~~~
   drivers/acpi/nfit/mce.c:97:37: note: each undeclared identifier is reported only once for each function it appears in
            if (static_branch_unlikely(&mcsafe_key))
                                        ^
   include/linux/compiler.h:168:42: note: in definition of macro 'unlikely'
    # define unlikely(x) __builtin_expect(!!(x), 0)
                                             ^
   include/linux/jump_label.h:387:44: note: in expansion of macro 'static_key_enabled'
    #define static_branch_unlikely(x) unlikely(static_key_enabled(&(x)->key))
                                               ^~~~~~~~~~~~~~~~~~
>> drivers/acpi/nfit/mce.c:97:13: note: in expansion of macro 'static_branch_unlikely'
            if (static_branch_unlikely(&mcsafe_key))
                ^~~~~~~~~~~~~~~~~~~~~~

vim +/mcsafe_key +97 drivers/acpi/nfit/mce.c

     9	 *
    10	 * This program is distributed in the hope that it will be useful, but
    11	 * WITHOUT ANY WARRANTY; without even the implied warranty of
    12	 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    13	 * General Public License for more details.
    14	 */
  > 15	#include <linux/notifier.h>
    16	#include <linux/acpi.h>
    17	#include <linux/nd.h>
    18	#include <asm/mce.h>
    19	#include "nfit.h"
    20	
    21	static int nfit_handle_mce(struct notifier_block *nb, unsigned long val,
    22				void *data)
    23	{
    24		struct mce *mce = (struct mce *)data;
    25		struct acpi_nfit_desc *acpi_desc;
    26		struct nfit_spa *nfit_spa;
    27	
    28		/* We only care about memory errors */
    29		if (!(mce->status & MCACOD))
    30			return NOTIFY_DONE;
    31	
    32		/*
    33		 * mce->addr contains the physical addr accessed that caused the
    34		 * machine check. We need to walk through the list of NFITs, and see
    35		 * if any of them matches that address, and only then start a scrub.
    36		 */
    37		mutex_lock(&acpi_desc_lock);
    38		list_for_each_entry(acpi_desc, &acpi_descs, list) {
    39			struct device *dev = acpi_desc->dev;
    40			int found_match = 0;
    41	
    42			mutex_lock(&acpi_desc->init_mutex);
    43			list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
    44				struct acpi_nfit_system_address *spa = nfit_spa->spa;
    45	
    46				if (nfit_spa_type(spa) != NFIT_SPA_PM)
    47					continue;
    48				/* find the spa that covers the mce addr */
    49				if (spa->address > mce->addr)
    50					continue;
    51				if ((spa->address + spa->length - 1) < mce->addr)
    52					continue;
    53				found_match = 1;
    54				dev_dbg(dev, "%s: addr in SPA %d (0x%llx, 0x%llx)\n",
    55					__func__, spa->range_index, spa->address,
    56					spa->length);
    57				/*
    58				 * We can break at the first match because we're going
    59				 * to rescan all the SPA ranges. There shouldn't be any
    60				 * aliasing anyway.
    61				 */
    62				break;
    63			}
    64			mutex_unlock(&acpi_desc->init_mutex);
    65	
    66			if (!found_match)
    67				continue;
    68	
    69			/* If this fails due to an -ENOMEM, there is little we can do */
    70			nvdimm_bus_add_poison(acpi_desc->nvdimm_bus,
    71					ALIGN(mce->addr, L1_CACHE_BYTES),
    72					L1_CACHE_BYTES);
    73			nvdimm_region_notify(nfit_spa->nd_region,
    74					NVDIMM_REVALIDATE_POISON);
    75	
    76			if (acpi_desc->scrub_mode == HW_ERROR_SCRUB_ON) {
    77				/*
    78				 * We can ignore an -EBUSY here because if an ARS is
    79				 * already in progress, just let that be the last
    80				 * authoritative one
    81				 */
    82				acpi_nfit_ars_rescan(acpi_desc);
    83			}
    84			break;
    85		}
    86	
    87		mutex_unlock(&acpi_desc_lock);
    88		return NOTIFY_DONE;
    89	}
    90	
    91	static struct notifier_block nfit_mce_dec = {
    92		.notifier_call	= nfit_handle_mce,
    93	};
    94	
    95	bool is_ars_required(void)
    96	{
  > 97	        if (static_branch_unlikely(&mcsafe_key))
    98	                return false;
    99		return true;
   100	}

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
Jeff Moyer Feb. 8, 2017, 3:10 p.m. UTC | #2
Dan Williams <dan.j.williams@intel.com> writes:

> If the platform supports machine-check-recovery then there is little
> reason to kick off opportunistic scrubs to collect a media error list.
> That initial scrub is only useful when it might prevent a kernel panic
> from consuming poison (a media error from memory).

How expensive is the scrub?  Even on platforms that support recoverable
machine checks, it's possible that you get one that is not recoverable.
You haven't sold me on this change.  ;-)

Cheers,
Jeff


> Cc: Vishal Verma <vishal.l.verma@intel.com>
> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
> ---
>  drivers/acpi/nfit/core.c |    6 ++++--
>  drivers/acpi/nfit/mce.c  |    7 +++++++
>  drivers/acpi/nfit/nfit.h |    5 +++++
>  3 files changed, 16 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
> index 7361d00818e2..bbefd9516939 100644
> --- a/drivers/acpi/nfit/core.c
> +++ b/drivers/acpi/nfit/core.c
> @@ -2500,10 +2500,12 @@ static void acpi_nfit_scrub(struct work_struct *work)
>  	list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
>  		/*
>  		 * Flag all the ranges that still need scrubbing, but
> -		 * register them now to make data available.
> +		 * register them now to make data available. If the
> +		 * platform supports machine-check recovery then we skip
> +		 * these opportunistic scans.
>  		 */
>  		if (!nfit_spa->nd_region) {
> -			nfit_spa->ars_required = 1;
> +			nfit_spa->ars_required = is_ars_required();
>  			acpi_nfit_register_region(acpi_desc, nfit_spa);
>  		}
>  	}
> diff --git a/drivers/acpi/nfit/mce.c b/drivers/acpi/nfit/mce.c
> index e5ce81c38eed..1e6f1e7100f9 100644
> --- a/drivers/acpi/nfit/mce.c
> +++ b/drivers/acpi/nfit/mce.c
> @@ -92,6 +92,13 @@ static struct notifier_block nfit_mce_dec = {
>  	.notifier_call	= nfit_handle_mce,
>  };
>  
> +bool is_ars_required(void)
> +{
> +        if (static_branch_unlikely(&mcsafe_key))
> +                return false;
> +	return true;
> +}
> +
>  void nfit_mce_register(void)
>  {
>  	mce_register_decode_chain(&nfit_mce_dec);
> diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h
> index fc29c2e9832e..925f2a3d896e 100644
> --- a/drivers/acpi/nfit/nfit.h
> +++ b/drivers/acpi/nfit/nfit.h
> @@ -211,6 +211,7 @@ int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc);
>  #ifdef CONFIG_X86_MCE
>  void nfit_mce_register(void);
>  void nfit_mce_unregister(void);
> +bool is_ars_required(void);
>  #else
>  static inline void nfit_mce_register(void)
>  {
> @@ -218,6 +219,10 @@ static inline void nfit_mce_register(void)
>  static inline void nfit_mce_unregister(void)
>  {
>  }
> +static inline bool is_ars_required(void)
> +{
> +	return true;
> +}
>  #endif
>  
>  int nfit_spa_type(struct acpi_nfit_system_address *spa);
>
> _______________________________________________
> Linux-nvdimm mailing list
> Linux-nvdimm@lists.01.org
> https://lists.01.org/mailman/listinfo/linux-nvdimm
Dan Williams Feb. 8, 2017, 5:42 p.m. UTC | #3
On Wed, Feb 8, 2017 at 7:10 AM, Jeff Moyer <jmoyer@redhat.com> wrote:
> Dan Williams <dan.j.williams@intel.com> writes:
>
>> If the platform supports machine-check-recovery then there is little
>> reason to kick off opportunistic scrubs to collect a media error list.
>> That initial scrub is only useful when it might prevent a kernel panic
>> from consuming poison (a media error from memory).
>
> How expensive is the scrub?

The ACPI spec is not clear, but it could range from benign to
expensive and degrading system performance for 10's of minutes after
boot

> Even on platforms that support recoverable
> machine checks, it's possible that you get one that is not recoverable.
> You haven't sold me on this change.  ;-)
>

Adding Tony so he can either confirm, or point and laugh at my
assumptions. In general you're right that there are machine check
events that are not recoverable, but I'm thinking of problems like bus
lockups and other disasters out of the direct cpu-to-memory data path.
The question is whether should we avoid the cpu consuming media errors
at all costs regardless of machine-check recovery. Tony might there be
system-fatal gaps in memcpy_mcsafe() or userspace poison consumption
handling that you would recommend aggressively trying to avoid media
errors?

> Cheers,
> Jeff
>
>
>> Cc: Vishal Verma <vishal.l.verma@intel.com>
>> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
>> ---
>>  drivers/acpi/nfit/core.c |    6 ++++--
>>  drivers/acpi/nfit/mce.c  |    7 +++++++
>>  drivers/acpi/nfit/nfit.h |    5 +++++
>>  3 files changed, 16 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
>> index 7361d00818e2..bbefd9516939 100644
>> --- a/drivers/acpi/nfit/core.c
>> +++ b/drivers/acpi/nfit/core.c
>> @@ -2500,10 +2500,12 @@ static void acpi_nfit_scrub(struct work_struct *work)
>>       list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
>>               /*
>>                * Flag all the ranges that still need scrubbing, but
>> -              * register them now to make data available.
>> +              * register them now to make data available. If the
>> +              * platform supports machine-check recovery then we skip
>> +              * these opportunistic scans.
>>                */
>>               if (!nfit_spa->nd_region) {
>> -                     nfit_spa->ars_required = 1;
>> +                     nfit_spa->ars_required = is_ars_required();
>>                       acpi_nfit_register_region(acpi_desc, nfit_spa);
>>               }
>>       }
>> diff --git a/drivers/acpi/nfit/mce.c b/drivers/acpi/nfit/mce.c
>> index e5ce81c38eed..1e6f1e7100f9 100644
>> --- a/drivers/acpi/nfit/mce.c
>> +++ b/drivers/acpi/nfit/mce.c
>> @@ -92,6 +92,13 @@ static struct notifier_block nfit_mce_dec = {
>>       .notifier_call  = nfit_handle_mce,
>>  };
>>
>> +bool is_ars_required(void)
>> +{
>> +        if (static_branch_unlikely(&mcsafe_key))
>> +                return false;
>> +     return true;
>> +}
>> +
>>  void nfit_mce_register(void)
>>  {
>>       mce_register_decode_chain(&nfit_mce_dec);
>> diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h
>> index fc29c2e9832e..925f2a3d896e 100644
>> --- a/drivers/acpi/nfit/nfit.h
>> +++ b/drivers/acpi/nfit/nfit.h
>> @@ -211,6 +211,7 @@ int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc);
>>  #ifdef CONFIG_X86_MCE
>>  void nfit_mce_register(void);
>>  void nfit_mce_unregister(void);
>> +bool is_ars_required(void);
>>  #else
>>  static inline void nfit_mce_register(void)
>>  {
>> @@ -218,6 +219,10 @@ static inline void nfit_mce_register(void)
>>  static inline void nfit_mce_unregister(void)
>>  {
>>  }
>> +static inline bool is_ars_required(void)
>> +{
>> +     return true;
>> +}
>>  #endif
>>
>>  int nfit_spa_type(struct acpi_nfit_system_address *spa);
>>
>> _______________________________________________
>> Linux-nvdimm mailing list
>> Linux-nvdimm@lists.01.org
>> https://lists.01.org/mailman/listinfo/linux-nvdimm
Dan Williams Feb. 8, 2017, 11:01 p.m. UTC | #4
On Wed, Feb 8, 2017 at 9:42 AM, Dan Williams <dan.j.williams@intel.com> wrote:
> On Wed, Feb 8, 2017 at 7:10 AM, Jeff Moyer <jmoyer@redhat.com> wrote:
>> Dan Williams <dan.j.williams@intel.com> writes:
>>
>>> If the platform supports machine-check-recovery then there is little
>>> reason to kick off opportunistic scrubs to collect a media error list.
>>> That initial scrub is only useful when it might prevent a kernel panic
>>> from consuming poison (a media error from memory).
>>
>> How expensive is the scrub?
>
> The ACPI spec is not clear, but it could range from benign to
> expensive and degrading system performance for 10's of minutes after
> boot
>
>> Even on platforms that support recoverable
>> machine checks, it's possible that you get one that is not recoverable.
>> You haven't sold me on this change.  ;-)
>>
>
> Adding Tony so he can either confirm, or point and laugh at my
> assumptions. In general you're right that there are machine check
> events that are not recoverable, but I'm thinking of problems like bus
> lockups and other disasters out of the direct cpu-to-memory data path.
> The question is whether should we avoid the cpu consuming media errors
> at all costs regardless of machine-check recovery. Tony might there be
> system-fatal gaps in memcpy_mcsafe() or userspace poison consumption
> handling that you would recommend aggressively trying to avoid media
> errors?
>

I was able to chat with Ashok and he warned that not all instructions
that consume poison can generate a recovery point. So, thanks for
prompting the double-check, we should definitely try to collect the
badblocks list regardless of the machine check recovery capability of
the system.
Jeff Moyer Feb. 8, 2017, 11:08 p.m. UTC | #5
Dan Williams <dan.j.williams@intel.com> writes:

> I was able to chat with Ashok and he warned that not all instructions
> that consume poison can generate a recovery point. So, thanks for
> prompting the double-check, we should definitely try to collect the
> badblocks list regardless of the machine check recovery capability of
> the system.

OK.  Thanks for looking into it!

-Jeff
Luck, Tony Feb. 9, 2017, 5:20 p.m. UTC | #6
> Adding Tony so he can either confirm, or point and laugh at my
> assumptions. In general you're right that there are machine check
> events that are not recoverable, but I'm thinking of problems like bus
> lockups and other disasters out of the direct cpu-to-memory data path.
> The question is whether should we avoid the cpu consuming media errors
> at all costs regardless of machine-check recovery. Tony might there be
> system-fatal gaps in memcpy_mcsafe() or userspace poison consumption
> handling that you would recommend aggressively trying to avoid media
> errors?

TL;DR - I think it is worth it ... but I worry more about errors than most
people.

In current generation systems the two most common sources of machine
checks are memory, and I/O.  They dwarf all the others like cache and
bus lockups.  So it is worth trying to avoid memory issues.

Whether you can recover from a machine check triggered from a CPU
read of memory depends on which instructions you use, and the alignment
of the access. That's why memcpy_mcsafe() will start with a few byte reads
if needed to align the source address while other copy routines prefer to
align the destination ... memory writes that straddle cache lines are more
expensive than reads that do that ... but the point of the routine is to be
safe, so we drop a tiny amount of performance in the unaligned case to
make sure we will be able to recover.

We can't control how userspace will access memory ... so if we can find
the errors before they stumble into them it is a win.

-Tony
diff mbox

Patch

diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index 7361d00818e2..bbefd9516939 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -2500,10 +2500,12 @@  static void acpi_nfit_scrub(struct work_struct *work)
 	list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
 		/*
 		 * Flag all the ranges that still need scrubbing, but
-		 * register them now to make data available.
+		 * register them now to make data available. If the
+		 * platform supports machine-check recovery then we skip
+		 * these opportunistic scans.
 		 */
 		if (!nfit_spa->nd_region) {
-			nfit_spa->ars_required = 1;
+			nfit_spa->ars_required = is_ars_required();
 			acpi_nfit_register_region(acpi_desc, nfit_spa);
 		}
 	}
diff --git a/drivers/acpi/nfit/mce.c b/drivers/acpi/nfit/mce.c
index e5ce81c38eed..1e6f1e7100f9 100644
--- a/drivers/acpi/nfit/mce.c
+++ b/drivers/acpi/nfit/mce.c
@@ -92,6 +92,13 @@  static struct notifier_block nfit_mce_dec = {
 	.notifier_call	= nfit_handle_mce,
 };
 
+bool is_ars_required(void)
+{
+        if (static_branch_unlikely(&mcsafe_key))
+                return false;
+	return true;
+}
+
 void nfit_mce_register(void)
 {
 	mce_register_decode_chain(&nfit_mce_dec);
diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h
index fc29c2e9832e..925f2a3d896e 100644
--- a/drivers/acpi/nfit/nfit.h
+++ b/drivers/acpi/nfit/nfit.h
@@ -211,6 +211,7 @@  int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc);
 #ifdef CONFIG_X86_MCE
 void nfit_mce_register(void);
 void nfit_mce_unregister(void);
+bool is_ars_required(void);
 #else
 static inline void nfit_mce_register(void)
 {
@@ -218,6 +219,10 @@  static inline void nfit_mce_register(void)
 static inline void nfit_mce_unregister(void)
 {
 }
+static inline bool is_ars_required(void)
+{
+	return true;
+}
 #endif
 
 int nfit_spa_type(struct acpi_nfit_system_address *spa);