diff mbox series

[v1,2/3] mm: memory-failure: Bump memory failure stats to pglist_data

Message ID 20230116193902.1315236-3-jiaqiyan@google.com (mailing list archive)
State New
Headers show
Series Introduce per NUMA node memory error statistics | expand

Commit Message

Jiaqi Yan Jan. 16, 2023, 7:39 p.m. UTC
Right before memory_failure finishes its handling, accumulate poisoned
page's resolution counters to pglist_data's memory_failure_stats, so as
to update the corresponding sysfs entries.

Tested:
1) Start an application to allocate memory buffer chunks
2) Convert random memory buffer addresses to physical addresses
3) Inject memory errors using EINJ at chosen physical addresses
4) Access poisoned memory buffer and recover from SIGBUS
5) Check counter values under
   /sys/devices/system/node/node*/memory_failure/pages_*

Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Jiaqi Yan <jiaqiyan@google.com>
---
 mm/memory-failure.c | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

Comments

Andrew Morton Jan. 16, 2023, 8:16 p.m. UTC | #1
On Mon, 16 Jan 2023 19:39:01 +0000 Jiaqi Yan <jiaqiyan@google.com> wrote:

> Right before memory_failure finishes its handling, accumulate poisoned
> page's resolution counters to pglist_data's memory_failure_stats, so as
> to update the corresponding sysfs entries.
> 
> Tested:
> 1) Start an application to allocate memory buffer chunks
> 2) Convert random memory buffer addresses to physical addresses
> 3) Inject memory errors using EINJ at chosen physical addresses
> 4) Access poisoned memory buffer and recover from SIGBUS
> 5) Check counter values under
>    /sys/devices/system/node/node*/memory_failure/pages_*
> 
> --- a/mm/memory-failure.c
> +++ b/mm/memory-failure.c
> @@ -1227,6 +1227,39 @@ static struct page_state error_states[] = {
>  #undef slab
>  #undef reserved
>  
> +static void update_per_node_mf_stats(unsigned long pfn,
> +				     enum mf_result result)
> +{
> +	int nid = MAX_NUMNODES;
> +	struct memory_failure_stats *mf_stats = NULL;
> +
> +	nid = pfn_to_nid(pfn);
> +	if (unlikely(nid < 0 || nid >= MAX_NUMNODES)) {
> +		WARN_ONCE(1, "Memory failure: pfn=%#lx, invalid nid=%d", pfn, nid);
> +		return;
> +	}
> +
> +	mf_stats = &NODE_DATA(nid)->mf_stats;
> +	switch (result) {
> +	case MF_IGNORED:
> +		++mf_stats->pages_ignored;

What is the locking here, to prevent concurrent increments?

> +		break;
> +	case MF_FAILED:
> +		++mf_stats->pages_failed;
> +		break;
> +	case MF_DELAYED:
> +		++mf_stats->pages_delayed;
> +		break;
> +	case MF_RECOVERED:
> +		++mf_stats->pages_recovered;
> +		break;
> +	default:
> +		WARN_ONCE(1, "Memory failure: mf_result=%d is not properly handled", result);
> +		break;
> +	}
> +	++mf_stats->pages_poisoned;
> +}
HORIGUCHI NAOYA(堀口 直也) Jan. 17, 2023, 9:03 a.m. UTC | #2
On Mon, Jan 16, 2023 at 12:16:13PM -0800, Andrew Morton wrote:
> On Mon, 16 Jan 2023 19:39:01 +0000 Jiaqi Yan <jiaqiyan@google.com> wrote:
...
> > @@ -1227,6 +1227,39 @@ static struct page_state error_states[] = {
> >  #undef slab
> >  #undef reserved
> >  
> > +static void update_per_node_mf_stats(unsigned long pfn,
> > +				     enum mf_result result)
> > +{
> > +	int nid = MAX_NUMNODES;
> > +	struct memory_failure_stats *mf_stats = NULL;
> > +
> > +	nid = pfn_to_nid(pfn);
> > +	if (unlikely(nid < 0 || nid >= MAX_NUMNODES)) {
> > +		WARN_ONCE(1, "Memory failure: pfn=%#lx, invalid nid=%d", pfn, nid);
> > +		return;
> > +	}
> > +
> > +	mf_stats = &NODE_DATA(nid)->mf_stats;
> > +	switch (result) {
> > +	case MF_IGNORED:
> > +		++mf_stats->pages_ignored;
> 
> What is the locking here, to prevent concurrent increments?

mf_mutex should prevent the race.

Thanks,
Naoya Horiguchi
Jiaqi Yan Jan. 18, 2023, 11:05 p.m. UTC | #3
On Tue, Jan 17, 2023 at 1:03 AM HORIGUCHI NAOYA(堀口 直也)
<naoya.horiguchi@nec.com> wrote:
>
> On Mon, Jan 16, 2023 at 12:16:13PM -0800, Andrew Morton wrote:
> > On Mon, 16 Jan 2023 19:39:01 +0000 Jiaqi Yan <jiaqiyan@google.com> wrote:
> ...
> > > @@ -1227,6 +1227,39 @@ static struct page_state error_states[] = {
> > >  #undef slab
> > >  #undef reserved
> > >
> > > +static void update_per_node_mf_stats(unsigned long pfn,
> > > +                                enum mf_result result)
> > > +{
> > > +   int nid = MAX_NUMNODES;
> > > +   struct memory_failure_stats *mf_stats = NULL;
> > > +
> > > +   nid = pfn_to_nid(pfn);
> > > +   if (unlikely(nid < 0 || nid >= MAX_NUMNODES)) {
> > > +           WARN_ONCE(1, "Memory failure: pfn=%#lx, invalid nid=%d", pfn, nid);
> > > +           return;
> > > +   }
> > > +
> > > +   mf_stats = &NODE_DATA(nid)->mf_stats;
> > > +   switch (result) {
> > > +   case MF_IGNORED:
> > > +           ++mf_stats->pages_ignored;
> >
> > What is the locking here, to prevent concurrent increments?
>
> mf_mutex should prevent the race.

Since we are only incrementing these counters, I wonder if it makes
sense to convert them into atomic_long_t (encapsulated inside
memory_failure_stats) and do atomic_long_add, instead of introducing a
struct mutex?

>
> Thanks,
> Naoya Horiguchi
HORIGUCHI NAOYA(堀口 直也) Jan. 19, 2023, 6:40 a.m. UTC | #4
On Wed, Jan 18, 2023 at 03:05:21PM -0800, Jiaqi Yan wrote:
> On Tue, Jan 17, 2023 at 1:03 AM HORIGUCHI NAOYA(堀口 直也)
> <naoya.horiguchi@nec.com> wrote:
> >
> > On Mon, Jan 16, 2023 at 12:16:13PM -0800, Andrew Morton wrote:
> > > On Mon, 16 Jan 2023 19:39:01 +0000 Jiaqi Yan <jiaqiyan@google.com> wrote:
> > ...
> > > > @@ -1227,6 +1227,39 @@ static struct page_state error_states[] = {
> > > >  #undef slab
> > > >  #undef reserved
> > > >
> > > > +static void update_per_node_mf_stats(unsigned long pfn,
> > > > +                                enum mf_result result)
> > > > +{
> > > > +   int nid = MAX_NUMNODES;
> > > > +   struct memory_failure_stats *mf_stats = NULL;
> > > > +
> > > > +   nid = pfn_to_nid(pfn);
> > > > +   if (unlikely(nid < 0 || nid >= MAX_NUMNODES)) {
> > > > +           WARN_ONCE(1, "Memory failure: pfn=%#lx, invalid nid=%d", pfn, nid);
> > > > +           return;
> > > > +   }
> > > > +
> > > > +   mf_stats = &NODE_DATA(nid)->mf_stats;
> > > > +   switch (result) {
> > > > +   case MF_IGNORED:
> > > > +           ++mf_stats->pages_ignored;
> > >
> > > What is the locking here, to prevent concurrent increments?
> >
> > mf_mutex should prevent the race.
> 
> Since we are only incrementing these counters, I wonder if it makes
> sense to convert them into atomic_long_t (encapsulated inside
> memory_failure_stats) and do atomic_long_add, instead of introducing a
> struct mutex?

I thought that all callers of action_result() (which is an only caller of
update_per_node_mf_stats()) are called with holding mf_mutex at the
beginning of memory_failure(), so the concurrent increments should not
happen on the new counters.  But writing counters with atomic type/API is
fine to me.

Thanks,
Naoya Horiguchi
Jiaqi Yan Jan. 19, 2023, 6:05 p.m. UTC | #5
Oh, sorry I misunderstood, I thought you are suggesting introducing a
new mutex for these counters.
With the existing mf_mutex, I don't think atomic is really necessary.
I will just leave them as unsigned long.
Thanks for the clarification.

On Wed, Jan 18, 2023 at 10:40 PM HORIGUCHI NAOYA(堀口 直也)
<naoya.horiguchi@nec.com> wrote:
>
> On Wed, Jan 18, 2023 at 03:05:21PM -0800, Jiaqi Yan wrote:
> > On Tue, Jan 17, 2023 at 1:03 AM HORIGUCHI NAOYA(堀口 直也)
> > <naoya.horiguchi@nec.com> wrote:
> > >
> > > On Mon, Jan 16, 2023 at 12:16:13PM -0800, Andrew Morton wrote:
> > > > On Mon, 16 Jan 2023 19:39:01 +0000 Jiaqi Yan <jiaqiyan@google.com> wrote:
> > > ...
> > > > > @@ -1227,6 +1227,39 @@ static struct page_state error_states[] = {
> > > > >  #undef slab
> > > > >  #undef reserved
> > > > >
> > > > > +static void update_per_node_mf_stats(unsigned long pfn,
> > > > > +                                enum mf_result result)
> > > > > +{
> > > > > +   int nid = MAX_NUMNODES;
> > > > > +   struct memory_failure_stats *mf_stats = NULL;
> > > > > +
> > > > > +   nid = pfn_to_nid(pfn);
> > > > > +   if (unlikely(nid < 0 || nid >= MAX_NUMNODES)) {
> > > > > +           WARN_ONCE(1, "Memory failure: pfn=%#lx, invalid nid=%d", pfn, nid);
> > > > > +           return;
> > > > > +   }
> > > > > +
> > > > > +   mf_stats = &NODE_DATA(nid)->mf_stats;
> > > > > +   switch (result) {
> > > > > +   case MF_IGNORED:
> > > > > +           ++mf_stats->pages_ignored;
> > > >
> > > > What is the locking here, to prevent concurrent increments?
> > >
> > > mf_mutex should prevent the race.
> >
> > Since we are only incrementing these counters, I wonder if it makes
> > sense to convert them into atomic_long_t (encapsulated inside
> > memory_failure_stats) and do atomic_long_add, instead of introducing a
> > struct mutex?
>
> I thought that all callers of action_result() (which is an only caller of
> update_per_node_mf_stats()) are called with holding mf_mutex at the
> beginning of memory_failure(), so the concurrent increments should not
> happen on the new counters.  But writing counters with atomic type/API is
> fine to me.
>
> Thanks,
> Naoya Horiguchi
diff mbox series

Patch

diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index cb782fa552d5..c90417cfcda4 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1227,6 +1227,39 @@  static struct page_state error_states[] = {
 #undef slab
 #undef reserved
 
+static void update_per_node_mf_stats(unsigned long pfn,
+				     enum mf_result result)
+{
+	int nid = MAX_NUMNODES;
+	struct memory_failure_stats *mf_stats = NULL;
+
+	nid = pfn_to_nid(pfn);
+	if (unlikely(nid < 0 || nid >= MAX_NUMNODES)) {
+		WARN_ONCE(1, "Memory failure: pfn=%#lx, invalid nid=%d", pfn, nid);
+		return;
+	}
+
+	mf_stats = &NODE_DATA(nid)->mf_stats;
+	switch (result) {
+	case MF_IGNORED:
+		++mf_stats->pages_ignored;
+		break;
+	case MF_FAILED:
+		++mf_stats->pages_failed;
+		break;
+	case MF_DELAYED:
+		++mf_stats->pages_delayed;
+		break;
+	case MF_RECOVERED:
+		++mf_stats->pages_recovered;
+		break;
+	default:
+		WARN_ONCE(1, "Memory failure: mf_result=%d is not properly handled", result);
+		break;
+	}
+	++mf_stats->pages_poisoned;
+}
+
 /*
  * "Dirty/Clean" indication is not 100% accurate due to the possibility of
  * setting PG_dirty outside page lock. See also comment above set_page_dirty().
@@ -1237,6 +1270,9 @@  static int action_result(unsigned long pfn, enum mf_action_page_type type,
 	trace_memory_failure_event(pfn, type, result);
 
 	num_poisoned_pages_inc(pfn);
+
+	update_per_node_mf_stats(pfn, result);
+
 	pr_err("%#lx: recovery action for %s: %s\n",
 		pfn, action_page_types[type], action_name[result]);