@@ -33,6 +33,9 @@
},
{
"$ref": "#/$events/shutdown"
+ },
+ {
+ "$ref": "#/$events/media_error"
}
],
@@ -63,6 +66,31 @@
"i_generation": {
"description": "Inode generation number",
"type": "integer"
+ },
+ "storage_devs": {
+ "description": "Storage devices in a filesystem",
+ "_comment": [
+ "One of:",
+ "",
+ " * datadev: filesystem device",
+ " * logdev: external log device",
+ " * rtdev: realtime volume"
+ ],
+ "enum": [
+ "datadev",
+ "logdev",
+ "rtdev"
+ ]
+ },
+ "xfs_daddr_t": {
+ "description": "Storage device address, in units of 512-byte blocks",
+ "type": "integer",
+ "minimum": 0
+ },
+ "bbcount": {
+ "description": "Storage space length, in units of 512-byte blocks",
+ "type": "integer",
+ "minimum": 1
}
},
@@ -448,6 +476,43 @@
"domain",
"reasons"
]
+ },
+ "media_error": {
+ "title": "Media Error",
+ "description": [
+ "A storage device reported a media error.",
+ "The domain element tells us which storage",
+ "device reported the media failure. The",
+ "daddr and bbcount elements tell us where",
+ "inside that device the failure was observed."
+ ],
+ "type": "object",
+
+ "properties": {
+ "type": {
+ "const": "media"
+ },
+ "time_ns": {
+ "$ref": "#/$defs/time_ns"
+ },
+ "domain": {
+ "$ref": "#/$defs/storage_devs"
+ },
+ "daddr": {
+ "$ref": "#/$defs/xfs_daddr_t"
+ },
+ "bbcount": {
+ "$ref": "#/$defs/bbcount"
+ }
+ },
+
+ "required": [
+ "type",
+ "time_ns",
+ "domain",
+ "daddr",
+ "bbcount"
+ ]
}
}
}
@@ -21,6 +21,7 @@
#include "xfs_health.h"
#include "xfs_healthmon.h"
#include "xfs_fsops.h"
+#include "xfs_notify_failure.h"
#include <linux/anon_inodes.h>
#include <linux/eventpoll.h>
@@ -70,6 +71,7 @@ struct xfs_healthmon {
/* live update hooks */
struct xfs_shutdown_hook shook;
struct xfs_health_hook hhook;
+ struct xfs_media_error_hook mhook;
/* filesystem mount, or NULL if we've unmounted */
struct xfs_mount *mp;
@@ -423,6 +425,59 @@ xfs_healthmon_shutdown_hook(
return NOTIFY_DONE;
}
+#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_FS_DAX)
+/* Add a media error event to the reporting queue. */
+STATIC int
+xfs_healthmon_media_error_hook(
+ struct notifier_block *nb,
+ unsigned long action,
+ void *data)
+{
+ struct xfs_healthmon *hm;
+ struct xfs_healthmon_event *event;
+ struct xfs_media_error_params *p = data;
+ enum xfs_healthmon_domain domain = 0; /* shut up gcc */
+ int error;
+
+ hm = container_of(nb, struct xfs_healthmon, mhook.error_hook.nb);
+
+ mutex_lock(&hm->lock);
+
+ trace_xfs_healthmon_media_error_hook(p, hm->events,
+ hm->lost_prev_event);
+
+ error = xfs_healthmon_start_live_update(hm);
+ if (error)
+ goto out_unlock;
+
+ switch (p->fdev) {
+ case XFS_FAILED_LOGDEV:
+ domain = XFS_HEALTHMON_LOGDEV;
+ break;
+ case XFS_FAILED_RTDEV:
+ domain = XFS_HEALTHMON_RTDEV;
+ break;
+ case XFS_FAILED_DATADEV:
+ domain = XFS_HEALTHMON_DATADEV;
+ break;
+ }
+
+ event = xfs_healthmon_alloc(hm, XFS_HEALTHMON_MEDIA_ERROR, domain);
+ if (!event)
+ goto out_unlock;
+
+ event->daddr = p->daddr;
+ event->bbcount = p->bbcount;
+ error = xfs_healthmon_push(hm, event);
+ if (error)
+ kfree(event);
+
+out_unlock:
+ mutex_unlock(&hm->lock);
+ return NOTIFY_DONE;
+}
+#endif
+
/* Render the health update type as a string. */
STATIC const char *
xfs_healthmon_typestring(
@@ -435,6 +490,7 @@ xfs_healthmon_typestring(
[XFS_HEALTHMON_SICK] = "sick",
[XFS_HEALTHMON_CORRUPT] = "corrupt",
[XFS_HEALTHMON_HEALTHY] = "healthy",
+ [XFS_HEALTHMON_MEDIA_ERROR] = "media",
};
if (event->type >= ARRAY_SIZE(type_strings))
@@ -454,6 +510,9 @@ xfs_healthmon_domstring(
[XFS_HEALTHMON_AG] = "perag",
[XFS_HEALTHMON_INODE] = "inode",
[XFS_HEALTHMON_RTGROUP] = "rtgroup",
+ [XFS_HEALTHMON_DATADEV] = "datadev",
+ [XFS_HEALTHMON_LOGDEV] = "logdev",
+ [XFS_HEALTHMON_RTDEV] = "rtdev",
};
if (event->domain >= ARRAY_SIZE(dom_strings))
@@ -665,6 +724,23 @@ xfs_healthmon_format_shutdown(
event->flags);
}
+/* Render media error as a string set */
+static int
+xfs_healthmon_format_media_error(
+ struct seq_buf *outbuf,
+ const struct xfs_healthmon_event *event)
+{
+ ssize_t ret;
+
+ ret = seq_buf_printf(outbuf, " \"daddr\": %llu,\n",
+ event->daddr);
+ if (ret < 0)
+ return ret;
+
+ return seq_buf_printf(outbuf, " \"bbcount\": %llu,\n",
+ event->bbcount);
+}
+
static inline void
xfs_healthmon_reset_outbuf(
struct xfs_healthmon *hm)
@@ -730,6 +806,11 @@ xfs_healthmon_format(
case XFS_HEALTHMON_INODE:
ret = xfs_healthmon_format_inode(outbuf, event);
break;
+ case XFS_HEALTHMON_DATADEV:
+ case XFS_HEALTHMON_LOGDEV:
+ case XFS_HEALTHMON_RTDEV:
+ ret = xfs_healthmon_format_media_error(outbuf, event);
+ break;
}
if (ret < 0)
goto overrun;
@@ -990,6 +1071,7 @@ xfs_healthmon_detach_hooks(
* through the health monitoring subsystem from xfs_fs_put_super, so
* it is now time to detach the hooks.
*/
+ xfs_media_error_hook_del(hm->mp, &hm->mhook);
xfs_shutdown_hook_del(hm->mp, &hm->shook);
xfs_health_hook_del(hm->mp, &hm->hhook);
return;
@@ -1011,6 +1093,7 @@ xfs_healthmon_release(
wake_up_all(&hm->wait);
iterate_supers_type(hm->fstyp, xfs_healthmon_detach_hooks, hm);
+ xfs_media_error_hook_disable();
xfs_shutdown_hook_disable();
xfs_health_hook_disable();
@@ -1092,6 +1175,7 @@ xfs_ioc_health_monitor(
/* Enable hooks to receive events, generally. */
xfs_health_hook_enable();
xfs_shutdown_hook_enable();
+ xfs_media_error_hook_enable();
/* Attach specific event hooks to this monitor. */
xfs_health_hook_setup(&hm->hhook, xfs_healthmon_metadata_hook);
@@ -1104,11 +1188,16 @@ xfs_ioc_health_monitor(
if (ret)
goto out_healthhook;
+ xfs_media_error_hook_setup(&hm->mhook, xfs_healthmon_media_error_hook);
+ ret = xfs_media_error_hook_add(mp, &hm->mhook);
+ if (ret)
+ goto out_shutdownhook;
+
/* Set up VFS file and file descriptor. */
name = kasprintf(GFP_KERNEL, "XFS (%s): healthmon", mp->m_super->s_id);
if (!name) {
ret = -ENOMEM;
- goto out_shutdownhook;
+ goto out_mediahook;
}
fd = anon_inode_getfd(name, &xfs_healthmon_fops, hm,
@@ -1116,18 +1205,21 @@ xfs_ioc_health_monitor(
kvfree(name);
if (fd < 0) {
ret = fd;
- goto out_shutdownhook;
+ goto out_mediahook;
}
trace_xfs_healthmon_create(mp, hmo.flags, hmo.format);
return fd;
+out_mediahook:
+ xfs_media_error_hook_del(mp, &hm->mhook);
out_shutdownhook:
xfs_shutdown_hook_del(mp, &hm->shook);
out_healthhook:
xfs_health_hook_del(mp, &hm->hhook);
out_hooks:
+ xfs_media_error_hook_disable();
xfs_health_hook_disable();
xfs_shutdown_hook_disable();
mutex_destroy(&hm->lock);
@@ -17,6 +17,9 @@ enum xfs_healthmon_type {
XFS_HEALTHMON_CORRUPT, /* fsck reported corruption */
XFS_HEALTHMON_HEALTHY, /* fsck reported healthy structure */
XFS_HEALTHMON_UNMOUNT, /* filesystem is unmounting */
+
+ /* media errors */
+ XFS_HEALTHMON_MEDIA_ERROR,
};
enum xfs_healthmon_domain {
@@ -27,6 +30,11 @@ enum xfs_healthmon_domain {
XFS_HEALTHMON_AG, /* allocation group metadata */
XFS_HEALTHMON_INODE, /* inode metadata */
XFS_HEALTHMON_RTGROUP, /* realtime group metadata */
+
+ /* media errors */
+ XFS_HEALTHMON_DATADEV,
+ XFS_HEALTHMON_RTDEV,
+ XFS_HEALTHMON_LOGDEV,
};
struct xfs_healthmon_event {
@@ -60,6 +68,11 @@ struct xfs_healthmon_event {
uint32_t gen;
xfs_ino_t ino;
};
+ /* media errors */
+ struct {
+ xfs_daddr_t daddr;
+ uint64_t bbcount;
+ };
};
};
@@ -54,6 +54,7 @@
#include "xfs_fsrefs.h"
#include "xfs_health.h"
#include "xfs_healthmon.h"
+#include "xfs_notify_failure.h"
/*
* We include this last to have the helpers above available for the trace
@@ -108,6 +108,7 @@ struct xfs_fsrefs_irec;
struct xfs_rtgroup;
struct xfs_healthmon_event;
struct xfs_health_update_params;
+struct xfs_media_error_params;
#define XFS_ATTR_FILTER_FLAGS \
{ XFS_ATTR_ROOT, "ROOT" }, \
@@ -6345,6 +6346,56 @@ TRACE_EVENT(xfs_healthmon_metadata_hook,
__entry->events,
__entry->lost_prev)
);
+
+#if defined(CONFIG_XFS_LIVE_HOOKS) && defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_FS_DAX)
+TRACE_EVENT(xfs_healthmon_media_error_hook,
+ TP_PROTO(const struct xfs_media_error_params *p,
+ unsigned int events, bool lost_prev),
+ TP_ARGS(p, events, lost_prev),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(dev_t, error_dev)
+ __field(uint64_t, daddr)
+ __field(uint64_t, bbcount)
+ __field(int, pre_remove)
+ __field(unsigned int, events)
+ __field(bool, lost_prev)
+ ),
+ TP_fast_assign(
+ struct xfs_mount *mp = p->mp;
+ struct xfs_buftarg *btp = NULL;
+
+ switch (p->fdev) {
+ case XFS_FAILED_DATADEV:
+ btp = mp->m_ddev_targp;
+ break;
+ case XFS_FAILED_LOGDEV:
+ btp = mp->m_logdev_targp;
+ break;
+ case XFS_FAILED_RTDEV:
+ btp = mp->m_rtdev_targp;
+ break;
+ }
+
+ __entry->dev = mp->m_super->s_dev;
+ if (btp)
+ __entry->error_dev = btp->bt_dev;
+ __entry->daddr = p->daddr;
+ __entry->bbcount = p->bbcount;
+ __entry->pre_remove = p->pre_remove;
+ __entry->events = events;
+ __entry->lost_prev = lost_prev;
+ ),
+ TP_printk("dev %d:%d error_dev %d:%d daddr 0x%llx bbcount 0x%llx pre_remove? %d events %u lost_prev? %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ MAJOR(__entry->error_dev), MINOR(__entry->error_dev),
+ __entry->daddr,
+ __entry->bbcount,
+ __entry->pre_remove,
+ __entry->events,
+ __entry->lost_prev)
+);
+#endif
#endif /* CONFIG_XFS_HEALTH_MONITOR */
#endif /* _TRACE_XFS_H */