@@ -8,7 +8,9 @@ opened by userspace. This can be used in conjunction with::
* Key/keyring notifications
- * General device event notifications
+ * General device event notifications, including::
+
+ * Block layer event notifications
The notifications buffers can be enabled by:
@@ -177,6 +177,15 @@ config BLK_SED_OPAL
Enabling this option enables users to setup/unlock/lock
Locking ranges for SED devices using the Opal protocol.
+config BLK_NOTIFICATIONS
+ bool "Block layer event notifications"
+ depends on DEVICE_NOTIFICATIONS
+ help
+ This option provides support for getting block layer event
+ notifications. This makes use of the /dev/watch_queue misc device to
+ handle the notification buffer and provides the device_notify() system
+ call to enable/disable watches.
+
menu "Partition Types"
source "block/partitions/Kconfig"
@@ -184,6 +184,22 @@ static const struct {
[BLK_STS_IOERR] = { -EIO, "I/O" },
};
+#ifdef CONFIG_BLK_NOTIFICATIONS
+static const
+enum block_notification_type blk_notifications[ARRAY_SIZE(blk_errors)] = {
+ [BLK_STS_TIMEOUT] = NOTIFY_BLOCK_ERROR_TIMEOUT,
+ [BLK_STS_NOSPC] = NOTIFY_BLOCK_ERROR_NO_SPACE,
+ [BLK_STS_TRANSPORT] = NOTIFY_BLOCK_ERROR_RECOVERABLE_TRANSPORT,
+ [BLK_STS_TARGET] = NOTIFY_BLOCK_ERROR_CRITICAL_TARGET,
+ [BLK_STS_NEXUS] = NOTIFY_BLOCK_ERROR_CRITICAL_NEXUS,
+ [BLK_STS_MEDIUM] = NOTIFY_BLOCK_ERROR_CRITICAL_MEDIUM,
+ [BLK_STS_PROTECTION] = NOTIFY_BLOCK_ERROR_PROTECTION,
+ [BLK_STS_RESOURCE] = NOTIFY_BLOCK_ERROR_KERNEL_RESOURCE,
+ [BLK_STS_DEV_RESOURCE] = NOTIFY_BLOCK_ERROR_DEVICE_RESOURCE,
+ [BLK_STS_IOERR] = NOTIFY_BLOCK_ERROR_IO,
+};
+#endif
+
blk_status_t errno_to_blk_status(int errno)
{
int i;
@@ -224,6 +240,19 @@ static void print_req_error(struct request *req, blk_status_t status,
req->cmd_flags & ~REQ_OP_MASK,
req->nr_phys_segments,
IOPRIO_PRIO_CLASS(req->ioprio));
+
+#ifdef CONFIG_BLK_NOTIFICATIONS
+ if (blk_notifications[idx]) {
+ struct block_notification n = {
+ .watch.type = WATCH_TYPE_BLOCK_NOTIFY,
+ .watch.subtype = blk_notifications[idx],
+ .watch.info = watch_sizeof(n),
+ .dev = req->rq_disk ? disk_devt(req->rq_disk) : 0,
+ .sector = blk_rq_pos(req),
+ };
+ post_block_notification(&n);
+ }
+#endif
}
static void req_bio_endio(struct request *rq, struct bio *bio,
@@ -27,6 +27,7 @@
#include <linux/percpu-refcount.h>
#include <linux/scatterlist.h>
#include <linux/blkzoned.h>
+#include <linux/watch_queue.h>
struct module;
struct scsi_ioctl_command;
@@ -1773,6 +1774,20 @@ static inline bool blk_req_can_dispatch_to_zone(struct request *rq)
}
#endif /* CONFIG_BLK_DEV_ZONED */
+#ifdef CONFIG_BLK_NOTIFICATIONS
+static inline void post_block_notification(struct block_notification *n)
+{
+ u64 id = 0; /* Might want to allow dev# here. */
+
+ post_device_notification(&n->watch, id);
+}
+#else
+static inline void post_block_notification(struct block_notification *n)
+{
+}
+#endif
+
+
#else /* CONFIG_BLOCK */
struct block_device;
@@ -11,7 +11,8 @@
enum watch_notification_type {
WATCH_TYPE_META = 0, /* Special record */
WATCH_TYPE_KEY_NOTIFY = 1, /* Key change event notification */
- WATCH_TYPE___NR = 2
+ WATCH_TYPE_BLOCK_NOTIFY = 2, /* Block layer event notification */
+ WATCH_TYPE___NR = 3
};
enum watch_meta_notification_subtype {
@@ -98,4 +99,31 @@ struct key_notification {
__u32 aux; /* Per-type auxiliary data */
};
+/*
+ * Type of block layer notification.
+ */
+enum block_notification_type {
+ NOTIFY_BLOCK_ERROR_TIMEOUT = 1, /* Timeout error */
+ NOTIFY_BLOCK_ERROR_NO_SPACE = 2, /* Critical space allocation error */
+ NOTIFY_BLOCK_ERROR_RECOVERABLE_TRANSPORT = 3, /* Recoverable transport error */
+ NOTIFY_BLOCK_ERROR_CRITICAL_TARGET = 4, /* Critical target error */
+ NOTIFY_BLOCK_ERROR_CRITICAL_NEXUS = 5, /* Critical nexus error */
+ NOTIFY_BLOCK_ERROR_CRITICAL_MEDIUM = 6, /* Critical medium error */
+ NOTIFY_BLOCK_ERROR_PROTECTION = 7, /* Protection error */
+ NOTIFY_BLOCK_ERROR_KERNEL_RESOURCE = 8, /* Kernel resource error */
+ NOTIFY_BLOCK_ERROR_DEVICE_RESOURCE = 9, /* Device resource error */
+ NOTIFY_BLOCK_ERROR_IO = 10, /* Other I/O error */
+};
+
+/*
+ * Block layer notification record.
+ * - watch.type = WATCH_TYPE_BLOCK_NOTIFY
+ * - watch.subtype = enum block_notification_type
+ */
+struct block_notification {
+ struct watch_notification watch; /* WATCH_TYPE_BLOCK_NOTIFY */
+ __u64 dev; /* Device number */
+ __u64 sector; /* Affected sector */
+};
+
#endif /* _UAPI_LINUX_WATCH_QUEUE_H */
@@ -59,6 +59,32 @@ static void saw_key_change(struct watch_notification *n, size_t len)
k->key_id, n->subtype, key_subtypes[n->subtype], k->aux);
}
+static const char *block_subtypes[256] = {
+ [NOTIFY_BLOCK_ERROR_TIMEOUT] = "timeout",
+ [NOTIFY_BLOCK_ERROR_NO_SPACE] = "critical space allocation",
+ [NOTIFY_BLOCK_ERROR_RECOVERABLE_TRANSPORT] = "recoverable transport",
+ [NOTIFY_BLOCK_ERROR_CRITICAL_TARGET] = "critical target",
+ [NOTIFY_BLOCK_ERROR_CRITICAL_NEXUS] = "critical nexus",
+ [NOTIFY_BLOCK_ERROR_CRITICAL_MEDIUM] = "critical medium",
+ [NOTIFY_BLOCK_ERROR_PROTECTION] = "protection",
+ [NOTIFY_BLOCK_ERROR_KERNEL_RESOURCE] = "kernel resource",
+ [NOTIFY_BLOCK_ERROR_DEVICE_RESOURCE] = "device resource",
+ [NOTIFY_BLOCK_ERROR_IO] = "I/O",
+};
+
+static void saw_block_change(struct watch_notification *n, size_t len)
+{
+ struct block_notification *b = (struct block_notification *)n;
+
+ if (len < sizeof(struct block_notification))
+ return;
+
+ printf("BLOCK %08llx e=%u[%s] s=%llx\n",
+ (unsigned long long)b->dev,
+ n->subtype, block_subtypes[n->subtype],
+ (unsigned long long)b->sector);
+}
+
/*
* Consume and display events.
*/
@@ -132,6 +158,9 @@ static void consumer(int fd)
case WATCH_TYPE_KEY_NOTIFY:
saw_key_change(&n.n, len);
break;
+ case WATCH_TYPE_BLOCK_NOTIFY:
+ saw_block_change(&n.n, len);
+ break;
default:
printf("other type\n");
break;
@@ -143,12 +172,16 @@ static void consumer(int fd)
}
static struct watch_notification_filter filter = {
- .nr_filters = 1,
+ .nr_filters = 2,
.filters = {
[0] = {
.type = WATCH_TYPE_KEY_NOTIFY,
.subtype_filter[0] = UINT_MAX,
},
+ [1] = {
+ .type = WATCH_TYPE_BLOCK_NOTIFY,
+ .subtype_filter[0] = UINT_MAX,
+ },
},
};
@@ -182,6 +215,11 @@ int main(int argc, char **argv)
exit(1);
}
+ if (syscall(__NR_watch_devices, fd, 0x04, 0) == -1) {
+ perror("watch_devices");
+ exit(1);
+ }
+
consumer(fd);
exit(0);
}
Add a block layer notification mechanism whereby notifications about block-layer events such as I/O errors, can be reported to a monitoring process asynchronously. Firstly, an event queue needs to be created: pipe2(fds, O_TMPFILE); ioctl(fds[1], IOC_WATCH_QUEUE_SET_SIZE, 256); then a notification can be set up to report block notifications via that queue: struct watch_notification_filter filter = { .nr_filters = 1, .filters = { [0] = { .type = WATCH_TYPE_BLOCK_NOTIFY, .subtype_filter[0] = UINT_MAX; }, }, }; ioctl(fds[1], IOC_WATCH_QUEUE_SET_FILTER, &filter); watch_devices(fds[1], 12); After that, records will be placed into the queue when, for example, errors occur on a block device. Records are of the following format: struct block_notification { struct watch_notification watch; __u64 dev; __u64 sector; } *n; Where: n->watch.type will be WATCH_TYPE_BLOCK_NOTIFY n->watch.subtype will be the type of notification, such as NOTIFY_BLOCK_ERROR_CRITICAL_MEDIUM. n->watch.info & WATCH_INFO_LENGTH will indicate the length of the record. n->watch.info & WATCH_INFO_ID will be the second argument to watch_devices(), shifted. n->dev will be the device numbers munged together. n->sector will indicate the affected sector (if appropriate for the event). Note that it is permissible for event records to be of variable length - or, at least, the length may be dependent on the subtype. Signed-off-by: David Howells <dhowells@redhat.com> --- Documentation/watch_queue.rst | 4 +++- block/Kconfig | 9 +++++++++ block/blk-core.c | 29 ++++++++++++++++++++++++++++ include/linux/blkdev.h | 15 ++++++++++++++ include/uapi/linux/watch_queue.h | 30 ++++++++++++++++++++++++++++- samples/watch_queue/watch_test.c | 40 +++++++++++++++++++++++++++++++++++++- 6 files changed, 124 insertions(+), 3 deletions(-)