From patchwork Wed Jun 1 13:20:29 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Alexey Gladkov X-Patchwork-Id: 12866902 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 31AFAC433FE for ; Wed, 1 Jun 2022 13:20:59 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1353193AbiFANU6 (ORCPT ); Wed, 1 Jun 2022 09:20:58 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:41128 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1353169AbiFANUy (ORCPT ); Wed, 1 Jun 2022 09:20:54 -0400 Received: from us-smtp-delivery-44.mimecast.com (us-smtp-delivery-44.mimecast.com [205.139.111.44]) by lindbergh.monkeyblade.net (Postfix) with ESMTP id F329C175A3 for ; Wed, 1 Jun 2022 06:20:52 -0700 (PDT) Received: from mimecast-mx02.redhat.com (mimecast-mx02.redhat.com [66.187.233.88]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id us-mta-606-W5A1QcP2OjSmyMrvEcSVfQ-1; Wed, 01 Jun 2022 09:20:48 -0400 X-MC-Unique: W5A1QcP2OjSmyMrvEcSVfQ-1 Received: from smtp.corp.redhat.com (int-mx02.intmail.prod.int.rdu2.redhat.com [10.11.54.2]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mimecast-mx02.redhat.com (Postfix) with ESMTPS id DD470811E81; Wed, 1 Jun 2022 13:20:47 +0000 (UTC) Received: from comp-core-i7-2640m-0182e6.redhat.com (unknown [10.36.110.3]) by smtp.corp.redhat.com (Postfix) with ESMTP id C17D0414A7E7; Wed, 1 Jun 2022 13:20:45 +0000 (UTC) From: Alexey Gladkov To: LKML , "Eric W . Biederman" , Linus Torvalds Cc: Andrew Morton , Christian Brauner , Iurii Zaikin , Kees Cook , Linux Containers , linux-fsdevel@vger.kernel.org, Luis Chamberlain , Vasily Averin Subject: [RFC PATCH 1/4] sysctl: API extension for handling sysctl Date: Wed, 1 Jun 2022 15:20:29 +0200 Message-Id: <5ec6759ab3b617f9c12449a9606b6f0b5a7582d0.1654086665.git.legion@kernel.org> In-Reply-To: References: MIME-Version: 1.0 X-Scanned-By: MIMEDefang 2.84 on 10.11.54.2 Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org This adds additional optional functions for handling open, read, and write operations that can be customized for each sysctl file. It also creates ctl_context that persists from opening to closing the file in the /proc/sys. The context allows us to store dynamic information at the time the file is opened. This eliminates the need to duplicate ctl_table in order to dynamically change .data, .extra1 or .extra2. This API extends the existing one and does not require any changes to already existing sysctl handlers. Signed-off-by: Alexey Gladkov --- fs/proc/proc_sysctl.c | 71 +++++++++++++++++++++++++++++++++++------- include/linux/sysctl.h | 20 ++++++++++-- 2 files changed, 77 insertions(+), 14 deletions(-) diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 7d9cfc730bd4..d3d43e738f01 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -560,6 +560,7 @@ static ssize_t proc_sys_call_handler(struct kiocb *iocb, struct iov_iter *iter, struct inode *inode = file_inode(iocb->ki_filp); struct ctl_table_header *head = grab_header(inode); struct ctl_table *table = PROC_I(inode)->sysctl_entry; + struct ctl_fops *fops = table->ctl_fops; size_t count = iov_iter_count(iter); char *kbuf; ssize_t error; @@ -577,7 +578,7 @@ static ssize_t proc_sys_call_handler(struct kiocb *iocb, struct iov_iter *iter, /* if that can happen at all, it should be -EINVAL, not -EISDIR */ error = -EINVAL; - if (!table->proc_handler) + if (!table->proc_handler && !fops) goto out; /* don't even try if the size is too large */ @@ -600,8 +601,20 @@ static ssize_t proc_sys_call_handler(struct kiocb *iocb, struct iov_iter *iter, if (error) goto out_free_buf; - /* careful: calling conventions are nasty here */ - error = table->proc_handler(table, write, kbuf, &count, &iocb->ki_pos); + if (fops) { + struct ctl_context *ctx = iocb->ki_filp->private_data; + + if (write && fops->write) + error = fops->write(ctx, iocb->ki_filp, kbuf, &count, &iocb->ki_pos); + else if (!write && fops->read) + error = fops->read(ctx, iocb->ki_filp, kbuf, &count, &iocb->ki_pos); + else + error = -EINVAL; + } else { + /* careful: calling conventions are nasty here */ + error = table->proc_handler(table, write, kbuf, &count, &iocb->ki_pos); + } + if (error) goto out_free_buf; @@ -634,17 +647,50 @@ static int proc_sys_open(struct inode *inode, struct file *filp) { struct ctl_table_header *head = grab_header(inode); struct ctl_table *table = PROC_I(inode)->sysctl_entry; + struct ctl_context *ctx; + int ret = 0; /* sysctl was unregistered */ if (IS_ERR(head)) return PTR_ERR(head); - if (table->poll) - filp->private_data = proc_sys_poll_event(table->poll); + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ctx->table = table; + filp->private_data = ctx; + + if (table->ctl_fops && table->ctl_fops->open) + ret = table->ctl_fops->open(ctx, inode, filp); + + if (!ret && table->poll) + ctx->poll_event = proc_sys_poll_event(table->poll); sysctl_head_finish(head); - return 0; + return ret; +} + +static int proc_sys_release(struct inode *inode, struct file *filp) +{ + struct ctl_table_header *head = grab_header(inode); + struct ctl_table *table = PROC_I(inode)->sysctl_entry; + struct ctl_context *ctx = filp->private_data; + int ret = 0; + + if (IS_ERR(head)) + return PTR_ERR(head); + + if (table->ctl_fops && table->ctl_fops->release) + ret = table->ctl_fops->release(ctx, inode, filp); + + sysctl_head_finish(head); + + kfree(ctx); + filp->private_data = NULL; + + return ret; } static __poll_t proc_sys_poll(struct file *filp, poll_table *wait) @@ -653,23 +699,23 @@ static __poll_t proc_sys_poll(struct file *filp, poll_table *wait) struct ctl_table_header *head = grab_header(inode); struct ctl_table *table = PROC_I(inode)->sysctl_entry; __poll_t ret = DEFAULT_POLLMASK; - unsigned long event; + struct ctl_context *ctx; /* sysctl was unregistered */ if (IS_ERR(head)) return EPOLLERR | EPOLLHUP; - if (!table->proc_handler) + if (!table->proc_handler && !table->ctl_fops) goto out; if (!table->poll) goto out; - event = (unsigned long)filp->private_data; + ctx = filp->private_data; poll_wait(filp, &table->poll->wait, wait); - if (event != atomic_read(&table->poll->event)) { - filp->private_data = proc_sys_poll_event(table->poll); + if (ctx->poll_event != atomic_read(&table->poll->event)) { + ctx->poll_event = proc_sys_poll_event(table->poll); ret = EPOLLIN | EPOLLRDNORM | EPOLLERR | EPOLLPRI; } @@ -866,6 +912,7 @@ static int proc_sys_getattr(struct user_namespace *mnt_userns, static const struct file_operations proc_sys_file_operations = { .open = proc_sys_open, + .release = proc_sys_release, .poll = proc_sys_poll, .read_iter = proc_sys_read, .write_iter = proc_sys_write, @@ -1153,7 +1200,7 @@ static int sysctl_check_table(const char *path, struct ctl_table *table) else err |= sysctl_check_table_array(path, table); } - if (!table->proc_handler) + if (!table->proc_handler && !table->ctl_fops) err |= sysctl_err(path, table, "No proc_handler"); if ((table->mode & (S_IRUGO|S_IWUGO)) != table->mode) diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 6353d6db69b2..ca5657c9fcb2 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -116,9 +116,9 @@ struct ctl_table_poll { wait_queue_head_t wait; }; -static inline void *proc_sys_poll_event(struct ctl_table_poll *poll) +static inline unsigned long proc_sys_poll_event(struct ctl_table_poll *poll) { - return (void *)(unsigned long)atomic_read(&poll->event); + return (unsigned long)atomic_read(&poll->event); } #define __CTL_TABLE_POLL_INITIALIZER(name) { \ @@ -128,6 +128,21 @@ static inline void *proc_sys_poll_event(struct ctl_table_poll *poll) #define DEFINE_CTL_TABLE_POLL(name) \ struct ctl_table_poll name = __CTL_TABLE_POLL_INITIALIZER(name) +struct ctl_context { + struct ctl_table *table; + unsigned long poll_event; + void *ctl_data; +}; + +struct inode; + +struct ctl_fops { + int (*open) (struct ctl_context *, struct inode *, struct file *); + int (*release) (struct ctl_context *, struct inode *, struct file *); + ssize_t (*read) (struct ctl_context *, struct file *, char *, size_t *, loff_t *); + ssize_t (*write) (struct ctl_context *, struct file *, char *, size_t *, loff_t *); +}; + /* A sysctl table is an array of struct ctl_table: */ struct ctl_table { const char *procname; /* Text ID for /proc/sys, or zero */ @@ -139,6 +154,7 @@ struct ctl_table { struct ctl_table_poll *poll; void *extra1; void *extra2; + struct ctl_fops *ctl_fops; } __randomize_layout; struct ctl_node { From patchwork Wed Jun 1 13:20:30 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Alexey Gladkov X-Patchwork-Id: 12866905 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id D6037C433FE for ; Wed, 1 Jun 2022 13:21:07 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1353223AbiFANVF (ORCPT ); Wed, 1 Jun 2022 09:21:05 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:41240 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1353169AbiFANU7 (ORCPT ); Wed, 1 Jun 2022 09:20:59 -0400 Received: from us-smtp-delivery-44.mimecast.com (us-smtp-delivery-44.mimecast.com [207.211.30.44]) by lindbergh.monkeyblade.net (Postfix) with ESMTP id 776C44D61F for ; Wed, 1 Jun 2022 06:20:57 -0700 (PDT) Received: from mimecast-mx02.redhat.com (mx3-rdu2.redhat.com [66.187.233.73]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id us-mta-519-4Z-CMtpGOVKaHbq7mveZtw-1; Wed, 01 Jun 2022 09:20:51 -0400 X-MC-Unique: 4Z-CMtpGOVKaHbq7mveZtw-1 Received: from smtp.corp.redhat.com (int-mx02.intmail.prod.int.rdu2.redhat.com [10.11.54.2]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mimecast-mx02.redhat.com (Postfix) with ESMTPS id 4838C1C0F68B; Wed, 1 Jun 2022 13:20:50 +0000 (UTC) Received: from comp-core-i7-2640m-0182e6.redhat.com (unknown [10.36.110.3]) by smtp.corp.redhat.com (Postfix) with ESMTP id 2E85B414A7E7; Wed, 1 Jun 2022 13:20:48 +0000 (UTC) From: Alexey Gladkov To: LKML , "Eric W . Biederman" , Linus Torvalds Cc: Andrew Morton , Christian Brauner , Iurii Zaikin , Kees Cook , Linux Containers , linux-fsdevel@vger.kernel.org, Luis Chamberlain , Vasily Averin Subject: [RFC PATCH 2/4] sysctl: ipc: Do not use dynamic memory Date: Wed, 1 Jun 2022 15:20:30 +0200 Message-Id: <857cb160a981b5719d8ed6a3e5e7c456915c64fa.1654086665.git.legion@kernel.org> In-Reply-To: References: MIME-Version: 1.0 X-Scanned-By: MIMEDefang 2.84 on 10.11.54.2 Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org Dynamic memory allocation is needed to modify .data and specify the per namespace parameter. The new sysctl API is allowed to get rid of the need for such modification. Signed-off-by: Alexey Gladkov --- include/linux/ipc_namespace.h | 18 --- ipc/ipc_sysctl.c | 236 +++++++++++++++++----------------- ipc/namespace.c | 4 - 3 files changed, 121 insertions(+), 137 deletions(-) diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index e3e8c8662b49..51c2c247c447 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -191,22 +191,4 @@ static inline bool setup_mq_sysctls(struct ipc_namespace *ns) } #endif /* CONFIG_POSIX_MQUEUE_SYSCTL */ - -#ifdef CONFIG_SYSVIPC_SYSCTL - -bool setup_ipc_sysctls(struct ipc_namespace *ns); -void retire_ipc_sysctls(struct ipc_namespace *ns); - -#else /* CONFIG_SYSVIPC_SYSCTL */ - -static inline void retire_ipc_sysctls(struct ipc_namespace *ns) -{ -} - -static inline bool setup_ipc_sysctls(struct ipc_namespace *ns) -{ - return true; -} - -#endif /* CONFIG_SYSVIPC_SYSCTL */ #endif diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c index ef313ecfb53a..833b670c38f3 100644 --- a/ipc/ipc_sysctl.c +++ b/ipc/ipc_sysctl.c @@ -68,26 +68,94 @@ static int proc_ipc_sem_dointvec(struct ctl_table *table, int write, return ret; } +static inline void *data_from_ns(struct ctl_context *ctx, struct ctl_table *table); + +static int ipc_sys_open(struct ctl_context *ctx, struct inode *inode, struct file *file) +{ + struct ipc_namespace *ns = current->nsproxy->ipc_ns; + + // For now, we only allow changes in init_user_ns. + if (ns->user_ns != &init_user_ns) + return -EPERM; + +#ifdef CONFIG_CHECKPOINT_RESTORE + int index = (ctx->table - ipc_sysctls); + + switch (index) { + case IPC_SYSCTL_SEM_NEXT_ID: + case IPC_SYSCTL_MSG_NEXT_ID: + case IPC_SYSCTL_SHM_NEXT_ID: + if (!checkpoint_restore_ns_capable(ns->user_ns)) + return -EPERM; + break; + } +#endif + ctx->ctl_data = ns; + return 0; +} + +static ssize_t ipc_sys_read(struct ctl_context *ctx, struct file *file, + char *buffer, size_t *lenp, loff_t *ppos) +{ + struct ctl_table table = *ctx->table; + table.data = data_from_ns(ctx, ctx->table); + return table.proc_handler(&table, 0, buffer, lenp, ppos); +} + +static ssize_t ipc_sys_write(struct ctl_context *ctx, struct file *file, + char *buffer, size_t *lenp, loff_t *ppos) +{ + struct ctl_table table = *ctx->table; + table.data = data_from_ns(ctx, ctx->table); + return table.proc_handler(&table, 1, buffer, lenp, ppos); +} + +static struct ctl_fops ipc_sys_fops = { + .open = ipc_sys_open, + .read = ipc_sys_read, + .write = ipc_sys_write, +}; + int ipc_mni = IPCMNI; int ipc_mni_shift = IPCMNI_SHIFT; int ipc_min_cycle = RADIX_TREE_MAP_SIZE; +enum { + IPC_SYSCTL_SHMMAX, + IPC_SYSCTL_SHMALL, + IPC_SYSCTL_SHMMNI, + IPC_SYSCTL_SHM_RMID_FORCED, + IPC_SYSCTL_MSGMAX, + IPC_SYSCTL_MSGMNI, + IPC_SYSCTL_AUTO_MSGMNI, + IPC_SYSCTL_MSGMNB, + IPC_SYSCTL_SEM, +#ifdef CONFIG_CHECKPOINT_RESTORE + IPC_SYSCTL_SEM_NEXT_ID, + IPC_SYSCTL_MSG_NEXT_ID, + IPC_SYSCTL_SHM_NEXT_ID, +#endif + IPC_SYSCTL_COUNTS +}; + static struct ctl_table ipc_sysctls[] = { - { + [IPC_SYSCTL_SHMMAX] = { .procname = "shmmax", .data = &init_ipc_ns.shm_ctlmax, .maxlen = sizeof(init_ipc_ns.shm_ctlmax), .mode = 0644, - .proc_handler = proc_doulongvec_minmax, + .proc_handler = proc_doulongvec_minmax, + .ctl_fops = &ipc_sys_fops, }, - { + [IPC_SYSCTL_SHMALL] = { .procname = "shmall", .data = &init_ipc_ns.shm_ctlall, .maxlen = sizeof(init_ipc_ns.shm_ctlall), .mode = 0644, - .proc_handler = proc_doulongvec_minmax, + .proc_handler = proc_doulongvec_minmax, + .ctl_fops = &ipc_sys_fops, }, - { + [IPC_SYSCTL_SHMMNI] = { .procname = "shmmni", .data = &init_ipc_ns.shm_ctlmni, .maxlen = sizeof(init_ipc_ns.shm_ctlmni), @@ -95,8 +163,9 @@ static struct ctl_table ipc_sysctls[] = { .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = &ipc_mni, + .ctl_fops = &ipc_sys_fops, }, - { + [IPC_SYSCTL_SHM_RMID_FORCED] = { .procname = "shm_rmid_forced", .data = &init_ipc_ns.shm_rmid_forced, .maxlen = sizeof(init_ipc_ns.shm_rmid_forced), @@ -104,8 +173,9 @@ static struct ctl_table ipc_sysctls[] = { .proc_handler = proc_ipc_dointvec_minmax_orphans, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, + .ctl_fops = &ipc_sys_fops, }, - { + [IPC_SYSCTL_MSGMAX] = { .procname = "msgmax", .data = &init_ipc_ns.msg_ctlmax, .maxlen = sizeof(init_ipc_ns.msg_ctlmax), @@ -113,8 +183,9 @@ static struct ctl_table ipc_sysctls[] = { .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_INT_MAX, + .ctl_fops = &ipc_sys_fops, }, - { + [IPC_SYSCTL_MSGMNI] = { .procname = "msgmni", .data = &init_ipc_ns.msg_ctlmni, .maxlen = sizeof(init_ipc_ns.msg_ctlmni), @@ -122,8 +193,9 @@ static struct ctl_table ipc_sysctls[] = { .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = &ipc_mni, + .ctl_fops = &ipc_sys_fops, }, - { + [IPC_SYSCTL_AUTO_MSGMNI] = { .procname = "auto_msgmni", .data = NULL, .maxlen = sizeof(int), @@ -131,8 +203,9 @@ static struct ctl_table ipc_sysctls[] = { .proc_handler = proc_ipc_auto_msgmni, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, + .ctl_fops = &ipc_sys_fops, }, - { + [IPC_SYSCTL_MSGMNB] = { .procname = "msgmnb", .data = &init_ipc_ns.msg_ctlmnb, .maxlen = sizeof(init_ipc_ns.msg_ctlmnb), @@ -140,152 +213,85 @@ static struct ctl_table ipc_sysctls[] = { .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_INT_MAX, + .ctl_fops = &ipc_sys_fops, }, - { + [IPC_SYSCTL_SEM] = { .procname = "sem", .data = &init_ipc_ns.sem_ctls, .maxlen = 4*sizeof(int), .mode = 0644, .proc_handler = proc_ipc_sem_dointvec, + .ctl_fops = &ipc_sys_fops, }, #ifdef CONFIG_CHECKPOINT_RESTORE - { + [IPC_SYSCTL_SEM_NEXT_ID] = { .procname = "sem_next_id", .data = &init_ipc_ns.ids[IPC_SEM_IDS].next_id, .maxlen = sizeof(init_ipc_ns.ids[IPC_SEM_IDS].next_id), - .mode = 0444, + .mode = 0666, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_INT_MAX, + .ctl_fops = &ipc_sys_fops, }, - { + [IPC_SYSCTL_MSG_NEXT_ID] = { .procname = "msg_next_id", .data = &init_ipc_ns.ids[IPC_MSG_IDS].next_id, .maxlen = sizeof(init_ipc_ns.ids[IPC_MSG_IDS].next_id), - .mode = 0444, + .mode = 0666, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_INT_MAX, + .ctl_fops = &ipc_sys_fops, }, - { + [IPC_SYSCTL_SHM_NEXT_ID] = { .procname = "shm_next_id", .data = &init_ipc_ns.ids[IPC_SHM_IDS].next_id, .maxlen = sizeof(init_ipc_ns.ids[IPC_SHM_IDS].next_id), - .mode = 0444, + .mode = 0666, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_INT_MAX, + .ctl_fops = &ipc_sys_fops, }, #endif - {} + [IPC_SYSCTL_COUNTS] = {} }; -static struct ctl_table_set *set_lookup(struct ctl_table_root *root) +static inline void *data_from_ns(struct ctl_context *ctx, struct ctl_table *table) { - return ¤t->nsproxy->ipc_ns->ipc_set; -} - -static int set_is_seen(struct ctl_table_set *set) -{ - return ¤t->nsproxy->ipc_ns->ipc_set == set; -} - -static int ipc_permissions(struct ctl_table_header *head, struct ctl_table *table) -{ - int mode = table->mode; - + struct ipc_namespace *ns = ctx->ctl_data; + + switch (ctx->table - ipc_sysctls) { + case IPC_SYSCTL_SHMMAX: return &ns->shm_ctlmax; + case IPC_SYSCTL_SHMALL: return &ns->shm_ctlall; + case IPC_SYSCTL_SHMMNI: return &ns->shm_ctlmni; + case IPC_SYSCTL_SHM_RMID_FORCED: return &ns->shm_rmid_forced; + case IPC_SYSCTL_MSGMAX: return &ns->msg_ctlmax; + case IPC_SYSCTL_MSGMNI: return &ns->msg_ctlmni; + case IPC_SYSCTL_MSGMNB: return &ns->msg_ctlmnb; + case IPC_SYSCTL_SEM: return &ns->sem_ctls; #ifdef CONFIG_CHECKPOINT_RESTORE - struct ipc_namespace *ns = current->nsproxy->ipc_ns; - - if (((table->data == &ns->ids[IPC_SEM_IDS].next_id) || - (table->data == &ns->ids[IPC_MSG_IDS].next_id) || - (table->data == &ns->ids[IPC_SHM_IDS].next_id)) && - checkpoint_restore_ns_capable(ns->user_ns)) - mode = 0666; + case IPC_SYSCTL_SEM_NEXT_ID: return &ns->ids[IPC_SEM_IDS].next_id; + case IPC_SYSCTL_MSG_NEXT_ID: return &ns->ids[IPC_MSG_IDS].next_id; + case IPC_SYSCTL_SHM_NEXT_ID: return &ns->ids[IPC_SHM_IDS].next_id; #endif - return mode; -} - -static struct ctl_table_root set_root = { - .lookup = set_lookup, - .permissions = ipc_permissions, -}; - -bool setup_ipc_sysctls(struct ipc_namespace *ns) -{ - struct ctl_table *tbl; - - setup_sysctl_set(&ns->ipc_set, &set_root, set_is_seen); - - tbl = kmemdup(ipc_sysctls, sizeof(ipc_sysctls), GFP_KERNEL); - if (tbl) { - int i; - - for (i = 0; i < ARRAY_SIZE(ipc_sysctls); i++) { - if (tbl[i].data == &init_ipc_ns.shm_ctlmax) - tbl[i].data = &ns->shm_ctlmax; - - else if (tbl[i].data == &init_ipc_ns.shm_ctlall) - tbl[i].data = &ns->shm_ctlall; - - else if (tbl[i].data == &init_ipc_ns.shm_ctlmni) - tbl[i].data = &ns->shm_ctlmni; - - else if (tbl[i].data == &init_ipc_ns.shm_rmid_forced) - tbl[i].data = &ns->shm_rmid_forced; - - else if (tbl[i].data == &init_ipc_ns.msg_ctlmax) - tbl[i].data = &ns->msg_ctlmax; - - else if (tbl[i].data == &init_ipc_ns.msg_ctlmni) - tbl[i].data = &ns->msg_ctlmni; - - else if (tbl[i].data == &init_ipc_ns.msg_ctlmnb) - tbl[i].data = &ns->msg_ctlmnb; - - else if (tbl[i].data == &init_ipc_ns.sem_ctls) - tbl[i].data = &ns->sem_ctls; -#ifdef CONFIG_CHECKPOINT_RESTORE - else if (tbl[i].data == &init_ipc_ns.ids[IPC_SEM_IDS].next_id) - tbl[i].data = &ns->ids[IPC_SEM_IDS].next_id; - - else if (tbl[i].data == &init_ipc_ns.ids[IPC_MSG_IDS].next_id) - tbl[i].data = &ns->ids[IPC_MSG_IDS].next_id; - - else if (tbl[i].data == &init_ipc_ns.ids[IPC_SHM_IDS].next_id) - tbl[i].data = &ns->ids[IPC_SHM_IDS].next_id; -#endif - else - tbl[i].data = NULL; - } - - ns->ipc_sysctls = __register_sysctl_table(&ns->ipc_set, "kernel", tbl); - } - if (!ns->ipc_sysctls) { - kfree(tbl); - retire_sysctl_set(&ns->ipc_set); - return false; } - - return true; + return NULL; } -void retire_ipc_sysctls(struct ipc_namespace *ns) -{ - struct ctl_table *tbl; - - tbl = ns->ipc_sysctls->ctl_table_arg; - unregister_sysctl_table(ns->ipc_sysctls); - retire_sysctl_set(&ns->ipc_set); - kfree(tbl); -} +static struct ctl_table ipc_root_table[] = { + { + .procname = "kernel", + .mode = 0555, + .child = ipc_sysctls, + }, + {} +}; static int __init ipc_sysctl_init(void) { - if (!setup_ipc_sysctls(&init_ipc_ns)) { - pr_warn("ipc sysctl registration failed\n"); - return -ENOMEM; - } + register_sysctl_table(ipc_root_table); return 0; } diff --git a/ipc/namespace.c b/ipc/namespace.c index 754f3237194a..f760243ca685 100644 --- a/ipc/namespace.c +++ b/ipc/namespace.c @@ -63,9 +63,6 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns, if (!setup_mq_sysctls(ns)) goto fail_put; - if (!setup_ipc_sysctls(ns)) - goto fail_put; - sem_init_ns(ns); msg_init_ns(ns); shm_init_ns(ns); @@ -133,7 +130,6 @@ static void free_ipc_ns(struct ipc_namespace *ns) shm_exit_ns(ns); retire_mq_sysctls(ns); - retire_ipc_sysctls(ns); dec_ipc_namespaces(ns->ucounts); put_user_ns(ns->user_ns); From patchwork Wed Jun 1 13:20:31 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Alexey Gladkov X-Patchwork-Id: 12866903 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 6B799C433EF for ; Wed, 1 Jun 2022 13:21:06 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1353221AbiFANVF (ORCPT ); Wed, 1 Jun 2022 09:21:05 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:41358 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1353196AbiFANVD (ORCPT ); Wed, 1 Jun 2022 09:21:03 -0400 Received: from us-smtp-delivery-44.mimecast.com (us-smtp-delivery-44.mimecast.com [205.139.111.44]) by lindbergh.monkeyblade.net (Postfix) with ESMTP id 0C81D4D634 for ; Wed, 1 Jun 2022 06:20:57 -0700 (PDT) Received: from mimecast-mx02.redhat.com (mimecast-mx02.redhat.com [66.187.233.88]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id us-mta-264-O_Pd3AsIMGigCWcA94cQtA-1; Wed, 01 Jun 2022 09:20:53 -0400 X-MC-Unique: O_Pd3AsIMGigCWcA94cQtA-1 Received: from smtp.corp.redhat.com (int-mx02.intmail.prod.int.rdu2.redhat.com [10.11.54.2]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mimecast-mx02.redhat.com (Postfix) with ESMTPS id B024B802804; Wed, 1 Jun 2022 13:20:52 +0000 (UTC) Received: from comp-core-i7-2640m-0182e6.redhat.com (unknown [10.36.110.3]) by smtp.corp.redhat.com (Postfix) with ESMTP id 94762414A7E7; Wed, 1 Jun 2022 13:20:50 +0000 (UTC) From: Alexey Gladkov To: LKML , "Eric W . Biederman" , Linus Torvalds Cc: Andrew Morton , Christian Brauner , Iurii Zaikin , Kees Cook , Linux Containers , linux-fsdevel@vger.kernel.org, Luis Chamberlain , Vasily Averin Subject: [RFC PATCH 3/4] sysctl: userns: Do not use dynamic memory Date: Wed, 1 Jun 2022 15:20:31 +0200 Message-Id: <81190e5e4879d53be2e1416bcad0b663421339d6.1654086665.git.legion@kernel.org> In-Reply-To: References: MIME-Version: 1.0 X-Scanned-By: MIMEDefang 2.84 on 10.11.54.2 Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org Dynamic memory allocation is needed to modify .data and specify the per namespace parameter. The new sysctl API is allowed to get rid of the need for such modification. Signed-off-by: Alexey Gladkov --- include/linux/user_namespace.h | 6 -- kernel/ucount.c | 116 +++++++++++++-------------------- kernel/user_namespace.c | 10 +-- 3 files changed, 46 insertions(+), 86 deletions(-) diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 45f09bec02c4..7b134516e5cb 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -95,10 +95,6 @@ struct user_namespace { struct key *persistent_keyring_register; #endif struct work_struct work; -#ifdef CONFIG_SYSCTL - struct ctl_table_set set; - struct ctl_table_header *sysctls; -#endif struct ucounts *ucounts; long ucount_max[UCOUNT_COUNTS]; long rlimit_max[UCOUNT_RLIMIT_COUNTS]; @@ -116,8 +112,6 @@ struct ucounts { extern struct user_namespace init_user_ns; extern struct ucounts init_ucounts; -bool setup_userns_sysctls(struct user_namespace *ns); -void retire_userns_sysctls(struct user_namespace *ns); struct ucounts *inc_ucount(struct user_namespace *ns, kuid_t uid, enum ucount_type type); void dec_ucount(struct ucounts *ucounts, enum ucount_type type); struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid); diff --git a/kernel/ucount.c b/kernel/ucount.c index ee8e57fd6f90..4a5072671847 100644 --- a/kernel/ucount.c +++ b/kernel/ucount.c @@ -7,6 +7,7 @@ #include #include #include +#include struct ucounts init_ucounts = { .ns = &init_user_ns, @@ -26,38 +27,20 @@ static DEFINE_SPINLOCK(ucounts_lock); #ifdef CONFIG_SYSCTL -static struct ctl_table_set * -set_lookup(struct ctl_table_root *root) -{ - return ¤t_user_ns()->set; -} - -static int set_is_seen(struct ctl_table_set *set) -{ - return ¤t_user_ns()->set == set; -} - -static int set_permissions(struct ctl_table_header *head, - struct ctl_table *table) -{ - struct user_namespace *user_ns = - container_of(head->set, struct user_namespace, set); - int mode; - - /* Allow users with CAP_SYS_RESOURCE unrestrained access */ - if (ns_capable(user_ns, CAP_SYS_RESOURCE)) - mode = (table->mode & S_IRWXU) >> 6; - else - /* Allow all others at most read-only access */ - mode = table->mode & S_IROTH; - return (mode << 6) | (mode << 3) | mode; -} - -static struct ctl_table_root set_root = { - .lookup = set_lookup, - .permissions = set_permissions, +static int user_sys_open(struct ctl_context *ctx, struct inode *inode, + struct file *file); +static ssize_t user_sys_read(struct ctl_context *ctx, struct file *file, + char *buffer, size_t *lenp, loff_t *ppos); +static ssize_t user_sys_write(struct ctl_context *ctx, struct file *file, + char *buffer, size_t *lenp, loff_t *ppos); + +static struct ctl_fops user_sys_fops = { + .open = user_sys_open, + .read = user_sys_read, + .write = user_sys_write, }; +static long ue_dummy = 0; static long ue_zero = 0; static long ue_int_max = INT_MAX; @@ -66,9 +49,11 @@ static long ue_int_max = INT_MAX; .procname = name, \ .maxlen = sizeof(long), \ .mode = 0644, \ + .data = &ue_dummy, \ .proc_handler = proc_doulongvec_minmax, \ .extra1 = &ue_zero, \ .extra2 = &ue_int_max, \ + .ctl_fops = &user_sys_fops, \ } static struct ctl_table user_table[] = { UCOUNT_ENTRY("max_user_namespaces"), @@ -89,44 +74,43 @@ static struct ctl_table user_table[] = { #endif { } }; -#endif /* CONFIG_SYSCTL */ -bool setup_userns_sysctls(struct user_namespace *ns) +static int user_sys_open(struct ctl_context *ctx, struct inode *inode, struct file *file) { -#ifdef CONFIG_SYSCTL - struct ctl_table *tbl; - - BUILD_BUG_ON(ARRAY_SIZE(user_table) != UCOUNT_COUNTS + 1); - setup_sysctl_set(&ns->set, &set_root, set_is_seen); - tbl = kmemdup(user_table, sizeof(user_table), GFP_KERNEL); - if (tbl) { - int i; - for (i = 0; i < UCOUNT_COUNTS; i++) { - tbl[i].data = &ns->ucount_max[i]; - } - ns->sysctls = __register_sysctl_table(&ns->set, "user", tbl); - } - if (!ns->sysctls) { - kfree(tbl); - retire_sysctl_set(&ns->set); - return false; - } -#endif - return true; + /* Allow users with CAP_SYS_RESOURCE unrestrained access */ + if ((file->f_mode & FMODE_WRITE) && + !ns_capable(file->f_cred->user_ns, CAP_SYS_RESOURCE)) + return -EPERM; + return 0; } -void retire_userns_sysctls(struct user_namespace *ns) +static ssize_t user_sys_read(struct ctl_context *ctx, struct file *file, + char *buffer, size_t *lenp, loff_t *ppos) { -#ifdef CONFIG_SYSCTL - struct ctl_table *tbl; + struct ctl_table table = *ctx->table; + table.data = &file->f_cred->user_ns->ucount_max[ctx->table - user_table]; + return table.proc_handler(&table, 0, buffer, lenp, ppos); +} - tbl = ns->sysctls->ctl_table_arg; - unregister_sysctl_table(ns->sysctls); - retire_sysctl_set(&ns->set); - kfree(tbl); -#endif +static ssize_t user_sys_write(struct ctl_context *ctx, struct file *file, + char *buffer, size_t *lenp, loff_t *ppos) +{ + struct ctl_table table = *ctx->table; + table.data = &file->f_cred->user_ns->ucount_max[ctx->table - user_table]; + return table.proc_handler(&table, 1, buffer, lenp, ppos); } +static struct ctl_table user_root_table[] = { + { + .procname = "user", + .mode = 0555, + .child = user_table, + }, + {} +}; + +#endif /* CONFIG_SYSCTL */ + static struct ucounts *find_ucounts(struct user_namespace *ns, kuid_t uid, struct hlist_head *hashent) { struct ucounts *ucounts; @@ -357,17 +341,7 @@ bool is_rlimit_overlimit(struct ucounts *ucounts, enum rlimit_type type, unsigne static __init int user_namespace_sysctl_init(void) { #ifdef CONFIG_SYSCTL - static struct ctl_table_header *user_header; - static struct ctl_table empty[1]; - /* - * It is necessary to register the user directory in the - * default set so that registrations in the child sets work - * properly. - */ - user_header = register_sysctl("user", empty); - kmemleak_ignore(user_header); - BUG_ON(!user_header); - BUG_ON(!setup_userns_sysctls(&init_user_ns)); + register_sysctl_table(user_root_table); #endif hlist_add_ucounts(&init_ucounts); inc_rlimit_ucounts(&init_ucounts, UCOUNT_RLIMIT_NPROC, 1); diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 981bb2d10d83..c0e707bc9a31 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -149,17 +149,10 @@ int create_user_ns(struct cred *new) INIT_LIST_HEAD(&ns->keyring_name_list); init_rwsem(&ns->keyring_sem); #endif - ret = -ENOMEM; - if (!setup_userns_sysctls(ns)) - goto fail_keyring; set_cred_user_ns(new, ns); return 0; -fail_keyring: -#ifdef CONFIG_PERSISTENT_KEYRINGS - key_put(ns->persistent_keyring_register); -#endif - ns_free_inum(&ns->ns); + fail_free: kmem_cache_free(user_ns_cachep, ns); fail_dec: @@ -208,7 +201,6 @@ static void free_user_ns(struct work_struct *work) kfree(ns->projid_map.forward); kfree(ns->projid_map.reverse); } - retire_userns_sysctls(ns); key_free_user_ns(ns); ns_free_inum(&ns->ns); kmem_cache_free(user_ns_cachep, ns); From patchwork Wed Jun 1 13:20:32 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Alexey Gladkov X-Patchwork-Id: 12866904 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 70351C43217 for ; Wed, 1 Jun 2022 13:21:08 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1349767AbiFANVG (ORCPT ); Wed, 1 Jun 2022 09:21:06 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:41360 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1353199AbiFANVE (ORCPT ); Wed, 1 Jun 2022 09:21:04 -0400 Received: from us-smtp-delivery-44.mimecast.com (us-smtp-delivery-44.mimecast.com [207.211.30.44]) by lindbergh.monkeyblade.net (Postfix) with ESMTP id 3B3644D9D4 for ; Wed, 1 Jun 2022 06:20:58 -0700 (PDT) Received: from mimecast-mx02.redhat.com (mx3-rdu2.redhat.com [66.187.233.73]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id us-mta-621-vLc6cXEvNsOpYsXD4FreTw-1; Wed, 01 Jun 2022 09:20:55 -0400 X-MC-Unique: vLc6cXEvNsOpYsXD4FreTw-1 Received: from smtp.corp.redhat.com (int-mx02.intmail.prod.int.rdu2.redhat.com [10.11.54.2]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mimecast-mx02.redhat.com (Postfix) with ESMTPS id 1C6001C0F68A; Wed, 1 Jun 2022 13:20:55 +0000 (UTC) Received: from comp-core-i7-2640m-0182e6.redhat.com (unknown [10.36.110.3]) by smtp.corp.redhat.com (Postfix) with ESMTP id 01B4B414A7E9; Wed, 1 Jun 2022 13:20:52 +0000 (UTC) From: Alexey Gladkov To: LKML , "Eric W . Biederman" , Linus Torvalds Cc: Andrew Morton , Christian Brauner , Iurii Zaikin , Kees Cook , Linux Containers , linux-fsdevel@vger.kernel.org, Luis Chamberlain , Vasily Averin Subject: [RFC PATCH 4/4] sysctl: mqueue: Do not use dynamic memory Date: Wed, 1 Jun 2022 15:20:32 +0200 Message-Id: In-Reply-To: References: MIME-Version: 1.0 X-Scanned-By: MIMEDefang 2.84 on 10.11.54.2 Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org Dynamic memory allocation is needed to modify .data and specify the per namespace parameter. The new sysctl API is allowed to get rid of the need for such modification. Signed-off-by: Alexey Gladkov --- include/linux/ipc_namespace.h | 17 ----- ipc/mq_sysctl.c | 138 +++++++++++++++++++--------------- ipc/mqueue.c | 5 -- ipc/namespace.c | 6 -- 4 files changed, 79 insertions(+), 87 deletions(-) diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index 51c2c247c447..d20753093a2c 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -174,21 +174,4 @@ static inline void put_ipc_ns(struct ipc_namespace *ns) } #endif -#ifdef CONFIG_POSIX_MQUEUE_SYSCTL - -void retire_mq_sysctls(struct ipc_namespace *ns); -bool setup_mq_sysctls(struct ipc_namespace *ns); - -#else /* CONFIG_POSIX_MQUEUE_SYSCTL */ - -static inline void retire_mq_sysctls(struct ipc_namespace *ns) -{ -} - -static inline bool setup_mq_sysctls(struct ipc_namespace *ns) -{ - return true; -} - -#endif /* CONFIG_POSIX_MQUEUE_SYSCTL */ #endif diff --git a/ipc/mq_sysctl.c b/ipc/mq_sysctl.c index fbf6a8b93a26..08ff7dfb721c 100644 --- a/ipc/mq_sysctl.c +++ b/ipc/mq_sysctl.c @@ -13,6 +13,45 @@ #include #include +static inline void *data_from_ns(struct ctl_context *ctx, struct ctl_table *table); + +static int mq_sys_open(struct ctl_context *ctx, struct inode *inode, struct file *file) +{ + ctx->ctl_data = current->nsproxy->ipc_ns; + return 0; +} + +static ssize_t mq_sys_read(struct ctl_context *ctx, struct file *file, + char *buffer, size_t *lenp, loff_t *ppos) +{ + struct ctl_table table = *ctx->table; + table.data = data_from_ns(ctx, ctx->table); + return table.proc_handler(&table, 0, buffer, lenp, ppos); +} + +static ssize_t mq_sys_write(struct ctl_context *ctx, struct file *file, + char *buffer, size_t *lenp, loff_t *ppos) +{ + struct ctl_table table = *ctx->table; + table.data = data_from_ns(ctx, ctx->table); + return table.proc_handler(&table, 1, buffer, lenp, ppos); +} + +static struct ctl_fops mq_sys_fops = { + .open = mq_sys_open, + .read = mq_sys_read, + .write = mq_sys_write, +}; + +enum { + MQ_SYSCTL_QUEUES_MAX, + MQ_SYSCTL_MSG_MAX, + MQ_SYSCTL_MSGSIZE_MAX, + MQ_SYSCTL_MSG_DEFAULT, + MQ_SYSCTL_MSGSIZE_DEFAULT, + MQ_SYSCTL_COUNTS +}; + static int msg_max_limit_min = MIN_MSGMAX; static int msg_max_limit_max = HARD_MSGMAX; @@ -20,14 +59,15 @@ static int msg_maxsize_limit_min = MIN_MSGSIZEMAX; static int msg_maxsize_limit_max = HARD_MSGSIZEMAX; static struct ctl_table mq_sysctls[] = { - { + [MQ_SYSCTL_QUEUES_MAX] = { .procname = "queues_max", .data = &init_ipc_ns.mq_queues_max, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, + .ctl_fops = &mq_sys_fops, }, - { + [MQ_SYSCTL_MSG_MAX] = { .procname = "msg_max", .data = &init_ipc_ns.mq_msg_max, .maxlen = sizeof(int), @@ -35,8 +75,9 @@ static struct ctl_table mq_sysctls[] = { .proc_handler = proc_dointvec_minmax, .extra1 = &msg_max_limit_min, .extra2 = &msg_max_limit_max, + .ctl_fops = &mq_sys_fops, }, - { + [MQ_SYSCTL_MSGSIZE_MAX] = { .procname = "msgsize_max", .data = &init_ipc_ns.mq_msgsize_max, .maxlen = sizeof(int), @@ -44,8 +85,9 @@ static struct ctl_table mq_sysctls[] = { .proc_handler = proc_dointvec_minmax, .extra1 = &msg_maxsize_limit_min, .extra2 = &msg_maxsize_limit_max, + .ctl_fops = &mq_sys_fops, }, - { + [MQ_SYSCTL_MSG_DEFAULT] = { .procname = "msg_default", .data = &init_ipc_ns.mq_msg_default, .maxlen = sizeof(int), @@ -53,8 +95,9 @@ static struct ctl_table mq_sysctls[] = { .proc_handler = proc_dointvec_minmax, .extra1 = &msg_max_limit_min, .extra2 = &msg_max_limit_max, + .ctl_fops = &mq_sys_fops, }, - { + [MQ_SYSCTL_MSGSIZE_DEFAULT] = { .procname = "msgsize_default", .data = &init_ipc_ns.mq_msgsize_default, .maxlen = sizeof(int), @@ -62,70 +105,47 @@ static struct ctl_table mq_sysctls[] = { .proc_handler = proc_dointvec_minmax, .extra1 = &msg_maxsize_limit_min, .extra2 = &msg_maxsize_limit_max, + .ctl_fops = &mq_sys_fops, }, {} }; -static struct ctl_table_set *set_lookup(struct ctl_table_root *root) +static inline void *data_from_ns(struct ctl_context *ctx, struct ctl_table *table) { - return ¤t->nsproxy->ipc_ns->mq_set; + struct ipc_namespace *ns = ctx->ctl_data; + + switch (ctx->table - mq_sysctls) { + case MQ_SYSCTL_QUEUES_MAX: return &ns->mq_queues_max; + case MQ_SYSCTL_MSG_MAX: return &ns->mq_msg_max; + case MQ_SYSCTL_MSGSIZE_MAX: return &ns->mq_msgsize_max; + case MQ_SYSCTL_MSG_DEFAULT: return &ns->mq_msg_default; + case MQ_SYSCTL_MSGSIZE_DEFAULT: return &ns->mq_msgsize_default; + } + return NULL; } -static int set_is_seen(struct ctl_table_set *set) -{ - return ¤t->nsproxy->ipc_ns->mq_set == set; -} +static struct ctl_table mq_sysctl_dir[] = { + { + .procname = "mqueue", + .mode = 0555, + .child = mq_sysctls, + }, + {} +}; -static struct ctl_table_root set_root = { - .lookup = set_lookup, +static struct ctl_table mq_sysctl_root[] = { + { + .procname = "fs", + .mode = 0555, + .child = mq_sysctl_dir, + }, + {} }; -bool setup_mq_sysctls(struct ipc_namespace *ns) +static int __init mq_sysctl_init(void) { - struct ctl_table *tbl; - - setup_sysctl_set(&ns->mq_set, &set_root, set_is_seen); - - tbl = kmemdup(mq_sysctls, sizeof(mq_sysctls), GFP_KERNEL); - if (tbl) { - int i; - - for (i = 0; i < ARRAY_SIZE(mq_sysctls); i++) { - if (tbl[i].data == &init_ipc_ns.mq_queues_max) - tbl[i].data = &ns->mq_queues_max; - - else if (tbl[i].data == &init_ipc_ns.mq_msg_max) - tbl[i].data = &ns->mq_msg_max; - - else if (tbl[i].data == &init_ipc_ns.mq_msgsize_max) - tbl[i].data = &ns->mq_msgsize_max; - - else if (tbl[i].data == &init_ipc_ns.mq_msg_default) - tbl[i].data = &ns->mq_msg_default; - - else if (tbl[i].data == &init_ipc_ns.mq_msgsize_default) - tbl[i].data = &ns->mq_msgsize_default; - else - tbl[i].data = NULL; - } - - ns->mq_sysctls = __register_sysctl_table(&ns->mq_set, "fs/mqueue", tbl); - } - if (!ns->mq_sysctls) { - kfree(tbl); - retire_sysctl_set(&ns->mq_set); - return false; - } - - return true; + register_sysctl_table(mq_sysctl_root); + return 0; } -void retire_mq_sysctls(struct ipc_namespace *ns) -{ - struct ctl_table *tbl; - - tbl = ns->mq_sysctls->ctl_table_arg; - unregister_sysctl_table(ns->mq_sysctls); - retire_sysctl_set(&ns->mq_set); - kfree(tbl); -} +device_initcall(mq_sysctl_init); diff --git a/ipc/mqueue.c b/ipc/mqueue.c index c0f24cc9f619..ffb79a24d70b 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -1711,11 +1711,6 @@ static int __init init_mqueue_fs(void) if (mqueue_inode_cachep == NULL) return -ENOMEM; - if (!setup_mq_sysctls(&init_ipc_ns)) { - pr_warn("sysctl registration failed\n"); - return -ENOMEM; - } - error = register_filesystem(&mqueue_fs_type); if (error) goto out_sysctl; diff --git a/ipc/namespace.c b/ipc/namespace.c index f760243ca685..ae83f0f2651b 100644 --- a/ipc/namespace.c +++ b/ipc/namespace.c @@ -59,10 +59,6 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns, if (err) goto fail_put; - err = -ENOMEM; - if (!setup_mq_sysctls(ns)) - goto fail_put; - sem_init_ns(ns); msg_init_ns(ns); shm_init_ns(ns); @@ -129,8 +125,6 @@ static void free_ipc_ns(struct ipc_namespace *ns) msg_exit_ns(ns); shm_exit_ns(ns); - retire_mq_sysctls(ns); - dec_ipc_namespaces(ns->ucounts); put_user_ns(ns->user_ns); ns_free_inum(&ns->ns);