@@ -343,6 +343,7 @@
332 common statx __x64_sys_statx
333 common io_pgetevents __x64_sys_io_pgetevents
334 common rseq __x64_sys_rseq
+335 common io_setup2 __x64_sys_io_setup2
#
# x32-specific system call numbers start at 512 to avoid cache impact
@@ -94,6 +94,8 @@ struct kioctx {
unsigned long user_id;
+ unsigned int flags;
+
struct __percpu kioctx_cpu *cpu;
/*
@@ -680,21 +682,24 @@ static void aio_nr_sub(unsigned nr)
spin_unlock(&aio_nr_lock);
}
-/* ioctx_alloc
- * Allocates and initializes an ioctx. Returns an ERR_PTR if it failed.
- */
-static struct kioctx *ioctx_alloc(unsigned nr_events)
+static struct kioctx *io_setup_flags(unsigned long ctxid,
+ unsigned int nr_events, unsigned int flags)
{
struct mm_struct *mm = current->mm;
struct kioctx *ctx;
int err = -ENOMEM;
-
/*
* Store the original nr_events -- what userspace passed to io_setup(),
* for counting against the global limit -- before it changes.
*/
unsigned int max_reqs = nr_events;
+ if (unlikely(ctxid || nr_events == 0)) {
+ pr_debug("EINVAL: ctx %lu nr_events %u\n",
+ ctxid, nr_events);
+ return ERR_PTR(-EINVAL);
+ }
+
/*
* We keep track of the number of available ringbuffer slots, to prevent
* overflow (reqs_available), and we also use percpu counters for this.
@@ -720,6 +725,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
if (!ctx)
return ERR_PTR(-ENOMEM);
+ ctx->flags = flags;
ctx->max_reqs = max_reqs;
spin_lock_init(&ctx->ctx_lock);
@@ -1275,6 +1281,33 @@ static long read_events(struct kioctx *ctx, long min_nr, long nr,
return ret;
}
+SYSCALL_DEFINE4(io_setup2, u32, nr_events, u32, flags, struct iocb * __user,
+ iocbs, aio_context_t __user *, ctxp)
+{
+ struct kioctx *ioctx;
+ unsigned long ctx;
+ long ret;
+
+ if (flags)
+ return -EINVAL;
+
+ ret = get_user(ctx, ctxp);
+ if (unlikely(ret))
+ goto out;
+
+ ioctx = io_setup_flags(ctx, nr_events, flags);
+ ret = PTR_ERR(ioctx);
+ if (IS_ERR(ioctx))
+ goto out;
+
+ ret = put_user(ioctx->user_id, ctxp);
+ if (ret)
+ kill_ioctx(current->mm, ioctx, NULL);
+ percpu_ref_put(&ioctx->users);
+out:
+ return ret;
+}
+
/* sys_io_setup:
* Create an aio_context capable of receiving at least nr_events.
* ctxp must not point to an aio_context that already exists, and
@@ -1290,7 +1323,7 @@ static long read_events(struct kioctx *ctx, long min_nr, long nr,
*/
SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp)
{
- struct kioctx *ioctx = NULL;
+ struct kioctx *ioctx;
unsigned long ctx;
long ret;
@@ -1298,14 +1331,7 @@ SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp)
if (unlikely(ret))
goto out;
- ret = -EINVAL;
- if (unlikely(ctx || nr_events == 0)) {
- pr_debug("EINVAL: ctx %lu nr_events %u\n",
- ctx, nr_events);
- goto out;
- }
-
- ioctx = ioctx_alloc(nr_events);
+ ioctx = io_setup_flags(ctx, nr_events, 0);
ret = PTR_ERR(ioctx);
if (!IS_ERR(ioctx)) {
ret = put_user(ioctx->user_id, ctxp);
@@ -1313,7 +1339,6 @@ SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp)
kill_ioctx(current->mm, ioctx, NULL);
percpu_ref_put(&ioctx->users);
}
-
out:
return ret;
}
@@ -1321,7 +1346,7 @@ SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp)
#ifdef CONFIG_COMPAT
COMPAT_SYSCALL_DEFINE2(io_setup, unsigned, nr_events, u32 __user *, ctx32p)
{
- struct kioctx *ioctx = NULL;
+ struct kioctx *ioctx;
unsigned long ctx;
long ret;
@@ -1329,23 +1354,14 @@ COMPAT_SYSCALL_DEFINE2(io_setup, unsigned, nr_events, u32 __user *, ctx32p)
if (unlikely(ret))
goto out;
- ret = -EINVAL;
- if (unlikely(ctx || nr_events == 0)) {
- pr_debug("EINVAL: ctx %lu nr_events %u\n",
- ctx, nr_events);
- goto out;
- }
-
- ioctx = ioctx_alloc(nr_events);
+ ioctx = io_setup_flags(ctx, nr_events, 0);
ret = PTR_ERR(ioctx);
if (!IS_ERR(ioctx)) {
- /* truncating is ok because it's a user address */
- ret = put_user((u32)ioctx->user_id, ctx32p);
+ ret = put_user(ioctx->user_id, ctx32p);
if (ret)
kill_ioctx(current->mm, ioctx, NULL);
percpu_ref_put(&ioctx->users);
}
-
out:
return ret;
}
@@ -287,6 +287,8 @@ static inline void addr_limit_user_check(void)
*/
#ifndef CONFIG_ARCH_HAS_SYSCALL_WRAPPER
asmlinkage long sys_io_setup(unsigned nr_reqs, aio_context_t __user *ctx);
+asmlinkage long sys_io_setup2(unsigned, unsigned, struct iocb __user *,
+ aio_context_t __user *);
asmlinkage long sys_io_destroy(aio_context_t ctx);
asmlinkage long sys_io_submit(aio_context_t, long,
struct iocb __user * __user *);
@@ -738,9 +738,11 @@ __SYSCALL(__NR_statx, sys_statx)
__SC_COMP(__NR_io_pgetevents, sys_io_pgetevents, compat_sys_io_pgetevents)
#define __NR_rseq 293
__SYSCALL(__NR_rseq, sys_rseq)
+#define __NR_io_setup2 294
+__SYSCALL(__NR_io_setup2, sys_io_setup2)
#undef __NR_syscalls
-#define __NR_syscalls 294
+#define __NR_syscalls 295
/*
* 32 bit systems traditionally used different
@@ -37,6 +37,7 @@ asmlinkage long sys_ni_syscall(void)
*/
COND_SYSCALL(io_setup);
+COND_SYSCALL(io_setup2);
COND_SYSCALL_COMPAT(io_setup);
COND_SYSCALL(io_destroy);
COND_SYSCALL(io_submit);
This is just like io_setup(), except add a flags argument to let the caller control/define some of the io_context behavior. Outside of that, we pass in an iocb array for future use. Signed-off-by: Jens Axboe <axboe@kernel.dk> --- arch/x86/entry/syscalls/syscall_64.tbl | 1 + fs/aio.c | 70 ++++++++++++++++---------- include/linux/syscalls.h | 2 + include/uapi/asm-generic/unistd.h | 4 +- kernel/sys_ni.c | 1 + 5 files changed, 50 insertions(+), 28 deletions(-)