@@ -47,10 +47,10 @@ Currently, these files are in /proc/sys/fs:
aio-nr & aio-max-nr:
aio-nr is the running total of the number of events specified on the
-io_setup system call for all currently active aio contexts. If aio-nr
-reaches aio-max-nr then io_setup will fail with EAGAIN. Note that
-raising aio-max-nr does not result in the pre-allocation or re-sizing
-of any kernel data structures.
+io_setup/io_setup2 system call for all currently active aio contexts.
+If aio-nr reaches aio-max-nr then io_setup will fail with EAGAIN.
+Note that raising aio-max-nr does not result in the pre-allocation or
+re-sizing of any kernel data structures.
==============================================================
@@ -343,6 +343,7 @@
332 common statx __x64_sys_statx
333 common io_pgetevents __x64_sys_io_pgetevents
334 common rseq __x64_sys_rseq
+335 common io_setup2 __x64_sys_io_setup2
#
# x32-specific system call numbers start at 512 to avoid cache impact
@@ -101,6 +101,8 @@ struct kioctx {
unsigned long user_id;
+ unsigned int flags;
+
struct __percpu kioctx_cpu *cpu;
/*
@@ -687,10 +689,8 @@ static void aio_nr_sub(unsigned nr)
spin_unlock(&aio_nr_lock);
}
-/* ioctx_alloc
- * Allocates and initializes an ioctx. Returns an ERR_PTR if it failed.
- */
-static struct kioctx *ioctx_alloc(unsigned nr_events)
+static struct kioctx *io_setup_flags(unsigned long ctxid,
+ unsigned int nr_events, unsigned int flags)
{
struct mm_struct *mm = current->mm;
struct kioctx *ctx;
@@ -702,6 +702,12 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
*/
unsigned int max_reqs = nr_events;
+ if (unlikely(ctxid || nr_events == 0)) {
+ pr_debug("EINVAL: ctx %lu nr_events %u\n",
+ ctxid, nr_events);
+ return ERR_PTR(-EINVAL);
+ }
+
/*
* We keep track of the number of available ringbuffer slots, to prevent
* overflow (reqs_available), and we also use percpu counters for this.
@@ -727,6 +733,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
if (!ctx)
return ERR_PTR(-ENOMEM);
+ ctx->flags = flags;
ctx->max_reqs = max_reqs;
spin_lock_init(&ctx->ctx_lock);
@@ -1283,6 +1290,41 @@ static long read_events(struct kioctx *ctx, long min_nr, long nr,
return ret;
}
+/* sys_io_setup2:
+ * Like sys_io_setup(), except that it takes a set of flags
+ * (IOCTX_FLAG_*), and some pointers to user structures:
+ *
+ * *user1 - reserved for future use
+ *
+ * *user2 - reserved for future use.
+ */
+SYSCALL_DEFINE5(io_setup2, u32, nr_events, u32, flags, void __user *, user1,
+ void __user *, user2, aio_context_t __user *, ctxp)
+{
+ struct kioctx *ioctx;
+ unsigned long ctx;
+ long ret;
+
+ if (flags || user1 || user2)
+ return -EINVAL;
+
+ ret = get_user(ctx, ctxp);
+ if (unlikely(ret))
+ goto out;
+
+ ioctx = io_setup_flags(ctx, nr_events, flags);
+ ret = PTR_ERR(ioctx);
+ if (IS_ERR(ioctx))
+ goto out;
+
+ ret = put_user(ioctx->user_id, ctxp);
+ if (ret)
+ kill_ioctx(current->mm, ioctx, NULL);
+ percpu_ref_put(&ioctx->users);
+out:
+ return ret;
+}
+
/* sys_io_setup:
* Create an aio_context capable of receiving at least nr_events.
* ctxp must not point to an aio_context that already exists, and
@@ -1298,7 +1340,7 @@ static long read_events(struct kioctx *ctx, long min_nr, long nr,
*/
SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp)
{
- struct kioctx *ioctx = NULL;
+ struct kioctx *ioctx;
unsigned long ctx;
long ret;
@@ -1306,14 +1348,7 @@ SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp)
if (unlikely(ret))
goto out;
- ret = -EINVAL;
- if (unlikely(ctx || nr_events == 0)) {
- pr_debug("EINVAL: ctx %lu nr_events %u\n",
- ctx, nr_events);
- goto out;
- }
-
- ioctx = ioctx_alloc(nr_events);
+ ioctx = io_setup_flags(ctx, nr_events, 0);
ret = PTR_ERR(ioctx);
if (!IS_ERR(ioctx)) {
ret = put_user(ioctx->user_id, ctxp);
@@ -1329,7 +1364,7 @@ SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp)
#ifdef CONFIG_COMPAT
COMPAT_SYSCALL_DEFINE2(io_setup, unsigned, nr_events, u32 __user *, ctx32p)
{
- struct kioctx *ioctx = NULL;
+ struct kioctx *ioctx;
unsigned long ctx;
long ret;
@@ -1337,23 +1372,14 @@ COMPAT_SYSCALL_DEFINE2(io_setup, unsigned, nr_events, u32 __user *, ctx32p)
if (unlikely(ret))
goto out;
- ret = -EINVAL;
- if (unlikely(ctx || nr_events == 0)) {
- pr_debug("EINVAL: ctx %lu nr_events %u\n",
- ctx, nr_events);
- goto out;
- }
-
- ioctx = ioctx_alloc(nr_events);
+ ioctx = io_setup_flags(ctx, nr_events, 0);
ret = PTR_ERR(ioctx);
if (!IS_ERR(ioctx)) {
- /* truncating is ok because it's a user address */
- ret = put_user((u32)ioctx->user_id, ctx32p);
+ ret = put_user(ioctx->user_id, ctx32p);
if (ret)
kill_ioctx(current->mm, ioctx, NULL);
percpu_ref_put(&ioctx->users);
}
-
out:
return ret;
}
@@ -287,6 +287,8 @@ static inline void addr_limit_user_check(void)
*/
#ifndef CONFIG_ARCH_HAS_SYSCALL_WRAPPER
asmlinkage long sys_io_setup(unsigned nr_reqs, aio_context_t __user *ctx);
+asmlinkage long sys_io_setup2(unsigned, unsigned, void __user *, void __user *,
+ aio_context_t __user *);
asmlinkage long sys_io_destroy(aio_context_t ctx);
asmlinkage long sys_io_submit(aio_context_t, long,
struct iocb __user * __user *);
@@ -738,9 +738,11 @@ __SYSCALL(__NR_statx, sys_statx)
__SC_COMP(__NR_io_pgetevents, sys_io_pgetevents, compat_sys_io_pgetevents)
#define __NR_rseq 293
__SYSCALL(__NR_rseq, sys_rseq)
+#define __NR_io_setup2 294
+__SYSCALL(__NR_io_setup2, sys_io_setup2)
#undef __NR_syscalls
-#define __NR_syscalls 294
+#define __NR_syscalls 295
/*
* 32 bit systems traditionally used different
@@ -37,6 +37,7 @@ asmlinkage long sys_ni_syscall(void)
*/
COND_SYSCALL(io_setup);
+COND_SYSCALL(io_setup2);
COND_SYSCALL_COMPAT(io_setup);
COND_SYSCALL(io_destroy);
COND_SYSCALL(io_submit);