diff mbox series

[13/26] aio: add io_setup2() system call

Message ID 20181207222016.29387-14-axboe@kernel.dk (mailing list archive)
State New, archived
Headers show
Series [01/26] fs: add an iopoll method to struct file_operations | expand

Commit Message

Jens Axboe Dec. 7, 2018, 10:20 p.m. UTC
This is just like io_setup(), except add a flags argument to let the
caller control/define some of the io_context behavior.

Outside of the flags, we add an iocb array and two user pointers for
future use.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 arch/x86/entry/syscalls/syscall_64.tbl |  1 +
 fs/aio.c                               | 69 ++++++++++++++++----------
 include/linux/syscalls.h               |  3 ++
 include/uapi/asm-generic/unistd.h      |  4 +-
 kernel/sys_ni.c                        |  1 +
 5 files changed, 52 insertions(+), 26 deletions(-)

Comments

Benny Halevy Dec. 9, 2018, 10:12 a.m. UTC | #1
On Fri, 2018-12-07 at 15:20 -0700, Jens Axboe wrote:
> This is just like io_setup(), except add a flags argument to let the
> caller control/define some of the io_context behavior.
> 
> Outside of the flags, we add an iocb array and two user pointers for
> future use.
> 
> Signed-off-by: Jens Axboe <axboe@kernel.dk>
> ---
>  arch/x86/entry/syscalls/syscall_64.tbl |  1 +
>  fs/aio.c                               | 69 ++++++++++++++++----------
>  include/linux/syscalls.h               |  3 ++
>  include/uapi/asm-generic/unistd.h      |  4 +-
>  kernel/sys_ni.c                        |  1 +
>  5 files changed, 52 insertions(+), 26 deletions(-)
> 
> diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
> index f0b1709a5ffb..67c357225fb0 100644
> --- a/arch/x86/entry/syscalls/syscall_64.tbl
> +++ b/arch/x86/entry/syscalls/syscall_64.tbl
> @@ -343,6 +343,7 @@
>  332	common	statx			__x64_sys_statx
>  333	common	io_pgetevents		__x64_sys_io_pgetevents
>  334	common	rseq			__x64_sys_rseq
> +335	common	io_setup2		__x64_sys_io_setup2
>  
>  #
>  # x32-specific system call numbers start at 512 to avoid cache impact
> diff --git a/fs/aio.c b/fs/aio.c
> index 173f1f79dc8f..26631d6872d2 100644
> --- a/fs/aio.c
> +++ b/fs/aio.c
> @@ -100,6 +100,8 @@ struct kioctx {
>  
>  	unsigned long		user_id;
>  
> +	unsigned int		flags;
> +
>  	struct __percpu kioctx_cpu *cpu;
>  
>  	/*
> @@ -686,10 +688,8 @@ static void aio_nr_sub(unsigned nr)
>  	spin_unlock(&aio_nr_lock);
>  }
>  
> -/* ioctx_alloc
> - *	Allocates and initializes an ioctx.  Returns an ERR_PTR if it failed.
> - */
> -static struct kioctx *ioctx_alloc(unsigned nr_events)
> +static struct kioctx *io_setup_flags(unsigned long ctxid,
> +				     unsigned int nr_events, unsigned int flags)
>  {
>  	struct mm_struct *mm = current->mm;
>  	struct kioctx *ctx;
> @@ -701,6 +701,12 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
>  	 */
>  	unsigned int max_reqs = nr_events;
>  
> +	if (unlikely(ctxid || nr_events == 0)) {
> +		pr_debug("EINVAL: ctx %lu nr_events %u\n",
> +		         ctxid, nr_events);
> +		return ERR_PTR(-EINVAL);
> +	}
> +
>  	/*
>  	 * We keep track of the number of available ringbuffer slots, to prevent
>  	 * overflow (reqs_available), and we also use percpu counters for this.
> @@ -726,6 +732,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
>  	if (!ctx)
>  		return ERR_PTR(-ENOMEM);
>  
> +	ctx->flags = flags;
>  	ctx->max_reqs = max_reqs;
>  
>  	spin_lock_init(&ctx->ctx_lock);
> @@ -1281,6 +1288,34 @@ static long read_events(struct kioctx *ctx, long min_nr, long nr,
>  	return ret;
>  }
>  

How about adding a comment similar to io_setup's below?

And would you like to mention also io_setup2 in
Documentation/sysctl/fs.txt?

> +SYSCALL_DEFINE6(io_setup2, u32, nr_events, u32, flags, struct iocb __user *,
> +		iocbs, void __user *, user1, void __user *, user2,
> +		aio_context_t __user *, ctxp)
> +{
> +	struct kioctx *ioctx;
> +	unsigned long ctx;
> +	long ret;
> +
> +	if (flags || user1 || user2)
> +		return -EINVAL;
> +
> +	ret = get_user(ctx, ctxp);
> +	if (unlikely(ret))
> +		goto out;
> +
> +	ioctx = io_setup_flags(ctx, nr_events, flags);
> +	ret = PTR_ERR(ioctx);
> +	if (IS_ERR(ioctx))
> +		goto out;
> +
> +	ret = put_user(ioctx->user_id, ctxp);
> +	if (ret)
> +		kill_ioctx(current->mm, ioctx, NULL);
> +	percpu_ref_put(&ioctx->users);
> +out:
> +	return ret;
> +}
> +
>  /* sys_io_setup:
>   *	Create an aio_context capable of receiving at least nr_events.
>   *	ctxp must not point to an aio_context that already exists, and
> @@ -1296,7 +1331,7 @@ static long read_events(struct kioctx *ctx, long min_nr, long nr,
>   */
>  SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp)
>  {
> -	struct kioctx *ioctx = NULL;
> +	struct kioctx *ioctx;
>  	unsigned long ctx;
>  	long ret;
>  
> @@ -1304,14 +1339,7 @@ SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp)
>  	if (unlikely(ret))
>  		goto out;
>  
> -	ret = -EINVAL;
> -	if (unlikely(ctx || nr_events == 0)) {
> -		pr_debug("EINVAL: ctx %lu nr_events %u\n",
> -		         ctx, nr_events);
> -		goto out;
> -	}
> -
> -	ioctx = ioctx_alloc(nr_events);
> +	ioctx = io_setup_flags(ctx, nr_events, 0);
>  	ret = PTR_ERR(ioctx);
>  	if (!IS_ERR(ioctx)) {
>  		ret = put_user(ioctx->user_id, ctxp);
> @@ -1327,7 +1355,7 @@ SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp)
>  #ifdef CONFIG_COMPAT
>  COMPAT_SYSCALL_DEFINE2(io_setup, unsigned, nr_events, u32 __user *, ctx32p)
>  {
> -	struct kioctx *ioctx = NULL;
> +	struct kioctx *ioctx;
>  	unsigned long ctx;
>  	long ret;
>  
> @@ -1335,23 +1363,14 @@ COMPAT_SYSCALL_DEFINE2(io_setup, unsigned, nr_events, u32 __user *, ctx32p)
>  	if (unlikely(ret))
>  		goto out;
>  
> -	ret = -EINVAL;
> -	if (unlikely(ctx || nr_events == 0)) {
> -		pr_debug("EINVAL: ctx %lu nr_events %u\n",
> -		         ctx, nr_events);
> -		goto out;
> -	}
> -
> -	ioctx = ioctx_alloc(nr_events);
> +	ioctx = io_setup_flags(ctx, nr_events, 0);
>  	ret = PTR_ERR(ioctx);
>  	if (!IS_ERR(ioctx)) {
> -		/* truncating is ok because it's a user address */
> -		ret = put_user((u32)ioctx->user_id, ctx32p);
> +		ret = put_user(ioctx->user_id, ctx32p);
>  		if (ret)
>  			kill_ioctx(current->mm, ioctx, NULL);
>  		percpu_ref_put(&ioctx->users);
>  	}
> -
>  out:
>  	return ret;
>  }
> diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
> index 2ac3d13a915b..a20a663d583f 100644
> --- a/include/linux/syscalls.h
> +++ b/include/linux/syscalls.h
> @@ -287,6 +287,9 @@ static inline void addr_limit_user_check(void)
>   */
>  #ifndef CONFIG_ARCH_HAS_SYSCALL_WRAPPER
>  asmlinkage long sys_io_setup(unsigned nr_reqs, aio_context_t __user *ctx);
> +asmlinkage long sys_io_setup2(unsigned, unsigned, struct iocb __user *,
> +				void __user *, void __user *,
> +				aio_context_t __user *);
>  asmlinkage long sys_io_destroy(aio_context_t ctx);
>  asmlinkage long sys_io_submit(aio_context_t, long,
>  			struct iocb __user * __user *);
> diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
> index 538546edbfbd..b4527ed373b0 100644
> --- a/include/uapi/asm-generic/unistd.h
> +++ b/include/uapi/asm-generic/unistd.h
> @@ -738,9 +738,11 @@ __SYSCALL(__NR_statx,     sys_statx)
>  __SC_COMP(__NR_io_pgetevents, sys_io_pgetevents, compat_sys_io_pgetevents)
>  #define __NR_rseq 293
>  __SYSCALL(__NR_rseq, sys_rseq)
> +#define __NR_io_setup2 294
> +__SYSCALL(__NR_io_setup2, sys_io_setup2)
>  
>  #undef __NR_syscalls
> -#define __NR_syscalls 294
> +#define __NR_syscalls 295
>  
>  /*
>   * 32 bit systems traditionally used different
> diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
> index df556175be50..17c8b4393669 100644
> --- a/kernel/sys_ni.c
> +++ b/kernel/sys_ni.c
> @@ -37,6 +37,7 @@ asmlinkage long sys_ni_syscall(void)
>   */
>  
>  COND_SYSCALL(io_setup);
> +COND_SYSCALL(io_setup2);
>  COND_SYSCALL_COMPAT(io_setup);
>  COND_SYSCALL(io_destroy);
>  COND_SYSCALL(io_submit);
Jens Axboe Dec. 9, 2018, 2:37 p.m. UTC | #2
On 12/9/18 3:12 AM, Benny Halevy wrote:
>> @@ -1281,6 +1288,34 @@ static long read_events(struct kioctx *ctx, long min_nr, long nr,
>>  	return ret;
>>  }
>>  
> 
> How about adding a comment similar to io_setup's below?
> 
> And would you like to mention also io_setup2 in
> Documentation/sysctl/fs.txt?

Sure, I'll add a comment for this system call too.
diff mbox series

Patch

diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index f0b1709a5ffb..67c357225fb0 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -343,6 +343,7 @@ 
 332	common	statx			__x64_sys_statx
 333	common	io_pgetevents		__x64_sys_io_pgetevents
 334	common	rseq			__x64_sys_rseq
+335	common	io_setup2		__x64_sys_io_setup2
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/fs/aio.c b/fs/aio.c
index 173f1f79dc8f..26631d6872d2 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -100,6 +100,8 @@  struct kioctx {
 
 	unsigned long		user_id;
 
+	unsigned int		flags;
+
 	struct __percpu kioctx_cpu *cpu;
 
 	/*
@@ -686,10 +688,8 @@  static void aio_nr_sub(unsigned nr)
 	spin_unlock(&aio_nr_lock);
 }
 
-/* ioctx_alloc
- *	Allocates and initializes an ioctx.  Returns an ERR_PTR if it failed.
- */
-static struct kioctx *ioctx_alloc(unsigned nr_events)
+static struct kioctx *io_setup_flags(unsigned long ctxid,
+				     unsigned int nr_events, unsigned int flags)
 {
 	struct mm_struct *mm = current->mm;
 	struct kioctx *ctx;
@@ -701,6 +701,12 @@  static struct kioctx *ioctx_alloc(unsigned nr_events)
 	 */
 	unsigned int max_reqs = nr_events;
 
+	if (unlikely(ctxid || nr_events == 0)) {
+		pr_debug("EINVAL: ctx %lu nr_events %u\n",
+		         ctxid, nr_events);
+		return ERR_PTR(-EINVAL);
+	}
+
 	/*
 	 * We keep track of the number of available ringbuffer slots, to prevent
 	 * overflow (reqs_available), and we also use percpu counters for this.
@@ -726,6 +732,7 @@  static struct kioctx *ioctx_alloc(unsigned nr_events)
 	if (!ctx)
 		return ERR_PTR(-ENOMEM);
 
+	ctx->flags = flags;
 	ctx->max_reqs = max_reqs;
 
 	spin_lock_init(&ctx->ctx_lock);
@@ -1281,6 +1288,34 @@  static long read_events(struct kioctx *ctx, long min_nr, long nr,
 	return ret;
 }
 
+SYSCALL_DEFINE6(io_setup2, u32, nr_events, u32, flags, struct iocb __user *,
+		iocbs, void __user *, user1, void __user *, user2,
+		aio_context_t __user *, ctxp)
+{
+	struct kioctx *ioctx;
+	unsigned long ctx;
+	long ret;
+
+	if (flags || user1 || user2)
+		return -EINVAL;
+
+	ret = get_user(ctx, ctxp);
+	if (unlikely(ret))
+		goto out;
+
+	ioctx = io_setup_flags(ctx, nr_events, flags);
+	ret = PTR_ERR(ioctx);
+	if (IS_ERR(ioctx))
+		goto out;
+
+	ret = put_user(ioctx->user_id, ctxp);
+	if (ret)
+		kill_ioctx(current->mm, ioctx, NULL);
+	percpu_ref_put(&ioctx->users);
+out:
+	return ret;
+}
+
 /* sys_io_setup:
  *	Create an aio_context capable of receiving at least nr_events.
  *	ctxp must not point to an aio_context that already exists, and
@@ -1296,7 +1331,7 @@  static long read_events(struct kioctx *ctx, long min_nr, long nr,
  */
 SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp)
 {
-	struct kioctx *ioctx = NULL;
+	struct kioctx *ioctx;
 	unsigned long ctx;
 	long ret;
 
@@ -1304,14 +1339,7 @@  SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp)
 	if (unlikely(ret))
 		goto out;
 
-	ret = -EINVAL;
-	if (unlikely(ctx || nr_events == 0)) {
-		pr_debug("EINVAL: ctx %lu nr_events %u\n",
-		         ctx, nr_events);
-		goto out;
-	}
-
-	ioctx = ioctx_alloc(nr_events);
+	ioctx = io_setup_flags(ctx, nr_events, 0);
 	ret = PTR_ERR(ioctx);
 	if (!IS_ERR(ioctx)) {
 		ret = put_user(ioctx->user_id, ctxp);
@@ -1327,7 +1355,7 @@  SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp)
 #ifdef CONFIG_COMPAT
 COMPAT_SYSCALL_DEFINE2(io_setup, unsigned, nr_events, u32 __user *, ctx32p)
 {
-	struct kioctx *ioctx = NULL;
+	struct kioctx *ioctx;
 	unsigned long ctx;
 	long ret;
 
@@ -1335,23 +1363,14 @@  COMPAT_SYSCALL_DEFINE2(io_setup, unsigned, nr_events, u32 __user *, ctx32p)
 	if (unlikely(ret))
 		goto out;
 
-	ret = -EINVAL;
-	if (unlikely(ctx || nr_events == 0)) {
-		pr_debug("EINVAL: ctx %lu nr_events %u\n",
-		         ctx, nr_events);
-		goto out;
-	}
-
-	ioctx = ioctx_alloc(nr_events);
+	ioctx = io_setup_flags(ctx, nr_events, 0);
 	ret = PTR_ERR(ioctx);
 	if (!IS_ERR(ioctx)) {
-		/* truncating is ok because it's a user address */
-		ret = put_user((u32)ioctx->user_id, ctx32p);
+		ret = put_user(ioctx->user_id, ctx32p);
 		if (ret)
 			kill_ioctx(current->mm, ioctx, NULL);
 		percpu_ref_put(&ioctx->users);
 	}
-
 out:
 	return ret;
 }
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 2ac3d13a915b..a20a663d583f 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -287,6 +287,9 @@  static inline void addr_limit_user_check(void)
  */
 #ifndef CONFIG_ARCH_HAS_SYSCALL_WRAPPER
 asmlinkage long sys_io_setup(unsigned nr_reqs, aio_context_t __user *ctx);
+asmlinkage long sys_io_setup2(unsigned, unsigned, struct iocb __user *,
+				void __user *, void __user *,
+				aio_context_t __user *);
 asmlinkage long sys_io_destroy(aio_context_t ctx);
 asmlinkage long sys_io_submit(aio_context_t, long,
 			struct iocb __user * __user *);
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 538546edbfbd..b4527ed373b0 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -738,9 +738,11 @@  __SYSCALL(__NR_statx,     sys_statx)
 __SC_COMP(__NR_io_pgetevents, sys_io_pgetevents, compat_sys_io_pgetevents)
 #define __NR_rseq 293
 __SYSCALL(__NR_rseq, sys_rseq)
+#define __NR_io_setup2 294
+__SYSCALL(__NR_io_setup2, sys_io_setup2)
 
 #undef __NR_syscalls
-#define __NR_syscalls 294
+#define __NR_syscalls 295
 
 /*
  * 32 bit systems traditionally used different
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index df556175be50..17c8b4393669 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -37,6 +37,7 @@  asmlinkage long sys_ni_syscall(void)
  */
 
 COND_SYSCALL(io_setup);
+COND_SYSCALL(io_setup2);
 COND_SYSCALL_COMPAT(io_setup);
 COND_SYSCALL(io_destroy);
 COND_SYSCALL(io_submit);