diff mbox series

[14/22] bsd-user/freebsd/os-syscall.c: lock_iovec

Message ID 20220201111455.52511-15-imp@bsdimp.com (mailing list archive)
State New, archived
Headers show
Series bsd-user: Start upstreaming the system calls. | expand

Commit Message

Warner Losh Feb. 1, 2022, 11:14 a.m. UTC
lock_iovec will lock an I/O vec and the memory to which it referrs and
create a iovec in the host space that referrs to it, with full error
unwinding.

Signed-off-by: Warner Losh <imp@bsdimp.com>
---
 bsd-user/freebsd/os-syscall.c | 92 +++++++++++++++++++++++++++++++++++
 1 file changed, 92 insertions(+)

Comments

Kyle Evans Feb. 1, 2022, 4:46 p.m. UTC | #1
On Tue, Feb 1, 2022 at 5:15 AM Warner Losh <imp@bsdimp.com> wrote:
>
> lock_iovec will lock an I/O vec and the memory to which it referrs and
> create a iovec in the host space that referrs to it, with full error
> unwinding.
>

s/referrs/refers/ twice

> Signed-off-by: Warner Losh <imp@bsdimp.com>
> ---
>  bsd-user/freebsd/os-syscall.c | 92 +++++++++++++++++++++++++++++++++++
>  1 file changed, 92 insertions(+)
>

Two typos, otherwise seems to LGTM:

Reviewed-by: Kyle Evans <kevans@FreeBSD.org>

> diff --git a/bsd-user/freebsd/os-syscall.c b/bsd-user/freebsd/os-syscall.c
> index 060134a9ecd..c21759ae7ce 100644
> --- a/bsd-user/freebsd/os-syscall.c
> +++ b/bsd-user/freebsd/os-syscall.c
> @@ -75,6 +75,98 @@ bool is_error(abi_long ret)
>      return (abi_ulong)ret >= (abi_ulong)(-4096);
>  }
>
> +struct iovec *lock_iovec(int type, abi_ulong target_addr,
> +        int count, int copy)
> +{
> +    struct target_iovec *target_vec;
> +    struct iovec *vec;
> +    abi_ulong total_len, max_len;
> +    int i;
> +    int err = 0;
> +    bool bad_address = false;
> +
> +    if (count == 0) {
> +        errno = 0;
> +        return NULL;
> +    }
> +    if (count < 0 || count > IOV_MAX) {
> +        errno = EINVAL;
> +        return NULL;
> +    }
> +
> +    vec = calloc(count, sizeof(struct iovec));
> +    if (vec == NULL) {
> +        errno = ENOMEM;
> +        return NULL;
> +    }
> +
> +    target_vec = lock_user(VERIFY_READ, target_addr,
> +                           count * sizeof(struct target_iovec), 1);
> +    if (target_vec == NULL) {
> +        err = EFAULT;
> +        goto fail2;
> +    }
> +
> +    /*
> +     * ??? If host page size > target page size, this will result in a value
> +     * larger than what we can actually support.
> +     */
> +    max_len = 0x7fffffff & TARGET_PAGE_MASK;
> +    total_len = 0;
> +
> +    for (i = 0; i < count; i++) {
> +        abi_ulong base = tswapal(target_vec[i].iov_base);
> +        abi_long len = tswapal(target_vec[i].iov_len);
> +
> +        if (len < 0) {
> +            err = EINVAL;
> +            goto fail;
> +        } else if (len == 0) {
> +            /* Zero length pointer is ignored.  */
> +            vec[i].iov_base = 0;
> +        } else {
> +            vec[i].iov_base = lock_user(type, base, len, copy);
> +            /*
> +             * If the first buffer pointer is bad, this is a fault.  But
> +             * subsequent bad buffers will result in a partial write; this is
> +             * realized by filling the vector with null pointers and zero
> +             * lengths.
> +             */
> +            if (!vec[i].iov_base) {
> +                if (i == 0) {
> +                    err = EFAULT;
> +                    goto fail;
> +                } else {
> +                    bad_address = true;
> +                }
> +            }
> +            if (bad_address) {
> +                len = 0;
> +            }
> +            if (len > max_len - total_len) {
> +                len = max_len - total_len;
> +            }
> +        }
> +        vec[i].iov_len = len;
> +        total_len += len;
> +    }
> +
> +    unlock_user(target_vec, target_addr, 0);
> +    return vec;
> +
> + fail:
> +    while (--i >= 0) {
> +        if (tswapal(target_vec[i].iov_len) > 0) {
> +            unlock_user(vec[i].iov_base, tswapal(target_vec[i].iov_base), 0);
> +        }
> +    }
> +    unlock_user(target_vec, target_addr, 0);
> + fail2:
> +    free(vec);
> +    errno = err;
> +    return NULL;
> +}
> +
>  /*
>   * do_syscall() should always have a single exit point at the end so that
>   * actions, such as logging of syscall results, can be performed.  All errnos
> --
> 2.33.1
>
Richard Henderson Feb. 1, 2022, 9:16 p.m. UTC | #2
On 2/1/22 22:14, Warner Losh wrote:
> lock_iovec will lock an I/O vec and the memory to which it referrs and
> create a iovec in the host space that referrs to it, with full error
> unwinding.
> 
> Signed-off-by: Warner Losh <imp@bsdimp.com>
> ---
>   bsd-user/freebsd/os-syscall.c | 92 +++++++++++++++++++++++++++++++++++
>   1 file changed, 92 insertions(+)
> 
> diff --git a/bsd-user/freebsd/os-syscall.c b/bsd-user/freebsd/os-syscall.c
> index 060134a9ecd..c21759ae7ce 100644
> --- a/bsd-user/freebsd/os-syscall.c
> +++ b/bsd-user/freebsd/os-syscall.c
> @@ -75,6 +75,98 @@ bool is_error(abi_long ret)
>       return (abi_ulong)ret >= (abi_ulong)(-4096);
>   }
>   
> +struct iovec *lock_iovec(int type, abi_ulong target_addr,
> +        int count, int copy)
> +{
> +    struct target_iovec *target_vec;
> +    struct iovec *vec;
> +    abi_ulong total_len, max_len;
> +    int i;
> +    int err = 0;
> +    bool bad_address = false;
> +
> +    if (count == 0) {
> +        errno = 0;
> +        return NULL;
> +    }
> +    if (count < 0 || count > IOV_MAX) {
> +        errno = EINVAL;
> +        return NULL;
> +    }
> +
> +    vec = calloc(count, sizeof(struct iovec));

g_try_new0.

You may want to use g_autofree to simplify error handling, which then requires you use

     return g_steal_pointer(&vec);

on the success path.

> +    if (vec == NULL) {
> +        errno = ENOMEM;
> +        return NULL;
> +    }
> +
> +    target_vec = lock_user(VERIFY_READ, target_addr,
> +                           count * sizeof(struct target_iovec), 1);
> +    if (target_vec == NULL) {
> +        err = EFAULT;
> +        goto fail2;
> +    }
> +
> +    /*
> +     * ??? If host page size > target page size, this will result in a value
> +     * larger than what we can actually support.
> +     */
> +    max_len = 0x7fffffff & TARGET_PAGE_MASK;
> +    total_len = 0;
> +
> +    for (i = 0; i < count; i++) {
> +        abi_ulong base = tswapal(target_vec[i].iov_base);
> +        abi_long len = tswapal(target_vec[i].iov_len);
> +
> +        if (len < 0) {
> +            err = EINVAL;
> +            goto fail;
> +        } else if (len == 0) {
> +            /* Zero length pointer is ignored.  */
> +            vec[i].iov_base = 0;
> +        } else {
> +            vec[i].iov_base = lock_user(type, base, len, copy);
> +            /*
> +             * If the first buffer pointer is bad, this is a fault.  But
> +             * subsequent bad buffers will result in a partial write; this is
> +             * realized by filling the vector with null pointers and zero
> +             * lengths.
> +             */
> +            if (!vec[i].iov_base) {
> +                if (i == 0) {
> +                    err = EFAULT;
> +                    goto fail;
> +                } else {
> +                    bad_address = true;
> +                }
> +            }
> +            if (bad_address) {
> +                len = 0;
> +            }

Surely this bad_address check should happen earlier, before we attempt the lock above?
E.g.

     else if (len == 0 || bad_address)


r~
diff mbox series

Patch

diff --git a/bsd-user/freebsd/os-syscall.c b/bsd-user/freebsd/os-syscall.c
index 060134a9ecd..c21759ae7ce 100644
--- a/bsd-user/freebsd/os-syscall.c
+++ b/bsd-user/freebsd/os-syscall.c
@@ -75,6 +75,98 @@  bool is_error(abi_long ret)
     return (abi_ulong)ret >= (abi_ulong)(-4096);
 }
 
+struct iovec *lock_iovec(int type, abi_ulong target_addr,
+        int count, int copy)
+{
+    struct target_iovec *target_vec;
+    struct iovec *vec;
+    abi_ulong total_len, max_len;
+    int i;
+    int err = 0;
+    bool bad_address = false;
+
+    if (count == 0) {
+        errno = 0;
+        return NULL;
+    }
+    if (count < 0 || count > IOV_MAX) {
+        errno = EINVAL;
+        return NULL;
+    }
+
+    vec = calloc(count, sizeof(struct iovec));
+    if (vec == NULL) {
+        errno = ENOMEM;
+        return NULL;
+    }
+
+    target_vec = lock_user(VERIFY_READ, target_addr,
+                           count * sizeof(struct target_iovec), 1);
+    if (target_vec == NULL) {
+        err = EFAULT;
+        goto fail2;
+    }
+
+    /*
+     * ??? If host page size > target page size, this will result in a value
+     * larger than what we can actually support.
+     */
+    max_len = 0x7fffffff & TARGET_PAGE_MASK;
+    total_len = 0;
+
+    for (i = 0; i < count; i++) {
+        abi_ulong base = tswapal(target_vec[i].iov_base);
+        abi_long len = tswapal(target_vec[i].iov_len);
+
+        if (len < 0) {
+            err = EINVAL;
+            goto fail;
+        } else if (len == 0) {
+            /* Zero length pointer is ignored.  */
+            vec[i].iov_base = 0;
+        } else {
+            vec[i].iov_base = lock_user(type, base, len, copy);
+            /*
+             * If the first buffer pointer is bad, this is a fault.  But
+             * subsequent bad buffers will result in a partial write; this is
+             * realized by filling the vector with null pointers and zero
+             * lengths.
+             */
+            if (!vec[i].iov_base) {
+                if (i == 0) {
+                    err = EFAULT;
+                    goto fail;
+                } else {
+                    bad_address = true;
+                }
+            }
+            if (bad_address) {
+                len = 0;
+            }
+            if (len > max_len - total_len) {
+                len = max_len - total_len;
+            }
+        }
+        vec[i].iov_len = len;
+        total_len += len;
+    }
+
+    unlock_user(target_vec, target_addr, 0);
+    return vec;
+
+ fail:
+    while (--i >= 0) {
+        if (tswapal(target_vec[i].iov_len) > 0) {
+            unlock_user(vec[i].iov_base, tswapal(target_vec[i].iov_base), 0);
+        }
+    }
+    unlock_user(target_vec, target_addr, 0);
+ fail2:
+    free(vec);
+    errno = err;
+    return NULL;
+}
+
 /*
  * do_syscall() should always have a single exit point at the end so that
  * actions, such as logging of syscall results, can be performed.  All errnos