diff mbox series

[bpf-next,v6,1/3] mm: add copy_remote_vm_str

Message ID 20250128224352.3808460-1-linux@jordanrome.com (mailing list archive)
State New
Headers show
Series [bpf-next,v6,1/3] mm: add copy_remote_vm_str | expand

Commit Message

Jordan Rome Jan. 28, 2025, 10:43 p.m. UTC
Similar to `access_process_vm` but specific to strings.
Also chunks reads by page and utilizes `strscpy`
for handling null termination.

Signed-off-by: Jordan Rome <linux@jordanrome.com>
---
 include/linux/mm.h |   3 ++
 mm/memory.c        | 119 +++++++++++++++++++++++++++++++++++++++++++++
 mm/nommu.c         |  74 ++++++++++++++++++++++++++++
 3 files changed, 196 insertions(+)

--
2.43.5

Comments

Andrii Nakryiko Jan. 30, 2025, 12:19 a.m. UTC | #1
On Tue, Jan 28, 2025 at 2:44 PM Jordan Rome <linux@jordanrome.com> wrote:
>
> Similar to `access_process_vm` but specific to strings.
> Also chunks reads by page and utilizes `strscpy`
> for handling null termination.
>
> Signed-off-by: Jordan Rome <linux@jordanrome.com>
> ---
>  include/linux/mm.h |   3 ++
>  mm/memory.c        | 119 +++++++++++++++++++++++++++++++++++++++++++++
>  mm/nommu.c         |  74 ++++++++++++++++++++++++++++
>  3 files changed, 196 insertions(+)
>

The logic looks good, but I have a bunch of stylistic nits below. It
would be nice for someone from mm side to take a look as well.

> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index f02925447e59..f3a05b3eb2f2 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -2485,6 +2485,9 @@ extern int access_process_vm(struct task_struct *tsk, unsigned long addr,
>  extern int access_remote_vm(struct mm_struct *mm, unsigned long addr,
>                 void *buf, int len, unsigned int gup_flags);
>
> +extern int copy_remote_vm_str(struct task_struct *tsk, unsigned long addr,
> +               void *buf, int len, unsigned int gup_flags);
> +
>  long get_user_pages_remote(struct mm_struct *mm,
>                            unsigned long start, unsigned long nr_pages,
>                            unsigned int gup_flags, struct page **pages,
> diff --git a/mm/memory.c b/mm/memory.c
> index 398c031be9ba..7f6e74a99984 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -6714,6 +6714,125 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr,
>  }
>  EXPORT_SYMBOL_GPL(access_process_vm);
>
> +/*
> + * Copy a string from another process's address space as given in mm.
> + * If there is any error return -EFAULT.
> + */
> +static int __copy_remote_vm_str(struct mm_struct *mm, unsigned long addr,
> +                             void *buf, int len, unsigned int gup_flags)
> +{
> +       void *old_buf = buf;
> +       int err = 0;

empty line between variables and statements

> +       ((char *)buf)[0] = '\0';

nit: this would be probably a bit more "canonical": *(char *)buf = '\0';

> +
> +       if (mmap_read_lock_killable(mm))
> +               return -EFAULT;
> +
> +       /* Untag the address before looking up the VMA */
> +       addr = untagged_addr_remote(mm, addr);
> +
> +       /* Avoid triggering the temporary warning in __get_user_pages */
> +       if (!vma_lookup(mm, addr)) {
> +               err = -EFAULT;
> +               goto out;
> +       }
> +
> +       while (len) {
> +               int bytes, offset, retval;
> +               void *maddr;
> +               struct page *page;
> +               struct vm_area_struct *vma = NULL;
> +
> +               page = get_user_page_vma_remote(mm, addr, gup_flags, &vma);
> +
> +               if (IS_ERR(page)) {
> +                       /*
> +                        * Treat as a total failure for now until we decide how
> +                        * to handle the CONFIG_HAVE_IOREMAP_PROT case and
> +                        * stack expansion.
> +                        */
> +                       ((char *)buf)[0] = '\0';
> +                       err = -EFAULT;
> +                       goto out;
> +               }
> +
> +               bytes = len;
> +               offset = addr & (PAGE_SIZE - 1);
> +               if (bytes > PAGE_SIZE - offset)
> +                       bytes = PAGE_SIZE - offset;
> +
> +               maddr = kmap_local_page(page);
> +               retval = strscpy(buf, maddr + offset, bytes);
> +
> +               if (retval < 0) {
> +                       buf += (bytes - 1);

nit: unnecessary ()

another nit: you could have had `addr += bytes - 1;` here, to keep
addr and buf adjustment code close


> +                       /*
> +                        * Because strscpy always NUL terminates we need to
> +                        * copy the last byte in the page if we are going to
> +                        * load more pages
> +                        */
> +                       if (bytes != len) {
> +                               addr += (bytes - 1);
> +                               copy_from_user_page(vma, page, addr, buf,
> +                                               maddr + (PAGE_SIZE - 1), 1);
> +
> +                               buf += 1;
> +                               addr += 1;
> +                       }
> +                       len -= bytes;
> +               }
> +
> +               unmap_and_put_page(page, maddr);
> +
> +               if (retval >= 0) {
> +                       /* Found the end of the string */
> +                       buf += retval;
> +                       goto out;
> +               }

it's not incorrect, but it would be nice not to have to re-check
retval twice. Why not this structure:

ret = strscpy(...)
if (retval >= 0) {
    unmap_and_put_page(page, maddr);
    buf += retval;
    break;
}

/* this is -E2BIG case */

buf += bytes - 1;
addr += bytes - 1;

if (bytes != len) { copy, buf += 1, addr += 1 }

unmap_and_put_page(page, maddr);



Note that you don't need goto, break is fine. And yes, I don't think
duplicating unmap_and_put_page() is a problem.


> +       }
> +
> +out:
> +       mmap_read_unlock(mm);
> +       if (err)
> +               return err;
> +
> +       return buf - old_buf;
> +}
> +
> +/**
> + * copy_remote_vm_str - copy a string from another process's address space.
> + * @tsk:       the task of the target address space
> + * @addr:      start address to read from
> + * @buf:       destination buffer
> + * @len:       number of bytes to copy
> + * @gup_flags: flags modifying lookup behaviour
> + *
> + * The caller must hold a reference on @mm.
> + *
> + * Return: number of bytes copied from @addr (source) to @buf (destination);
> + * not including the trailing NUL. Always guaranteed to leave NUL-terminated
> + * buffer. On any error, return -EFAULT.
> + */
> +int copy_remote_vm_str(struct task_struct *tsk, unsigned long addr,
> +               void *buf, int len, unsigned int gup_flags)
> +{
> +       struct mm_struct *mm;
> +       int ret;
> +
> +       mm = get_task_mm(tsk);
> +       if (!mm) {
> +               ((char *)buf)[0] = '\0';
> +               return -EFAULT;
> +       }
> +
> +       ret = __copy_remote_vm_str(mm, addr, buf, len, gup_flags);
> +
> +       mmput(mm);
> +
> +       return ret;
> +}
> +EXPORT_SYMBOL_GPL(copy_remote_vm_str);
> +
>  /*
>   * Print the name of a VMA.
>   */
> diff --git a/mm/nommu.c b/mm/nommu.c
> index 9cb6e99215e2..4d83d0813eb8 100644
> --- a/mm/nommu.c
> +++ b/mm/nommu.c
> @@ -1701,6 +1701,80 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in
>  }
>  EXPORT_SYMBOL_GPL(access_process_vm);
>
> +/*
> + * Copy a string from another process's address space as given in mm.
> + * If there is any error return -EFAULT.
> + */
> +static int __copy_remote_vm_str(struct mm_struct *mm, unsigned long addr,
> +                             void *buf, int len)
> +{
> +       uint64_t tmp;

s/uint64_t/unsigned long/

also tmp -> addr_end ?

> +       struct vm_area_struct *vma;
> +

nit: no empty line here, why?

> +       int ret = -EFAULT;
> +
> +       ((char *)buf)[0] = '\0';
> +
> +       if (mmap_read_lock_killable(mm))
> +               return ret;
> +
> +       /* the access must start within one of the target process's mappings */
> +       vma = find_vma(mm, addr);
> +       if (!vma)
> +               goto out;
> +
> +       if (check_add_overflow(addr, len, &tmp))
> +               goto out;
> +       /* don't overrun this mapping */
> +       if (tmp >= vma->vm_end)

nit: strictly speaking only `tmp > vma->vm_end` needs special handling

> +               len = vma->vm_end - addr;
> +
> +       /* only read mappings where it is permitted */
> +       if (vma->vm_flags & VM_MAYREAD) {
> +               ret = strscpy(buf, (char *)addr, len);
> +               if (ret < 0)
> +                       ret = len - 1;
> +       }
> +
> +out:
> +       mmap_read_unlock(mm);
> +       return ret;
> +}
> +
> +/**
> + * copy_remote_vm_str - copy a string from another process's address space.
> + * @tsk:       the task of the target address space
> + * @addr:      start address to read from
> + * @buf:       destination buffer
> + * @len:       number of bytes to copy
> + * @gup_flags: flags modifying lookup behaviour (unused)
> + *
> + * The caller must hold a reference on @mm.
> + *
> + * Return: number of bytes copied from @addr (source) to @buf (destination);
> + * not including the trailing NUL. Always guaranteed to leave NUL-terminated
> + * buffer. On any error, return -EFAULT.
> + */
> +int copy_remote_vm_str(struct task_struct *tsk, unsigned long addr,
> +               void *buf, int len, unsigned int gup_flags)
> +{
> +       struct mm_struct *mm;
> +       int ret;
> +
> +       mm = get_task_mm(tsk);
> +       if (!mm) {
> +               ((char *)buf)[0] = '\0';
> +               return -EFAULT;
> +       }
> +
> +       ret = __copy_remote_vm_str(mm, addr, buf, len);
> +
> +       mmput(mm);
> +
> +       return ret;
> +}
> +EXPORT_SYMBOL_GPL(copy_remote_vm_str);
> +
>  /**
>   * nommu_shrink_inode_mappings - Shrink the shared mappings on an inode
>   * @inode: The inode to check
> --
> 2.43.5
>
diff mbox series

Patch

diff --git a/include/linux/mm.h b/include/linux/mm.h
index f02925447e59..f3a05b3eb2f2 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2485,6 +2485,9 @@  extern int access_process_vm(struct task_struct *tsk, unsigned long addr,
 extern int access_remote_vm(struct mm_struct *mm, unsigned long addr,
 		void *buf, int len, unsigned int gup_flags);

+extern int copy_remote_vm_str(struct task_struct *tsk, unsigned long addr,
+		void *buf, int len, unsigned int gup_flags);
+
 long get_user_pages_remote(struct mm_struct *mm,
 			   unsigned long start, unsigned long nr_pages,
 			   unsigned int gup_flags, struct page **pages,
diff --git a/mm/memory.c b/mm/memory.c
index 398c031be9ba..7f6e74a99984 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -6714,6 +6714,125 @@  int access_process_vm(struct task_struct *tsk, unsigned long addr,
 }
 EXPORT_SYMBOL_GPL(access_process_vm);

+/*
+ * Copy a string from another process's address space as given in mm.
+ * If there is any error return -EFAULT.
+ */
+static int __copy_remote_vm_str(struct mm_struct *mm, unsigned long addr,
+			      void *buf, int len, unsigned int gup_flags)
+{
+	void *old_buf = buf;
+	int err = 0;
+	((char *)buf)[0] = '\0';
+
+	if (mmap_read_lock_killable(mm))
+		return -EFAULT;
+
+	/* Untag the address before looking up the VMA */
+	addr = untagged_addr_remote(mm, addr);
+
+	/* Avoid triggering the temporary warning in __get_user_pages */
+	if (!vma_lookup(mm, addr)) {
+		err = -EFAULT;
+		goto out;
+	}
+
+	while (len) {
+		int bytes, offset, retval;
+		void *maddr;
+		struct page *page;
+		struct vm_area_struct *vma = NULL;
+
+		page = get_user_page_vma_remote(mm, addr, gup_flags, &vma);
+
+		if (IS_ERR(page)) {
+			/*
+			 * Treat as a total failure for now until we decide how
+			 * to handle the CONFIG_HAVE_IOREMAP_PROT case and
+			 * stack expansion.
+			 */
+			((char *)buf)[0] = '\0';
+			err = -EFAULT;
+			goto out;
+		}
+
+		bytes = len;
+		offset = addr & (PAGE_SIZE - 1);
+		if (bytes > PAGE_SIZE - offset)
+			bytes = PAGE_SIZE - offset;
+
+		maddr = kmap_local_page(page);
+		retval = strscpy(buf, maddr + offset, bytes);
+
+		if (retval < 0) {
+			buf += (bytes - 1);
+			/*
+			 * Because strscpy always NUL terminates we need to
+			 * copy the last byte in the page if we are going to
+			 * load more pages
+			 */
+			if (bytes != len) {
+				addr += (bytes - 1);
+				copy_from_user_page(vma, page, addr, buf,
+						maddr + (PAGE_SIZE - 1), 1);
+
+				buf += 1;
+				addr += 1;
+			}
+			len -= bytes;
+		}
+
+		unmap_and_put_page(page, maddr);
+
+		if (retval >= 0) {
+			/* Found the end of the string */
+			buf += retval;
+			goto out;
+		}
+	}
+
+out:
+	mmap_read_unlock(mm);
+	if (err)
+		return err;
+
+	return buf - old_buf;
+}
+
+/**
+ * copy_remote_vm_str - copy a string from another process's address space.
+ * @tsk:	the task of the target address space
+ * @addr:	start address to read from
+ * @buf:	destination buffer
+ * @len:	number of bytes to copy
+ * @gup_flags:	flags modifying lookup behaviour
+ *
+ * The caller must hold a reference on @mm.
+ *
+ * Return: number of bytes copied from @addr (source) to @buf (destination);
+ * not including the trailing NUL. Always guaranteed to leave NUL-terminated
+ * buffer. On any error, return -EFAULT.
+ */
+int copy_remote_vm_str(struct task_struct *tsk, unsigned long addr,
+		void *buf, int len, unsigned int gup_flags)
+{
+	struct mm_struct *mm;
+	int ret;
+
+	mm = get_task_mm(tsk);
+	if (!mm) {
+		((char *)buf)[0] = '\0';
+		return -EFAULT;
+	}
+
+	ret = __copy_remote_vm_str(mm, addr, buf, len, gup_flags);
+
+	mmput(mm);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(copy_remote_vm_str);
+
 /*
  * Print the name of a VMA.
  */
diff --git a/mm/nommu.c b/mm/nommu.c
index 9cb6e99215e2..4d83d0813eb8 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1701,6 +1701,80 @@  int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in
 }
 EXPORT_SYMBOL_GPL(access_process_vm);

+/*
+ * Copy a string from another process's address space as given in mm.
+ * If there is any error return -EFAULT.
+ */
+static int __copy_remote_vm_str(struct mm_struct *mm, unsigned long addr,
+			      void *buf, int len)
+{
+	uint64_t tmp;
+	struct vm_area_struct *vma;
+
+	int ret = -EFAULT;
+
+	((char *)buf)[0] = '\0';
+
+	if (mmap_read_lock_killable(mm))
+		return ret;
+
+	/* the access must start within one of the target process's mappings */
+	vma = find_vma(mm, addr);
+	if (!vma)
+		goto out;
+
+	if (check_add_overflow(addr, len, &tmp))
+		goto out;
+	/* don't overrun this mapping */
+	if (tmp >= vma->vm_end)
+		len = vma->vm_end - addr;
+
+	/* only read mappings where it is permitted */
+	if (vma->vm_flags & VM_MAYREAD) {
+		ret = strscpy(buf, (char *)addr, len);
+		if (ret < 0)
+			ret = len - 1;
+	}
+
+out:
+	mmap_read_unlock(mm);
+	return ret;
+}
+
+/**
+ * copy_remote_vm_str - copy a string from another process's address space.
+ * @tsk:	the task of the target address space
+ * @addr:	start address to read from
+ * @buf:	destination buffer
+ * @len:	number of bytes to copy
+ * @gup_flags:	flags modifying lookup behaviour (unused)
+ *
+ * The caller must hold a reference on @mm.
+ *
+ * Return: number of bytes copied from @addr (source) to @buf (destination);
+ * not including the trailing NUL. Always guaranteed to leave NUL-terminated
+ * buffer. On any error, return -EFAULT.
+ */
+int copy_remote_vm_str(struct task_struct *tsk, unsigned long addr,
+		void *buf, int len, unsigned int gup_flags)
+{
+	struct mm_struct *mm;
+	int ret;
+
+	mm = get_task_mm(tsk);
+	if (!mm) {
+		((char *)buf)[0] = '\0';
+		return -EFAULT;
+	}
+
+	ret = __copy_remote_vm_str(mm, addr, buf, len);
+
+	mmput(mm);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(copy_remote_vm_str);
+
 /**
  * nommu_shrink_inode_mappings - Shrink the shared mappings on an inode
  * @inode: The inode to check