diff mbox series

[bpf-next,v3,1/3] mm: add copy_remote_vm_str

Message ID 20250124181602.1872142-1-linux@jordanrome.com (mailing list archive)
State New
Headers show
Series [bpf-next,v3,1/3] mm: add copy_remote_vm_str | expand

Commit Message

Jordan Rome Jan. 24, 2025, 6:16 p.m. UTC
Similar to `access_process_vm` but specific to strings.
Also chunks reads by page and utilizes `strscpy`
for handling null termination.

Signed-off-by: Jordan Rome <linux@jordanrome.com>
---
 include/linux/mm.h |   3 ++
 mm/memory.c        | 119 +++++++++++++++++++++++++++++++++++++++++++++
 mm/nommu.c         |  68 ++++++++++++++++++++++++++
 3 files changed, 190 insertions(+)

--
2.43.5

Comments

Andrii Nakryiko Jan. 25, 2025, 12:08 a.m. UTC | #1
On Fri, Jan 24, 2025 at 10:22 AM Jordan Rome <linux@jordanrome.com> wrote:
>
> Similar to `access_process_vm` but specific to strings.
> Also chunks reads by page and utilizes `strscpy`
> for handling null termination.
>
> Signed-off-by: Jordan Rome <linux@jordanrome.com>
> ---
>  include/linux/mm.h |   3 ++
>  mm/memory.c        | 119 +++++++++++++++++++++++++++++++++++++++++++++
>  mm/nommu.c         |  68 ++++++++++++++++++++++++++
>  3 files changed, 190 insertions(+)
>

[...]

> +               maddr = kmap_local_page(page);
> +               retval = strscpy(buf, maddr + offset, bytes);
> +               unmap_and_put_page(page, maddr);
> +
> +               if (retval > -1 && retval < bytes) {
> +                       /* found the end of the string */
> +                       buf += retval;
> +                       goto out;
> +               }
> +
> +               if (retval == -E2BIG) {

nit: strscpy() can't return any other error, so I'd structure result
handling as:

if (retval < 0) {
  /* that annoying last byte copy */
  retval = bytes;
}
if (retval < bytes) {
    /* "we are done" handling */
}

/* common len, buf, addr adjustment logic stays here */


but also here's the question. If we get E2BIG, while bytes is exactly
how many bytes we have left in the buffer, the last byte should be
zero, no? So this should be cleanly handled, right? Or do we have a
test for that and it works already?

> +                       retval = bytes;
> +                       /*
> +                        * Because strscpy always null terminates we need to
> +                        * copy the last byte in the page if we are going to
> +                        * load more pages
> +                        */
> +                       if (bytes < len) {
> +                               end = bytes - 1;
> +                               copy_from_user_page(vma,
> +                                               page,
> +                                               addr + end,
> +                                               buf + end,

you don't need the `end` variable, just use `bytes - 1` twice?

> +                                               maddr + (PAGE_SIZE - 1),
> +                                               1);
> +                       }
> +               }
> +
> +               len -= retval;
> +               buf += retval;
> +               addr += retval;
> +       }
> +
> +out:
> +       mmap_read_unlock(mm);
> +       if (err)
> +               return err;
> +
> +       return buf - old_buf;
> +}
> +
> +/**
> + * copy_remote_vm_str - copy a string from another process's address space.
> + * @tsk:       the task of the target address space
> + * @addr:      start address to read from
> + * @buf:       destination buffer
> + * @len:       number of bytes to transfer
> + * @gup_flags: flags modifying lookup behaviour
> + *
> + * The caller must hold a reference on @mm.
> + *
> + * Return: number of bytes copied from @addr (source) to @buf (destination).
> + * If the source string is shorter than @len then return the length of the
> + * source string. If the source string is longer than @len, return @len.
> + * On any error, return -EFAULT.

strncpy_from_user_nofault() doc says:

  On success, returns the length of the string INCLUDING the trailing NUL

Is this the case with copy_remote_vm_str() as well? I.e., if the
source string is 5 bytes + NUL, dst buf is 10. Will we get 5 or 6
returned? We should be very careful with all this +/- 1 business in
corner cases, too easy to mess this up.

> + */
> +int copy_remote_vm_str(struct task_struct *tsk, unsigned long addr,
> +               void *buf, int len, unsigned int gup_flags)
> +{
> +       struct mm_struct *mm;
> +       int ret;
> +
> +       mm = get_task_mm(tsk);
> +       if (!mm)
> +               return -EFAULT;
> +
> +       ret = __copy_remote_vm_str(mm, addr, buf, len, gup_flags);
> +
> +       mmput(mm);
> +
> +       return ret;
> +}
> +EXPORT_SYMBOL_GPL(copy_remote_vm_str);
> +

[...]
diff mbox series

Patch

diff --git a/include/linux/mm.h b/include/linux/mm.h
index f02925447e59..f3a05b3eb2f2 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2485,6 +2485,9 @@  extern int access_process_vm(struct task_struct *tsk, unsigned long addr,
 extern int access_remote_vm(struct mm_struct *mm, unsigned long addr,
 		void *buf, int len, unsigned int gup_flags);

+extern int copy_remote_vm_str(struct task_struct *tsk, unsigned long addr,
+		void *buf, int len, unsigned int gup_flags);
+
 long get_user_pages_remote(struct mm_struct *mm,
 			   unsigned long start, unsigned long nr_pages,
 			   unsigned int gup_flags, struct page **pages,
diff --git a/mm/memory.c b/mm/memory.c
index 398c031be9ba..905e3f10fad0 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -6714,6 +6714,125 @@  int access_process_vm(struct task_struct *tsk, unsigned long addr,
 }
 EXPORT_SYMBOL_GPL(access_process_vm);

+/*
+ * Copy a string from another process's address space as given in mm.
+ * If there is any error return -EFAULT.
+ */
+static int __copy_remote_vm_str(struct mm_struct *mm, unsigned long addr,
+			      void *buf, int len, unsigned int gup_flags)
+{
+	void *old_buf = buf;
+	int err = 0;
+
+	if (mmap_read_lock_killable(mm))
+		return -EFAULT;
+
+	/* Untag the address before looking up the VMA */
+	addr = untagged_addr_remote(mm, addr);
+
+	/* Avoid triggering the temporary warning in __get_user_pages */
+	if (!vma_lookup(mm, addr)) {
+		err = -EFAULT;
+		goto out;
+	}
+
+	while (len) {
+		int bytes, offset, retval, end;
+		void *maddr;
+		struct page *page;
+		struct vm_area_struct *vma = NULL;
+
+		page = get_user_page_vma_remote(mm, addr, gup_flags, &vma);
+
+		if (IS_ERR(page)) {
+			/*
+			 * Treat as a total failure for now until we decide how
+			 * to handle the CONFIG_HAVE_IOREMAP_PROT case and
+			 * stack expansion.
+			 */
+			err = -EFAULT;
+			goto out;
+		}
+
+		bytes = len;
+		offset = addr & (PAGE_SIZE - 1);
+		if (bytes > PAGE_SIZE - offset)
+			bytes = PAGE_SIZE - offset;
+
+		maddr = kmap_local_page(page);
+		retval = strscpy(buf, maddr + offset, bytes);
+		unmap_and_put_page(page, maddr);
+
+		if (retval > -1 && retval < bytes) {
+			/* found the end of the string */
+			buf += retval;
+			goto out;
+		}
+
+		if (retval == -E2BIG) {
+			retval = bytes;
+			/*
+			 * Because strscpy always null terminates we need to
+			 * copy the last byte in the page if we are going to
+			 * load more pages
+			 */
+			if (bytes < len) {
+				end = bytes - 1;
+				copy_from_user_page(vma,
+						page,
+						addr + end,
+						buf + end,
+						maddr + (PAGE_SIZE - 1),
+						1);
+			}
+		}
+
+		len -= retval;
+		buf += retval;
+		addr += retval;
+	}
+
+out:
+	mmap_read_unlock(mm);
+	if (err)
+		return err;
+
+	return buf - old_buf;
+}
+
+/**
+ * copy_remote_vm_str - copy a string from another process's address space.
+ * @tsk:	the task of the target address space
+ * @addr:	start address to read from
+ * @buf:	destination buffer
+ * @len:	number of bytes to transfer
+ * @gup_flags:	flags modifying lookup behaviour
+ *
+ * The caller must hold a reference on @mm.
+ *
+ * Return: number of bytes copied from @addr (source) to @buf (destination).
+ * If the source string is shorter than @len then return the length of the
+ * source string. If the source string is longer than @len, return @len.
+ * On any error, return -EFAULT.
+ */
+int copy_remote_vm_str(struct task_struct *tsk, unsigned long addr,
+		void *buf, int len, unsigned int gup_flags)
+{
+	struct mm_struct *mm;
+	int ret;
+
+	mm = get_task_mm(tsk);
+	if (!mm)
+		return -EFAULT;
+
+	ret = __copy_remote_vm_str(mm, addr, buf, len, gup_flags);
+
+	mmput(mm);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(copy_remote_vm_str);
+
 /*
  * Print the name of a VMA.
  */
diff --git a/mm/nommu.c b/mm/nommu.c
index 9cb6e99215e2..23281751b1eb 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1701,6 +1701,74 @@  int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in
 }
 EXPORT_SYMBOL_GPL(access_process_vm);

+/*
+ * Copy a string from another process's address space as given in mm.
+ * If there is any error return -EFAULT.
+ */
+static int __copy_remote_vm_str(struct mm_struct *mm, unsigned long addr,
+			      void *buf, int len)
+{
+	int ret = 0;
+
+	if (mmap_read_lock_killable(mm))
+		return -EFAULT;
+
+	/* the access must start within one of the target process's mappings */
+	vma = find_vma(mm, addr);
+	if (vma) {
+		/* don't overrun this mapping */
+		if (addr + len >= vma->vm_end)
+			len = vma->vm_end - addr;
+
+		/* only read mappings where it is permitted */
+		if (vma->vm_flags & VM_MAYREAD) {
+			ret = strscpy(buf, addr, len);
+			if (ret == -E2BIG)
+				ret = len;
+		} else {
+			ret = -EFAULT;
+		}
+	} else {
+		ret = -EFAULT;
+	}
+
+	mmap_read_unlock(mm);
+	return ret;
+}
+
+/**
+ * copy_remote_vm_str - copy a string from another process's address space.
+ * @tsk:	the task of the target address space
+ * @addr:	start address to read from
+ * @buf:	destination buffer
+ * @len:	number of bytes to transfer
+ * @gup_flags:	flags modifying lookup behaviour (unused)
+ *
+ * The caller must hold a reference on @mm.
+ *
+ * Return: number of bytes copied from @addr (source) to @buf (destination).
+ * If the source string is shorter than @len then return the length of the
+ * source string. If the source string is longer than @len, return @len.
+ * On any error, return -EFAULT.
+ */
+int copy_remote_vm_str(struct task_struct *tsk, unsigned long addr,
+		void *buf, int len, unsigned int gup_flags)
+{
+	struct mm_struct *mm;
+	int ret;
+
+	mm = get_task_mm(tsk);
+	if (!mm)
+		return -EFAULT;
+
+	ret = __copy_remote_vm_str(mm, addr, buf, len);
+
+	mmput(mm);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(copy_remote_vm_str);
+
 /**
  * nommu_shrink_inode_mappings - Shrink the shared mappings on an inode
  * @inode: The inode to check