diff mbox

[v3,5/8] proc/kcore: hold lock during read

Message ID d7cfbc1e8a76616f3b699eaff9df0a2730380534.1531953780.git.osandov@fb.com (mailing list archive)
State New, archived
Headers show

Commit Message

Omar Sandoval July 18, 2018, 10:58 p.m. UTC
From: Omar Sandoval <osandov@fb.com>

Now that we're using an rwsem, we can hold it during the entirety of
read_kcore() and have a common return path. This is preparation for the
next change.

Signed-off-by: Omar Sandoval <osandov@fb.com>
---
 fs/proc/kcore.c | 70 ++++++++++++++++++++++++++++---------------------
 1 file changed, 40 insertions(+), 30 deletions(-)

Comments

Tetsuo Handa July 24, 2018, 3:11 p.m. UTC | #1
On 2018/07/19 7:58, Omar Sandoval wrote:
> From: Omar Sandoval <osandov@fb.com>
> 
> Now that we're using an rwsem, we can hold it during the entirety of
> read_kcore() and have a common return path. This is preparation for the
> next change.
> 
> Signed-off-by: Omar Sandoval <osandov@fb.com>
> ---
>  fs/proc/kcore.c | 70 ++++++++++++++++++++++++++++---------------------
>  1 file changed, 40 insertions(+), 30 deletions(-)
> 
> diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
> index 95aa988c5b5d..e317ac890871 100644
> --- a/fs/proc/kcore.c
> +++ b/fs/proc/kcore.c
> @@ -440,19 +440,18 @@ static ssize_t
>  read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
>  {
>  	char *buf = file->private_data;
> -	ssize_t acc = 0;
>  	size_t size, tsz;
>  	size_t elf_buflen;
>  	int nphdr;
>  	unsigned long start;
> +	size_t orig_buflen = buflen;
> +	int ret = 0;
>  
>  	down_read(&kclist_lock);

(...snipped...)

> +out:
> +	up_write(&kclist_lock);

Oops. This needs to be up_read().

> +	if (ret)
> +		return ret;
> +	return orig_buflen - buflen;
>  }
>  
[   43.508922] ------------[ cut here ]------------
[   43.509931] DEBUG_LOCKS_WARN_ON(sem->owner != get_current())
[   43.509940] WARNING: CPU: 0 PID: 7933 at kernel/locking/rwsem.c:133 up_write+0x75/0x80
[   43.512792] Modules linked in: pcspkr sg vmw_vmci i2c_piix4 sd_mod ata_generic pata_acpi vmwgfx drm_kms_helper syscopyarea ahci sysfillrect libahci sysimgblt fb_sys_fops mptspi ata_piix ttm scsi_transport_spi mptscsih drm e1000 mptbase libata i2c_core serio_raw ipv6 crc_ccitt
[   43.517692] CPU: 0 PID: 7933 Comm: kexec Not tainted 4.18.0-rc6-next-20180724+ #715
[   43.519237] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 05/19/2017
[   43.521498] RIP: 0010:up_write+0x75/0x80
[   43.522391] Code: 00 5b c3 e8 0d 03 3a 00 85 c0 74 d9 83 3d 6a e4 10 02 00 75 d0 48 c7 c6 c8 2f e0 81 48 c7 c7 a3 e3 de 81 31 c0 e8 fb 8d fa ff <0f> 0b eb b7 0f 1f 80 00 00 00 00 8b 05 42 3e 06 02 53 48 89 fb 85
[   43.526253] RSP: 0018:ffffc90007ec7bc8 EFLAGS: 00010282
[   43.527303] RAX: 0000000000000000 RBX: ffffffff8207b5e0 RCX: 0000000000000006
[   43.528874] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff88013ba15b70
[   43.530496] RBP: ffffc90007ec7e28 R08: 0000000000000000 R09: 0000000000000001
[   43.532007] R10: 0000000000000000 R11: 292928746e657272 R12: ffffffff8207b660
[   43.533448] R13: 0000000000000000 R14: 0000000000000000 R15: 000000000000e000
[   43.534969] FS:  00007fdd6e0c0740(0000) GS:ffff88013ba00000(0000) knlGS:0000000000000000
[   43.536623] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   43.537943] CR2: 00000000017dc000 CR3: 00000001376b5006 CR4: 00000000001606f0
[   43.539608] Call Trace:
[   43.540154]  read_kcore+0x81/0x630
[   43.540932]  proc_reg_read+0x34/0x60
[   43.541722]  __vfs_read+0x2e/0x160
[   43.542450]  vfs_read+0x84/0x130
[   43.543166]  ksys_read+0x50/0xc0
[   43.543856]  do_syscall_64+0x4f/0x1f0
[   43.544684]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
[   43.545909] RIP: 0033:0x7fdd6d798c70
[   43.546688] Code: 0b 31 c0 48 83 c4 08 e9 be fe ff ff 48 8d 3d 07 b9 09 00 e8 52 8a 02 00 66 90 83 3d 2d c3 2d 00 00 75 10 b8 00 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 4e cc 01 00 48 89 04 24
[   43.550607] RSP: 002b:00007fffe5207428 EFLAGS: 00000246 ORIG_RAX: 0000000000000000
[   43.552192] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fdd6d798c70
[   43.553711] RDX: 0000000000010000 RSI: 00000000017cd840 RDI: 0000000000000004
[   43.555240] RBP: 0000000000010000 R08: 0000000000000001 R09: 0000000000010000
[   43.556779] R10: 0000000000000079 R11: 0000000000000246 R12: 0000000000000004
[   43.558284] R13: 00000000017cd840 R14: 00007fffe52074d8 R15: 00007fffe52076c0
[   43.559809] irq event stamp: 25471
[   43.560597] hardirqs last  enabled at (25471): [<ffffffff81800966>] restore_regs_and_return_to_kernel+0x0/0x2a
[   43.562821] hardirqs last disabled at (25470): [<ffffffff81800fa6>] error_exit+0x6/0x20
[   43.564515] softirqs last  enabled at (24962): [<ffffffff81a001db>] __do_softirq+0x1db/0x48e
[   43.566308] softirqs last disabled at (24955): [<ffffffff8107197d>] irq_exit+0xcd/0xe0
[   43.567982] ---[ end trace 0140237dd1b1be70 ]---
Omar Sandoval July 25, 2018, 11:34 p.m. UTC | #2
On Wed, Jul 25, 2018 at 12:11:26AM +0900, Tetsuo Handa wrote:
> On 2018/07/19 7:58, Omar Sandoval wrote:
> > From: Omar Sandoval <osandov@fb.com>
> > 
> > Now that we're using an rwsem, we can hold it during the entirety of
> > read_kcore() and have a common return path. This is preparation for the
> > next change.
> > 
> > Signed-off-by: Omar Sandoval <osandov@fb.com>
> > ---
> >  fs/proc/kcore.c | 70 ++++++++++++++++++++++++++++---------------------
> >  1 file changed, 40 insertions(+), 30 deletions(-)
> > 
> > diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
> > index 95aa988c5b5d..e317ac890871 100644
> > --- a/fs/proc/kcore.c
> > +++ b/fs/proc/kcore.c
> > @@ -440,19 +440,18 @@ static ssize_t
> >  read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
> >  {
> >  	char *buf = file->private_data;
> > -	ssize_t acc = 0;
> >  	size_t size, tsz;
> >  	size_t elf_buflen;
> >  	int nphdr;
> >  	unsigned long start;
> > +	size_t orig_buflen = buflen;
> > +	int ret = 0;
> >  
> >  	down_read(&kclist_lock);
> 
> (...snipped...)
> 
> > +out:
> > +	up_write(&kclist_lock);
> 
> Oops. This needs to be up_read().

Yeah, thanks, I'll fix this and rerun my tests with lockdep.
diff mbox

Patch

diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 95aa988c5b5d..e317ac890871 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -440,19 +440,18 @@  static ssize_t
 read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
 {
 	char *buf = file->private_data;
-	ssize_t acc = 0;
 	size_t size, tsz;
 	size_t elf_buflen;
 	int nphdr;
 	unsigned long start;
+	size_t orig_buflen = buflen;
+	int ret = 0;
 
 	down_read(&kclist_lock);
 	size = get_kcore_size(&nphdr, &elf_buflen);
 
-	if (buflen == 0 || *fpos >= size) {
-		up_read(&kclist_lock);
-		return 0;
-	}
+	if (buflen == 0 || *fpos >= size)
+		goto out;
 
 	/* trim buflen to not go beyond EOF */
 	if (buflen > size - *fpos)
@@ -465,28 +464,26 @@  read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
 		tsz = elf_buflen - *fpos;
 		if (buflen < tsz)
 			tsz = buflen;
-		elf_buf = kzalloc(elf_buflen, GFP_ATOMIC);
+		elf_buf = kzalloc(elf_buflen, GFP_KERNEL);
 		if (!elf_buf) {
-			up_read(&kclist_lock);
-			return -ENOMEM;
+			ret = -ENOMEM;
+			goto out;
 		}
 		elf_kcore_store_hdr(elf_buf, nphdr, elf_buflen);
-		up_read(&kclist_lock);
 		if (copy_to_user(buffer, elf_buf + *fpos, tsz)) {
 			kfree(elf_buf);
-			return -EFAULT;
+			ret = -EFAULT;
+			goto out;
 		}
 		kfree(elf_buf);
 		buflen -= tsz;
 		*fpos += tsz;
 		buffer += tsz;
-		acc += tsz;
 
 		/* leave now if filled buffer already */
 		if (buflen == 0)
-			return acc;
-	} else
-		up_read(&kclist_lock);
+			goto out;
+	}
 
 	/*
 	 * Check to see if our file offset matches with any of
@@ -499,25 +496,29 @@  read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
 	while (buflen) {
 		struct kcore_list *m;
 
-		down_read(&kclist_lock);
 		list_for_each_entry(m, &kclist_head, list) {
 			if (start >= m->addr && start < (m->addr+m->size))
 				break;
 		}
-		up_read(&kclist_lock);
 
 		if (&m->list == &kclist_head) {
-			if (clear_user(buffer, tsz))
-				return -EFAULT;
+			if (clear_user(buffer, tsz)) {
+				ret = -EFAULT;
+				goto out;
+			}
 		} else if (m->type == KCORE_VMALLOC) {
 			vread(buf, (char *)start, tsz);
 			/* we have to zero-fill user buffer even if no read */
-			if (copy_to_user(buffer, buf, tsz))
-				return -EFAULT;
+			if (copy_to_user(buffer, buf, tsz)) {
+				ret = -EFAULT;
+				goto out;
+			}
 		} else if (m->type == KCORE_USER) {
 			/* User page is handled prior to normal kernel page: */
-			if (copy_to_user(buffer, (char *)start, tsz))
-				return -EFAULT;
+			if (copy_to_user(buffer, (char *)start, tsz)) {
+				ret = -EFAULT;
+				goto out;
+			}
 		} else {
 			if (kern_addr_valid(start)) {
 				/*
@@ -525,26 +526,35 @@  read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
 				 * hardened user copy kernel text checks.
 				 */
 				if (probe_kernel_read(buf, (void *) start, tsz)) {
-					if (clear_user(buffer, tsz))
-						return -EFAULT;
+					if (clear_user(buffer, tsz)) {
+						ret = -EFAULT;
+						goto out;
+					}
 				} else {
-					if (copy_to_user(buffer, buf, tsz))
-						return -EFAULT;
+					if (copy_to_user(buffer, buf, tsz)) {
+						ret = -EFAULT;
+						goto out;
+					}
 				}
 			} else {
-				if (clear_user(buffer, tsz))
-					return -EFAULT;
+				if (clear_user(buffer, tsz)) {
+					ret = -EFAULT;
+					goto out;
+				}
 			}
 		}
 		buflen -= tsz;
 		*fpos += tsz;
 		buffer += tsz;
-		acc += tsz;
 		start += tsz;
 		tsz = (buflen > PAGE_SIZE ? PAGE_SIZE : buflen);
 	}
 
-	return acc;
+out:
+	up_write(&kclist_lock);
+	if (ret)
+		return ret;
+	return orig_buflen - buflen;
 }