diff mbox series

[V40,12/29] x86: Lock down IO port access when the kernel is locked down

Message ID 20190820001805.241928-13-matthewgarrett@google.com (mailing list archive)
State New, archived
Headers show
Series Add kernel lockdown functionality | expand

Commit Message

Matthew Garrett Aug. 20, 2019, 12:17 a.m. UTC
From: Matthew Garrett <mjg59@srcf.ucam.org>

IO port access would permit users to gain access to PCI configuration
registers, which in turn (on a lot of hardware) give access to MMIO
register space. This would potentially permit root to trigger arbitrary
DMA, so lock it down by default.

This also implicitly locks down the KDADDIO, KDDELIO, KDENABIO and
KDDISABIO console ioctls.

Signed-off-by: Matthew Garrett <mjg59@google.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
cc: x86@kernel.org
Signed-off-by: James Morris <jmorris@namei.org>
---
 arch/x86/kernel/ioport.c     | 7 +++++--
 include/linux/security.h     | 1 +
 security/lockdown/lockdown.c | 1 +
 3 files changed, 7 insertions(+), 2 deletions(-)

Comments

Kai-Heng Feng Jan. 5, 2022, 6:25 a.m. UTC | #1
Hi Matthew,

On Tue, Aug 20, 2019 at 8:20 AM Matthew Garrett
<matthewgarrett@google.com> wrote:
>
> From: Matthew Garrett <mjg59@srcf.ucam.org>
>
> IO port access would permit users to gain access to PCI configuration
> registers, which in turn (on a lot of hardware) give access to MMIO
> register space. This would potentially permit root to trigger arbitrary
> DMA, so lock it down by default.
>
> This also implicitly locks down the KDADDIO, KDDELIO, KDENABIO and
> KDDISABIO console ioctls.
>
> Signed-off-by: Matthew Garrett <mjg59@google.com>
> Signed-off-by: David Howells <dhowells@redhat.com>
> Reviewed-by: Kees Cook <keescook@chromium.org>
> cc: x86@kernel.org
> Signed-off-by: James Morris <jmorris@namei.org>

This patch breaks ioperm() usage from userspace programs with CAP_SYS_RAWIO cap.

I wonder if it's possible to revert this commit?

Kai-Heng

> ---
>  arch/x86/kernel/ioport.c     | 7 +++++--
>  include/linux/security.h     | 1 +
>  security/lockdown/lockdown.c | 1 +
>  3 files changed, 7 insertions(+), 2 deletions(-)
>
> diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
> index 0fe1c8782208..61a89d3c0382 100644
> --- a/arch/x86/kernel/ioport.c
> +++ b/arch/x86/kernel/ioport.c
> @@ -11,6 +11,7 @@
>  #include <linux/errno.h>
>  #include <linux/types.h>
>  #include <linux/ioport.h>
> +#include <linux/security.h>
>  #include <linux/smp.h>
>  #include <linux/stddef.h>
>  #include <linux/slab.h>
> @@ -31,7 +32,8 @@ long ksys_ioperm(unsigned long from, unsigned long num, int turn_on)
>
>         if ((from + num <= from) || (from + num > IO_BITMAP_BITS))
>                 return -EINVAL;
> -       if (turn_on && !capable(CAP_SYS_RAWIO))
> +       if (turn_on && (!capable(CAP_SYS_RAWIO) ||
> +                       security_locked_down(LOCKDOWN_IOPORT)))
>                 return -EPERM;
>
>         /*
> @@ -126,7 +128,8 @@ SYSCALL_DEFINE1(iopl, unsigned int, level)
>                 return -EINVAL;
>         /* Trying to gain more privileges? */
>         if (level > old) {
> -               if (!capable(CAP_SYS_RAWIO))
> +               if (!capable(CAP_SYS_RAWIO) ||
> +                   security_locked_down(LOCKDOWN_IOPORT))
>                         return -EPERM;
>         }
>         regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) |
> diff --git a/include/linux/security.h b/include/linux/security.h
> index 2b763f0ee352..cd93fa5d3c6d 100644
> --- a/include/linux/security.h
> +++ b/include/linux/security.h
> @@ -108,6 +108,7 @@ enum lockdown_reason {
>         LOCKDOWN_KEXEC,
>         LOCKDOWN_HIBERNATION,
>         LOCKDOWN_PCI_ACCESS,
> +       LOCKDOWN_IOPORT,
>         LOCKDOWN_INTEGRITY_MAX,
>         LOCKDOWN_CONFIDENTIALITY_MAX,
>  };
> diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c
> index 410e90eda848..8b7d65dbb086 100644
> --- a/security/lockdown/lockdown.c
> +++ b/security/lockdown/lockdown.c
> @@ -23,6 +23,7 @@ static char *lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = {
>         [LOCKDOWN_KEXEC] = "kexec of unsigned images",
>         [LOCKDOWN_HIBERNATION] = "hibernation",
>         [LOCKDOWN_PCI_ACCESS] = "direct PCI access",
> +       [LOCKDOWN_IOPORT] = "raw io port access",
>         [LOCKDOWN_INTEGRITY_MAX] = "integrity",
>         [LOCKDOWN_CONFIDENTIALITY_MAX] = "confidentiality",
>  };
> --
> 2.23.0.rc1.153.gdeed80330f-goog
>
Matthew Garrett Jan. 5, 2022, 6:48 a.m. UTC | #2
On Wed, Jan 05, 2022 at 02:25:41PM +0800, Kai-Heng Feng wrote:

> This patch breaks ioperm() usage from userspace programs with CAP_SYS_RAWIO cap.
> 
> I wonder if it's possible to revert this commit?

When lockdown is enabled, or under all circumstances? It's expected to 
be blocked when lockdown is enabled - allowing userland to use port IO 
would potentially allow reconfiguration of PCI devices in ways that 
could alter kernel behaviour in ways relevant to security, which is what 
lockdown aims to prevent. What's being broken by this?
Kai-Heng Feng Jan. 5, 2022, 6:57 a.m. UTC | #3
On Wed, Jan 5, 2022 at 2:48 PM Matthew Garrett <mjg59@srcf.ucam.org> wrote:
>
> On Wed, Jan 05, 2022 at 02:25:41PM +0800, Kai-Heng Feng wrote:
>
> > This patch breaks ioperm() usage from userspace programs with CAP_SYS_RAWIO cap.
> >
> > I wonder if it's possible to revert this commit?
>
> When lockdown is enabled, or under all circumstances? It's expected to
> be blocked when lockdown is enabled - allowing userland to use port IO
> would potentially allow reconfiguration of PCI devices in ways that
> could alter kernel behaviour in ways relevant to security, which is what
> lockdown aims to prevent. What's being broken by this?

Only when lockdown is enabled.

The affected system from the customer has SecureBoot enabled (and
hence lockdown), and the kernel upgrade surprisingly broke ioperm()
usage.
The userspace program is proprietary so I can't share it here.

Basically this patch makes ioperm() a noop on SecureBoot enabled x86 systems.
If reverting is not an option, what else can we do to circumvent the regression?

Kai-Heng
Matthew Garrett Jan. 5, 2022, 7:20 a.m. UTC | #4
On Wed, Jan 05, 2022 at 02:57:57PM +0800, Kai-Heng Feng wrote:

> The affected system from the customer has SecureBoot enabled (and
> hence lockdown), and the kernel upgrade surprisingly broke ioperm()
> usage.

Which kernel was being used that was signed but didn't implement 
lockdown? That sounds, uh, bad.

> The userspace program is proprietary so I can't share it here.

Ok. Are you able to describe anything about what it does so we can 
figure out a better solution?

> Basically this patch makes ioperm() a noop on SecureBoot enabled x86 systems.
> If reverting is not an option, what else can we do to circumvent the regression?

There's two main choices:

1) Disable secure boot on the system in question - if there's a need to 
run userland that can do arbitrary port IO then secure boot isn't 
providing any meaningful security benefit in any case.

2) Implement a kernel driver that abstracts the hardware access away 
from userland, and ensures that all the accesses are performed in a safe 
way.

Doing port IO from userland is almost always a terrible idea - it 
usually involves indexed accesses (you write an address to one port and 
then write or read data from another), and if two processes are trying 
to do this simultaneously (either because SMP or because one process 
gets preempted after writing the address but before accessing the data 
register), and in that case you can end up with accesses to the wrong 
register as a result. You really want this sort of thing to be mediated 
by the kernel, both from a safety perspective and to ensure appropriate 
synchronisation.
Kai-Heng Feng Jan. 5, 2022, 10:05 a.m. UTC | #5
On Wed, Jan 5, 2022 at 3:20 PM Matthew Garrett <mjg59@srcf.ucam.org> wrote:
>
> On Wed, Jan 05, 2022 at 02:57:57PM +0800, Kai-Heng Feng wrote:
>
> > The affected system from the customer has SecureBoot enabled (and
> > hence lockdown), and the kernel upgrade surprisingly broke ioperm()
> > usage.
>
> Which kernel was being used that was signed but didn't implement
> lockdown? That sounds, uh, bad.

It was upgraded from older distro release. Older kernels don't have lockdown.

>
> > The userspace program is proprietary so I can't share it here.
>
> Ok. Are you able to describe anything about what it does so we can
> figure out a better solution?
>
> > Basically this patch makes ioperm() a noop on SecureBoot enabled x86 systems.
> > If reverting is not an option, what else can we do to circumvent the regression?
>
> There's two main choices:
>
> 1) Disable secure boot on the system in question - if there's a need to
> run userland that can do arbitrary port IO then secure boot isn't
> providing any meaningful security benefit in any case.

How so?
Other security features are still incredible valuable, we don't want
to toss them out just because someone has to use ioperm().

>
> 2) Implement a kernel driver that abstracts the hardware access away
> from userland, and ensures that all the accesses are performed in a safe
> way.
>
> Doing port IO from userland is almost always a terrible idea - it
> usually involves indexed accesses (you write an address to one port and
> then write or read data from another), and if two processes are trying
> to do this simultaneously (either because SMP or because one process
> gets preempted after writing the address but before accessing the data
> register), and in that case you can end up with accesses to the wrong
> register as a result. You really want this sort of thing to be mediated
> by the kernel, both from a safety perspective and to ensure appropriate
> synchronisation.

Agree, let me start a discussion with them.

Kai-Heng
Matthew Garrett Jan. 5, 2022, 10:14 a.m. UTC | #6
On Wed, Jan 05, 2022 at 06:05:26PM +0800, Kai-Heng Feng wrote:
> On Wed, Jan 5, 2022 at 3:20 PM Matthew Garrett <mjg59@srcf.ucam.org> wrote:
> >
> > On Wed, Jan 05, 2022 at 02:57:57PM +0800, Kai-Heng Feng wrote:
> >
> > > The affected system from the customer has SecureBoot enabled (and
> > > hence lockdown), and the kernel upgrade surprisingly broke ioperm()
> > > usage.
> >
> > Which kernel was being used that was signed but didn't implement
> > lockdown? That sounds, uh, bad.
> 
> It was upgraded from older distro release. Older kernels don't have lockdown.

But have a signed bootloader? Which releases?

> > There's two main choices:
> >
> > 1) Disable secure boot on the system in question - if there's a need to
> > run userland that can do arbitrary port IO then secure boot isn't
> > providing any meaningful security benefit in any case.
> 
> How so?
> Other security features are still incredible valuable, we don't want
> to toss them out just because someone has to use ioperm().

Because having the ability to do port io allows you to tamper with the 
running kernel and disable all the other security boundaries, making 
them pointless. Many PCI devices have a port IO side channel into MMIO 
BARs for use in early boot, so if an attacker can fill that BAR as they 
wish and then modify the BAR to map it into the kernel address space 
(and fix up the bridges appropriately), or if the port IO interface can 
be used to trigger DMA, the outcomes are pretty bad. The point of 
lockdown is to disable every plausible interface for userland (even uid 
0) to have access to any interfaces that would let them insert modified 
code into ring 0 - port IO is definitely one of those interfaces. An 
attacker could just take a kernel that allows ioperm(), add an initramfs 
containing their payload, boot, hotpatch the kernel to disable lockdown, 
and then kexec into their backdoored payload.

> >
> > 2) Implement a kernel driver that abstracts the hardware access away
> > from userland, and ensures that all the accesses are performed in a safe
> > way.
> >
> > Doing port IO from userland is almost always a terrible idea - it
> > usually involves indexed accesses (you write an address to one port and
> > then write or read data from another), and if two processes are trying
> > to do this simultaneously (either because SMP or because one process
> > gets preempted after writing the address but before accessing the data
> > register), and in that case you can end up with accesses to the wrong
> > register as a result. You really want this sort of thing to be mediated
> > by the kernel, both from a safety perspective and to ensure appropriate
> > synchronisation.
> 
> Agree, let me start a discussion with them.

Sounds good.
diff mbox series

Patch

diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index 0fe1c8782208..61a89d3c0382 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -11,6 +11,7 @@ 
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/ioport.h>
+#include <linux/security.h>
 #include <linux/smp.h>
 #include <linux/stddef.h>
 #include <linux/slab.h>
@@ -31,7 +32,8 @@  long ksys_ioperm(unsigned long from, unsigned long num, int turn_on)
 
 	if ((from + num <= from) || (from + num > IO_BITMAP_BITS))
 		return -EINVAL;
-	if (turn_on && !capable(CAP_SYS_RAWIO))
+	if (turn_on && (!capable(CAP_SYS_RAWIO) ||
+			security_locked_down(LOCKDOWN_IOPORT)))
 		return -EPERM;
 
 	/*
@@ -126,7 +128,8 @@  SYSCALL_DEFINE1(iopl, unsigned int, level)
 		return -EINVAL;
 	/* Trying to gain more privileges? */
 	if (level > old) {
-		if (!capable(CAP_SYS_RAWIO))
+		if (!capable(CAP_SYS_RAWIO) ||
+		    security_locked_down(LOCKDOWN_IOPORT))
 			return -EPERM;
 	}
 	regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) |
diff --git a/include/linux/security.h b/include/linux/security.h
index 2b763f0ee352..cd93fa5d3c6d 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -108,6 +108,7 @@  enum lockdown_reason {
 	LOCKDOWN_KEXEC,
 	LOCKDOWN_HIBERNATION,
 	LOCKDOWN_PCI_ACCESS,
+	LOCKDOWN_IOPORT,
 	LOCKDOWN_INTEGRITY_MAX,
 	LOCKDOWN_CONFIDENTIALITY_MAX,
 };
diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c
index 410e90eda848..8b7d65dbb086 100644
--- a/security/lockdown/lockdown.c
+++ b/security/lockdown/lockdown.c
@@ -23,6 +23,7 @@  static char *lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = {
 	[LOCKDOWN_KEXEC] = "kexec of unsigned images",
 	[LOCKDOWN_HIBERNATION] = "hibernation",
 	[LOCKDOWN_PCI_ACCESS] = "direct PCI access",
+	[LOCKDOWN_IOPORT] = "raw io port access",
 	[LOCKDOWN_INTEGRITY_MAX] = "integrity",
 	[LOCKDOWN_CONFIDENTIALITY_MAX] = "confidentiality",
 };