Message ID | 20240609104355.442002-3-jcalmels@3xx0.net (mailing list archive) |
---|---|
State | Not Applicable |
Headers | show |
Series | Introduce user namespace capabilities | expand |
Context | Check | Description |
---|---|---|
netdev/tree_selection | success | Not a local patch |
bpf/vmtest-bpf-next-PR | fail | PR summary |
bpf/vmtest-bpf-next-VM_Test-1 | success | Logs for ShellCheck |
bpf/vmtest-bpf-next-VM_Test-2 | success | Logs for Unittests |
bpf/vmtest-bpf-next-VM_Test-0 | success | Logs for Lint |
bpf/vmtest-bpf-next-VM_Test-3 | success | Logs for Validate matrix.py |
bpf/vmtest-bpf-next-VM_Test-5 | success | Logs for aarch64-gcc / build-release |
bpf/vmtest-bpf-next-VM_Test-9 | success | Logs for s390x-gcc / build-release |
bpf/vmtest-bpf-next-VM_Test-7 | success | Logs for aarch64-gcc / veristat |
bpf/vmtest-bpf-next-VM_Test-6 | success | Logs for aarch64-gcc / test |
bpf/vmtest-bpf-next-VM_Test-4 | fail | Logs for aarch64-gcc / build / build for aarch64 with gcc |
bpf/vmtest-bpf-next-VM_Test-24 | success | Logs for x86_64-llvm-18 / veristat |
bpf/vmtest-bpf-next-VM_Test-12 | success | Logs for set-matrix |
bpf/vmtest-bpf-next-VM_Test-8 | fail | Logs for s390x-gcc / build / build for s390x with gcc |
bpf/vmtest-bpf-next-VM_Test-10 | success | Logs for s390x-gcc / test |
bpf/vmtest-bpf-next-VM_Test-14 | success | Logs for x86_64-gcc / build-release |
bpf/vmtest-bpf-next-VM_Test-15 | success | Logs for x86_64-gcc / test |
bpf/vmtest-bpf-next-VM_Test-19 | success | Logs for x86_64-llvm-17 / test |
bpf/vmtest-bpf-next-VM_Test-21 | fail | Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18 |
bpf/vmtest-bpf-next-VM_Test-20 | success | Logs for x86_64-llvm-17 / veristat |
bpf/vmtest-bpf-next-VM_Test-16 | success | Logs for x86_64-gcc / veristat |
bpf/vmtest-bpf-next-VM_Test-22 | fail | Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18-O2 |
bpf/vmtest-bpf-next-VM_Test-13 | fail | Logs for x86_64-gcc / build / build for x86_64 with gcc |
bpf/vmtest-bpf-next-VM_Test-17 | fail | Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17 |
bpf/vmtest-bpf-next-VM_Test-23 | success | Logs for x86_64-llvm-18 / test |
bpf/vmtest-bpf-next-VM_Test-18 | fail | Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17-O2 |
bpf/vmtest-bpf-next-VM_Test-11 | success | Logs for s390x-gcc / veristat |
On Sun, Jun 09, 2024 at 03:43:35AM -0700, Jonathan Calmels wrote: > This patch adds a new capability security bit designed to constrain a > task’s userns capability set to its bounding set. The reason for this is > twofold: > > - This serves as a quick and easy way to lock down a set of capabilities > for a task, thus ensuring that any namespace it creates will never be > more privileged than itself is. > - This helps userspace transition to more secure defaults by not requiring > specific logic for the userns capability set, or libcap support. > > Example: > > # capsh --secbits=$((1 << 8)) --drop=cap_sys_rawio -- \ > -c 'unshare -r grep Cap /proc/self/status' > CapInh: 0000000000000000 > CapPrm: 000001fffffdffff > CapEff: 000001fffffdffff > CapBnd: 000001fffffdffff > CapAmb: 0000000000000000 > CapUNs: 000001fffffdffff But you are not (that I can see, in this or the previous patch) keeping SECURE_USERNS_STRICT_CAPS in securebits on the next level unshare. Though I think it's ok, because by then both cap_userns and cap_bset are reduced and cap_userns can't be expanded. (Sorry, just thinking aloud here) > Signed-off-by: Jonathan Calmels <jcalmels@3xx0.net> > --- > include/linux/securebits.h | 1 + > include/uapi/linux/securebits.h | 11 ++++++++++- > kernel/user_namespace.c | 5 +++++ > 3 files changed, 16 insertions(+), 1 deletion(-) > > diff --git a/include/linux/securebits.h b/include/linux/securebits.h > index 656528673983..5f9d85cd69c3 100644 > --- a/include/linux/securebits.h > +++ b/include/linux/securebits.h > @@ -5,4 +5,5 @@ > #include <uapi/linux/securebits.h> > > #define issecure(X) (issecure_mask(X) & current_cred_xxx(securebits)) > +#define iscredsecure(cred, X) (issecure_mask(X) & cred->securebits) > #endif /* !_LINUX_SECUREBITS_H */ > diff --git a/include/uapi/linux/securebits.h b/include/uapi/linux/securebits.h > index d6d98877ff1a..2da3f4be4531 100644 > --- a/include/uapi/linux/securebits.h > +++ b/include/uapi/linux/securebits.h > @@ -52,10 +52,19 @@ > #define SECBIT_NO_CAP_AMBIENT_RAISE_LOCKED \ > (issecure_mask(SECURE_NO_CAP_AMBIENT_RAISE_LOCKED)) > > +/* When set, user namespace capabilities are restricted to their parent's bounding set. */ > +#define SECURE_USERNS_STRICT_CAPS 8 > +#define SECURE_USERNS_STRICT_CAPS_LOCKED 9 /* make bit-8 immutable */ > + > +#define SECBIT_USERNS_STRICT_CAPS (issecure_mask(SECURE_USERNS_STRICT_CAPS)) > +#define SECBIT_USERNS_STRICT_CAPS_LOCKED \ > + (issecure_mask(SECURE_USERNS_STRICT_CAPS_LOCKED)) > + > #define SECURE_ALL_BITS (issecure_mask(SECURE_NOROOT) | \ > issecure_mask(SECURE_NO_SETUID_FIXUP) | \ > issecure_mask(SECURE_KEEP_CAPS) | \ > - issecure_mask(SECURE_NO_CAP_AMBIENT_RAISE)) > + issecure_mask(SECURE_NO_CAP_AMBIENT_RAISE) | \ > + issecure_mask(SECURE_USERNS_STRICT_CAPS)) > #define SECURE_ALL_LOCKS (SECURE_ALL_BITS << 1) > > #endif /* _UAPI_LINUX_SECUREBITS_H */ > diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c > index 7e624607330b..53848e2b68cd 100644 > --- a/kernel/user_namespace.c > +++ b/kernel/user_namespace.c > @@ -10,6 +10,7 @@ > #include <linux/cred.h> > #include <linux/securebits.h> > #include <linux/security.h> > +#include <linux/capability.h> > #include <linux/keyctl.h> > #include <linux/key-type.h> > #include <keys/user-type.h> > @@ -42,6 +43,10 @@ static void dec_user_namespaces(struct ucounts *ucounts) > > static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns) > { > + /* Limit userns capabilities to our parent's bounding set. */ In the case of userns_install(), it will be the target user namespace creator's bounding set, right? Not "our parent's"? > + if (iscredsecure(cred, SECURE_USERNS_STRICT_CAPS)) > + cred->cap_userns = cap_intersect(cred->cap_userns, cred->cap_bset); > + > /* Start with the capabilities defined in the userns set. */ > cred->cap_bset = cred->cap_userns; > cred->cap_permitted = cred->cap_userns; > -- > 2.45.2
On Sun, Jun 09, 2024 at 09:33:01PM GMT, Serge E. Hallyn wrote: > On Sun, Jun 09, 2024 at 03:43:35AM -0700, Jonathan Calmels wrote: > > This patch adds a new capability security bit designed to constrain a > > task’s userns capability set to its bounding set. The reason for this is > > twofold: > > > > - This serves as a quick and easy way to lock down a set of capabilities > > for a task, thus ensuring that any namespace it creates will never be > > more privileged than itself is. > > - This helps userspace transition to more secure defaults by not requiring > > specific logic for the userns capability set, or libcap support. > > > > Example: > > > > # capsh --secbits=$((1 << 8)) --drop=cap_sys_rawio -- \ > > -c 'unshare -r grep Cap /proc/self/status' > > CapInh: 0000000000000000 > > CapPrm: 000001fffffdffff > > CapEff: 000001fffffdffff > > CapBnd: 000001fffffdffff > > CapAmb: 0000000000000000 > > CapUNs: 000001fffffdffff > > But you are not (that I can see, in this or the previous patch) > keeping SECURE_USERNS_STRICT_CAPS in securebits on the next > level unshare. Though I think it's ok, because by then both > cap_userns and cap_bset are reduced and cap_userns can't be > expanded. (Sorry, just thinking aloud here) Right this is safe to reset, but maybe we do keep it if the secbit is locked? This is kind of a special case compared to the other bits. > > + /* Limit userns capabilities to our parent's bounding set. */ > > In the case of userns_install(), it will be the target user namespace > creator's bounding set, right? Not "our parent's"? Good point, I should reword this comment.
On Mon, Jun 10, 2024 at 02:46:06AM -0700, Jonathan Calmels wrote: > On Sun, Jun 09, 2024 at 09:33:01PM GMT, Serge E. Hallyn wrote: > > On Sun, Jun 09, 2024 at 03:43:35AM -0700, Jonathan Calmels wrote: > > > This patch adds a new capability security bit designed to constrain a > > > task’s userns capability set to its bounding set. The reason for this is > > > twofold: > > > > > > - This serves as a quick and easy way to lock down a set of capabilities > > > for a task, thus ensuring that any namespace it creates will never be > > > more privileged than itself is. > > > - This helps userspace transition to more secure defaults by not requiring > > > specific logic for the userns capability set, or libcap support. > > > > > > Example: > > > > > > # capsh --secbits=$((1 << 8)) --drop=cap_sys_rawio -- \ > > > -c 'unshare -r grep Cap /proc/self/status' > > > CapInh: 0000000000000000 > > > CapPrm: 000001fffffdffff > > > CapEff: 000001fffffdffff > > > CapBnd: 000001fffffdffff > > > CapAmb: 0000000000000000 > > > CapUNs: 000001fffffdffff > > > > But you are not (that I can see, in this or the previous patch) > > keeping SECURE_USERNS_STRICT_CAPS in securebits on the next > > level unshare. Though I think it's ok, because by then both > > cap_userns and cap_bset are reduced and cap_userns can't be > > expanded. (Sorry, just thinking aloud here) > > Right this is safe to reset, but maybe we do keep it if the secbit is > locked? This is kind of a special case compared to the other bits. I don't think it would be worth the extra complication in the secbits code, and it's semantically very different from the cap_userns. > > > + /* Limit userns capabilities to our parent's bounding set. */ > > > > In the case of userns_install(), it will be the target user namespace > > creator's bounding set, right? Not "our parent's"? > > Good point, I should reword this comment.
On Sun, 2024-06-09 at 03:43 -0700, Jonathan Calmels wrote: > This patch adds a new capability security bit designed to constrain a nit: if you think of it "This patch adds" could be just "add", right? :-) Also name the exact thing/symbol/whatever here. This is not a HBO series. > task’s userns capability set to its bounding set. The reason for this > is > twofold: > > - This serves as a quick and easy way to lock down a set of > capabilities > for a task, thus ensuring that any namespace it creates will never > be > more privileged than itself is. > - This helps userspace transition to more secure defaults by not > requiring > specific logic for the userns capability set, or libcap support. > > Example: > > # capsh --secbits=$((1 << 8)) --drop=cap_sys_rawio -- \ > -c 'unshare -r grep Cap /proc/self/status' > CapInh: 0000000000000000 > CapPrm: 000001fffffdffff > CapEff: 000001fffffdffff > CapBnd: 000001fffffdffff > CapAmb: 0000000000000000 > CapUNs: 000001fffffdffff > > Signed-off-by: Jonathan Calmels <jcalmels@3xx0.net> > --- > include/linux/securebits.h | 1 + > include/uapi/linux/securebits.h | 11 ++++++++++- > kernel/user_namespace.c | 5 +++++ > 3 files changed, 16 insertions(+), 1 deletion(-) > > diff --git a/include/linux/securebits.h b/include/linux/securebits.h > index 656528673983..5f9d85cd69c3 100644 > --- a/include/linux/securebits.h > +++ b/include/linux/securebits.h > @@ -5,4 +5,5 @@ > #include <uapi/linux/securebits.h> > > #define issecure(X) (issecure_mask(X) & > current_cred_xxx(securebits)) > +#define iscredsecure(cred, X) (issecure_mask(X) & cred- > >securebits) > #endif /* !_LINUX_SECUREBITS_H */ > diff --git a/include/uapi/linux/securebits.h > b/include/uapi/linux/securebits.h > index d6d98877ff1a..2da3f4be4531 100644 > --- a/include/uapi/linux/securebits.h > +++ b/include/uapi/linux/securebits.h > @@ -52,10 +52,19 @@ > #define SECBIT_NO_CAP_AMBIENT_RAISE_LOCKED \ > (issecure_mask(SECURE_NO_CAP_AMBIENT_RAISE_L > OCKED)) > > +/* When set, user namespace capabilities are restricted to their > parent's bounding set. */ > +#define SECURE_USERNS_STRICT_CAPS 8 > +#define SECURE_USERNS_STRICT_CAPS_LOCKED 9 /* make > bit-8 immutable */ > + > +#define SECBIT_USERNS_STRICT_CAPS > (issecure_mask(SECURE_USERNS_STRICT_CAPS)) > +#define SECBIT_USERNS_STRICT_CAPS_LOCKED \ > + (issecure_mask(SECURE_USERNS_STRICT_CAPS_LOC > KED)) > + > #define > SECURE_ALL_BITS (issecure_mask(SECURE_NOROOT) | \ > > issecure_mask(SECURE_NO_SETUID_FIXUP) | \ > issecure_mask(SECURE_KEEP_CAPS) | \ > - > issecure_mask(SECURE_NO_CAP_AMBIENT_RAISE)) > + > issecure_mask(SECURE_NO_CAP_AMBIENT_RAISE) | \ > + spurious new lines in the diff please as first priority aim for absolute minimal diff or at least do grow diff proactively like this. If we really think after that, that we need some "extras" to the patch set, then we decide that. These only take energy away from reviewers. > issecure_mask(SECURE_USERNS_STRICT_CAPS)) > #define SECURE_ALL_LOCKS (SECURE_ALL_BITS << 1) > > #endif /* _UAPI_LINUX_SECUREBITS_H */ > diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c > index 7e624607330b..53848e2b68cd 100644 > --- a/kernel/user_namespace.c > +++ b/kernel/user_namespace.c > @@ -10,6 +10,7 @@ > #include <linux/cred.h> > #include <linux/securebits.h> > #include <linux/security.h> > +#include <linux/capability.h> > #include <linux/keyctl.h> > #include <linux/key-type.h> > #include <keys/user-type.h> > @@ -42,6 +43,10 @@ static void dec_user_namespaces(struct ucounts > *ucounts) > > static void set_cred_user_ns(struct cred *cred, struct > user_namespace *user_ns) > { > + /* Limit userns capabilities to our parent's bounding set. > */ > + if (iscredsecure(cred, SECURE_USERNS_STRICT_CAPS)) > + cred->cap_userns = cap_intersect(cred->cap_userns, > cred->cap_bset); > + > /* Start with the capabilities defined in the userns set. */ > cred->cap_bset = cred->cap_userns; > cred->cap_permitted = cred->cap_userns; Going for 4 week holiday starting for next week so focus in on nits but since this is something to do access control: 1. Please go surgical with the diff's because this type of patches also require a surgical review. Now reviewing this like riding on a bumpy road with a car of which suspension mechanics is broken ;-) Hope you grab my argument here. I only want to look at the problem and solution for that not random stuff.. BR, Jarkko
On Fri, 2024-06-28 at 17:43 +0300, Jarkko Sakkinen wrote: > On Sun, 2024-06-09 at 03:43 -0700, Jonathan Calmels wrote: > > This patch adds a new capability security bit designed to constrain > > a > > > nit: if you think of it "This patch adds" could be just "add", right? > :-) > > Also name the exact thing/symbol/whatever here. This is not a HBO > series. > > > task’s userns capability set to its bounding set. The reason for > > this > > is > > twofold: > > > > - This serves as a quick and easy way to lock down a set of > > capabilities > > for a task, thus ensuring that any namespace it creates will > > never > > be > > more privileged than itself is. > > - This helps userspace transition to more secure defaults by not > > requiring > > specific logic for the userns capability set, or libcap support. > > > > Example: > > > > # capsh --secbits=$((1 << 8)) --drop=cap_sys_rawio -- \ > > -c 'unshare -r grep Cap /proc/self/status' > > CapInh: 0000000000000000 > > CapPrm: 000001fffffdffff > > CapEff: 000001fffffdffff > > CapBnd: 000001fffffdffff > > CapAmb: 0000000000000000 > > CapUNs: 000001fffffdffff > > > > Signed-off-by: Jonathan Calmels <jcalmels@3xx0.net> > > --- > > include/linux/securebits.h | 1 + > > include/uapi/linux/securebits.h | 11 ++++++++++- > > kernel/user_namespace.c | 5 +++++ > > 3 files changed, 16 insertions(+), 1 deletion(-) > > > > diff --git a/include/linux/securebits.h > > b/include/linux/securebits.h > > index 656528673983..5f9d85cd69c3 100644 > > --- a/include/linux/securebits.h > > +++ b/include/linux/securebits.h > > @@ -5,4 +5,5 @@ > > #include <uapi/linux/securebits.h> > > > > #define issecure(X) (issecure_mask(X) & > > current_cred_xxx(securebits)) > > +#define iscredsecure(cred, X) (issecure_mask(X) & cred- > > > securebits) > > #endif /* !_LINUX_SECUREBITS_H */ > > diff --git a/include/uapi/linux/securebits.h > > b/include/uapi/linux/securebits.h > > index d6d98877ff1a..2da3f4be4531 100644 > > --- a/include/uapi/linux/securebits.h > > +++ b/include/uapi/linux/securebits.h > > @@ -52,10 +52,19 @@ > > #define SECBIT_NO_CAP_AMBIENT_RAISE_LOCKED \ > > (issecure_mask(SECURE_NO_CAP_AMBIENT_RAISE > > _L > > OCKED)) > > > > +/* When set, user namespace capabilities are restricted to their > > parent's bounding set. */ > > +#define SECURE_USERNS_STRICT_CAPS 8 > > +#define SECURE_USERNS_STRICT_CAPS_LOCKED 9 /* make > > > > > bit-8 immutable */ > > + > > +#define SECBIT_USERNS_STRICT_CAPS > > (issecure_mask(SECURE_USERNS_STRICT_CAPS)) > > +#define SECBIT_USERNS_STRICT_CAPS_LOCKED \ > > + (issecure_mask(SECURE_USERNS_STRICT_CAPS_L > > OC > > KED)) > > + > > #define > > SECURE_ALL_BITS (issecure_mask(SECURE_NOROOT) | \ > > > > issecure_mask(SECURE_NO_SETUID_FIXUP) | \ > > issecure_mask(SECURE_KEEP_CAPS) | > > \ > > - > > issecure_mask(SECURE_NO_CAP_AMBIENT_RAISE)) > > + > > issecure_mask(SECURE_NO_CAP_AMBIENT_RAISE) | \ > > + > > spurious new lines in the diff > > please as first priority aim for absolute minimal diff or at least > do grow diff proactively like this. > > If we really think after that, that we need some "extras" to the > patch set, then we decide that. These only take energy away from > reviewers. > > > > issecure_mask(SECURE_USERNS_STRICT_CAPS)) > > #define SECURE_ALL_LOCKS (SECURE_ALL_BITS << 1) > > > > #endif /* _UAPI_LINUX_SECUREBITS_H */ > > diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c > > index 7e624607330b..53848e2b68cd 100644 > > --- a/kernel/user_namespace.c > > +++ b/kernel/user_namespace.c > > @@ -10,6 +10,7 @@ > > #include <linux/cred.h> > > #include <linux/securebits.h> > > #include <linux/security.h> > > +#include <linux/capability.h> > > #include <linux/keyctl.h> > > #include <linux/key-type.h> > > #include <keys/user-type.h> > > @@ -42,6 +43,10 @@ static void dec_user_namespaces(struct ucounts > > *ucounts) > > > > static void set_cred_user_ns(struct cred *cred, struct > > user_namespace *user_ns) > > { > > + /* Limit userns capabilities to our parent's bounding set. > > */ > > + if (iscredsecure(cred, SECURE_USERNS_STRICT_CAPS)) > > + cred->cap_userns = cap_intersect(cred->cap_userns, > > cred->cap_bset); > > + > > /* Start with the capabilities defined in the userns set. > > */ > > cred->cap_bset = cred->cap_userns; > > cred->cap_permitted = cred->cap_userns; > > Going for 4 week holiday starting for next week so focus in on nits > but since this is something to do access control: > > 1. Please go surgical with the diff's because this type of patches > also require a surgical review. Now reviewing this like riding on > a bumpy road with a car of which suspension mechanics is broken > ;-) > > Hope you grab my argument here. I only want to look at the problem > and solution for that not random stuff.. I skip the other patches because of my eager to get on holiday but my instinct tells me that at least some of this feedback applies to all of the patches. So put your solution in sight, not clean ups. BR, Jarkko
diff --git a/include/linux/securebits.h b/include/linux/securebits.h index 656528673983..5f9d85cd69c3 100644 --- a/include/linux/securebits.h +++ b/include/linux/securebits.h @@ -5,4 +5,5 @@ #include <uapi/linux/securebits.h> #define issecure(X) (issecure_mask(X) & current_cred_xxx(securebits)) +#define iscredsecure(cred, X) (issecure_mask(X) & cred->securebits) #endif /* !_LINUX_SECUREBITS_H */ diff --git a/include/uapi/linux/securebits.h b/include/uapi/linux/securebits.h index d6d98877ff1a..2da3f4be4531 100644 --- a/include/uapi/linux/securebits.h +++ b/include/uapi/linux/securebits.h @@ -52,10 +52,19 @@ #define SECBIT_NO_CAP_AMBIENT_RAISE_LOCKED \ (issecure_mask(SECURE_NO_CAP_AMBIENT_RAISE_LOCKED)) +/* When set, user namespace capabilities are restricted to their parent's bounding set. */ +#define SECURE_USERNS_STRICT_CAPS 8 +#define SECURE_USERNS_STRICT_CAPS_LOCKED 9 /* make bit-8 immutable */ + +#define SECBIT_USERNS_STRICT_CAPS (issecure_mask(SECURE_USERNS_STRICT_CAPS)) +#define SECBIT_USERNS_STRICT_CAPS_LOCKED \ + (issecure_mask(SECURE_USERNS_STRICT_CAPS_LOCKED)) + #define SECURE_ALL_BITS (issecure_mask(SECURE_NOROOT) | \ issecure_mask(SECURE_NO_SETUID_FIXUP) | \ issecure_mask(SECURE_KEEP_CAPS) | \ - issecure_mask(SECURE_NO_CAP_AMBIENT_RAISE)) + issecure_mask(SECURE_NO_CAP_AMBIENT_RAISE) | \ + issecure_mask(SECURE_USERNS_STRICT_CAPS)) #define SECURE_ALL_LOCKS (SECURE_ALL_BITS << 1) #endif /* _UAPI_LINUX_SECUREBITS_H */ diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 7e624607330b..53848e2b68cd 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -10,6 +10,7 @@ #include <linux/cred.h> #include <linux/securebits.h> #include <linux/security.h> +#include <linux/capability.h> #include <linux/keyctl.h> #include <linux/key-type.h> #include <keys/user-type.h> @@ -42,6 +43,10 @@ static void dec_user_namespaces(struct ucounts *ucounts) static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns) { + /* Limit userns capabilities to our parent's bounding set. */ + if (iscredsecure(cred, SECURE_USERNS_STRICT_CAPS)) + cred->cap_userns = cap_intersect(cred->cap_userns, cred->cap_bset); + /* Start with the capabilities defined in the userns set. */ cred->cap_bset = cred->cap_userns; cred->cap_permitted = cred->cap_userns;
This patch adds a new capability security bit designed to constrain a task’s userns capability set to its bounding set. The reason for this is twofold: - This serves as a quick and easy way to lock down a set of capabilities for a task, thus ensuring that any namespace it creates will never be more privileged than itself is. - This helps userspace transition to more secure defaults by not requiring specific logic for the userns capability set, or libcap support. Example: # capsh --secbits=$((1 << 8)) --drop=cap_sys_rawio -- \ -c 'unshare -r grep Cap /proc/self/status' CapInh: 0000000000000000 CapPrm: 000001fffffdffff CapEff: 000001fffffdffff CapBnd: 000001fffffdffff CapAmb: 0000000000000000 CapUNs: 000001fffffdffff Signed-off-by: Jonathan Calmels <jcalmels@3xx0.net> --- include/linux/securebits.h | 1 + include/uapi/linux/securebits.h | 11 ++++++++++- kernel/user_namespace.c | 5 +++++ 3 files changed, 16 insertions(+), 1 deletion(-)