Message ID | 20250402085257.728844-1-jarkko@kernel.org (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | [RFC,v3] KEYS: Add a list for unreferenced keys | expand |
Jarkko Sakkinen <jarkko@kernel.org> wrote: > Add an isolated list for unreferenced keys, thereby splitting key > deletion as separate phase, after the reaping cycle. This makes the > whole process more safe, as these two distinct tasks don't intervene > each other. As an effect, KEY_FLAG_FINAL_PUT is no longer needed. > > Since `security/keys/proc.c` reserves key_serial_lock for variable > amount of time, `rb_erase()` cannot be done within `key_put()` by using > IRQ saving/restoring versions of the spin lock functions. For the same > reason, the key needs to co-exist both in the tree and in the list for > period of time. > > Therefore, split the union in `struct key` into separate `serial_node` > and `graveyard_link` fields, and introduce a separate lock for the > graveyard, which is locked and unlocked using IRQ saving/restoring > versions of the locking routines. Splitting the union seems reasonable. However, you need to be very careful extracting keys from the serial tree outside of the gc loop: (1) The serial tree walking loop assumes that it can keep a pointer to the key it is currently examining without holding any locks. I think this is probably not a problem for your patch as you're doing the removal in the same work item. (2) We have to deal with put keys belonging to a key type that's undergoing removal. That might not be a problem as we can just get rid of them directly - but we have to deal with the RCU implications and may need to call synchronize_rcu() again after calling key_gc_graveyard(). (3) We still have to deal with a new key_put() happening immediately after you've done the rb_erase() calls. I think it's probably worth still skipping the evaluation of the key state if the refcount is 0 (if we're getting rid of the FINAL_PUT flag). We might be holding up key insertion, after all. This brings me on to another though: Should key_serial_lock be a seqlock? And should the gc use RCU + read_seqlock() and insertion write_seqlock()? And further, should we have an 'active keys' list to search instead of searching the RCU tree? Your patch here might then move the put key from the active list to the graveyard list - though it's better if it doesn't as the garbage collector can then walk the active list without the need for a lock except to remove a key. Attached is an incomplete move in that direction. David --- keys: Have key_put() add dead key to a graveyard list [INCOMPLETE] diff --git a/include/linux/key.h b/include/linux/key.h index 074dca3222b9..2a095a32dc42 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -195,10 +195,9 @@ enum key_state { struct key { refcount_t usage; /* number of references */ key_serial_t serial; /* key serial number */ - union { - struct list_head graveyard_link; - struct rb_node serial_node; - }; + struct rb_node serial_node; + struct list_head active_link; /* Next key in key_active_keys */ + struct key *next_dead; /* Next key in key_graveyard */ #ifdef CONFIG_KEY_NOTIFICATIONS struct watch_list *watchers; /* Entities watching this key for changes */ #endif diff --git a/security/keys/gc.c b/security/keys/gc.c index 7d687b0962b1..2152d2f50ca7 100644 --- a/security/keys/gc.c +++ b/security/keys/gc.c @@ -20,6 +20,8 @@ unsigned key_gc_delay = 5 * 60; */ static void key_garbage_collector(struct work_struct *work); DECLARE_WORK(key_gc_work, key_garbage_collector); +extern struct key *key_graveyard; +DEFINE_SPINLOCK(key_graveyard_lock); /* * Reaper for links from keyrings to dead keys. @@ -132,14 +134,40 @@ void key_gc_keytype(struct key_type *ktype) /* * Garbage collect a list of unreferenced, detached keys */ -static noinline void key_gc_unused_keys(struct list_head *keys) +static noinline void key_gc_unused_keys(void) { - while (!list_empty(keys)) { - struct key *key = - list_entry(keys->next, struct key, graveyard_link); + struct key *graveyard, *key; + + spin_lock_irq(&key_graveyard_lock); + graveyard = key_graveyard; + key_graveyard = NULL; + spin_unlock_irq(&key_graveyard_lock); + + /* Make the keys inaccessible and remove them from the gc check list. */ + spin_lock(&key_serial_lock); + for (key = graveyard; key; key = key->next_dead) { + rb_erase(&key->serial_node, &key_serial_tree); + list_del(&key->active_link); + if (need_resched()) { + spin_lock(&key_serial_lock); + cond_resched(); + spin_unlock(&key_serial_lock); + } + } + spin_unlock(&key_serial_lock); + + /* Make sure that all pending keyring payload destructions are + * fulfilled and that people aren't now looking at dead or dying keys + * that they don't have a reference upon or a link to. + */ + kdebug("gc sync"); + synchronize_rcu(); + + while (graveyard) { + struct key *key = graveyard; short state = key->state; - list_del(&key->graveyard_link); + graveyard = key->next_dead; kdebug("- %u", key->serial); key_check(key); @@ -177,7 +205,6 @@ static noinline void key_gc_unused_keys(struct list_head *keys) */ static void key_garbage_collector(struct work_struct *work) { - static LIST_HEAD(graveyard); static u8 gc_state; /* Internal persistent state */ #define KEY_GC_REAP_AGAIN 0x01 /* - Need another cycle */ #define KEY_GC_REAPING_LINKS 0x02 /* - We need to reap links */ @@ -186,8 +213,7 @@ static void key_garbage_collector(struct work_struct *work) #define KEY_GC_REAPING_DEAD_3 0x40 /* - We need to reap dead keys */ #define KEY_GC_FOUND_DEAD_KEY 0x80 /* - We found at least one dead key */ - struct rb_node *cursor; - struct key *key; + struct list_head *cursor, *next; time64_t new_timer, limit, expiry; kenter("[%lx,%x]", key_gc_flags, gc_state); @@ -206,36 +232,71 @@ static void key_garbage_collector(struct work_struct *work) new_timer = TIME64_MAX; - /* As only this function is permitted to remove things from the key - * serial tree, if cursor is non-NULL then it will always point to a - * valid node in the tree - even if lock got dropped. + /* If we have a key type that's being removed, go through and make all + * its keys unavailable. At this point, only keyctl_read() and + * /proc/keys should be looking at keys of this type. */ - spin_lock(&key_serial_lock); - cursor = rb_first(&key_serial_tree); + if (gc_state & KEY_GC_REAPING_DEAD_1) { + for (cursor = smp_load_acquire(&key_active_list.next); + /* key_active_list.next before key->active_link.next */ + cursor != &key_active_list; + cursor = next) { + struct key *key = list_entry(cursor, struct key, active_link); + + next = cursor->next; + if (key->type != key_gc_dead_keytype) + continue; + + spin_lock(&key_serial_lock); + rb_erase(&key->serial_node, &key_serial_tree); + list_del_init(&key->active_link); + spin_unlock(&key_serial_lock); + + kdebug("destroy key %d", key->serial); + down_write(&key->sem); + key->type = &key_type_dead; + if (key_gc_dead_keytype->destroy) + key_gc_dead_keytype->destroy(key); + memset(&key->payload, KEY_DESTROY, sizeof(key->payload)); + up_write(&key->sem); + } + } -continue_scanning: - while (cursor) { - key = rb_entry(cursor, struct key, serial_node); - cursor = rb_next(cursor); + /* As only this function is permitted to remove things from the active + * key list, if cursor is non-NULL then it will always point to a valid + * node in the tree. No locking is required to scan the tree. + */ + for (cursor = smp_load_acquire(&key_active_list.next); /* active.next before key->next */ + cursor != &key_active_list; + cursor = cursor->next) { + struct key *key = list_entry(cursor, struct key, active_link); + const struct key_type *type = key->type; + cond_resched(); if (refcount_read(&key->usage) == 0) - goto found_unreferenced_key; + continue; /* Presumably it's in the graveyard. */ if (unlikely(gc_state & KEY_GC_REAPING_DEAD_1)) { - if (key->type == key_gc_dead_keytype) { + if (type == key_gc_dead_keytype) { gc_state |= KEY_GC_FOUND_DEAD_KEY; set_bit(KEY_FLAG_DEAD, &key->flags); key->perm = 0; - goto skip_dead_key; - } else if (key->type == &key_type_keyring && - key->restrict_link) { - goto found_restricted_keyring; + continue; + } + if (type == &key_type_keyring && + key->restrict_link) { + /* We found a restricted keyring and need to + * update the restriction if it is associated + * with the dead key type. + */ + keyring_restriction_gc(key, key_gc_dead_keytype); + continue; } } expiry = key->expiry; if (expiry != TIME64_MAX) { - if (!(key->type->flags & KEY_TYPE_INSTANT_REAP)) + if (!(type->flags & KEY_TYPE_INSTANT_REAP)) expiry += key_gc_delay; if (expiry > limit && expiry < new_timer) { kdebug("will expire %x in %lld", @@ -245,32 +306,35 @@ static void key_garbage_collector(struct work_struct *work) } if (unlikely(gc_state & KEY_GC_REAPING_DEAD_2)) - if (key->type == key_gc_dead_keytype) + if (type == key_gc_dead_keytype) gc_state |= KEY_GC_FOUND_DEAD_KEY; - if ((gc_state & KEY_GC_REAPING_LINKS) || - unlikely(gc_state & KEY_GC_REAPING_DEAD_2)) { - if (key->type == &key_type_keyring) - goto found_keyring; + /* If we have a keyring and we need to check the payload for + * links to dead or expired keys. We don't flag another reap + * immediately as we have to wait for the old payload to be + * destroyed by RCU before we can reap the keys to which it + * refers. + */ + if (((gc_state & KEY_GC_REAPING_LINKS) || + unlikely(gc_state & KEY_GC_REAPING_DEAD_2)) && + type == &key_type_keyring) { + keyring_gc(key, limit); + continue; } - if (unlikely(gc_state & KEY_GC_REAPING_DEAD_3)) - if (key->type == key_gc_dead_keytype) - goto destroy_dead_key; - - skip_dead_key: - if (spin_is_contended(&key_serial_lock) || need_resched()) - goto contended; - } - -contended: - spin_unlock(&key_serial_lock); - -maybe_resched: - if (cursor) { - cond_resched(); - spin_lock(&key_serial_lock); - goto continue_scanning; + /* PASS 3: If we have a dead key that's still referenced, reset + * its type and destroy its payload with its semaphore held. + */ + if (unlikely(gc_state & KEY_GC_REAPING_DEAD_3) && + key->type == key_gc_dead_keytype) { + kdebug("destroy key %d", key->serial); + down_write(&key->sem); + key->type = &key_type_dead; + if (key_gc_dead_keytype->destroy) + key_gc_dead_keytype->destroy(key); + memset(&key->payload, KEY_DESTROY, sizeof(key->payload)); + up_write(&key->sem); + } } /* We've completed the pass. Set the timer if we need to and queue a @@ -284,20 +348,9 @@ static void key_garbage_collector(struct work_struct *work) key_schedule_gc(new_timer); } - if (unlikely(gc_state & KEY_GC_REAPING_DEAD_2) || - !list_empty(&graveyard)) { - /* Make sure that all pending keyring payload destructions are - * fulfilled and that people aren't now looking at dead or - * dying keys that they don't have a reference upon or a link - * to. - */ - kdebug("gc sync"); - synchronize_rcu(); - } - - if (!list_empty(&graveyard)) { + if (key_graveyard) { kdebug("gc keys"); - key_gc_unused_keys(&graveyard); + key_gc_unused_keys(); } if (unlikely(gc_state & (KEY_GC_REAPING_DEAD_1 | @@ -325,48 +378,4 @@ static void key_garbage_collector(struct work_struct *work) schedule_work(&key_gc_work); kleave(" [end %x]", gc_state); return; - - /* We found an unreferenced key - once we've removed it from the tree, - * we can safely drop the lock. - */ -found_unreferenced_key: - kdebug("unrefd key %d", key->serial); - rb_erase(&key->serial_node, &key_serial_tree); - spin_unlock(&key_serial_lock); - - list_add_tail(&key->graveyard_link, &graveyard); - gc_state |= KEY_GC_REAP_AGAIN; - goto maybe_resched; - - /* We found a restricted keyring and need to update the restriction if - * it is associated with the dead key type. - */ -found_restricted_keyring: - spin_unlock(&key_serial_lock); - keyring_restriction_gc(key, key_gc_dead_keytype); - goto maybe_resched; - - /* We found a keyring and we need to check the payload for links to - * dead or expired keys. We don't flag another reap immediately as we - * have to wait for the old payload to be destroyed by RCU before we - * can reap the keys to which it refers. - */ -found_keyring: - spin_unlock(&key_serial_lock); - keyring_gc(key, limit); - goto maybe_resched; - - /* We found a dead key that is still referenced. Reset its type and - * destroy its payload with its semaphore held. - */ -destroy_dead_key: - spin_unlock(&key_serial_lock); - kdebug("destroy key %d", key->serial); - down_write(&key->sem); - key->type = &key_type_dead; - if (key_gc_dead_keytype->destroy) - key_gc_dead_keytype->destroy(key); - memset(&key->payload, KEY_DESTROY, sizeof(key->payload)); - up_write(&key->sem); - goto maybe_resched; } diff --git a/security/keys/internal.h b/security/keys/internal.h index 2cffa6dc8255..2e0179315a01 100644 --- a/security/keys/internal.h +++ b/security/keys/internal.h @@ -87,6 +87,9 @@ extern struct rb_root key_serial_tree; extern spinlock_t key_serial_lock; extern struct mutex key_construction_mutex; extern wait_queue_head_t request_key_conswq; +extern struct list_head key_active_list; +extern struct key *key_graveyard; +extern spinlock_t key_graveyard_lock; extern void key_set_index_key(struct keyring_index_key *index_key); extern struct key_type *key_type_lookup(const char *type); diff --git a/security/keys/key.c b/security/keys/key.c index 3d7d185019d3..8990b1956780 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -18,6 +18,7 @@ struct kmem_cache *key_jar; struct rb_root key_serial_tree; /* tree of keys indexed by serial */ +LIST_HEAD(key_active_list); /* List of active keys, entries deleted by gc only */ DEFINE_SPINLOCK(key_serial_lock); struct rb_root key_user_tree; /* tree of quota records indexed by UID */ @@ -165,6 +166,15 @@ static inline void key_alloc_serial(struct key *key) rb_link_node(&key->serial_node, parent, p); rb_insert_color(&key->serial_node, &key_serial_tree); + /* Add the key to the front of the live list so that the garbage + * collector can see it (as list_add() with a barrier). + */ + key->active_link.prev = &key_active_list; + key->active_link.next = key_active_list.next; + key->active_link.next->prev = &key->active_link; + /* Write key->active_link.next before key_active_list.next. */ + smp_store_release(key_active_list.next, key); + spin_unlock(&key_serial_lock); return; @@ -651,14 +661,22 @@ void key_put(struct key *key) if (refcount_dec_and_test(&key->usage)) { unsigned long flags; - /* deal with the user's key tracking and quota */ + local_irq_save(flags); + if (test_bit(KEY_FLAG_IN_QUOTA, &key->flags)) { - spin_lock_irqsave(&key->user->lock, flags); + spin_lock(&key->user->lock); key->user->qnkeys--; key->user->qnbytes -= key->quotalen; - spin_unlock_irqrestore(&key->user->lock, flags); + spin_unlock(&key->user->lock); } + + spin_lock(&key_graveyard_lock); + key->next_dead = key_graveyard; + key_graveyard = key; + spin_unlock(&key_graveyard_lock); + schedule_work(&key_gc_work); + local_irq_restore(flags); } } }
On Wed, Apr 02, 2025 at 02:54:43PM +0100, David Howells wrote: > Jarkko Sakkinen <jarkko@kernel.org> wrote: > > > Add an isolated list for unreferenced keys, thereby splitting key > > deletion as separate phase, after the reaping cycle. This makes the > > whole process more safe, as these two distinct tasks don't intervene > > each other. As an effect, KEY_FLAG_FINAL_PUT is no longer needed. > > > > Since `security/keys/proc.c` reserves key_serial_lock for variable > > amount of time, `rb_erase()` cannot be done within `key_put()` by using > > IRQ saving/restoring versions of the spin lock functions. For the same > > reason, the key needs to co-exist both in the tree and in the list for > > period of time. > > > > Therefore, split the union in `struct key` into separate `serial_node` > > and `graveyard_link` fields, and introduce a separate lock for the > > graveyard, which is locked and unlocked using IRQ saving/restoring > > versions of the locking routines. For this response I'll just discuss around the bullets brought up. I need more time with seqlock solution. What I did pick from your patch is that I'll revert the name of the helper function back to key_gc_unused_keys() because I think it is not senseful rename (only adds unneeded delta). > Splitting the union seems reasonable. > > However, you need to be very careful extracting keys from the serial tree > outside of the gc loop: > > (1) The serial tree walking loop assumes that it can keep a pointer to the > key it is currently examining without holding any locks. I think this is > probably not a problem for your patch as you're doing the removal in the > same work item. > > (2) We have to deal with put keys belonging to a key type that's undergoing > removal. That might not be a problem as we can just get rid of them > directly - but we have to deal with the RCU implications and may need to > call synchronize_rcu() again after calling key_gc_graveyard(). > > (3) We still have to deal with a new key_put() happening immediately after > you've done the rb_erase() calls. I think it's probably worth still > skipping the evaluation of the key state if the refcount is 0 (if we're > getting rid of the FINAL_PUT flag). We might be holding up key > insertion, after all. In this v3, true, the processing is broken, as pages with usage at zero go also go through "gc state machine" (or whatever you want to call the key type teardown process). How my solution could be improved to address these concerns for the most part, would be to do this in the beginning of each iteration: if (!refcount_inc_not_zero(&key->usage)) goto skip_dead_key; And after processing: call key_put() (essentially open-coded kref_get_unless_zero). This would guarantee that the traversal keeps it hands off from pages with usage reduced to zero, wouldn't it? If this is added to the patch, I don't really see how this patch would break the causality i.e., order of how things are executed in the garbage collection flow. BR, Jarkko
diff --git a/include/linux/key.h b/include/linux/key.h index ba05de8579ec..c50659184bdf 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -195,10 +195,8 @@ enum key_state { struct key { refcount_t usage; /* number of references */ key_serial_t serial; /* key serial number */ - union { - struct list_head graveyard_link; - struct rb_node serial_node; - }; + struct list_head graveyard_link; /* key->usage == 0 */ + struct rb_node serial_node; #ifdef CONFIG_KEY_NOTIFICATIONS struct watch_list *watchers; /* Entities watching this key for changes */ #endif @@ -236,7 +234,6 @@ struct key { #define KEY_FLAG_ROOT_CAN_INVAL 7 /* set if key can be invalidated by root without permission */ #define KEY_FLAG_KEEP 8 /* set if key should not be removed */ #define KEY_FLAG_UID_KEYRING 9 /* set if key is a user or user session keyring */ -#define KEY_FLAG_FINAL_PUT 10 /* set if final put has happened on key */ /* the key type and key description string * - the desc is used to match a key against search criteria diff --git a/security/keys/gc.c b/security/keys/gc.c index f27223ea4578..7b1356c1095d 100644 --- a/security/keys/gc.c +++ b/security/keys/gc.c @@ -130,9 +130,9 @@ void key_gc_keytype(struct key_type *ktype) } /* - * Garbage collect a list of unreferenced, detached keys + * Takes ownership of the given list, and deinitializes and destroys the keys. */ -static noinline void key_gc_unused_keys(struct list_head *keys) +static noinline void key_gc_graveyard(struct list_head *keys) { while (!list_empty(keys)) { struct key *key = @@ -206,6 +206,17 @@ static void key_garbage_collector(struct work_struct *work) new_timer = TIME64_MAX; + spin_lock_irqsave(&key_graveyard_lock, flags); + list_splice_init(&key_graveyard, &graveyard); + spin_unlock_irqrestore(&key_graveyard_lock, flags); + + list_for_each_entry(key, &graveyard, graveyard_link) { + spin_lock(&key_serial_lock); + kdebug("unrefd key %d", key->serial); + rb_erase(&key->serial_node, &key_serial_tree); + spin_unlock(&key_serial_lock); + } + /* As only this function is permitted to remove things from the key * serial tree, if cursor is non-NULL then it will always point to a * valid node in the tree - even if lock got dropped. @@ -218,11 +229,6 @@ static void key_garbage_collector(struct work_struct *work) key = rb_entry(cursor, struct key, serial_node); cursor = rb_next(cursor); - if (test_bit(KEY_FLAG_FINAL_PUT, &key->flags)) { - smp_mb(); /* Clobber key->user after FINAL_PUT seen. */ - goto found_unreferenced_key; - } - if (unlikely(gc_state & KEY_GC_REAPING_DEAD_1)) { if (key->type == key_gc_dead_keytype) { gc_state |= KEY_GC_FOUND_DEAD_KEY; @@ -299,7 +305,7 @@ static void key_garbage_collector(struct work_struct *work) if (!list_empty(&graveyard)) { kdebug("gc keys"); - key_gc_unused_keys(&graveyard); + key_gc_graveyard(&graveyard); } if (unlikely(gc_state & (KEY_GC_REAPING_DEAD_1 | @@ -328,18 +334,6 @@ static void key_garbage_collector(struct work_struct *work) kleave(" [end %x]", gc_state); return; - /* We found an unreferenced key - once we've removed it from the tree, - * we can safely drop the lock. - */ -found_unreferenced_key: - kdebug("unrefd key %d", key->serial); - rb_erase(&key->serial_node, &key_serial_tree); - spin_unlock(&key_serial_lock); - - list_add_tail(&key->graveyard_link, &graveyard); - gc_state |= KEY_GC_REAP_AGAIN; - goto maybe_resched; - /* We found a restricted keyring and need to update the restriction if * it is associated with the dead key type. */ diff --git a/security/keys/internal.h b/security/keys/internal.h index 2cffa6dc8255..08f356d04d78 100644 --- a/security/keys/internal.h +++ b/security/keys/internal.h @@ -66,6 +66,8 @@ struct key_user { extern struct rb_root key_user_tree; extern spinlock_t key_user_lock; extern struct key_user root_key_user; +extern struct list_head key_graveyard; +extern spinlock_t key_graveyard_lock; extern struct key_user *key_user_lookup(kuid_t uid); extern void key_user_put(struct key_user *user); diff --git a/security/keys/key.c b/security/keys/key.c index 7198cd2ac3a3..7511f2017b6b 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -22,6 +22,8 @@ DEFINE_SPINLOCK(key_serial_lock); struct rb_root key_user_tree; /* tree of quota records indexed by UID */ DEFINE_SPINLOCK(key_user_lock); +LIST_HEAD(key_graveyard); +DEFINE_SPINLOCK(key_graveyard_lock); unsigned int key_quota_root_maxkeys = 1000000; /* root's key count quota */ unsigned int key_quota_root_maxbytes = 25000000; /* root's key space quota */ @@ -658,8 +660,9 @@ void key_put(struct key *key) key->user->qnbytes -= key->quotalen; spin_unlock_irqrestore(&key->user->lock, flags); } - smp_mb(); /* key->user before FINAL_PUT set. */ - set_bit(KEY_FLAG_FINAL_PUT, &key->flags); + spin_lock_irqsave(&key_graveyard_lock, flags); + list_add_tail(&key->graveyard_link, &key_graveyard); + spin_unlock_irqrestore(&key_graveyard_lock, flags); schedule_work(&key_gc_work); } }