summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJosh Boyer <jwboyer@redhat.com>2013-09-03 14:17:59 -0400
committerJosh Boyer <jwboyer@redhat.com>2013-09-03 14:19:33 -0400
commit9cb73b95df5118a987a376f187693b0c62f3d69d (patch)
treedbfebfbe7b69654f7b49be2e1eba1789faff91f9
parent49ba08f01f1bd05991fa29e617fedd778e03a1a5 (diff)
downloadkernel-9cb73b95df5118a987a376f187693b0c62f3d69d.tar.gz
kernel-9cb73b95df5118a987a376f187693b0c62f3d69d.tar.xz
kernel-9cb73b95df5118a987a376f187693b0c62f3d69d.zip
Add keyring patches to support krb5 (rhbz 1003043)
-rw-r--r--config-generic2
-rw-r--r--kernel.spec11
-rw-r--r--keys-expand-keyring.patch6834
-rw-r--r--keys-krb-support.patch747
4 files changed, 7594 insertions, 0 deletions
diff --git a/config-generic b/config-generic
index 57d7d9af3..0acca3193 100644
--- a/config-generic
+++ b/config-generic
@@ -4203,6 +4203,8 @@ CONFIG_ZLIB_DEFLATE=m
CONFIG_INITRAMFS_SOURCE=""
CONFIG_KEYS=y
+CONFIG_PERSISTENT_KEYRINGS=y
+CONFIG_BIG_KEYS=m
CONFIG_TRUSTED_KEYS=m
CONFIG_ENCRYPTED_KEYS=m
CONFIG_KEYS_DEBUG_PROC_KEYS=y
diff --git a/kernel.spec b/kernel.spec
index 9beaa3f3c..564c6e574 100644
--- a/kernel.spec
+++ b/kernel.spec
@@ -646,6 +646,10 @@ Patch800: crash-driver.patch
# crypto/
+# keys
+Patch900: keys-expand-keyring.patch
+Patch901: keys-krb-support.patch
+
# secure boot
Patch1000: secure-modules.patch
Patch1001: modsign-uefi.patch
@@ -1379,6 +1383,10 @@ ApplyPatch crash-driver.patch
# crypto/
+# keys
+ApplyPatch keys-expand-keyring.patch
+ApplyPatch keys-krb-support.patch
+
# secure boot
ApplyPatch secure-modules.patch
ApplyPatch modsign-uefi.patch
@@ -2258,6 +2266,9 @@ fi
# ||----w |
# || ||
%changelog
+* Tue Sep 03 2013 Josh Boyer <jwboyer@fedoraproject.org>
+- Add keyring patches to support krb5 (rhbz 1003043)
+
* Tue Sep 03 2013 Kyle McMartin <kyle@redhat.com>
- [arm64] disable VGA_CONSOLE and PARPORT_PC
- [arm64] install dtb as on %{arm}
diff --git a/keys-expand-keyring.patch b/keys-expand-keyring.patch
new file mode 100644
index 000000000..75618243b
--- /dev/null
+++ b/keys-expand-keyring.patch
@@ -0,0 +1,6834 @@
+From 96dcf8e91389e509021448ffd798cc68471fcf0f Mon Sep 17 00:00:00 2001
+From: David Howells <dhowells@redhat.com>
+Date: Fri, 30 Aug 2013 15:37:50 +0100
+Subject: [PATCH 01/10] KEYS: Skip key state checks when checking for
+ possession
+
+Skip key state checks (invalidation, revocation and expiration) when checking
+for possession. Without this, keys that have been marked invalid, revoked
+keys and expired keys are not given a possession attribute - which means the
+possessor is not granted any possession permits and cannot do anything with
+them unless they also have one a user, group or other permit.
+
+This causes failures in the keyutils test suite's revocation and expiration
+tests now that commit 96b5c8fea6c0861621051290d705ec2e971963f1 reduced the
+initial permissions granted to a key.
+
+The failures are due to accesses to revoked and expired keys being given
+EACCES instead of EKEYREVOKED or EKEYEXPIRED.
+
+Signed-off-by: David Howells <dhowells@redhat.com>
+---
+ security/keys/internal.h | 1 +
+ security/keys/process_keys.c | 8 +++++---
+ security/keys/request_key.c | 6 ++++--
+ security/keys/request_key_auth.c | 2 +-
+ 4 files changed, 11 insertions(+), 6 deletions(-)
+
+diff --git a/security/keys/internal.h b/security/keys/internal.h
+index d4f1468..df971fe 100644
+--- a/security/keys/internal.h
++++ b/security/keys/internal.h
+@@ -124,6 +124,7 @@ extern key_ref_t search_my_process_keyrings(struct key_type *type,
+ extern key_ref_t search_process_keyrings(struct key_type *type,
+ const void *description,
+ key_match_func_t match,
++ bool no_state_check,
+ const struct cred *cred);
+
+ extern struct key *find_keyring_by_name(const char *name, bool skip_perm_check);
+diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c
+index 42defae..a3410d6 100644
+--- a/security/keys/process_keys.c
++++ b/security/keys/process_keys.c
+@@ -440,6 +440,7 @@ found:
+ key_ref_t search_process_keyrings(struct key_type *type,
+ const void *description,
+ key_match_func_t match,
++ bool no_state_check,
+ const struct cred *cred)
+ {
+ struct request_key_auth *rka;
+@@ -448,7 +449,7 @@ key_ref_t search_process_keyrings(struct key_type *type,
+ might_sleep();
+
+ key_ref = search_my_process_keyrings(type, description, match,
+- false, cred);
++ no_state_check, cred);
+ if (!IS_ERR(key_ref))
+ goto found;
+ err = key_ref;
+@@ -468,7 +469,8 @@ key_ref_t search_process_keyrings(struct key_type *type,
+ rka = cred->request_key_auth->payload.data;
+
+ key_ref = search_process_keyrings(type, description,
+- match, rka->cred);
++ match, no_state_check,
++ rka->cred);
+
+ up_read(&cred->request_key_auth->sem);
+
+@@ -675,7 +677,7 @@ try_again:
+ /* check to see if we possess the key */
+ skey_ref = search_process_keyrings(key->type, key,
+ lookup_user_key_possessed,
+- cred);
++ true, cred);
+
+ if (!IS_ERR(skey_ref)) {
+ key_put(key);
+diff --git a/security/keys/request_key.c b/security/keys/request_key.c
+index c411f9b..172115b 100644
+--- a/security/keys/request_key.c
++++ b/security/keys/request_key.c
+@@ -390,7 +390,8 @@ static int construct_alloc_key(struct key_type *type,
+ * waited for locks */
+ mutex_lock(&key_construction_mutex);
+
+- key_ref = search_process_keyrings(type, description, type->match, cred);
++ key_ref = search_process_keyrings(type, description, type->match,
++ false, cred);
+ if (!IS_ERR(key_ref))
+ goto key_already_present;
+
+@@ -539,7 +540,8 @@ struct key *request_key_and_link(struct key_type *type,
+ dest_keyring, flags);
+
+ /* search all the process keyrings for a key */
+- key_ref = search_process_keyrings(type, description, type->match, cred);
++ key_ref = search_process_keyrings(type, description, type->match,
++ false, cred);
+
+ if (!IS_ERR(key_ref)) {
+ key = key_ref_to_ptr(key_ref);
+diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c
+index 85730d5..92077de 100644
+--- a/security/keys/request_key_auth.c
++++ b/security/keys/request_key_auth.c
+@@ -247,7 +247,7 @@ struct key *key_get_instantiation_authkey(key_serial_t target_id)
+ &key_type_request_key_auth,
+ (void *) (unsigned long) target_id,
+ key_get_instantiation_authkey_match,
+- cred);
++ false, cred);
+
+ if (IS_ERR(authkey_ref)) {
+ authkey = ERR_CAST(authkey_ref);
+--
+1.8.3.1
+
+
+From 9b1294158dd1fbca78541b5d55c057e46b1a9ca2 Mon Sep 17 00:00:00 2001
+From: David Howells <dhowells@redhat.com>
+Date: Fri, 30 Aug 2013 15:37:51 +0100
+Subject: [PATCH 02/10] KEYS: Use bool in make_key_ref() and is_key_possessed()
+
+Make make_key_ref() take a bool possession parameter and make
+is_key_possessed() return a bool.
+
+Signed-off-by: David Howells <dhowells@redhat.com>
+---
+ Documentation/security/keys.txt | 7 +++----
+ include/linux/key.h | 4 ++--
+ security/keys/keyring.c | 5 +++--
+ 3 files changed, 8 insertions(+), 8 deletions(-)
+
+diff --git a/Documentation/security/keys.txt b/Documentation/security/keys.txt
+index 7b4145d..9ede670 100644
+--- a/Documentation/security/keys.txt
++++ b/Documentation/security/keys.txt
+@@ -865,15 +865,14 @@ encountered:
+ calling processes has a searchable link to the key from one of its
+ keyrings. There are three functions for dealing with these:
+
+- key_ref_t make_key_ref(const struct key *key,
+- unsigned long possession);
++ key_ref_t make_key_ref(const struct key *key, bool possession);
+
+ struct key *key_ref_to_ptr(const key_ref_t key_ref);
+
+- unsigned long is_key_possessed(const key_ref_t key_ref);
++ bool is_key_possessed(const key_ref_t key_ref);
+
+ The first function constructs a key reference from a key pointer and
+- possession information (which must be 0 or 1 and not any other value).
++ possession information (which must be true or false).
+
+ The second function retrieves the key pointer from a reference and the
+ third retrieves the possession flag.
+diff --git a/include/linux/key.h b/include/linux/key.h
+index 4dfde11..51bce29 100644
+--- a/include/linux/key.h
++++ b/include/linux/key.h
+@@ -99,7 +99,7 @@ struct keyring_name;
+ typedef struct __key_reference_with_attributes *key_ref_t;
+
+ static inline key_ref_t make_key_ref(const struct key *key,
+- unsigned long possession)
++ bool possession)
+ {
+ return (key_ref_t) ((unsigned long) key | possession);
+ }
+@@ -109,7 +109,7 @@ static inline struct key *key_ref_to_ptr(const key_ref_t key_ref)
+ return (struct key *) ((unsigned long) key_ref & ~1UL);
+ }
+
+-static inline unsigned long is_key_possessed(const key_ref_t key_ref)
++static inline bool is_key_possessed(const key_ref_t key_ref)
+ {
+ return (unsigned long) key_ref & 1UL;
+ }
+diff --git a/security/keys/keyring.c b/security/keys/keyring.c
+index 6ece7f2..f784063 100644
+--- a/security/keys/keyring.c
++++ b/security/keys/keyring.c
+@@ -329,9 +329,10 @@ key_ref_t keyring_search_aux(key_ref_t keyring_ref,
+
+ struct keyring_list *keylist;
+ struct timespec now;
+- unsigned long possessed, kflags;
++ unsigned long kflags;
+ struct key *keyring, *key;
+ key_ref_t key_ref;
++ bool possessed;
+ long err;
+ int sp, nkeys, kix;
+
+@@ -542,8 +543,8 @@ key_ref_t __keyring_search_one(key_ref_t keyring_ref,
+ key_perm_t perm)
+ {
+ struct keyring_list *klist;
+- unsigned long possessed;
+ struct key *keyring, *key;
++ bool possessed;
+ int nkeys, loop;
+
+ keyring = key_ref_to_ptr(keyring_ref);
+--
+1.8.3.1
+
+
+From 4a7e7536b9b728f1d912d0e4c047c885c95e13a1 Mon Sep 17 00:00:00 2001
+From: David Howells <dhowells@redhat.com>
+Date: Fri, 30 Aug 2013 15:37:51 +0100
+Subject: [PATCH 03/10] KEYS: key_is_dead() should take a const key pointer
+ argument
+
+key_is_dead() should take a const key pointer argument as it doesn't modify
+what it points to.
+
+Signed-off-by: David Howells <dhowells@redhat.com>
+---
+ security/keys/internal.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/security/keys/internal.h b/security/keys/internal.h
+index df971fe..490aef5 100644
+--- a/security/keys/internal.h
++++ b/security/keys/internal.h
+@@ -203,7 +203,7 @@ extern struct key *key_get_instantiation_authkey(key_serial_t target_id);
+ /*
+ * Determine whether a key is dead.
+ */
+-static inline bool key_is_dead(struct key *key, time_t limit)
++static inline bool key_is_dead(const struct key *key, time_t limit)
+ {
+ return
+ key->flags & ((1 << KEY_FLAG_DEAD) |
+--
+1.8.3.1
+
+
+From 9007a0a7f8c135f0085e46db277de0cf7b944403 Mon Sep 17 00:00:00 2001
+From: David Howells <dhowells@redhat.com>
+Date: Fri, 30 Aug 2013 15:37:52 +0100
+Subject: [PATCH 04/10] KEYS: Consolidate the concept of an 'index key' for key
+ access
+
+Consolidate the concept of an 'index key' for accessing keys. The index key
+is the search term needed to find a key directly - basically the key type and
+the key description. We can add to that the description length.
+
+This will be useful when turning a keyring into an associative array rather
+than just a pointer block.
+
+Signed-off-by: David Howells <dhowells@redhat.com>
+---
+ include/linux/key.h | 21 +++++++++----
+ security/keys/internal.h | 8 ++---
+ security/keys/key.c | 72 +++++++++++++++++++++++----------------------
+ security/keys/keyring.c | 37 +++++++++++------------
+ security/keys/request_key.c | 12 +++++---
+ 5 files changed, 83 insertions(+), 67 deletions(-)
+
+diff --git a/include/linux/key.h b/include/linux/key.h
+index 51bce29..d573e82 100644
+--- a/include/linux/key.h
++++ b/include/linux/key.h
+@@ -82,6 +82,12 @@ struct key_owner;
+ struct keyring_list;
+ struct keyring_name;
+
++struct keyring_index_key {
++ struct key_type *type;
++ const char *description;
++ size_t desc_len;
++};
++
+ /*****************************************************************************/
+ /*
+ * key reference with possession attribute handling
+@@ -129,7 +135,6 @@ struct key {
+ struct list_head graveyard_link;
+ struct rb_node serial_node;
+ };
+- struct key_type *type; /* type of key */
+ struct rw_semaphore sem; /* change vs change sem */
+ struct key_user *user; /* owner of this key */
+ void *security; /* security data for this key */
+@@ -163,12 +168,18 @@ struct key {
+ #define KEY_FLAG_ROOT_CAN_CLEAR 6 /* set if key can be cleared by root without permission */
+ #define KEY_FLAG_INVALIDATED 7 /* set if key has been invalidated */
+
+- /* the description string
+- * - this is used to match a key against search criteria
+- * - this should be a printable string
++ /* the key type and key description string
++ * - the desc is used to match a key against search criteria
++ * - it should be a printable string
+ * - eg: for krb5 AFS, this might be "afs@REDHAT.COM"
+ */
+- char *description;
++ union {
++ struct keyring_index_key index_key;
++ struct {
++ struct key_type *type; /* type of key */
++ char *description;
++ };
++ };
+
+ /* type specific data
+ * - this is used by the keyring type to index the name
+diff --git a/security/keys/internal.h b/security/keys/internal.h
+index 490aef5..77441dd 100644
+--- a/security/keys/internal.h
++++ b/security/keys/internal.h
+@@ -89,19 +89,17 @@ extern struct key_type *key_type_lookup(const char *type);
+ extern void key_type_put(struct key_type *ktype);
+
+ extern int __key_link_begin(struct key *keyring,
+- const struct key_type *type,
+- const char *description,
++ const struct keyring_index_key *index_key,
+ unsigned long *_prealloc);
+ extern int __key_link_check_live_key(struct key *keyring, struct key *key);
+ extern void __key_link(struct key *keyring, struct key *key,
+ unsigned long *_prealloc);
+ extern void __key_link_end(struct key *keyring,
+- struct key_type *type,
++ const struct keyring_index_key *index_key,
+ unsigned long prealloc);
+
+ extern key_ref_t __keyring_search_one(key_ref_t keyring_ref,
+- const struct key_type *type,
+- const char *description,
++ const struct keyring_index_key *index_key,
+ key_perm_t perm);
+
+ extern struct key *keyring_search_instkey(struct key *keyring,
+diff --git a/security/keys/key.c b/security/keys/key.c
+index 8fb7c7b..7e6bc39 100644
+--- a/security/keys/key.c
++++ b/security/keys/key.c
+@@ -242,8 +242,8 @@ struct key *key_alloc(struct key_type *type, const char *desc,
+ }
+ }
+
+- desclen = strlen(desc) + 1;
+- quotalen = desclen + type->def_datalen;
++ desclen = strlen(desc);
++ quotalen = desclen + 1 + type->def_datalen;
+
+ /* get hold of the key tracking for this user */
+ user = key_user_lookup(uid);
+@@ -277,7 +277,8 @@ struct key *key_alloc(struct key_type *type, const char *desc,
+ goto no_memory_2;
+
+ if (desc) {
+- key->description = kmemdup(desc, desclen, GFP_KERNEL);
++ key->index_key.desc_len = desclen;
++ key->index_key.description = kmemdup(desc, desclen + 1, GFP_KERNEL);
+ if (!key->description)
+ goto no_memory_3;
+ }
+@@ -285,7 +286,7 @@ struct key *key_alloc(struct key_type *type, const char *desc,
+ atomic_set(&key->usage, 1);
+ init_rwsem(&key->sem);
+ lockdep_set_class(&key->sem, &type->lock_class);
+- key->type = type;
++ key->index_key.type = type;
+ key->user = user;
+ key->quotalen = quotalen;
+ key->datalen = type->def_datalen;
+@@ -489,8 +490,7 @@ int key_instantiate_and_link(struct key *key,
+ }
+
+ if (keyring) {
+- ret = __key_link_begin(keyring, key->type, key->description,
+- &prealloc);
++ ret = __key_link_begin(keyring, &key->index_key, &prealloc);
+ if (ret < 0)
+ goto error_free_preparse;
+ }
+@@ -499,7 +499,7 @@ int key_instantiate_and_link(struct key *key,
+ &prealloc);
+
+ if (keyring)
+- __key_link_end(keyring, key->type, prealloc);
++ __key_link_end(keyring, &key->index_key, prealloc);
+
+ error_free_preparse:
+ if (key->type->preparse)
+@@ -548,8 +548,7 @@ int key_reject_and_link(struct key *key,
+ ret = -EBUSY;
+
+ if (keyring)
+- link_ret = __key_link_begin(keyring, key->type,
+- key->description, &prealloc);
++ link_ret = __key_link_begin(keyring, &key->index_key, &prealloc);
+
+ mutex_lock(&key_construction_mutex);
+
+@@ -581,7 +580,7 @@ int key_reject_and_link(struct key *key,
+ mutex_unlock(&key_construction_mutex);
+
+ if (keyring)
+- __key_link_end(keyring, key->type, prealloc);
++ __key_link_end(keyring, &key->index_key, prealloc);
+
+ /* wake up anyone waiting for a key to be constructed */
+ if (awaken)
+@@ -780,25 +779,27 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
+ key_perm_t perm,
+ unsigned long flags)
+ {
+- unsigned long prealloc;
++ struct keyring_index_key index_key = {
++ .description = description,
++ };
+ struct key_preparsed_payload prep;
+ const struct cred *cred = current_cred();
+- struct key_type *ktype;
++ unsigned long prealloc;
+ struct key *keyring, *key = NULL;
+ key_ref_t key_ref;
+ int ret;
+
+ /* look up the key type to see if it's one of the registered kernel
+ * types */
+- ktype = key_type_lookup(type);
+- if (IS_ERR(ktype)) {
++ index_key.type = key_type_lookup(type);
++ if (IS_ERR(index_key.type)) {
+ key_ref = ERR_PTR(-ENODEV);
+ goto error;
+ }
+
+ key_ref = ERR_PTR(-EINVAL);
+- if (!ktype->match || !ktype->instantiate ||
+- (!description && !ktype->preparse))
++ if (!index_key.type->match || !index_key.type->instantiate ||
++ (!index_key.description && !index_key.type->preparse))
+ goto error_put_type;
+
+ keyring = key_ref_to_ptr(keyring_ref);
+@@ -812,21 +813,22 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
+ memset(&prep, 0, sizeof(prep));
+ prep.data = payload;
+ prep.datalen = plen;
+- prep.quotalen = ktype->def_datalen;
+- if (ktype->preparse) {
+- ret = ktype->preparse(&prep);
++ prep.quotalen = index_key.type->def_datalen;
++ if (index_key.type->preparse) {
++ ret = index_key.type->preparse(&prep);
+ if (ret < 0) {
+ key_ref = ERR_PTR(ret);
+ goto error_put_type;
+ }
+- if (!description)
+- description = prep.description;
++ if (!index_key.description)
++ index_key.description = prep.description;
+ key_ref = ERR_PTR(-EINVAL);
+- if (!description)
++ if (!index_key.description)
+ goto error_free_prep;
+ }
++ index_key.desc_len = strlen(index_key.description);
+
+- ret = __key_link_begin(keyring, ktype, description, &prealloc);
++ ret = __key_link_begin(keyring, &index_key, &prealloc);
+ if (ret < 0) {
+ key_ref = ERR_PTR(ret);
+ goto error_free_prep;
+@@ -844,9 +846,8 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
+ * key of the same type and description in the destination keyring and
+ * update that instead if possible
+ */
+- if (ktype->update) {
+- key_ref = __keyring_search_one(keyring_ref, ktype, description,
+- 0);
++ if (index_key.type->update) {
++ key_ref = __keyring_search_one(keyring_ref, &index_key, 0);
+ if (!IS_ERR(key_ref))
+ goto found_matching_key;
+ }
+@@ -856,16 +857,17 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
+ perm = KEY_POS_VIEW | KEY_POS_SEARCH | KEY_POS_LINK | KEY_POS_SETATTR;
+ perm |= KEY_USR_VIEW;
+
+- if (ktype->read)
++ if (index_key.type->read)
+ perm |= KEY_POS_READ;
+
+- if (ktype == &key_type_keyring || ktype->update)
++ if (index_key.type == &key_type_keyring ||
++ index_key.type->update)
+ perm |= KEY_POS_WRITE;
+ }
+
+ /* allocate a new key */
+- key = key_alloc(ktype, description, cred->fsuid, cred->fsgid, cred,
+- perm, flags);
++ key = key_alloc(index_key.type, index_key.description,
++ cred->fsuid, cred->fsgid, cred, perm, flags);
+ if (IS_ERR(key)) {
+ key_ref = ERR_CAST(key);
+ goto error_link_end;
+@@ -882,12 +884,12 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
+ key_ref = make_key_ref(key, is_key_possessed(keyring_ref));
+
+ error_link_end:
+- __key_link_end(keyring, ktype, prealloc);
++ __key_link_end(keyring, &index_key, prealloc);
+ error_free_prep:
+- if (ktype->preparse)
+- ktype->free_preparse(&prep);
++ if (index_key.type->preparse)
++ index_key.type->free_preparse(&prep);
+ error_put_type:
+- key_type_put(ktype);
++ key_type_put(index_key.type);
+ error:
+ return key_ref;
+
+@@ -895,7 +897,7 @@ error:
+ /* we found a matching key, so we're going to try to update it
+ * - we can drop the locks first as we have the key pinned
+ */
+- __key_link_end(keyring, ktype, prealloc);
++ __key_link_end(keyring, &index_key, prealloc);
+
+ key_ref = __key_update(key_ref, &prep);
+ goto error_free_prep;
+diff --git a/security/keys/keyring.c b/security/keys/keyring.c
+index f784063..c7f59f9 100644
+--- a/security/keys/keyring.c
++++ b/security/keys/keyring.c
+@@ -538,8 +538,7 @@ EXPORT_SYMBOL(keyring_search);
+ * to the returned key reference.
+ */
+ key_ref_t __keyring_search_one(key_ref_t keyring_ref,
+- const struct key_type *ktype,
+- const char *description,
++ const struct keyring_index_key *index_key,
+ key_perm_t perm)
+ {
+ struct keyring_list *klist;
+@@ -558,9 +557,9 @@ key_ref_t __keyring_search_one(key_ref_t keyring_ref,
+ smp_rmb();
+ for (loop = 0; loop < nkeys ; loop++) {
+ key = rcu_dereference(klist->keys[loop]);
+- if (key->type == ktype &&
++ if (key->type == index_key->type &&
+ (!key->type->match ||
+- key->type->match(key, description)) &&
++ key->type->match(key, index_key->description)) &&
+ key_permission(make_key_ref(key, possessed),
+ perm) == 0 &&
+ !(key->flags & ((1 << KEY_FLAG_INVALIDATED) |
+@@ -747,8 +746,8 @@ static void keyring_unlink_rcu_disposal(struct rcu_head *rcu)
+ /*
+ * Preallocate memory so that a key can be linked into to a keyring.
+ */
+-int __key_link_begin(struct key *keyring, const struct key_type *type,
+- const char *description, unsigned long *_prealloc)
++int __key_link_begin(struct key *keyring, const struct keyring_index_key *index_key,
++ unsigned long *_prealloc)
+ __acquires(&keyring->sem)
+ __acquires(&keyring_serialise_link_sem)
+ {
+@@ -759,7 +758,8 @@ int __key_link_begin(struct key *keyring, const struct key_type *type,
+ size_t size;
+ int loop, lru, ret;
+
+- kenter("%d,%s,%s,", key_serial(keyring), type->name, description);
++ kenter("%d,%s,%s,",
++ key_serial(keyring), index_key->type->name, index_key->description);
+
+ if (keyring->type != &key_type_keyring)
+ return -ENOTDIR;
+@@ -772,7 +772,7 @@ int __key_link_begin(struct key *keyring, const struct key_type *type,
+
+ /* serialise link/link calls to prevent parallel calls causing a cycle
+ * when linking two keyring in opposite orders */
+- if (type == &key_type_keyring)
++ if (index_key->type == &key_type_keyring)
+ down_write(&keyring_serialise_link_sem);
+
+ klist = rcu_dereference_locked_keyring(keyring);
+@@ -784,8 +784,8 @@ int __key_link_begin(struct key *keyring, const struct key_type *type,
+ for (loop = klist->nkeys - 1; loop >= 0; loop--) {
+ struct key *key = rcu_deref_link_locked(klist, loop,
+ keyring);
+- if (key->type == type &&
+- strcmp(key->description, description) == 0) {
++ if (key->type == index_key->type &&
++ strcmp(key->description, index_key->description) == 0) {
+ /* Found a match - we'll replace the link with
+ * one to the new key. We record the slot
+ * position.
+@@ -865,7 +865,7 @@ error_quota:
+ key_payload_reserve(keyring,
+ keyring->datalen - KEYQUOTA_LINK_BYTES);
+ error_sem:
+- if (type == &key_type_keyring)
++ if (index_key->type == &key_type_keyring)
+ up_write(&keyring_serialise_link_sem);
+ error_krsem:
+ up_write(&keyring->sem);
+@@ -957,16 +957,17 @@ void __key_link(struct key *keyring, struct key *key,
+ *
+ * Must be called with __key_link_begin() having being called.
+ */
+-void __key_link_end(struct key *keyring, struct key_type *type,
++void __key_link_end(struct key *keyring,
++ const struct keyring_index_key *index_key,
+ unsigned long prealloc)
+ __releases(&keyring->sem)
+ __releases(&keyring_serialise_link_sem)
+ {
+- BUG_ON(type == NULL);
+- BUG_ON(type->name == NULL);
+- kenter("%d,%s,%lx", keyring->serial, type->name, prealloc);
++ BUG_ON(index_key->type == NULL);
++ BUG_ON(index_key->type->name == NULL);
++ kenter("%d,%s,%lx", keyring->serial, index_key->type->name, prealloc);
+
+- if (type == &key_type_keyring)
++ if (index_key->type == &key_type_keyring)
+ up_write(&keyring_serialise_link_sem);
+
+ if (prealloc) {
+@@ -1007,12 +1008,12 @@ int key_link(struct key *keyring, struct key *key)
+ key_check(keyring);
+ key_check(key);
+
+- ret = __key_link_begin(keyring, key->type, key->description, &prealloc);
++ ret = __key_link_begin(keyring, &key->index_key, &prealloc);
+ if (ret == 0) {
+ ret = __key_link_check_live_key(keyring, key);
+ if (ret == 0)
+ __key_link(keyring, key, &prealloc);
+- __key_link_end(keyring, key->type, prealloc);
++ __key_link_end(keyring, &key->index_key, prealloc);
+ }
+
+ return ret;
+diff --git a/security/keys/request_key.c b/security/keys/request_key.c
+index 172115b..586cb79 100644
+--- a/security/keys/request_key.c
++++ b/security/keys/request_key.c
+@@ -352,6 +352,11 @@ static int construct_alloc_key(struct key_type *type,
+ struct key_user *user,
+ struct key **_key)
+ {
++ const struct keyring_index_key index_key = {
++ .type = type,
++ .description = description,
++ .desc_len = strlen(description),
++ };
+ const struct cred *cred = current_cred();
+ unsigned long prealloc;
+ struct key *key;
+@@ -379,8 +384,7 @@ static int construct_alloc_key(struct key_type *type,
+ set_bit(KEY_FLAG_USER_CONSTRUCT, &key->flags);
+
+ if (dest_keyring) {
+- ret = __key_link_begin(dest_keyring, type, description,
+- &prealloc);
++ ret = __key_link_begin(dest_keyring, &index_key, &prealloc);
+ if (ret < 0)
+ goto link_prealloc_failed;
+ }
+@@ -400,7 +404,7 @@ static int construct_alloc_key(struct key_type *type,
+
+ mutex_unlock(&key_construction_mutex);
+ if (dest_keyring)
+- __key_link_end(dest_keyring, type, prealloc);
++ __key_link_end(dest_keyring, &index_key, prealloc);
+ mutex_unlock(&user->cons_lock);
+ *_key = key;
+ kleave(" = 0 [%d]", key_serial(key));
+@@ -416,7 +420,7 @@ key_already_present:
+ ret = __key_link_check_live_key(dest_keyring, key);
+ if (ret == 0)
+ __key_link(dest_keyring, key, &prealloc);
+- __key_link_end(dest_keyring, type, prealloc);
++ __key_link_end(dest_keyring, &index_key, prealloc);
+ if (ret < 0)
+ goto link_check_failed;
+ }
+--
+1.8.3.1
+
+
+From eca8dad5cd291d2baf2d20372fcb0af9e75e25ea Mon Sep 17 00:00:00 2001
+From: David Howells <dhowells@redhat.com>
+Date: Fri, 30 Aug 2013 15:37:52 +0100
+Subject: [PATCH 05/10] KEYS: Introduce a search context structure
+
+Search functions pass around a bunch of arguments, each of which gets copied
+with each call. Introduce a search context structure to hold these.
+
+Whilst we're at it, create a search flag that indicates whether the search
+should be directly to the description or whether it should iterate through all
+keys looking for a non-description match.
+
+This will be useful when keyrings use a generic data struct with generic
+routines to manage their content as the search terms can just be passed
+through to the iterator callback function.
+
+Also, for future use, the data to be supplied to the match function is
+separated from the description pointer in the search context. This makes it
+clear which is being supplied.
+
+Signed-off-by: David Howells <dhowells@redhat.com>
+---
+ include/linux/key-type.h | 5 ++
+ security/keys/internal.h | 40 +++++++------
+ security/keys/keyring.c | 70 +++++++++++------------
+ security/keys/proc.c | 17 ++++--
+ security/keys/process_keys.c | 117 +++++++++++++++++++--------------------
+ security/keys/request_key.c | 56 +++++++++----------
+ security/keys/request_key_auth.c | 14 +++--
+ security/keys/user_defined.c | 18 +++---
+ 8 files changed, 179 insertions(+), 158 deletions(-)
+
+diff --git a/include/linux/key-type.h b/include/linux/key-type.h
+index 518a53a..f58737b 100644
+--- a/include/linux/key-type.h
++++ b/include/linux/key-type.h
+@@ -63,6 +63,11 @@ struct key_type {
+ */
+ size_t def_datalen;
+
++ /* Default key search algorithm. */
++ unsigned def_lookup_type;
++#define KEYRING_SEARCH_LOOKUP_DIRECT 0x0000 /* Direct lookup by description. */
++#define KEYRING_SEARCH_LOOKUP_ITERATE 0x0001 /* Iterative search. */
++
+ /* vet a description */
+ int (*vet_description)(const char *description);
+
+diff --git a/security/keys/internal.h b/security/keys/internal.h
+index 77441dd..f4bf938 100644
+--- a/security/keys/internal.h
++++ b/security/keys/internal.h
+@@ -107,23 +107,31 @@ extern struct key *keyring_search_instkey(struct key *keyring,
+
+ typedef int (*key_match_func_t)(const struct key *, const void *);
+
++struct keyring_search_context {
++ struct keyring_index_key index_key;
++ const struct cred *cred;
++ key_match_func_t match;
++ const void *match_data;
++ unsigned flags;
++#define KEYRING_SEARCH_LOOKUP_TYPE 0x0001 /* [as type->def_lookup_type] */
++#define KEYRING_SEARCH_NO_STATE_CHECK 0x0002 /* Skip state checks */
++#define KEYRING_SEARCH_DO_STATE_CHECK 0x0004 /* Override NO_STATE_CHECK */
++#define KEYRING_SEARCH_NO_UPDATE_TIME 0x0008 /* Don't update times */
++#define KEYRING_SEARCH_NO_CHECK_PERM 0x0010 /* Don't check permissions */
++#define KEYRING_SEARCH_DETECT_TOO_DEEP 0x0020 /* Give an error on excessive depth */
++
++ /* Internal stuff */
++ int skipped_ret;
++ bool possessed;
++ key_ref_t result;
++ struct timespec now;
++};
++
+ extern key_ref_t keyring_search_aux(key_ref_t keyring_ref,
+- const struct cred *cred,
+- struct key_type *type,
+- const void *description,
+- key_match_func_t match,
+- bool no_state_check);
+-
+-extern key_ref_t search_my_process_keyrings(struct key_type *type,
+- const void *description,
+- key_match_func_t match,
+- bool no_state_check,
+- const struct cred *cred);
+-extern key_ref_t search_process_keyrings(struct key_type *type,
+- const void *description,
+- key_match_func_t match,
+- bool no_state_check,
+- const struct cred *cred);
++ struct keyring_search_context *ctx);
++
++extern key_ref_t search_my_process_keyrings(struct keyring_search_context *ctx);
++extern key_ref_t search_process_keyrings(struct keyring_search_context *ctx);
+
+ extern struct key *find_keyring_by_name(const char *name, bool skip_perm_check);
+
+diff --git a/security/keys/keyring.c b/security/keys/keyring.c
+index c7f59f9..b42f2d4 100644
+--- a/security/keys/keyring.c
++++ b/security/keys/keyring.c
+@@ -280,11 +280,7 @@ EXPORT_SYMBOL(keyring_alloc);
+ /**
+ * keyring_search_aux - Search a keyring tree for a key matching some criteria
+ * @keyring_ref: A pointer to the keyring with possession indicator.
+- * @cred: The credentials to use for permissions checks.
+- * @type: The type of key to search for.
+- * @description: Parameter for @match.
+- * @match: Function to rule on whether or not a key is the one required.
+- * @no_state_check: Don't check if a matching key is bad
++ * @ctx: The keyring search context.
+ *
+ * Search the supplied keyring tree for a key that matches the criteria given.
+ * The root keyring and any linked keyrings must grant Search permission to the
+@@ -314,11 +310,7 @@ EXPORT_SYMBOL(keyring_alloc);
+ * @keyring_ref is propagated to the returned key reference.
+ */
+ key_ref_t keyring_search_aux(key_ref_t keyring_ref,
+- const struct cred *cred,
+- struct key_type *type,
+- const void *description,
+- key_match_func_t match,
+- bool no_state_check)
++ struct keyring_search_context *ctx)
+ {
+ struct {
+ /* Need a separate keylist pointer for RCU purposes */
+@@ -328,20 +320,18 @@ key_ref_t keyring_search_aux(key_ref_t keyring_ref,
+ } stack[KEYRING_SEARCH_MAX_DEPTH];
+
+ struct keyring_list *keylist;
+- struct timespec now;
+ unsigned long kflags;
+ struct key *keyring, *key;
+ key_ref_t key_ref;
+- bool possessed;
+ long err;
+ int sp, nkeys, kix;
+
+ keyring = key_ref_to_ptr(keyring_ref);
+- possessed = is_key_possessed(keyring_ref);
++ ctx->possessed = is_key_possessed(keyring_ref);
+ key_check(keyring);
+
+ /* top keyring must have search permission to begin the search */
+- err = key_task_permission(keyring_ref, cred, KEY_SEARCH);
++ err = key_task_permission(keyring_ref, ctx->cred, KEY_SEARCH);
+ if (err < 0) {
+ key_ref = ERR_PTR(err);
+ goto error;
+@@ -353,7 +343,7 @@ key_ref_t keyring_search_aux(key_ref_t keyring_ref,
+
+ rcu_read_lock();
+
+- now = current_kernel_time();
++ ctx->now = current_kernel_time();
+ err = -EAGAIN;
+ sp = 0;
+
+@@ -361,16 +351,17 @@ key_ref_t keyring_search_aux(key_ref_t keyring_ref,
+ * are looking for */
+ key_ref = ERR_PTR(-EAGAIN);
+ kflags = keyring->flags;
+- if (keyring->type == type && match(keyring, description)) {
++ if (keyring->type == ctx->index_key.type &&
++ ctx->match(keyring, ctx->match_data)) {
+ key = keyring;
+- if (no_state_check)
++ if (ctx->flags & KEYRING_SEARCH_NO_STATE_CHECK)
+ goto found;
+
+ /* check it isn't negative and hasn't expired or been
+ * revoked */
+ if (kflags & (1 << KEY_FLAG_REVOKED))
+ goto error_2;
+- if (key->expiry && now.tv_sec >= key->expiry)
++ if (key->expiry && ctx->now.tv_sec >= key->expiry)
+ goto error_2;
+ key_ref = ERR_PTR(key->type_data.reject_error);
+ if (kflags & (1 << KEY_FLAG_NEGATIVE))
+@@ -384,7 +375,7 @@ key_ref_t keyring_search_aux(key_ref_t keyring_ref,
+ if (kflags & ((1 << KEY_FLAG_INVALIDATED) |
+ (1 << KEY_FLAG_REVOKED) |
+ (1 << KEY_FLAG_NEGATIVE)) ||
+- (keyring->expiry && now.tv_sec >= keyring->expiry))
++ (keyring->expiry && ctx->now.tv_sec >= keyring->expiry))
+ goto error_2;
+
+ /* start processing a new keyring */
+@@ -406,29 +397,29 @@ descend:
+ kflags = key->flags;
+
+ /* ignore keys not of this type */
+- if (key->type != type)
++ if (key->type != ctx->index_key.type)
+ continue;
+
+ /* skip invalidated, revoked and expired keys */
+- if (!no_state_check) {
++ if (!(ctx->flags & KEYRING_SEARCH_NO_STATE_CHECK)) {
+ if (kflags & ((1 << KEY_FLAG_INVALIDATED) |
+ (1 << KEY_FLAG_REVOKED)))
+ continue;
+
+- if (key->expiry && now.tv_sec >= key->expiry)
++ if (key->expiry && ctx->now.tv_sec >= key->expiry)
+ continue;
+ }
+
+ /* keys that don't match */
+- if (!match(key, description))
++ if (!ctx->match(key, ctx->match_data))
+ continue;
+
+ /* key must have search permissions */
+- if (key_task_permission(make_key_ref(key, possessed),
+- cred, KEY_SEARCH) < 0)
++ if (key_task_permission(make_key_ref(key, ctx->possessed),
++ ctx->cred, KEY_SEARCH) < 0)
+ continue;
+
+- if (no_state_check)
++ if (ctx->flags & KEYRING_SEARCH_NO_STATE_CHECK)
+ goto found;
+
+ /* we set a different error code if we pass a negative key */
+@@ -456,8 +447,8 @@ ascend:
+ if (sp >= KEYRING_SEARCH_MAX_DEPTH)
+ continue;
+
+- if (key_task_permission(make_key_ref(key, possessed),
+- cred, KEY_SEARCH) < 0)
++ if (key_task_permission(make_key_ref(key, ctx->possessed),
++ ctx->cred, KEY_SEARCH) < 0)
+ continue;
+
+ /* stack the current position */
+@@ -489,12 +480,12 @@ not_this_keyring:
+ /* we found a viable match */
+ found:
+ atomic_inc(&key->usage);
+- key->last_used_at = now.tv_sec;
+- keyring->last_used_at = now.tv_sec;
++ key->last_used_at = ctx->now.tv_sec;
++ keyring->last_used_at = ctx->now.tv_sec;
+ while (sp > 0)
+- stack[--sp].keyring->last_used_at = now.tv_sec;
++ stack[--sp].keyring->last_used_at = ctx->now.tv_sec;
+ key_check(key);
+- key_ref = make_key_ref(key, possessed);
++ key_ref = make_key_ref(key, ctx->possessed);
+ error_2:
+ rcu_read_unlock();
+ error:
+@@ -514,11 +505,20 @@ key_ref_t keyring_search(key_ref_t keyring,
+ struct key_type *type,
+ const char *description)
+ {
+- if (!type->match)
++ struct keyring_search_context ctx = {
++ .index_key.type = type,
++ .index_key.description = description,
++ .cred = current_cred(),
++ .match = type->match,
++ .match_data = description,
++ .flags = (type->def_lookup_type |
++ KEYRING_SEARCH_DO_STATE_CHECK),
++ };
++
++ if (!ctx.match)
+ return ERR_PTR(-ENOKEY);
+
+- return keyring_search_aux(keyring, current->cred,
+- type, description, type->match, false);
++ return keyring_search_aux(keyring, &ctx);
+ }
+ EXPORT_SYMBOL(keyring_search);
+
+diff --git a/security/keys/proc.c b/security/keys/proc.c
+index 217b685..88e9a46 100644
+--- a/security/keys/proc.c
++++ b/security/keys/proc.c
+@@ -182,7 +182,6 @@ static void proc_keys_stop(struct seq_file *p, void *v)
+
+ static int proc_keys_show(struct seq_file *m, void *v)
+ {
+- const struct cred *cred = current_cred();
+ struct rb_node *_p = v;
+ struct key *key = rb_entry(_p, struct key, serial_node);
+ struct timespec now;
+@@ -191,15 +190,23 @@ static int proc_keys_show(struct seq_file *m, void *v)
+ char xbuf[12];
+ int rc;
+
++ struct keyring_search_context ctx = {
++ .index_key.type = key->type,
++ .index_key.description = key->description,
++ .cred = current_cred(),
++ .match = lookup_user_key_possessed,
++ .match_data = key,
++ .flags = (KEYRING_SEARCH_NO_STATE_CHECK |
++ KEYRING_SEARCH_LOOKUP_DIRECT),
++ };
++
+ key_ref = make_key_ref(key, 0);
+
+ /* determine if the key is possessed by this process (a test we can
+ * skip if the key does not indicate the possessor can view it
+ */
+ if (key->perm & KEY_POS_VIEW) {
+- skey_ref = search_my_process_keyrings(key->type, key,
+- lookup_user_key_possessed,
+- true, cred);
++ skey_ref = search_my_process_keyrings(&ctx);
+ if (!IS_ERR(skey_ref)) {
+ key_ref_put(skey_ref);
+ key_ref = make_key_ref(key, 1);
+@@ -211,7 +218,7 @@ static int proc_keys_show(struct seq_file *m, void *v)
+ * - the caller holds a spinlock, and thus the RCU read lock, making our
+ * access to __current_cred() safe
+ */
+- rc = key_task_permission(key_ref, cred, KEY_VIEW);
++ rc = key_task_permission(key_ref, ctx.cred, KEY_VIEW);
+ if (rc < 0)
+ return 0;
+
+diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c
+index a3410d6..e68a3e0 100644
+--- a/security/keys/process_keys.c
++++ b/security/keys/process_keys.c
+@@ -319,11 +319,7 @@ void key_fsgid_changed(struct task_struct *tsk)
+ * In the case of a successful return, the possession attribute is set on the
+ * returned key reference.
+ */
+-key_ref_t search_my_process_keyrings(struct key_type *type,
+- const void *description,
+- key_match_func_t match,
+- bool no_state_check,
+- const struct cred *cred)
++key_ref_t search_my_process_keyrings(struct keyring_search_context *ctx)
+ {
+ key_ref_t key_ref, ret, err;
+
+@@ -339,10 +335,9 @@ key_ref_t search_my_process_keyrings(struct key_type *type,
+ err = ERR_PTR(-EAGAIN);
+
+ /* search the thread keyring first */
+- if (cred->thread_keyring) {
++ if (ctx->cred->thread_keyring) {
+ key_ref = keyring_search_aux(
+- make_key_ref(cred->thread_keyring, 1),
+- cred, type, description, match, no_state_check);
++ make_key_ref(ctx->cred->thread_keyring, 1), ctx);
+ if (!IS_ERR(key_ref))
+ goto found;
+
+@@ -358,10 +353,9 @@ key_ref_t search_my_process_keyrings(struct key_type *type,
+ }
+
+ /* search the process keyring second */
+- if (cred->process_keyring) {
++ if (ctx->cred->process_keyring) {
+ key_ref = keyring_search_aux(
+- make_key_ref(cred->process_keyring, 1),
+- cred, type, description, match, no_state_check);
++ make_key_ref(ctx->cred->process_keyring, 1), ctx);
+ if (!IS_ERR(key_ref))
+ goto found;
+
+@@ -379,11 +373,11 @@ key_ref_t search_my_process_keyrings(struct key_type *type,
+ }
+
+ /* search the session keyring */
+- if (cred->session_keyring) {
++ if (ctx->cred->session_keyring) {
+ rcu_read_lock();
+ key_ref = keyring_search_aux(
+- make_key_ref(rcu_dereference(cred->session_keyring), 1),
+- cred, type, description, match, no_state_check);
++ make_key_ref(rcu_dereference(ctx->cred->session_keyring), 1),
++ ctx);
+ rcu_read_unlock();
+
+ if (!IS_ERR(key_ref))
+@@ -402,10 +396,10 @@ key_ref_t search_my_process_keyrings(struct key_type *type,
+ }
+ }
+ /* or search the user-session keyring */
+- else if (cred->user->session_keyring) {
++ else if (ctx->cred->user->session_keyring) {
+ key_ref = keyring_search_aux(
+- make_key_ref(cred->user->session_keyring, 1),
+- cred, type, description, match, no_state_check);
++ make_key_ref(ctx->cred->user->session_keyring, 1),
++ ctx);
+ if (!IS_ERR(key_ref))
+ goto found;
+
+@@ -437,19 +431,14 @@ found:
+ *
+ * Return same as search_my_process_keyrings().
+ */
+-key_ref_t search_process_keyrings(struct key_type *type,
+- const void *description,
+- key_match_func_t match,
+- bool no_state_check,
+- const struct cred *cred)
++key_ref_t search_process_keyrings(struct keyring_search_context *ctx)
+ {
+ struct request_key_auth *rka;
+ key_ref_t key_ref, ret = ERR_PTR(-EACCES), err;
+
+ might_sleep();
+
+- key_ref = search_my_process_keyrings(type, description, match,
+- no_state_check, cred);
++ key_ref = search_my_process_keyrings(ctx);
+ if (!IS_ERR(key_ref))
+ goto found;
+ err = key_ref;
+@@ -458,19 +447,21 @@ key_ref_t search_process_keyrings(struct key_type *type,
+ * search the keyrings of the process mentioned there
+ * - we don't permit access to request_key auth keys via this method
+ */
+- if (cred->request_key_auth &&
+- cred == current_cred() &&
+- type != &key_type_request_key_auth
++ if (ctx->cred->request_key_auth &&
++ ctx->cred == current_cred() &&
++ ctx->index_key.type != &key_type_request_key_auth
+ ) {
++ const struct cred *cred = ctx->cred;
++
+ /* defend against the auth key being revoked */
+ down_read(&cred->request_key_auth->sem);
+
+- if (key_validate(cred->request_key_auth) == 0) {
+- rka = cred->request_key_auth->payload.data;
++ if (key_validate(ctx->cred->request_key_auth) == 0) {
++ rka = ctx->cred->request_key_auth->payload.data;
+
+- key_ref = search_process_keyrings(type, description,
+- match, no_state_check,
+- rka->cred);
++ ctx->cred = rka->cred;
++ key_ref = search_process_keyrings(ctx);
++ ctx->cred = cred;
+
+ up_read(&cred->request_key_auth->sem);
+
+@@ -524,19 +515,23 @@ int lookup_user_key_possessed(const struct key *key, const void *target)
+ key_ref_t lookup_user_key(key_serial_t id, unsigned long lflags,
+ key_perm_t perm)
+ {
++ struct keyring_search_context ctx = {
++ .match = lookup_user_key_possessed,
++ .flags = (KEYRING_SEARCH_NO_STATE_CHECK |
++ KEYRING_SEARCH_LOOKUP_DIRECT),
++ };
+ struct request_key_auth *rka;
+- const struct cred *cred;
+ struct key *key;
+ key_ref_t key_ref, skey_ref;
+ int ret;
+
+ try_again:
+- cred = get_current_cred();
++ ctx.cred = get_current_cred();
+ key_ref = ERR_PTR(-ENOKEY);
+
+ switch (id) {
+ case KEY_SPEC_THREAD_KEYRING:
+- if (!cred->thread_keyring) {
++ if (!ctx.cred->thread_keyring) {
+ if (!(lflags & KEY_LOOKUP_CREATE))
+ goto error;
+
+@@ -548,13 +543,13 @@ try_again:
+ goto reget_creds;
+ }
+
+- key = cred->thread_keyring;
++ key = ctx.cred->thread_keyring;
+ atomic_inc(&key->usage);
+ key_ref = make_key_ref(key, 1);
+ break;
+
+ case KEY_SPEC_PROCESS_KEYRING:
+- if (!cred->process_keyring) {
++ if (!ctx.cred->process_keyring) {
+ if (!(lflags & KEY_LOOKUP_CREATE))
+ goto error;
+
+@@ -566,13 +561,13 @@ try_again:
+ goto reget_creds;
+ }
+
+- key = cred->process_keyring;
++ key = ctx.cred->process_keyring;
+ atomic_inc(&key->usage);
+ key_ref = make_key_ref(key, 1);
+ break;
+
+ case KEY_SPEC_SESSION_KEYRING:
+- if (!cred->session_keyring) {
++ if (!ctx.cred->session_keyring) {
+ /* always install a session keyring upon access if one
+ * doesn't exist yet */
+ ret = install_user_keyrings();
+@@ -582,13 +577,13 @@ try_again:
+ ret = join_session_keyring(NULL);
+ else
+ ret = install_session_keyring(
+- cred->user->session_keyring);
++ ctx.cred->user->session_keyring);
+
+ if (ret < 0)
+ goto error;
+ goto reget_creds;
+- } else if (cred->session_keyring ==
+- cred->user->session_keyring &&
++ } else if (ctx.cred->session_keyring ==
++ ctx.cred->user->session_keyring &&
+ lflags & KEY_LOOKUP_CREATE) {
+ ret = join_session_keyring(NULL);
+ if (ret < 0)
+@@ -597,32 +592,32 @@ try_again:
+ }
+
+ rcu_read_lock();
+- key = rcu_dereference(cred->session_keyring);
++ key = rcu_dereference(ctx.cred->session_keyring);
+ atomic_inc(&key->usage);
+ rcu_read_unlock();
+ key_ref = make_key_ref(key, 1);
+ break;
+
+ case KEY_SPEC_USER_KEYRING:
+- if (!cred->user->uid_keyring) {
++ if (!ctx.cred->user->uid_keyring) {
+ ret = install_user_keyrings();
+ if (ret < 0)
+ goto error;
+ }
+
+- key = cred->user->uid_keyring;
++ key = ctx.cred->user->uid_keyring;
+ atomic_inc(&key->usage);
+ key_ref = make_key_ref(key, 1);
+ break;
+
+ case KEY_SPEC_USER_SESSION_KEYRING:
+- if (!cred->user->session_keyring) {
++ if (!ctx.cred->user->session_keyring) {
+ ret = install_user_keyrings();
+ if (ret < 0)
+ goto error;
+ }
+
+- key = cred->user->session_keyring;
++ key = ctx.cred->user->session_keyring;
+ atomic_inc(&key->usage);
+ key_ref = make_key_ref(key, 1);
+ break;
+@@ -633,7 +628,7 @@ try_again:
+ goto error;
+
+ case KEY_SPEC_REQKEY_AUTH_KEY:
+- key = cred->request_key_auth;
++ key = ctx.cred->request_key_auth;
+ if (!key)
+ goto error;
+
+@@ -642,20 +637,20 @@ try_again:
+ break;
+
+ case KEY_SPEC_REQUESTOR_KEYRING:
+- if (!cred->request_key_auth)
++ if (!ctx.cred->request_key_auth)
+ goto error;
+
+- down_read(&cred->request_key_auth->sem);
++ down_read(&ctx.cred->request_key_auth->sem);
+ if (test_bit(KEY_FLAG_REVOKED,
+- &cred->request_key_auth->flags)) {
++ &ctx.cred->request_key_auth->flags)) {
+ key_ref = ERR_PTR(-EKEYREVOKED);
+ key = NULL;
+ } else {
+- rka = cred->request_key_auth->payload.data;
++ rka = ctx.cred->request_key_auth->payload.data;
+ key = rka->dest_keyring;
+ atomic_inc(&key->usage);
+ }
+- up_read(&cred->request_key_auth->sem);
++ up_read(&ctx.cred->request_key_auth->sem);
+ if (!key)
+ goto error;
+ key_ref = make_key_ref(key, 1);
+@@ -675,9 +670,13 @@ try_again:
+ key_ref = make_key_ref(key, 0);
+
+ /* check to see if we possess the key */
+- skey_ref = search_process_keyrings(key->type, key,
+- lookup_user_key_possessed,
+- true, cred);
++ ctx.index_key.type = key->type;
++ ctx.index_key.description = key->description;
++ ctx.index_key.desc_len = strlen(key->description);
++ ctx.match_data = key;
++ kdebug("check possessed");
++ skey_ref = search_process_keyrings(&ctx);
++ kdebug("possessed=%p", skey_ref);
+
+ if (!IS_ERR(skey_ref)) {
+ key_put(key);
+@@ -717,14 +716,14 @@ try_again:
+ goto invalid_key;
+
+ /* check the permissions */
+- ret = key_task_permission(key_ref, cred, perm);
++ ret = key_task_permission(key_ref, ctx.cred, perm);
+ if (ret < 0)
+ goto invalid_key;
+
+ key->last_used_at = current_kernel_time().tv_sec;
+
+ error:
+- put_cred(cred);
++ put_cred(ctx.cred);
+ return key_ref;
+
+ invalid_key:
+@@ -735,7 +734,7 @@ invalid_key:
+ /* if we attempted to install a keyring, then it may have caused new
+ * creds to be installed */
+ reget_creds:
+- put_cred(cred);
++ put_cred(ctx.cred);
+ goto try_again;
+ }
+
+diff --git a/security/keys/request_key.c b/security/keys/request_key.c
+index 586cb79..ab75df4 100644
+--- a/security/keys/request_key.c
++++ b/security/keys/request_key.c
+@@ -345,38 +345,34 @@ static void construct_get_dest_keyring(struct key **_dest_keyring)
+ * May return a key that's already under construction instead if there was a
+ * race between two thread calling request_key().
+ */
+-static int construct_alloc_key(struct key_type *type,
+- const char *description,
++static int construct_alloc_key(struct keyring_search_context *ctx,
+ struct key *dest_keyring,
+ unsigned long flags,
+ struct key_user *user,
+ struct key **_key)
+ {
+- const struct keyring_index_key index_key = {
+- .type = type,
+- .description = description,
+- .desc_len = strlen(description),
+- };
+- const struct cred *cred = current_cred();
+ unsigned long prealloc;
+ struct key *key;
+ key_perm_t perm;
+ key_ref_t key_ref;
+ int ret;
+
+- kenter("%s,%s,,,", type->name, description);
++ kenter("%s,%s,,,",
++ ctx->index_key.type->name, ctx->index_key.description);
+
+ *_key = NULL;
+ mutex_lock(&user->cons_lock);
+
+ perm = KEY_POS_VIEW | KEY_POS_SEARCH | KEY_POS_LINK | KEY_POS_SETATTR;
+ perm |= KEY_USR_VIEW;
+- if (type->read)
++ if (ctx->index_key.type->read)
+ perm |= KEY_POS_READ;
+- if (type == &key_type_keyring || type->update)
++ if (ctx->index_key.type == &key_type_keyring ||
++ ctx->index_key.type->update)
+ perm |= KEY_POS_WRITE;
+
+- key = key_alloc(type, description, cred->fsuid, cred->fsgid, cred,
++ key = key_alloc(ctx->index_key.type, ctx->index_key.description,
++ ctx->cred->fsuid, ctx->cred->fsgid, ctx->cred,
+ perm, flags);
+ if (IS_ERR(key))
+ goto alloc_failed;
+@@ -384,7 +380,7 @@ static int construct_alloc_key(struct key_type *type,
+ set_bit(KEY_FLAG_USER_CONSTRUCT, &key->flags);
+
+ if (dest_keyring) {
+- ret = __key_link_begin(dest_keyring, &index_key, &prealloc);
++ ret = __key_link_begin(dest_keyring, &ctx->index_key, &prealloc);
+ if (ret < 0)
+ goto link_prealloc_failed;
+ }
+@@ -394,8 +390,7 @@ static int construct_alloc_key(struct key_type *type,
+ * waited for locks */
+ mutex_lock(&key_construction_mutex);
+
+- key_ref = search_process_keyrings(type, description, type->match,
+- false, cred);
++ key_ref = search_process_keyrings(ctx);
+ if (!IS_ERR(key_ref))
+ goto key_already_present;
+
+@@ -404,7 +399,7 @@ static int construct_alloc_key(struct key_type *type,
+
+ mutex_unlock(&key_construction_mutex);
+ if (dest_keyring)
+- __key_link_end(dest_keyring, &index_key, prealloc);
++ __key_link_end(dest_keyring, &ctx->index_key, prealloc);
+ mutex_unlock(&user->cons_lock);
+ *_key = key;
+ kleave(" = 0 [%d]", key_serial(key));
+@@ -420,7 +415,7 @@ key_already_present:
+ ret = __key_link_check_live_key(dest_keyring, key);
+ if (ret == 0)
+ __key_link(dest_keyring, key, &prealloc);
+- __key_link_end(dest_keyring, &index_key, prealloc);
++ __key_link_end(dest_keyring, &ctx->index_key, prealloc);
+ if (ret < 0)
+ goto link_check_failed;
+ }
+@@ -449,8 +444,7 @@ alloc_failed:
+ /*
+ * Commence key construction.
+ */
+-static struct key *construct_key_and_link(struct key_type *type,
+- const char *description,
++static struct key *construct_key_and_link(struct keyring_search_context *ctx,
+ const char *callout_info,
+ size_t callout_len,
+ void *aux,
+@@ -469,8 +463,7 @@ static struct key *construct_key_and_link(struct key_type *type,
+
+ construct_get_dest_keyring(&dest_keyring);
+
+- ret = construct_alloc_key(type, description, dest_keyring, flags, user,
+- &key);
++ ret = construct_alloc_key(ctx, dest_keyring, flags, user, &key);
+ key_user_put(user);
+
+ if (ret == 0) {
+@@ -534,18 +527,24 @@ struct key *request_key_and_link(struct key_type *type,
+ struct key *dest_keyring,
+ unsigned long flags)
+ {
+- const struct cred *cred = current_cred();
++ struct keyring_search_context ctx = {
++ .index_key.type = type,
++ .index_key.description = description,
++ .cred = current_cred(),
++ .match = type->match,
++ .match_data = description,
++ .flags = KEYRING_SEARCH_LOOKUP_DIRECT,
++ };
+ struct key *key;
+ key_ref_t key_ref;
+ int ret;
+
+ kenter("%s,%s,%p,%zu,%p,%p,%lx",
+- type->name, description, callout_info, callout_len, aux,
+- dest_keyring, flags);
++ ctx.index_key.type->name, ctx.index_key.description,
++ callout_info, callout_len, aux, dest_keyring, flags);
+
+ /* search all the process keyrings for a key */
+- key_ref = search_process_keyrings(type, description, type->match,
+- false, cred);
++ key_ref = search_process_keyrings(&ctx);
+
+ if (!IS_ERR(key_ref)) {
+ key = key_ref_to_ptr(key_ref);
+@@ -568,9 +567,8 @@ struct key *request_key_and_link(struct key_type *type,
+ if (!callout_info)
+ goto error;
+
+- key = construct_key_and_link(type, description, callout_info,
+- callout_len, aux, dest_keyring,
+- flags);
++ key = construct_key_and_link(&ctx, callout_info, callout_len,
++ aux, dest_keyring, flags);
+ }
+
+ error:
+diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c
+index 92077de..8d09852 100644
+--- a/security/keys/request_key_auth.c
++++ b/security/keys/request_key_auth.c
+@@ -239,15 +239,17 @@ static int key_get_instantiation_authkey_match(const struct key *key,
+ */
+ struct key *key_get_instantiation_authkey(key_serial_t target_id)
+ {
+- const struct cred *cred = current_cred();
++ struct keyring_search_context ctx = {
++ .index_key.type = &key_type_request_key_auth,
++ .cred = current_cred(),
++ .match = key_get_instantiation_authkey_match,
++ .match_data = (void *)(unsigned long)target_id,
++ .flags = KEYRING_SEARCH_LOOKUP_DIRECT,
++ };
+ struct key *authkey;
+ key_ref_t authkey_ref;
+
+- authkey_ref = search_process_keyrings(
+- &key_type_request_key_auth,
+- (void *) (unsigned long) target_id,
+- key_get_instantiation_authkey_match,
+- false, cred);
++ authkey_ref = search_process_keyrings(&ctx);
+
+ if (IS_ERR(authkey_ref)) {
+ authkey = ERR_CAST(authkey_ref);
+diff --git a/security/keys/user_defined.c b/security/keys/user_defined.c
+index 55dc889..faa2cae 100644
+--- a/security/keys/user_defined.c
++++ b/security/keys/user_defined.c
+@@ -25,14 +25,15 @@ static int logon_vet_description(const char *desc);
+ * arbitrary blob of data as the payload
+ */
+ struct key_type key_type_user = {
+- .name = "user",
+- .instantiate = user_instantiate,
+- .update = user_update,
+- .match = user_match,
+- .revoke = user_revoke,
+- .destroy = user_destroy,
+- .describe = user_describe,
+- .read = user_read,
++ .name = "user",
++ .def_lookup_type = KEYRING_SEARCH_LOOKUP_DIRECT,
++ .instantiate = user_instantiate,
++ .update = user_update,
++ .match = user_match,
++ .revoke = user_revoke,
++ .destroy = user_destroy,
++ .describe = user_describe,
++ .read = user_read,
+ };
+
+ EXPORT_SYMBOL_GPL(key_type_user);
+@@ -45,6 +46,7 @@ EXPORT_SYMBOL_GPL(key_type_user);
+ */
+ struct key_type key_type_logon = {
+ .name = "logon",
++ .def_lookup_type = KEYRING_SEARCH_LOOKUP_DIRECT,
+ .instantiate = user_instantiate,
+ .update = user_update,
+ .match = user_match,
+--
+1.8.3.1
+
+
+From 4dffed72b92a305bcdbb73b719570d8f4ec53f46 Mon Sep 17 00:00:00 2001
+From: David Howells <dhowells@redhat.com>
+Date: Fri, 30 Aug 2013 15:37:52 +0100
+Subject: [PATCH 06/10] KEYS: Search for auth-key by name rather than target
+ key ID
+
+Search for auth-key by name rather than by target key ID as, in a future
+patch, we'll by searching directly by index key in preference to iteration
+over all keys.
+
+Signed-off-by: David Howells <dhowells@redhat.com>
+---
+ security/keys/request_key_auth.c | 21 +++++++--------------
+ 1 file changed, 7 insertions(+), 14 deletions(-)
+
+diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c
+index 8d09852..7495a93 100644
+--- a/security/keys/request_key_auth.c
++++ b/security/keys/request_key_auth.c
+@@ -18,6 +18,7 @@
+ #include <linux/slab.h>
+ #include <asm/uaccess.h>
+ #include "internal.h"
++#include <keys/user-type.h>
+
+ static int request_key_auth_instantiate(struct key *,
+ struct key_preparsed_payload *);
+@@ -222,33 +223,25 @@ error_alloc:
+ }
+
+ /*
+- * See if an authorisation key is associated with a particular key.
+- */
+-static int key_get_instantiation_authkey_match(const struct key *key,
+- const void *_id)
+-{
+- struct request_key_auth *rka = key->payload.data;
+- key_serial_t id = (key_serial_t)(unsigned long) _id;
+-
+- return rka->target_key->serial == id;
+-}
+-
+-/*
+ * Search the current process's keyrings for the authorisation key for
+ * instantiation of a key.
+ */
+ struct key *key_get_instantiation_authkey(key_serial_t target_id)
+ {
++ char description[16];
+ struct keyring_search_context ctx = {
+ .index_key.type = &key_type_request_key_auth,
++ .index_key.description = description,
+ .cred = current_cred(),
+- .match = key_get_instantiation_authkey_match,
+- .match_data = (void *)(unsigned long)target_id,
++ .match = user_match,
++ .match_data = description,
+ .flags = KEYRING_SEARCH_LOOKUP_DIRECT,
+ };
+ struct key *authkey;
+ key_ref_t authkey_ref;
+
++ sprintf(description, "%x", target_id);
++
+ authkey_ref = search_process_keyrings(&ctx);
+
+ if (IS_ERR(authkey_ref)) {
+--
+1.8.3.1
+
+
+From 5f3c76b0923620ddd5294270ac478819f06f21d1 Mon Sep 17 00:00:00 2001
+From: David Howells <dhowells@redhat.com>
+Date: Fri, 30 Aug 2013 15:37:53 +0100
+Subject: [PATCH 07/10] KEYS: Define a __key_get() wrapper to use rather than
+ atomic_inc()
+
+Define a __key_get() wrapper to use rather than atomic_inc() on the key usage
+count as this makes it easier to hook in refcount error debugging.
+
+Signed-off-by: David Howells <dhowells@redhat.com>
+---
+ Documentation/security/keys.txt | 13 ++++++++-----
+ include/linux/key.h | 10 +++++++---
+ security/keys/key.c | 2 +-
+ security/keys/keyring.c | 6 +++---
+ security/keys/process_keys.c | 16 ++++++++--------
+ 5 files changed, 27 insertions(+), 20 deletions(-)
+
+diff --git a/Documentation/security/keys.txt b/Documentation/security/keys.txt
+index 9ede670..a4c33f1 100644
+--- a/Documentation/security/keys.txt
++++ b/Documentation/security/keys.txt
+@@ -960,14 +960,17 @@ payload contents" for more information.
+ the argument will not be parsed.
+
+
+-(*) Extra references can be made to a key by calling the following function:
++(*) Extra references can be made to a key by calling one of the following
++ functions:
+
++ struct key *__key_get(struct key *key);
+ struct key *key_get(struct key *key);
+
+- These need to be disposed of by calling key_put() when they've been
+- finished with. The key pointer passed in will be returned. If the pointer
+- is NULL or CONFIG_KEYS is not set then the key will not be dereferenced and
+- no increment will take place.
++ Keys so references will need to be disposed of by calling key_put() when
++ they've been finished with. The key pointer passed in will be returned.
++
++ In the case of key_get(), if the pointer is NULL or CONFIG_KEYS is not set
++ then the key will not be dereferenced and no increment will take place.
+
+
+ (*) A key's serial number can be obtained by calling:
+diff --git a/include/linux/key.h b/include/linux/key.h
+index d573e82..ef596c7 100644
+--- a/include/linux/key.h
++++ b/include/linux/key.h
+@@ -219,13 +219,17 @@ extern void key_revoke(struct key *key);
+ extern void key_invalidate(struct key *key);
+ extern void key_put(struct key *key);
+
+-static inline struct key *key_get(struct key *key)
++static inline struct key *__key_get(struct key *key)
+ {
+- if (key)
+- atomic_inc(&key->usage);
++ atomic_inc(&key->usage);
+ return key;
+ }
+
++static inline struct key *key_get(struct key *key)
++{
++ return key ? __key_get(key) : key;
++}
++
+ static inline void key_ref_put(key_ref_t key_ref)
+ {
+ key_put(key_ref_to_ptr(key_ref));
+diff --git a/security/keys/key.c b/security/keys/key.c
+index 7e6bc39..1e23cc2 100644
+--- a/security/keys/key.c
++++ b/security/keys/key.c
+@@ -644,7 +644,7 @@ found:
+ /* this races with key_put(), but that doesn't matter since key_put()
+ * doesn't actually change the key
+ */
+- atomic_inc(&key->usage);
++ __key_get(key);
+
+ error:
+ spin_unlock(&key_serial_lock);
+diff --git a/security/keys/keyring.c b/security/keys/keyring.c
+index b42f2d4..87eff32 100644
+--- a/security/keys/keyring.c
++++ b/security/keys/keyring.c
+@@ -479,7 +479,7 @@ not_this_keyring:
+
+ /* we found a viable match */
+ found:
+- atomic_inc(&key->usage);
++ __key_get(key);
+ key->last_used_at = ctx->now.tv_sec;
+ keyring->last_used_at = ctx->now.tv_sec;
+ while (sp > 0)
+@@ -573,7 +573,7 @@ key_ref_t __keyring_search_one(key_ref_t keyring_ref,
+ return ERR_PTR(-ENOKEY);
+
+ found:
+- atomic_inc(&key->usage);
++ __key_get(key);
+ keyring->last_used_at = key->last_used_at =
+ current_kernel_time().tv_sec;
+ rcu_read_unlock();
+@@ -909,7 +909,7 @@ void __key_link(struct key *keyring, struct key *key,
+
+ klist = rcu_dereference_locked_keyring(keyring);
+
+- atomic_inc(&key->usage);
++ __key_get(key);
+ keyring->last_used_at = key->last_used_at =
+ current_kernel_time().tv_sec;
+
+diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c
+index e68a3e0..68548ea 100644
+--- a/security/keys/process_keys.c
++++ b/security/keys/process_keys.c
+@@ -235,7 +235,7 @@ int install_session_keyring_to_cred(struct cred *cred, struct key *keyring)
+ if (IS_ERR(keyring))
+ return PTR_ERR(keyring);
+ } else {
+- atomic_inc(&keyring->usage);
++ __key_get(keyring);
+ }
+
+ /* install the keyring */
+@@ -544,7 +544,7 @@ try_again:
+ }
+
+ key = ctx.cred->thread_keyring;
+- atomic_inc(&key->usage);
++ __key_get(key);
+ key_ref = make_key_ref(key, 1);
+ break;
+
+@@ -562,7 +562,7 @@ try_again:
+ }
+
+ key = ctx.cred->process_keyring;
+- atomic_inc(&key->usage);
++ __key_get(key);
+ key_ref = make_key_ref(key, 1);
+ break;
+
+@@ -593,7 +593,7 @@ try_again:
+
+ rcu_read_lock();
+ key = rcu_dereference(ctx.cred->session_keyring);
+- atomic_inc(&key->usage);
++ __key_get(key);
+ rcu_read_unlock();
+ key_ref = make_key_ref(key, 1);
+ break;
+@@ -606,7 +606,7 @@ try_again:
+ }
+
+ key = ctx.cred->user->uid_keyring;
+- atomic_inc(&key->usage);
++ __key_get(key);
+ key_ref = make_key_ref(key, 1);
+ break;
+
+@@ -618,7 +618,7 @@ try_again:
+ }
+
+ key = ctx.cred->user->session_keyring;
+- atomic_inc(&key->usage);
++ __key_get(key);
+ key_ref = make_key_ref(key, 1);
+ break;
+
+@@ -632,7 +632,7 @@ try_again:
+ if (!key)
+ goto error;
+
+- atomic_inc(&key->usage);
++ __key_get(key);
+ key_ref = make_key_ref(key, 1);
+ break;
+
+@@ -648,7 +648,7 @@ try_again:
+ } else {
+ rka = ctx.cred->request_key_auth->payload.data;
+ key = rka->dest_keyring;
+- atomic_inc(&key->usage);
++ __key_get(key);
+ }
+ up_read(&ctx.cred->request_key_auth->sem);
+ if (!key)
+--
+1.8.3.1
+
+
+From 99b0f3185570bb92a61952673b9933d9c1999508 Mon Sep 17 00:00:00 2001
+From: David Howells <dhowells@redhat.com>
+Date: Fri, 30 Aug 2013 15:37:53 +0100
+Subject: [PATCH 08/10] KEYS: Drop the permissions argument from
+ __keyring_search_one()
+
+Drop the permissions argument from __keyring_search_one() as the only caller
+passes 0 here - which causes all checks to be skipped.
+
+Signed-off-by: David Howells <dhowells@redhat.com>
+---
+ security/keys/internal.h | 3 +--
+ security/keys/key.c | 2 +-
+ security/keys/keyring.c | 9 +++------
+ 3 files changed, 5 insertions(+), 9 deletions(-)
+
+diff --git a/security/keys/internal.h b/security/keys/internal.h
+index f4bf938..73950bf 100644
+--- a/security/keys/internal.h
++++ b/security/keys/internal.h
+@@ -99,8 +99,7 @@ extern void __key_link_end(struct key *keyring,
+ unsigned long prealloc);
+
+ extern key_ref_t __keyring_search_one(key_ref_t keyring_ref,
+- const struct keyring_index_key *index_key,
+- key_perm_t perm);
++ const struct keyring_index_key *index_key);
+
+ extern struct key *keyring_search_instkey(struct key *keyring,
+ key_serial_t target_id);
+diff --git a/security/keys/key.c b/security/keys/key.c
+index 1e23cc2..7d716b8 100644
+--- a/security/keys/key.c
++++ b/security/keys/key.c
+@@ -847,7 +847,7 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
+ * update that instead if possible
+ */
+ if (index_key.type->update) {
+- key_ref = __keyring_search_one(keyring_ref, &index_key, 0);
++ key_ref = __keyring_search_one(keyring_ref, &index_key);
+ if (!IS_ERR(key_ref))
+ goto found_matching_key;
+ }
+diff --git a/security/keys/keyring.c b/security/keys/keyring.c
+index 87eff32..eeef1a0 100644
+--- a/security/keys/keyring.c
++++ b/security/keys/keyring.c
+@@ -531,15 +531,14 @@ EXPORT_SYMBOL(keyring_search);
+ * RCU is used to make it unnecessary to lock the keyring key list here.
+ *
+ * Returns a pointer to the found key with usage count incremented if
+- * successful and returns -ENOKEY if not found. Revoked keys and keys not
+- * providing the requested permission are skipped over.
++ * successful and returns -ENOKEY if not found. Revoked and invalidated keys
++ * are skipped over.
+ *
+ * If successful, the possession indicator is propagated from the keyring ref
+ * to the returned key reference.
+ */
+ key_ref_t __keyring_search_one(key_ref_t keyring_ref,
+- const struct keyring_index_key *index_key,
+- key_perm_t perm)
++ const struct keyring_index_key *index_key)
+ {
+ struct keyring_list *klist;
+ struct key *keyring, *key;
+@@ -560,8 +559,6 @@ key_ref_t __keyring_search_one(key_ref_t keyring_ref,
+ if (key->type == index_key->type &&
+ (!key->type->match ||
+ key->type->match(key, index_key->description)) &&
+- key_permission(make_key_ref(key, possessed),
+- perm) == 0 &&
+ !(key->flags & ((1 << KEY_FLAG_INVALIDATED) |
+ (1 << KEY_FLAG_REVOKED)))
+ )
+--
+1.8.3.1
+
+
+From cb720b39e41e62d55bf1e5f8243d78643d31154d Mon Sep 17 00:00:00 2001
+From: David Howells <dhowells@redhat.com>
+Date: Fri, 30 Aug 2013 15:37:53 +0100
+Subject: [PATCH 09/10] Add a generic associative array implementation.
+
+Add a generic associative array implementation that can be used as the
+container for keyrings, thereby massively increasing the capacity available
+whilst also speeding up searching in keyrings that contain a lot of keys.
+
+This may also be useful in FS-Cache for tracking cookies.
+
+Documentation is added into Documentation/associative_array.txt
+
+Some of the properties of the implementation are:
+
+ (1) Objects are opaque pointers. The implementation does not care where they
+ point (if anywhere) or what they point to (if anything).
+
+ [!] NOTE: Pointers to objects _must_ be zero in the two least significant
+ bits.
+
+ (2) Objects do not need to contain linkage blocks for use by the array. This
+ permits an object to be located in multiple arrays simultaneously.
+ Rather, the array is made up of metadata blocks that point to objects.
+
+ (3) Objects are labelled as being one of two types (the type is a bool value).
+ This information is stored in the array, but has no consequence to the
+ array itself or its algorithms.
+
+ (4) Objects require index keys to locate them within the array.
+
+ (5) Index keys must be unique. Inserting an object with the same key as one
+ already in the array will replace the old object.
+
+ (6) Index keys can be of any length and can be of different lengths.
+
+ (7) Index keys should encode the length early on, before any variation due to
+ length is seen.
+
+ (8) Index keys can include a hash to scatter objects throughout the array.
+
+ (9) The array can iterated over. The objects will not necessarily come out in
+ key order.
+
+(10) The array can be iterated whilst it is being modified, provided the RCU
+ readlock is being held by the iterator. Note, however, under these
+ circumstances, some objects may be seen more than once. If this is a
+ problem, the iterator should lock against modification. Objects will not
+ be missed, however, unless deleted.
+
+(11) Objects in the array can be looked up by means of their index key.
+
+(12) Objects can be looked up whilst the array is being modified, provided the
+ RCU readlock is being held by the thread doing the look up.
+
+The implementation uses a tree of 16-pointer nodes internally that are indexed
+on each level by nibbles from the index key. To improve memory efficiency,
+shortcuts can be emplaced to skip over what would otherwise be a series of
+single-occupancy nodes. Further, nodes pack leaf object pointers into spare
+space in the node rather than making an extra branch until as such time an
+object needs to be added to a full node.
+
+Signed-off-by: David Howells <dhowells@redhat.com>
+---
+ Documentation/assoc_array.txt | 574 +++++++++++++
+ include/linux/assoc_array.h | 92 ++
+ include/linux/assoc_array_priv.h | 182 ++++
+ lib/Kconfig | 14 +
+ lib/Makefile | 1 +
+ lib/assoc_array.c | 1745 ++++++++++++++++++++++++++++++++++++++
+ 6 files changed, 2608 insertions(+)
+ create mode 100644 Documentation/assoc_array.txt
+ create mode 100644 include/linux/assoc_array.h
+ create mode 100644 include/linux/assoc_array_priv.h
+ create mode 100644 lib/assoc_array.c
+
+diff --git a/Documentation/assoc_array.txt b/Documentation/assoc_array.txt
+new file mode 100644
+index 0000000..f4faec0
+--- /dev/null
++++ b/Documentation/assoc_array.txt
+@@ -0,0 +1,574 @@
++ ========================================
++ GENERIC ASSOCIATIVE ARRAY IMPLEMENTATION
++ ========================================
++
++Contents:
++
++ - Overview.
++
++ - The public API.
++ - Edit script.
++ - Operations table.
++ - Manipulation functions.
++ - Access functions.
++ - Index key form.
++
++ - Internal workings.
++ - Basic internal tree layout.
++ - Shortcuts.
++ - Splitting and collapsing nodes.
++ - Non-recursive iteration.
++ - Simultaneous alteration and iteration.
++
++
++========
++OVERVIEW
++========
++
++This associative array implementation is an object container with the following
++properties:
++
++ (1) Objects are opaque pointers. The implementation does not care where they
++ point (if anywhere) or what they point to (if anything).
++
++ [!] NOTE: Pointers to objects _must_ be zero in the least significant bit.
++
++ (2) Objects do not need to contain linkage blocks for use by the array. This
++ permits an object to be located in multiple arrays simultaneously.
++ Rather, the array is made up of metadata blocks that point to objects.
++
++ (3) Objects require index keys to locate them within the array.
++
++ (4) Index keys must be unique. Inserting an object with the same key as one
++ already in the array will replace the old object.
++
++ (5) Index keys can be of any length and can be of different lengths.
++
++ (6) Index keys should encode the length early on, before any variation due to
++ length is seen.
++
++ (7) Index keys can include a hash to scatter objects throughout the array.
++
++ (8) The array can iterated over. The objects will not necessarily come out in
++ key order.
++
++ (9) The array can be iterated over whilst it is being modified, provided the
++ RCU readlock is being held by the iterator. Note, however, under these
++ circumstances, some objects may be seen more than once. If this is a
++ problem, the iterator should lock against modification. Objects will not
++ be missed, however, unless deleted.
++
++(10) Objects in the array can be looked up by means of their index key.
++
++(11) Objects can be looked up whilst the array is being modified, provided the
++ RCU readlock is being held by the thread doing the look up.
++
++The implementation uses a tree of 16-pointer nodes internally that are indexed
++on each level by nibbles from the index key in the same manner as in a radix
++tree. To improve memory efficiency, shortcuts can be emplaced to skip over
++what would otherwise be a series of single-occupancy nodes. Further, nodes
++pack leaf object pointers into spare space in the node rather than making an
++extra branch until as such time an object needs to be added to a full node.
++
++
++==============
++THE PUBLIC API
++==============
++
++The public API can be found in <linux/assoc_array.h>. The associative array is
++rooted on the following structure:
++
++ struct assoc_array {
++ ...
++ };
++
++The code is selected by enabling CONFIG_ASSOCIATIVE_ARRAY.
++
++
++EDIT SCRIPT
++-----------
++
++The insertion and deletion functions produce an 'edit script' that can later be
++applied to effect the changes without risking ENOMEM. This retains the
++preallocated metadata blocks that will be installed in the internal tree and
++keeps track of the metadata blocks that will be removed from the tree when the
++script is applied.
++
++This is also used to keep track of dead blocks and dead objects after the
++script has been applied so that they can be freed later. The freeing is done
++after an RCU grace period has passed - thus allowing access functions to
++proceed under the RCU read lock.
++
++The script appears as outside of the API as a pointer of the type:
++
++ struct assoc_array_edit;
++
++There are two functions for dealing with the script:
++
++ (1) Apply an edit script.
++
++ void assoc_array_apply_edit(struct assoc_array_edit *edit);
++
++ This will perform the edit functions, interpolating various write barriers
++ to permit accesses under the RCU read lock to continue. The edit script
++ will then be passed to call_rcu() to free it and any dead stuff it points
++ to.
++
++ (2) Cancel an edit script.
++
++ void assoc_array_cancel_edit(struct assoc_array_edit *edit);
++
++ This frees the edit script and all preallocated memory immediately. If
++ this was for insertion, the new object is _not_ released by this function,
++ but must rather be released by the caller.
++
++These functions are guaranteed not to fail.
++
++
++OPERATIONS TABLE
++----------------
++
++Various functions take a table of operations:
++
++ struct assoc_array_ops {
++ ...
++ };
++
++This points to a number of methods, all of which need to be provided:
++
++ (1) Get a chunk of index key from caller data:
++
++ unsigned long (*get_key_chunk)(const void *index_key, int level);
++
++ This should return a chunk of caller-supplied index key starting at the
++ *bit* position given by the level argument. The level argument will be a
++ multiple of ASSOC_ARRAY_KEY_CHUNK_SIZE and the function should return
++ ASSOC_ARRAY_KEY_CHUNK_SIZE bits. No error is possible.
++
++
++ (2) Get a chunk of an object's index key.
++
++ unsigned long (*get_object_key_chunk)(const void *object, int level);
++
++ As the previous function, but gets its data from an object in the array
++ rather than from a caller-supplied index key.
++
++
++ (3) See if this is the object we're looking for.
++
++ bool (*compare_object)(const void *object, const void *index_key);
++
++ Compare the object against an index key and return true if it matches and
++ false if it doesn't.
++
++
++ (4) Diff the index keys of two objects.
++
++ int (*diff_objects)(const void *a, const void *b);
++
++ Return the bit position at which the index keys of two objects differ or
++ -1 if they are the same.
++
++
++ (5) Free an object.
++
++ void (*free_object)(void *object);
++
++ Free the specified object. Note that this may be called an RCU grace
++ period after assoc_array_apply_edit() was called, so synchronize_rcu() may
++ be necessary on module unloading.
++
++
++MANIPULATION FUNCTIONS
++----------------------
++
++There are a number of functions for manipulating an associative array:
++
++ (1) Initialise an associative array.
++
++ void assoc_array_init(struct assoc_array *array);
++
++ This initialises the base structure for an associative array. It can't
++ fail.
++
++
++ (2) Insert/replace an object in an associative array.
++
++ struct assoc_array_edit *
++ assoc_array_insert(struct assoc_array *array,
++ const struct assoc_array_ops *ops,
++ const void *index_key,
++ void *object);
++
++ This inserts the given object into the array. Note that the least
++ significant bit of the pointer must be zero as it's used to type-mark
++ pointers internally.
++
++ If an object already exists for that key then it will be replaced with the
++ new object and the old one will be freed automatically.
++
++ The index_key argument should hold index key information and is
++ passed to the methods in the ops table when they are called.
++
++ This function makes no alteration to the array itself, but rather returns
++ an edit script that must be applied. -ENOMEM is returned in the case of
++ an out-of-memory error.
++
++ The caller should lock exclusively against other modifiers of the array.
++
++
++ (3) Delete an object from an associative array.
++
++ struct assoc_array_edit *
++ assoc_array_delete(struct assoc_array *array,
++ const struct assoc_array_ops *ops,
++ const void *index_key);
++
++ This deletes an object that matches the specified data from the array.
++
++ The index_key argument should hold index key information and is
++ passed to the methods in the ops table when they are called.
++
++ This function makes no alteration to the array itself, but rather returns
++ an edit script that must be applied. -ENOMEM is returned in the case of
++ an out-of-memory error. NULL will be returned if the specified object is
++ not found within the array.
++
++ The caller should lock exclusively against other modifiers of the array.
++
++
++ (4) Delete all objects from an associative array.
++
++ struct assoc_array_edit *
++ assoc_array_clear(struct assoc_array *array,
++ const struct assoc_array_ops *ops);
++
++ This deletes all the objects from an associative array and leaves it
++ completely empty.
++
++ This function makes no alteration to the array itself, but rather returns
++ an edit script that must be applied. -ENOMEM is returned in the case of
++ an out-of-memory error.
++
++ The caller should lock exclusively against other modifiers of the array.
++
++
++ (5) Destroy an associative array, deleting all objects.
++
++ void assoc_array_destroy(struct assoc_array *array,
++ const struct assoc_array_ops *ops);
++
++ This destroys the contents of the associative array and leaves it
++ completely empty. It is not permitted for another thread to be traversing
++ the array under the RCU read lock at the same time as this function is
++ destroying it as no RCU deferral is performed on memory release -
++ something that would require memory to be allocated.
++
++ The caller should lock exclusively against other modifiers and accessors
++ of the array.
++
++
++ (6) Garbage collect an associative array.
++
++ int assoc_array_gc(struct assoc_array *array,
++ const struct assoc_array_ops *ops,
++ bool (*iterator)(void *object, void *iterator_data),
++ void *iterator_data);
++
++ This iterates over the objects in an associative array and passes each one
++ to iterator(). If iterator() returns true, the object is kept. If it
++ returns false, the object will be freed. If the iterator() function
++ returns true, it must perform any appropriate refcount incrementing on the
++ object before returning.
++
++ The internal tree will be packed down if possible as part of the iteration
++ to reduce the number of nodes in it.
++
++ The iterator_data is passed directly to iterator() and is otherwise
++ ignored by the function.
++
++ The function will return 0 if successful and -ENOMEM if there wasn't
++ enough memory.
++
++ It is possible for other threads to iterate over or search the array under
++ the RCU read lock whilst this function is in progress. The caller should
++ lock exclusively against other modifiers of the array.
++
++
++ACCESS FUNCTIONS
++----------------
++
++There are two functions for accessing an associative array:
++
++ (1) Iterate over all the objects in an associative array.
++
++ int assoc_array_iterate(const struct assoc_array *array,
++ int (*iterator)(const void *object,
++ void *iterator_data),
++ void *iterator_data);
++
++ This passes each object in the array to the iterator callback function.
++ iterator_data is private data for that function.
++
++ This may be used on an array at the same time as the array is being
++ modified, provided the RCU read lock is held. Under such circumstances,
++ it is possible for the iteration function to see some objects twice. If
++ this is a problem, then modification should be locked against. The
++ iteration algorithm should not, however, miss any objects.
++
++ The function will return 0 if no objects were in the array or else it will
++ return the result of the last iterator function called. Iteration stops
++ immediately if any call to the iteration function results in a non-zero
++ return.
++
++
++ (2) Find an object in an associative array.
++
++ void *assoc_array_find(const struct assoc_array *array,
++ const struct assoc_array_ops *ops,
++ const void *index_key);
++
++ This walks through the array's internal tree directly to the object
++ specified by the index key..
++
++ This may be used on an array at the same time as the array is being
++ modified, provided the RCU read lock is held.
++
++ The function will return the object if found (and set *_type to the object
++ type) or will return NULL if the object was not found.
++
++
++INDEX KEY FORM
++--------------
++
++The index key can be of any form, but since the algorithms aren't told how long
++the key is, it is strongly recommended that the index key includes its length
++very early on before any variation due to the length would have an effect on
++comparisons.
++
++This will cause leaves with different length keys to scatter away from each
++other - and those with the same length keys to cluster together.
++
++It is also recommended that the index key begin with a hash of the rest of the
++key to maximise scattering throughout keyspace.
++
++The better the scattering, the wider and lower the internal tree will be.
++
++Poor scattering isn't too much of a problem as there are shortcuts and nodes
++can contain mixtures of leaves and metadata pointers.
++
++The index key is read in chunks of machine word. Each chunk is subdivided into
++one nibble (4 bits) per level, so on a 32-bit CPU this is good for 8 levels and
++on a 64-bit CPU, 16 levels. Unless the scattering is really poor, it is
++unlikely that more than one word of any particular index key will have to be
++used.
++
++
++=================
++INTERNAL WORKINGS
++=================
++
++The associative array data structure has an internal tree. This tree is
++constructed of two types of metadata blocks: nodes and shortcuts.
++
++A node is an array of slots. Each slot can contain one of four things:
++
++ (*) A NULL pointer, indicating that the slot is empty.
++
++ (*) A pointer to an object (a leaf).
++
++ (*) A pointer to a node at the next level.
++
++ (*) A pointer to a shortcut.
++
++
++BASIC INTERNAL TREE LAYOUT
++--------------------------
++
++Ignoring shortcuts for the moment, the nodes form a multilevel tree. The index
++key space is strictly subdivided by the nodes in the tree and nodes occur on
++fixed levels. For example:
++
++ Level: 0 1 2 3
++ =============== =============== =============== ===============
++ NODE D
++ NODE B NODE C +------>+---+
++ +------>+---+ +------>+---+ | | 0 |
++ NODE A | | 0 | | | 0 | | +---+
++ +---+ | +---+ | +---+ | : :
++ | 0 | | : : | : : | +---+
++ +---+ | +---+ | +---+ | | f |
++ | 1 |---+ | 3 |---+ | 7 |---+ +---+
++ +---+ +---+ +---+
++ : : : : | 8 |---+
++ +---+ +---+ +---+ | NODE E
++ | e |---+ | f | : : +------>+---+
++ +---+ | +---+ +---+ | 0 |
++ | f | | | f | +---+
++ +---+ | +---+ : :
++ | NODE F +---+
++ +------>+---+ | f |
++ | 0 | NODE G +---+
++ +---+ +------>+---+
++ : : | | 0 |
++ +---+ | +---+
++ | 6 |---+ : :
++ +---+ +---+
++ : : | f |
++ +---+ +---+
++ | f |
++ +---+
++
++In the above example, there are 7 nodes (A-G), each with 16 slots (0-f).
++Assuming no other meta data nodes in the tree, the key space is divided thusly:
++
++ KEY PREFIX NODE
++ ========== ====
++ 137* D
++ 138* E
++ 13[0-69-f]* C
++ 1[0-24-f]* B
++ e6* G
++ e[0-57-f]* F
++ [02-df]* A
++
++So, for instance, keys with the following example index keys will be found in
++the appropriate nodes:
++
++ INDEX KEY PREFIX NODE
++ =============== ======= ====
++ 13694892892489 13 C
++ 13795289025897 137 D
++ 13889dde88793 138 E
++ 138bbb89003093 138 E
++ 1394879524789 12 C
++ 1458952489 1 B
++ 9431809de993ba - A
++ b4542910809cd - A
++ e5284310def98 e F
++ e68428974237 e6 G
++ e7fffcbd443 e F
++ f3842239082 - A
++
++To save memory, if a node can hold all the leaves in its portion of keyspace,
++then the node will have all those leaves in it and will not have any metadata
++pointers - even if some of those leaves would like to be in the same slot.
++
++A node can contain a heterogeneous mix of leaves and metadata pointers.
++Metadata pointers must be in the slots that match their subdivisions of key
++space. The leaves can be in any slot not occupied by a metadata pointer. It
++is guaranteed that none of the leaves in a node will match a slot occupied by a
++metadata pointer. If the metadata pointer is there, any leaf whose key matches
++the metadata key prefix must be in the subtree that the metadata pointer points
++to.
++
++In the above example list of index keys, node A will contain:
++
++ SLOT CONTENT INDEX KEY (PREFIX)
++ ==== =============== ==================
++ 1 PTR TO NODE B 1*
++ any LEAF 9431809de993ba
++ any LEAF b4542910809cd
++ e PTR TO NODE F e*
++ any LEAF f3842239082
++
++and node B:
++
++ 3 PTR TO NODE C 13*
++ any LEAF 1458952489
++
++
++SHORTCUTS
++---------
++
++Shortcuts are metadata records that jump over a piece of keyspace. A shortcut
++is a replacement for a series of single-occupancy nodes ascending through the
++levels. Shortcuts exist to save memory and to speed up traversal.
++
++It is possible for the root of the tree to be a shortcut - say, for example,
++the tree contains at least 17 nodes all with key prefix '1111'. The insertion
++algorithm will insert a shortcut to skip over the '1111' keyspace in a single
++bound and get to the fourth level where these actually become different.
++
++
++SPLITTING AND COLLAPSING NODES
++------------------------------
++
++Each node has a maximum capacity of 16 leaves and metadata pointers. If the
++insertion algorithm finds that it is trying to insert a 17th object into a
++node, that node will be split such that at least two leaves that have a common
++key segment at that level end up in a separate node rooted on that slot for
++that common key segment.
++
++If the leaves in a full node and the leaf that is being inserted are
++sufficiently similar, then a shortcut will be inserted into the tree.
++
++When the number of objects in the subtree rooted at a node falls to 16 or
++fewer, then the subtree will be collapsed down to a single node - and this will
++ripple towards the root if possible.
++
++
++NON-RECURSIVE ITERATION
++-----------------------
++
++Each node and shortcut contains a back pointer to its parent and the number of
++slot in that parent that points to it. None-recursive iteration uses these to
++proceed rootwards through the tree, going to the parent node, slot N + 1 to
++make sure progress is made without the need for a stack.
++
++The backpointers, however, make simultaneous alteration and iteration tricky.
++
++
++SIMULTANEOUS ALTERATION AND ITERATION
++-------------------------------------
++
++There are a number of cases to consider:
++
++ (1) Simple insert/replace. This involves simply replacing a NULL or old
++ matching leaf pointer with the pointer to the new leaf after a barrier.
++ The metadata blocks don't change otherwise. An old leaf won't be freed
++ until after the RCU grace period.
++
++ (2) Simple delete. This involves just clearing an old matching leaf. The
++ metadata blocks don't change otherwise. The old leaf won't be freed until
++ after the RCU grace period.
++
++ (3) Insertion replacing part of a subtree that we haven't yet entered. This
++ may involve replacement of part of that subtree - but that won't affect
++ the iteration as we won't have reached the pointer to it yet and the
++ ancestry blocks are not replaced (the layout of those does not change).
++
++ (4) Insertion replacing nodes that we're actively processing. This isn't a
++ problem as we've passed the anchoring pointer and won't switch onto the
++ new layout until we follow the back pointers - at which point we've
++ already examined the leaves in the replaced node (we iterate over all the
++ leaves in a node before following any of its metadata pointers).
++
++ We might, however, re-see some leaves that have been split out into a new
++ branch that's in a slot further along than we were at.
++
++ (5) Insertion replacing nodes that we're processing a dependent branch of.
++ This won't affect us until we follow the back pointers. Similar to (4).
++
++ (6) Deletion collapsing a branch under us. This doesn't affect us because the
++ back pointers will get us back to the parent of the new node before we
++ could see the new node. The entire collapsed subtree is thrown away
++ unchanged - and will still be rooted on the same slot, so we shouldn't
++ process it a second time as we'll go back to slot + 1.
++
++Note:
++
++ (*) Under some circumstances, we need to simultaneously change the parent
++ pointer and the parent slot pointer on a node (say, for example, we
++ inserted another node before it and moved it up a level). We cannot do
++ this without locking against a read - so we have to replace that node too.
++
++ However, when we're changing a shortcut into a node this isn't a problem
++ as shortcuts only have one slot and so the parent slot number isn't used
++ when traversing backwards over one. This means that it's okay to change
++ the slot number first - provided suitable barriers are used to make sure
++ the parent slot number is read after the back pointer.
++
++Obsolete blocks and leaves are freed up after an RCU grace period has passed,
++so as long as anyone doing walking or iteration holds the RCU read lock, the
++old superstructure should not go away on them.
+diff --git a/include/linux/assoc_array.h b/include/linux/assoc_array.h
+new file mode 100644
+index 0000000..9a193b8
+--- /dev/null
++++ b/include/linux/assoc_array.h
+@@ -0,0 +1,92 @@
++/* Generic associative array implementation.
++ *
++ * See Documentation/assoc_array.txt for information.
++ *
++ * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved.
++ * Written by David Howells (dhowells@redhat.com)
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public Licence
++ * as published by the Free Software Foundation; either version
++ * 2 of the Licence, or (at your option) any later version.
++ */
++
++#ifndef _LINUX_ASSOC_ARRAY_H
++#define _LINUX_ASSOC_ARRAY_H
++
++#ifdef CONFIG_ASSOCIATIVE_ARRAY
++
++#include <linux/types.h>
++
++#define ASSOC_ARRAY_KEY_CHUNK_SIZE BITS_PER_LONG /* Key data retrieved in chunks of this size */
++
++/*
++ * Generic associative array.
++ */
++struct assoc_array {
++ struct assoc_array_ptr *root; /* The node at the root of the tree */
++ unsigned long nr_leaves_on_tree;
++};
++
++/*
++ * Operations on objects and index keys for use by array manipulation routines.
++ */
++struct assoc_array_ops {
++ /* Method to get a chunk of an index key from caller-supplied data */
++ unsigned long (*get_key_chunk)(const void *index_key, int level);
++
++ /* Method to get a piece of an object's index key */
++ unsigned long (*get_object_key_chunk)(const void *object, int level);
++
++ /* Is this the object we're looking for? */
++ bool (*compare_object)(const void *object, const void *index_key);
++
++ /* How different are two objects, to a bit position in their keys? (or
++ * -1 if they're the same)
++ */
++ int (*diff_objects)(const void *a, const void *b);
++
++ /* Method to free an object. */
++ void (*free_object)(void *object);
++};
++
++/*
++ * Access and manipulation functions.
++ */
++struct assoc_array_edit;
++
++static inline void assoc_array_init(struct assoc_array *array)
++{
++ array->root = NULL;
++ array->nr_leaves_on_tree = 0;
++}
++
++extern int assoc_array_iterate(const struct assoc_array *array,
++ int (*iterator)(const void *object,
++ void *iterator_data),
++ void *iterator_data);
++extern void *assoc_array_find(const struct assoc_array *array,
++ const struct assoc_array_ops *ops,
++ const void *index_key);
++extern void assoc_array_destroy(struct assoc_array *array,
++ const struct assoc_array_ops *ops);
++extern struct assoc_array_edit *assoc_array_insert(struct assoc_array *array,
++ const struct assoc_array_ops *ops,
++ const void *index_key,
++ void *object);
++extern void assoc_array_insert_set_object(struct assoc_array_edit *edit,
++ void *object);
++extern struct assoc_array_edit *assoc_array_delete(struct assoc_array *array,
++ const struct assoc_array_ops *ops,
++ const void *index_key);
++extern struct assoc_array_edit *assoc_array_clear(struct assoc_array *array,
++ const struct assoc_array_ops *ops);
++extern void assoc_array_apply_edit(struct assoc_array_edit *edit);
++extern void assoc_array_cancel_edit(struct assoc_array_edit *edit);
++extern int assoc_array_gc(struct assoc_array *array,
++ const struct assoc_array_ops *ops,
++ bool (*iterator)(void *object, void *iterator_data),
++ void *iterator_data);
++
++#endif /* CONFIG_ASSOCIATIVE_ARRAY */
++#endif /* _LINUX_ASSOC_ARRAY_H */
+diff --git a/include/linux/assoc_array_priv.h b/include/linux/assoc_array_priv.h
+new file mode 100644
+index 0000000..711275e
+--- /dev/null
++++ b/include/linux/assoc_array_priv.h
+@@ -0,0 +1,182 @@
++/* Private definitions for the generic associative array implementation.
++ *
++ * See Documentation/assoc_array.txt for information.
++ *
++ * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved.
++ * Written by David Howells (dhowells@redhat.com)
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public Licence
++ * as published by the Free Software Foundation; either version
++ * 2 of the Licence, or (at your option) any later version.
++ */
++
++#ifndef _LINUX_ASSOC_ARRAY_PRIV_H
++#define _LINUX_ASSOC_ARRAY_PRIV_H
++
++#ifdef CONFIG_ASSOCIATIVE_ARRAY
++
++#include <linux/assoc_array.h>
++
++#define ASSOC_ARRAY_FAN_OUT 16 /* Number of slots per node */
++#define ASSOC_ARRAY_FAN_MASK (ASSOC_ARRAY_FAN_OUT - 1)
++#define ASSOC_ARRAY_LEVEL_STEP (ilog2(ASSOC_ARRAY_FAN_OUT))
++#define ASSOC_ARRAY_LEVEL_STEP_MASK (ASSOC_ARRAY_LEVEL_STEP - 1)
++#define ASSOC_ARRAY_KEY_CHUNK_MASK (ASSOC_ARRAY_KEY_CHUNK_SIZE - 1)
++#define ASSOC_ARRAY_KEY_CHUNK_SHIFT (ilog2(BITS_PER_LONG))
++
++/*
++ * Undefined type representing a pointer with type information in the bottom
++ * two bits.
++ */
++struct assoc_array_ptr;
++
++/*
++ * An N-way node in the tree.
++ *
++ * Each slot contains one of four things:
++ *
++ * (1) Nothing (NULL).
++ *
++ * (2) A leaf object (pointer types 0).
++ *
++ * (3) A next-level node (pointer type 1, subtype 0).
++ *
++ * (4) A shortcut (pointer type 1, subtype 1).
++ *
++ * The tree is optimised for search-by-ID, but permits reasonable iteration
++ * also.
++ *
++ * The tree is navigated by constructing an index key consisting of an array of
++ * segments, where each segment is ilog2(ASSOC_ARRAY_FAN_OUT) bits in size.
++ *
++ * The segments correspond to levels of the tree (the first segment is used at
++ * level 0, the second at level 1, etc.).
++ */
++struct assoc_array_node {
++ struct assoc_array_ptr *back_pointer;
++ u8 parent_slot;
++ struct assoc_array_ptr *slots[ASSOC_ARRAY_FAN_OUT];
++ unsigned long nr_leaves_on_branch;
++};
++
++/*
++ * A shortcut through the index space out to where a collection of nodes/leaves
++ * with the same IDs live.
++ */
++struct assoc_array_shortcut {
++ struct assoc_array_ptr *back_pointer;
++ int parent_slot;
++ int skip_to_level;
++ struct assoc_array_ptr *next_node;
++ unsigned long index_key[];
++};
++
++/*
++ * Preallocation cache.
++ */
++struct assoc_array_edit {
++ struct rcu_head rcu;
++ struct assoc_array *array;
++ const struct assoc_array_ops *ops;
++ const struct assoc_array_ops *ops_for_excised_subtree;
++ struct assoc_array_ptr *leaf;
++ struct assoc_array_ptr **leaf_p;
++ struct assoc_array_ptr *dead_leaf;
++ struct assoc_array_ptr *new_meta[3];
++ struct assoc_array_ptr *excised_meta[1];
++ struct assoc_array_ptr *excised_subtree;
++ struct assoc_array_ptr **set_backpointers[ASSOC_ARRAY_FAN_OUT];
++ struct assoc_array_ptr *set_backpointers_to;
++ struct assoc_array_node *adjust_count_on;
++ long adjust_count_by;
++ struct {
++ struct assoc_array_ptr **ptr;
++ struct assoc_array_ptr *to;
++ } set[2];
++ struct {
++ u8 *p;
++ u8 to;
++ } set_parent_slot[1];
++ u8 segment_cache[ASSOC_ARRAY_FAN_OUT + 1];
++};
++
++/*
++ * Internal tree member pointers are marked in the bottom one or two bits to
++ * indicate what type they are so that we don't have to look behind every
++ * pointer to see what it points to.
++ *
++ * We provide functions to test type annotations and to create and translate
++ * the annotated pointers.
++ */
++#define ASSOC_ARRAY_PTR_TYPE_MASK 0x1UL
++#define ASSOC_ARRAY_PTR_LEAF_TYPE 0x0UL /* Points to leaf (or nowhere) */
++#define ASSOC_ARRAY_PTR_META_TYPE 0x1UL /* Points to node or shortcut */
++#define ASSOC_ARRAY_PTR_SUBTYPE_MASK 0x2UL
++#define ASSOC_ARRAY_PTR_NODE_SUBTYPE 0x0UL
++#define ASSOC_ARRAY_PTR_SHORTCUT_SUBTYPE 0x2UL
++
++static inline bool assoc_array_ptr_is_meta(const struct assoc_array_ptr *x)
++{
++ return (unsigned long)x & ASSOC_ARRAY_PTR_TYPE_MASK;
++}
++static inline bool assoc_array_ptr_is_leaf(const struct assoc_array_ptr *x)
++{
++ return !assoc_array_ptr_is_meta(x);
++}
++static inline bool assoc_array_ptr_is_shortcut(const struct assoc_array_ptr *x)
++{
++ return (unsigned long)x & ASSOC_ARRAY_PTR_SUBTYPE_MASK;
++}
++static inline bool assoc_array_ptr_is_node(const struct assoc_array_ptr *x)
++{
++ return !assoc_array_ptr_is_shortcut(x);
++}
++
++static inline void *assoc_array_ptr_to_leaf(const struct assoc_array_ptr *x)
++{
++ return (void *)((unsigned long)x & ~ASSOC_ARRAY_PTR_TYPE_MASK);
++}
++
++static inline
++unsigned long __assoc_array_ptr_to_meta(const struct assoc_array_ptr *x)
++{
++ return (unsigned long)x &
++ ~(ASSOC_ARRAY_PTR_SUBTYPE_MASK | ASSOC_ARRAY_PTR_TYPE_MASK);
++}
++static inline
++struct assoc_array_node *assoc_array_ptr_to_node(const struct assoc_array_ptr *x)
++{
++ return (struct assoc_array_node *)__assoc_array_ptr_to_meta(x);
++}
++static inline
++struct assoc_array_shortcut *assoc_array_ptr_to_shortcut(const struct assoc_array_ptr *x)
++{
++ return (struct assoc_array_shortcut *)__assoc_array_ptr_to_meta(x);
++}
++
++static inline
++struct assoc_array_ptr *__assoc_array_x_to_ptr(const void *p, unsigned long t)
++{
++ return (struct assoc_array_ptr *)((unsigned long)p | t);
++}
++static inline
++struct assoc_array_ptr *assoc_array_leaf_to_ptr(const void *p)
++{
++ return __assoc_array_x_to_ptr(p, ASSOC_ARRAY_PTR_LEAF_TYPE);
++}
++static inline
++struct assoc_array_ptr *assoc_array_node_to_ptr(const struct assoc_array_node *p)
++{
++ return __assoc_array_x_to_ptr(
++ p, ASSOC_ARRAY_PTR_META_TYPE | ASSOC_ARRAY_PTR_NODE_SUBTYPE);
++}
++static inline
++struct assoc_array_ptr *assoc_array_shortcut_to_ptr(const struct assoc_array_shortcut *p)
++{
++ return __assoc_array_x_to_ptr(
++ p, ASSOC_ARRAY_PTR_META_TYPE | ASSOC_ARRAY_PTR_SHORTCUT_SUBTYPE);
++}
++
++#endif /* CONFIG_ASSOCIATIVE_ARRAY */
++#endif /* _LINUX_ASSOC_ARRAY_PRIV_H */
+diff --git a/lib/Kconfig b/lib/Kconfig
+index 35da513..b7d3234 100644
+--- a/lib/Kconfig
++++ b/lib/Kconfig
+@@ -312,6 +312,20 @@ config TEXTSEARCH_FSM
+ config BTREE
+ boolean
+
++config ASSOCIATIVE_ARRAY
++ bool
++ help
++ Generic associative array. Can be searched and iterated over whilst
++ it is being modified. It is also reasonably quick to search and
++ modify. The algorithms are non-recursive, and the trees are highly
++ capacious.
++
++ See:
++
++ Documentation/assoc_array.txt
++
++ for more information.
++
+ config HAS_IOMEM
+ boolean
+ depends on !NO_IOMEM
+diff --git a/lib/Makefile b/lib/Makefile
+index 7baccfd..2c59891 100644
+--- a/lib/Makefile
++++ b/lib/Makefile
+@@ -49,6 +49,7 @@ CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS))
+ obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
+
+ obj-$(CONFIG_BTREE) += btree.o
++obj-$(CONFIG_ASSOCIATIVE_ARRAY) += assoc_array.o
+ obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
+ obj-$(CONFIG_DEBUG_LIST) += list_debug.o
+ obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o
+diff --git a/lib/assoc_array.c b/lib/assoc_array.c
+new file mode 100644
+index 0000000..a095281
+--- /dev/null
++++ b/lib/assoc_array.c
+@@ -0,0 +1,1745 @@
++/* Generic associative array implementation.
++ *
++ * See Documentation/assoc_array.txt for information.
++ *
++ * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved.
++ * Written by David Howells (dhowells@redhat.com)
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public Licence
++ * as published by the Free Software Foundation; either version
++ * 2 of the Licence, or (at your option) any later version.
++ */
++//#define DEBUG
++#include <linux/slab.h>
++#include <linux/assoc_array_priv.h>
++
++/*
++ * Iterate over an associative array. The caller must hold the RCU read lock
++ * or better.
++ */
++static int assoc_array_subtree_iterate(const struct assoc_array_ptr *root,
++ const struct assoc_array_ptr *stop,
++ int (*iterator)(const void *leaf,
++ void *iterator_data),
++ void *iterator_data)
++{
++ const struct assoc_array_shortcut *shortcut;
++ const struct assoc_array_node *node;
++ const struct assoc_array_ptr *cursor, *ptr, *parent;
++ unsigned long has_meta;
++ int slot, ret;
++
++ cursor = root;
++
++begin_node:
++ if (assoc_array_ptr_is_shortcut(cursor)) {
++ /* Descend through a shortcut */
++ shortcut = assoc_array_ptr_to_shortcut(cursor);
++ smp_read_barrier_depends();
++ cursor = ACCESS_ONCE(shortcut->next_node);
++ }
++
++ node = assoc_array_ptr_to_node(cursor);
++ smp_read_barrier_depends();
++ slot = 0;
++
++ /* We perform two passes of each node.
++ *
++ * The first pass does all the leaves in this node. This means we
++ * don't miss any leaves if the node is split up by insertion whilst
++ * we're iterating over the branches rooted here (we may, however, see
++ * some leaves twice).
++ */
++ has_meta = 0;
++ for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
++ ptr = ACCESS_ONCE(node->slots[slot]);
++ has_meta |= (unsigned long)ptr;
++ if (ptr && assoc_array_ptr_is_leaf(ptr)) {
++ /* We need a barrier between the read of the pointer
++ * and dereferencing the pointer - but only if we are
++ * actually going to dereference it.
++ */
++ smp_read_barrier_depends();
++
++ /* Invoke the callback */
++ ret = iterator(assoc_array_ptr_to_leaf(ptr),
++ iterator_data);
++ if (ret)
++ return ret;
++ }
++ }
++
++ /* The second pass attends to all the metadata pointers. If we follow
++ * one of these we may find that we don't come back here, but rather go
++ * back to a replacement node with the leaves in a different layout.
++ *
++ * We are guaranteed to make progress, however, as the slot number for
++ * a particular portion of the key space cannot change - and we
++ * continue at the back pointer + 1.
++ */
++ if (!(has_meta & ASSOC_ARRAY_PTR_META_TYPE))
++ goto finished_node;
++ slot = 0;
++
++continue_node:
++ node = assoc_array_ptr_to_node(cursor);
++ smp_read_barrier_depends();
++
++ for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
++ ptr = ACCESS_ONCE(node->slots[slot]);
++ if (assoc_array_ptr_is_meta(ptr)) {
++ cursor = ptr;
++ goto begin_node;
++ }
++ }
++
++finished_node:
++ /* Move up to the parent (may need to skip back over a shortcut) */
++ parent = ACCESS_ONCE(node->back_pointer);
++ slot = node->parent_slot;
++ if (parent == stop)
++ return 0;
++
++ if (assoc_array_ptr_is_shortcut(parent)) {
++ shortcut = assoc_array_ptr_to_shortcut(parent);
++ smp_read_barrier_depends();
++ cursor = parent;
++ parent = ACCESS_ONCE(shortcut->back_pointer);
++ slot = shortcut->parent_slot;
++ if (parent == stop)
++ return 0;
++ }
++
++ /* Ascend to next slot in parent node */
++ cursor = parent;
++ slot++;
++ goto continue_node;
++}
++
++/**
++ * assoc_array_iterate - Pass all objects in the array to a callback
++ * @array: The array to iterate over.
++ * @iterator: The callback function.
++ * @iterator_data: Private data for the callback function.
++ *
++ * Iterate over all the objects in an associative array. Each one will be
++ * presented to the iterator function.
++ *
++ * If the array is being modified concurrently with the iteration then it is
++ * possible that some objects in the array will be passed to the iterator
++ * callback more than once - though every object should be passed at least
++ * once. If this is undesirable then the caller must lock against modification
++ * for the duration of this function.
++ *
++ * The function will return 0 if no objects were in the array or else it will
++ * return the result of the last iterator function called. Iteration stops
++ * immediately if any call to the iteration function results in a non-zero
++ * return.
++ *
++ * The caller should hold the RCU read lock or better if concurrent
++ * modification is possible.
++ */
++int assoc_array_iterate(const struct assoc_array *array,
++ int (*iterator)(const void *object,
++ void *iterator_data),
++ void *iterator_data)
++{
++ struct assoc_array_ptr *root = ACCESS_ONCE(array->root);
++
++ if (!root)
++ return 0;
++ return assoc_array_subtree_iterate(root, NULL, iterator, iterator_data);
++}
++
++enum assoc_array_walk_status {
++ assoc_array_walk_tree_empty,
++ assoc_array_walk_found_terminal_node,
++ assoc_array_walk_found_wrong_shortcut,
++} status;
++
++struct assoc_array_walk_result {
++ struct {
++ struct assoc_array_node *node; /* Node in which leaf might be found */
++ int level;
++ int slot;
++ } terminal_node;
++ struct {
++ struct assoc_array_shortcut *shortcut;
++ int level;
++ int sc_level;
++ unsigned long sc_segments;
++ unsigned long dissimilarity;
++ } wrong_shortcut;
++};
++
++/*
++ * Navigate through the internal tree looking for the closest node to the key.
++ */
++static enum assoc_array_walk_status
++assoc_array_walk(const struct assoc_array *array,
++ const struct assoc_array_ops *ops,
++ const void *index_key,
++ struct assoc_array_walk_result *result)
++{
++ struct assoc_array_shortcut *shortcut;
++ struct assoc_array_node *node;
++ struct assoc_array_ptr *cursor, *ptr;
++ unsigned long sc_segments, dissimilarity;
++ unsigned long segments;
++ int level, sc_level, next_sc_level;
++ int slot;
++
++ pr_devel("-->%s()\n", __func__);
++
++ cursor = ACCESS_ONCE(array->root);
++ if (!cursor)
++ return assoc_array_walk_tree_empty;
++
++ level = 0;
++
++ /* Use segments from the key for the new leaf to navigate through the
++ * internal tree, skipping through nodes and shortcuts that are on
++ * route to the destination. Eventually we'll come to a slot that is
++ * either empty or contains a leaf at which point we've found a node in
++ * which the leaf we're looking for might be found or into which it
++ * should be inserted.
++ */
++jumped:
++ segments = ops->get_key_chunk(index_key, level);
++ pr_devel("segments[%d]: %lx\n", level, segments);
++
++ if (assoc_array_ptr_is_shortcut(cursor))
++ goto follow_shortcut;
++
++consider_node:
++ node = assoc_array_ptr_to_node(cursor);
++ smp_read_barrier_depends();
++
++ slot = segments >> (level & ASSOC_ARRAY_KEY_CHUNK_MASK);
++ slot &= ASSOC_ARRAY_FAN_MASK;
++ ptr = ACCESS_ONCE(node->slots[slot]);
++
++ pr_devel("consider slot %x [ix=%d type=%lu]\n",
++ slot, level, (unsigned long)ptr & 3);
++
++ if (!assoc_array_ptr_is_meta(ptr)) {
++ /* The node doesn't have a node/shortcut pointer in the slot
++ * corresponding to the index key that we have to follow.
++ */
++ result->terminal_node.node = node;
++ result->terminal_node.level = level;
++ result->terminal_node.slot = slot;
++ pr_devel("<--%s() = terminal_node\n", __func__);
++ return assoc_array_walk_found_terminal_node;
++ }
++
++ if (assoc_array_ptr_is_node(ptr)) {
++ /* There is a pointer to a node in the slot corresponding to
++ * this index key segment, so we need to follow it.
++ */
++ cursor = ptr;
++ level += ASSOC_ARRAY_LEVEL_STEP;
++ if ((level & ASSOC_ARRAY_KEY_CHUNK_MASK) != 0)
++ goto consider_node;
++ goto jumped;
++ }
++
++ /* There is a shortcut in the slot corresponding to the index key
++ * segment. We follow the shortcut if its partial index key matches
++ * this leaf's. Otherwise we need to split the shortcut.
++ */
++ cursor = ptr;
++follow_shortcut:
++ shortcut = assoc_array_ptr_to_shortcut(cursor);
++ smp_read_barrier_depends();
++ pr_devel("shortcut to %d\n", shortcut->skip_to_level);
++ sc_level = level + ASSOC_ARRAY_LEVEL_STEP;
++ BUG_ON(sc_level > shortcut->skip_to_level);
++
++ do {
++ /* Check the leaf against the shortcut's index key a word at a
++ * time, trimming the final word (the shortcut stores the index
++ * key completely from the root to the shortcut's target).
++ */
++ if ((sc_level & ASSOC_ARRAY_KEY_CHUNK_MASK) == 0)
++ segments = ops->get_key_chunk(index_key, sc_level);
++
++ sc_segments = shortcut->index_key[sc_level >> ASSOC_ARRAY_KEY_CHUNK_SHIFT];
++ dissimilarity = segments ^ sc_segments;
++
++ if (round_up(sc_level, ASSOC_ARRAY_KEY_CHUNK_SIZE) > shortcut->skip_to_level) {
++ /* Trim segments that are beyond the shortcut */
++ int shift = shortcut->skip_to_level & ASSOC_ARRAY_KEY_CHUNK_MASK;
++ dissimilarity &= ~(ULONG_MAX << shift);
++ next_sc_level = shortcut->skip_to_level;
++ } else {
++ next_sc_level = sc_level + ASSOC_ARRAY_KEY_CHUNK_SIZE;
++ next_sc_level = round_down(next_sc_level, ASSOC_ARRAY_KEY_CHUNK_SIZE);
++ }
++
++ if (dissimilarity != 0) {
++ /* This shortcut points elsewhere */
++ result->wrong_shortcut.shortcut = shortcut;
++ result->wrong_shortcut.level = level;
++ result->wrong_shortcut.sc_level = sc_level;
++ result->wrong_shortcut.sc_segments = sc_segments;
++ result->wrong_shortcut.dissimilarity = dissimilarity;
++ return assoc_array_walk_found_wrong_shortcut;
++ }
++
++ sc_level = next_sc_level;
++ } while (sc_level < shortcut->skip_to_level);
++
++ /* The shortcut matches the leaf's index to this point. */
++ cursor = ACCESS_ONCE(shortcut->next_node);
++ if (((level ^ sc_level) & ~ASSOC_ARRAY_KEY_CHUNK_MASK) != 0) {
++ level = sc_level;
++ goto jumped;
++ } else {
++ level = sc_level;
++ goto consider_node;
++ }
++}
++
++/**
++ * assoc_array_find - Find an object by index key
++ * @array: The associative array to search.
++ * @ops: The operations to use.
++ * @index_key: The key to the object.
++ *
++ * Find an object in an associative array by walking through the internal tree
++ * to the node that should contain the object and then searching the leaves
++ * there. NULL is returned if the requested object was not found in the array.
++ *
++ * The caller must hold the RCU read lock or better.
++ */
++void *assoc_array_find(const struct assoc_array *array,
++ const struct assoc_array_ops *ops,
++ const void *index_key)
++{
++ struct assoc_array_walk_result result;
++ const struct assoc_array_node *node;
++ const struct assoc_array_ptr *ptr;
++ const void *leaf;
++ int slot;
++
++ if (assoc_array_walk(array, ops, index_key, &result) !=
++ assoc_array_walk_found_terminal_node)
++ return NULL;
++
++ node = result.terminal_node.node;
++ smp_read_barrier_depends();
++
++ /* If the target key is available to us, it's has to be pointed to by
++ * the terminal node.
++ */
++ for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
++ ptr = ACCESS_ONCE(node->slots[slot]);
++ if (ptr && assoc_array_ptr_is_leaf(ptr)) {
++ /* We need a barrier between the read of the pointer
++ * and dereferencing the pointer - but only if we are
++ * actually going to dereference it.
++ */
++ leaf = assoc_array_ptr_to_leaf(ptr);
++ smp_read_barrier_depends();
++ if (ops->compare_object(leaf, index_key))
++ return (void *)leaf;
++ }
++ }
++
++ return NULL;
++}
++
++/*
++ * Destructively iterate over an associative array. The caller must prevent
++ * other simultaneous accesses.
++ */
++static void assoc_array_destroy_subtree(struct assoc_array_ptr *root,
++ const struct assoc_array_ops *ops)
++{
++ struct assoc_array_shortcut *shortcut;
++ struct assoc_array_node *node;
++ struct assoc_array_ptr *cursor, *parent = NULL;
++ int slot = -1;
++
++ pr_devel("-->%s()\n", __func__);
++
++ cursor = root;
++ if (!cursor) {
++ pr_devel("empty\n");
++ return;
++ }
++
++move_to_meta:
++ if (assoc_array_ptr_is_shortcut(cursor)) {
++ /* Descend through a shortcut */
++ pr_devel("[%d] shortcut\n", slot);
++ BUG_ON(!assoc_array_ptr_is_shortcut(cursor));
++ shortcut = assoc_array_ptr_to_shortcut(cursor);
++ BUG_ON(shortcut->back_pointer != parent);
++ BUG_ON(slot != -1 && shortcut->parent_slot != slot);
++ parent = cursor;
++ cursor = shortcut->next_node;
++ slot = -1;
++ BUG_ON(!assoc_array_ptr_is_node(cursor));
++ }
++
++ pr_devel("[%d] node\n", slot);
++ node = assoc_array_ptr_to_node(cursor);
++ BUG_ON(node->back_pointer != parent);
++ BUG_ON(slot != -1 && node->parent_slot != slot);
++ slot = 0;
++
++continue_node:
++ pr_devel("Node %p [back=%p]\n", node, node->back_pointer);
++ for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
++ struct assoc_array_ptr *ptr = node->slots[slot];
++ if (!ptr)
++ continue;
++ if (assoc_array_ptr_is_meta(ptr)) {
++ parent = cursor;
++ cursor = ptr;
++ goto move_to_meta;
++ }
++
++ if (ops) {
++ pr_devel("[%d] free leaf\n", slot);
++ ops->free_object(assoc_array_ptr_to_leaf(ptr));
++ }
++ }
++
++ parent = node->back_pointer;
++ slot = node->parent_slot;
++ pr_devel("free node\n");
++ kfree(node);
++ if (!parent)
++ return; /* Done */
++
++ /* Move back up to the parent (may need to free a shortcut on
++ * the way up) */
++ if (assoc_array_ptr_is_shortcut(parent)) {
++ shortcut = assoc_array_ptr_to_shortcut(parent);
++ BUG_ON(shortcut->next_node != cursor);
++ cursor = parent;
++ parent = shortcut->back_pointer;
++ slot = shortcut->parent_slot;
++ pr_devel("free shortcut\n");
++ kfree(shortcut);
++ if (!parent)
++ return;
++
++ BUG_ON(!assoc_array_ptr_is_node(parent));
++ }
++
++ /* Ascend to next slot in parent node */
++ pr_devel("ascend to %p[%d]\n", parent, slot);
++ cursor = parent;
++ node = assoc_array_ptr_to_node(cursor);
++ slot++;
++ goto continue_node;
++}
++
++/**
++ * assoc_array_destroy - Destroy an associative array
++ * @array: The array to destroy.
++ * @ops: The operations to use.
++ *
++ * Discard all metadata and free all objects in an associative array. The
++ * array will be empty and ready to use again upon completion. This function
++ * cannot fail.
++ *
++ * The caller must prevent all other accesses whilst this takes place as no
++ * attempt is made to adjust pointers gracefully to permit RCU readlock-holding
++ * accesses to continue. On the other hand, no memory allocation is required.
++ */
++void assoc_array_destroy(struct assoc_array *array,
++ const struct assoc_array_ops *ops)
++{
++ assoc_array_destroy_subtree(array->root, ops);
++ array->root = NULL;
++}
++
++/*
++ * Handle insertion into an empty tree.
++ */
++static bool assoc_array_insert_in_empty_tree(struct assoc_array_edit *edit)
++{
++ struct assoc_array_node *new_n0;
++
++ pr_devel("-->%s()\n", __func__);
++
++ new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
++ if (!new_n0)
++ return false;
++
++ edit->new_meta[0] = assoc_array_node_to_ptr(new_n0);
++ edit->leaf_p = &new_n0->slots[0];
++ edit->adjust_count_on = new_n0;
++ edit->set[0].ptr = &edit->array->root;
++ edit->set[0].to = assoc_array_node_to_ptr(new_n0);
++
++ pr_devel("<--%s() = ok [no root]\n", __func__);
++ return true;
++}
++
++/*
++ * Handle insertion into a terminal node.
++ */
++static bool assoc_array_insert_into_terminal_node(struct assoc_array_edit *edit,
++ const struct assoc_array_ops *ops,
++ const void *index_key,
++ struct assoc_array_walk_result *result)
++{
++ struct assoc_array_shortcut *shortcut, *new_s0;
++ struct assoc_array_node *node, *new_n0, *new_n1, *side;
++ struct assoc_array_ptr *ptr;
++ unsigned long dissimilarity, base_seg, blank;
++ size_t keylen;
++ bool have_meta;
++ int level, diff;
++ int slot, next_slot, free_slot, i, j;
++
++ node = result->terminal_node.node;
++ level = result->terminal_node.level;
++ edit->segment_cache[ASSOC_ARRAY_FAN_OUT] = result->terminal_node.slot;
++
++ pr_devel("-->%s()\n", __func__);
++
++ /* We arrived at a node which doesn't have an onward node or shortcut
++ * pointer that we have to follow. This means that (a) the leaf we
++ * want must go here (either by insertion or replacement) or (b) we
++ * need to split this node and insert in one of the fragments.
++ */
++ free_slot = -1;
++
++ /* Firstly, we have to check the leaves in this node to see if there's
++ * a matching one we should replace in place.
++ */
++ for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
++ ptr = node->slots[i];
++ if (!ptr) {
++ free_slot = i;
++ continue;
++ }
++ if (ops->compare_object(assoc_array_ptr_to_leaf(ptr), index_key)) {
++ pr_devel("replace in slot %d\n", i);
++ edit->leaf_p = &node->slots[i];
++ edit->dead_leaf = node->slots[i];
++ pr_devel("<--%s() = ok [replace]\n", __func__);
++ return true;
++ }
++ }
++
++ /* If there is a free slot in this node then we can just insert the
++ * leaf here.
++ */
++ if (free_slot >= 0) {
++ pr_devel("insert in free slot %d\n", free_slot);
++ edit->leaf_p = &node->slots[free_slot];
++ edit->adjust_count_on = node;
++ pr_devel("<--%s() = ok [insert]\n", __func__);
++ return true;
++ }
++
++ /* The node has no spare slots - so we're either going to have to split
++ * it or insert another node before it.
++ *
++ * Whatever, we're going to need at least two new nodes - so allocate
++ * those now. We may also need a new shortcut, but we deal with that
++ * when we need it.
++ */
++ new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
++ if (!new_n0)
++ return false;
++ edit->new_meta[0] = assoc_array_node_to_ptr(new_n0);
++ new_n1 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
++ if (!new_n1)
++ return false;
++ edit->new_meta[1] = assoc_array_node_to_ptr(new_n1);
++
++ /* We need to find out how similar the leaves are. */
++ pr_devel("no spare slots\n");
++ have_meta = false;
++ for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
++ ptr = node->slots[i];
++ if (assoc_array_ptr_is_meta(ptr)) {
++ edit->segment_cache[i] = 0xff;
++ have_meta = true;
++ continue;
++ }
++ base_seg = ops->get_object_key_chunk(
++ assoc_array_ptr_to_leaf(ptr), level);
++ base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK;
++ edit->segment_cache[i] = base_seg & ASSOC_ARRAY_FAN_MASK;
++ }
++
++ if (have_meta) {
++ pr_devel("have meta\n");
++ goto split_node;
++ }
++
++ /* The node contains only leaves */
++ dissimilarity = 0;
++ base_seg = edit->segment_cache[0];
++ for (i = 1; i < ASSOC_ARRAY_FAN_OUT; i++)
++ dissimilarity |= edit->segment_cache[i] ^ base_seg;
++
++ pr_devel("only leaves; dissimilarity=%lx\n", dissimilarity);
++
++ if ((dissimilarity & ASSOC_ARRAY_FAN_MASK) == 0) {
++ /* The old leaves all cluster in the same slot. We will need
++ * to insert a shortcut if the new node wants to cluster with them.
++ */
++ if ((edit->segment_cache[ASSOC_ARRAY_FAN_OUT] ^ base_seg) == 0)
++ goto all_leaves_cluster_together;
++
++ /* Otherwise we can just insert a new node ahead of the old
++ * one.
++ */
++ goto present_leaves_cluster_but_not_new_leaf;
++ }
++
++split_node:
++ pr_devel("split node\n");
++
++ /* We need to split the current node; we know that the node doesn't
++ * simply contain a full set of leaves that cluster together (it
++ * contains meta pointers and/or non-clustering leaves).
++ *
++ * We need to expel at least two leaves out of a set consisting of the
++ * leaves in the node and the new leaf.
++ *
++ * We need a new node (n0) to replace the current one and a new node to
++ * take the expelled nodes (n1).
++ */
++ edit->set[0].to = assoc_array_node_to_ptr(new_n0);
++ new_n0->back_pointer = node->back_pointer;
++ new_n0->parent_slot = node->parent_slot;
++ new_n1->back_pointer = assoc_array_node_to_ptr(new_n0);
++ new_n1->parent_slot = -1; /* Need to calculate this */
++
++do_split_node:
++ pr_devel("do_split_node\n");
++
++ new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch;
++ new_n1->nr_leaves_on_branch = 0;
++
++ /* Begin by finding two matching leaves. There have to be at least two
++ * that match - even if there are meta pointers - because any leaf that
++ * would match a slot with a meta pointer in it must be somewhere
++ * behind that meta pointer and cannot be here. Further, given N
++ * remaining leaf slots, we now have N+1 leaves to go in them.
++ */
++ for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
++ slot = edit->segment_cache[i];
++ if (slot != 0xff)
++ for (j = i + 1; j < ASSOC_ARRAY_FAN_OUT + 1; j++)
++ if (edit->segment_cache[j] == slot)
++ goto found_slot_for_multiple_occupancy;
++ }
++found_slot_for_multiple_occupancy:
++ pr_devel("same slot: %x %x [%02x]\n", i, j, slot);
++ BUG_ON(i >= ASSOC_ARRAY_FAN_OUT);
++ BUG_ON(j >= ASSOC_ARRAY_FAN_OUT + 1);
++ BUG_ON(slot >= ASSOC_ARRAY_FAN_OUT);
++
++ new_n1->parent_slot = slot;
++
++ /* Metadata pointers cannot change slot */
++ for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++)
++ if (assoc_array_ptr_is_meta(node->slots[i]))
++ new_n0->slots[i] = node->slots[i];
++ else
++ new_n0->slots[i] = NULL;
++ BUG_ON(new_n0->slots[slot] != NULL);
++ new_n0->slots[slot] = assoc_array_node_to_ptr(new_n1);
++
++ /* Filter the leaf pointers between the new nodes */
++ free_slot = -1;
++ next_slot = 0;
++ for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
++ if (assoc_array_ptr_is_meta(node->slots[i]))
++ continue;
++ if (edit->segment_cache[i] == slot) {
++ new_n1->slots[next_slot++] = node->slots[i];
++ new_n1->nr_leaves_on_branch++;
++ } else {
++ do {
++ free_slot++;
++ } while (new_n0->slots[free_slot] != NULL);
++ new_n0->slots[free_slot] = node->slots[i];
++ }
++ }
++
++ pr_devel("filtered: f=%x n=%x\n", free_slot, next_slot);
++
++ if (edit->segment_cache[ASSOC_ARRAY_FAN_OUT] != slot) {
++ do {
++ free_slot++;
++ } while (new_n0->slots[free_slot] != NULL);
++ edit->leaf_p = &new_n0->slots[free_slot];
++ edit->adjust_count_on = new_n0;
++ } else {
++ edit->leaf_p = &new_n1->slots[next_slot++];
++ edit->adjust_count_on = new_n1;
++ }
++
++ BUG_ON(next_slot <= 1);
++
++ edit->set_backpointers_to = assoc_array_node_to_ptr(new_n0);
++ for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
++ if (edit->segment_cache[i] == 0xff) {
++ ptr = node->slots[i];
++ BUG_ON(assoc_array_ptr_is_leaf(ptr));
++ if (assoc_array_ptr_is_node(ptr)) {
++ side = assoc_array_ptr_to_node(ptr);
++ edit->set_backpointers[i] = &side->back_pointer;
++ } else {
++ shortcut = assoc_array_ptr_to_shortcut(ptr);
++ edit->set_backpointers[i] = &shortcut->back_pointer;
++ }
++ }
++ }
++
++ ptr = node->back_pointer;
++ if (!ptr)
++ edit->set[0].ptr = &edit->array->root;
++ else if (assoc_array_ptr_is_node(ptr))
++ edit->set[0].ptr = &assoc_array_ptr_to_node(ptr)->slots[node->parent_slot];
++ else
++ edit->set[0].ptr = &assoc_array_ptr_to_shortcut(ptr)->next_node;
++ edit->excised_meta[0] = assoc_array_node_to_ptr(node);
++ pr_devel("<--%s() = ok [split node]\n", __func__);
++ return true;
++
++present_leaves_cluster_but_not_new_leaf:
++ /* All the old leaves cluster in the same slot, but the new leaf wants
++ * to go into a different slot, so we create a new node to hold the new
++ * leaf and a pointer to a new node holding all the old leaves.
++ */
++ pr_devel("present leaves cluster but not new leaf\n");
++
++ new_n0->back_pointer = node->back_pointer;
++ new_n0->parent_slot = node->parent_slot;
++ new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch;
++ new_n1->back_pointer = assoc_array_node_to_ptr(new_n0);
++ new_n1->parent_slot = edit->segment_cache[0];
++ new_n1->nr_leaves_on_branch = node->nr_leaves_on_branch;
++ edit->adjust_count_on = new_n0;
++
++ for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++)
++ new_n1->slots[i] = node->slots[i];
++
++ new_n0->slots[edit->segment_cache[0]] = assoc_array_node_to_ptr(new_n0);
++ edit->leaf_p = &new_n0->slots[edit->segment_cache[ASSOC_ARRAY_FAN_OUT]];
++
++ edit->set[0].ptr = &assoc_array_ptr_to_node(node->back_pointer)->slots[node->parent_slot];
++ edit->set[0].to = assoc_array_node_to_ptr(new_n0);
++ edit->excised_meta[0] = assoc_array_node_to_ptr(node);
++ pr_devel("<--%s() = ok [insert node before]\n", __func__);
++ return true;
++
++all_leaves_cluster_together:
++ /* All the leaves, new and old, want to cluster together in this node
++ * in the same slot, so we have to replace this node with a shortcut to
++ * skip over the identical parts of the key and then place a pair of
++ * nodes, one inside the other, at the end of the shortcut and
++ * distribute the keys between them.
++ *
++ * Firstly we need to work out where the leaves start diverging as a
++ * bit position into their keys so that we know how big the shortcut
++ * needs to be.
++ *
++ * We only need to make a single pass of N of the N+1 leaves because if
++ * any keys differ between themselves at bit X then at least one of
++ * them must also differ with the base key at bit X or before.
++ */
++ pr_devel("all leaves cluster together\n");
++ diff = INT_MAX;
++ for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
++ int x = ops->diff_objects(assoc_array_ptr_to_leaf(edit->leaf),
++ assoc_array_ptr_to_leaf(node->slots[i]));
++ if (x < diff) {
++ BUG_ON(x < 0);
++ diff = x;
++ }
++ }
++ BUG_ON(diff == INT_MAX);
++ BUG_ON(diff < level + ASSOC_ARRAY_LEVEL_STEP);
++
++ keylen = round_up(diff, ASSOC_ARRAY_KEY_CHUNK_SIZE);
++ keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT;
++
++ new_s0 = kzalloc(sizeof(struct assoc_array_shortcut) +
++ keylen * sizeof(unsigned long), GFP_KERNEL);
++ if (!new_s0)
++ return false;
++ edit->new_meta[2] = assoc_array_shortcut_to_ptr(new_s0);
++
++ edit->set[0].to = assoc_array_shortcut_to_ptr(new_s0);
++ new_s0->back_pointer = node->back_pointer;
++ new_s0->parent_slot = node->parent_slot;
++ new_s0->next_node = assoc_array_node_to_ptr(new_n0);
++ new_n0->back_pointer = assoc_array_shortcut_to_ptr(new_s0);
++ new_n0->parent_slot = 0;
++ new_n1->back_pointer = assoc_array_node_to_ptr(new_n0);
++ new_n1->parent_slot = -1; /* Need to calculate this */
++
++ new_s0->skip_to_level = level = diff & ~ASSOC_ARRAY_LEVEL_STEP_MASK;
++ pr_devel("skip_to_level = %d [diff %d]\n", level, diff);
++ BUG_ON(level <= 0);
++
++ for (i = 0; i < keylen; i++)
++ new_s0->index_key[i] =
++ ops->get_key_chunk(index_key, i * ASSOC_ARRAY_KEY_CHUNK_SIZE);
++
++ blank = ULONG_MAX << (level & ASSOC_ARRAY_KEY_CHUNK_MASK);
++ pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, level, blank);
++ new_s0->index_key[keylen - 1] &= ~blank;
++
++ /* This now reduces to a node splitting exercise for which we'll need
++ * to regenerate the disparity table.
++ */
++ for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
++ ptr = node->slots[i];
++ base_seg = ops->get_object_key_chunk(assoc_array_ptr_to_leaf(ptr),
++ level);
++ base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK;
++ edit->segment_cache[i] = base_seg & ASSOC_ARRAY_FAN_MASK;
++ }
++
++ base_seg = ops->get_key_chunk(index_key, level);
++ base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK;
++ edit->segment_cache[ASSOC_ARRAY_FAN_OUT] = base_seg & ASSOC_ARRAY_FAN_MASK;
++ goto do_split_node;
++}
++
++/*
++ * Handle insertion into the middle of a shortcut.
++ */
++static bool assoc_array_insert_mid_shortcut(struct assoc_array_edit *edit,
++ const struct assoc_array_ops *ops,
++ struct assoc_array_walk_result *result)
++{
++ struct assoc_array_shortcut *shortcut, *new_s0, *new_s1;
++ struct assoc_array_node *node, *new_n0, *side;
++ unsigned long sc_segments, dissimilarity, blank;
++ size_t keylen;
++ int level, sc_level, diff;
++ int sc_slot;
++
++ shortcut = result->wrong_shortcut.shortcut;
++ level = result->wrong_shortcut.level;
++ sc_level = result->wrong_shortcut.sc_level;
++ sc_segments = result->wrong_shortcut.sc_segments;
++ dissimilarity = result->wrong_shortcut.dissimilarity;
++
++ pr_devel("-->%s(ix=%d dis=%lx scix=%d)\n",
++ __func__, level, dissimilarity, sc_level);
++
++ /* We need to split a shortcut and insert a node between the two
++ * pieces. Zero-length pieces will be dispensed with entirely.
++ *
++ * First of all, we need to find out in which level the first
++ * difference was.
++ */
++ diff = __ffs(dissimilarity);
++ diff &= ~ASSOC_ARRAY_LEVEL_STEP_MASK;
++ diff += sc_level & ~ASSOC_ARRAY_KEY_CHUNK_MASK;
++ pr_devel("diff=%d\n", diff);
++
++ if (!shortcut->back_pointer) {
++ edit->set[0].ptr = &edit->array->root;
++ } else if (assoc_array_ptr_is_node(shortcut->back_pointer)) {
++ node = assoc_array_ptr_to_node(shortcut->back_pointer);
++ edit->set[0].ptr = &node->slots[shortcut->parent_slot];
++ } else {
++ BUG();
++ }
++
++ edit->excised_meta[0] = assoc_array_shortcut_to_ptr(shortcut);
++
++ /* Create a new node now since we're going to need it anyway */
++ new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
++ if (!new_n0)
++ return false;
++ edit->new_meta[0] = assoc_array_node_to_ptr(new_n0);
++ edit->adjust_count_on = new_n0;
++
++ /* Insert a new shortcut before the new node if this segment isn't of
++ * zero length - otherwise we just connect the new node directly to the
++ * parent.
++ */
++ level += ASSOC_ARRAY_LEVEL_STEP;
++ if (diff > level) {
++ pr_devel("pre-shortcut %d...%d\n", level, diff);
++ keylen = round_up(diff, ASSOC_ARRAY_KEY_CHUNK_SIZE);
++ keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT;
++
++ new_s0 = kzalloc(sizeof(struct assoc_array_shortcut) +
++ keylen * sizeof(unsigned long), GFP_KERNEL);
++ if (!new_s0)
++ return false;
++ edit->new_meta[1] = assoc_array_shortcut_to_ptr(new_s0);
++ edit->set[0].to = assoc_array_shortcut_to_ptr(new_s0);
++ new_s0->back_pointer = shortcut->back_pointer;
++ new_s0->parent_slot = shortcut->parent_slot;
++ new_s0->next_node = assoc_array_node_to_ptr(new_n0);
++ new_s0->skip_to_level = diff;
++
++ new_n0->back_pointer = assoc_array_shortcut_to_ptr(new_s0);
++ new_n0->parent_slot = 0;
++
++ memcpy(new_s0->index_key, shortcut->index_key,
++ keylen * sizeof(unsigned long));
++
++ blank = ULONG_MAX << (diff & ASSOC_ARRAY_KEY_CHUNK_MASK);
++ pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, diff, blank);
++ new_s0->index_key[keylen - 1] &= ~blank;
++ } else {
++ pr_devel("no pre-shortcut\n");
++ edit->set[0].to = assoc_array_node_to_ptr(new_n0);
++ new_n0->back_pointer = shortcut->back_pointer;
++ new_n0->parent_slot = shortcut->parent_slot;
++ }
++
++ side = assoc_array_ptr_to_node(shortcut->next_node);
++ new_n0->nr_leaves_on_branch = side->nr_leaves_on_branch;
++
++ /* We need to know which slot in the new node is going to take a
++ * metadata pointer.
++ */
++ sc_slot = sc_segments >> (diff & ASSOC_ARRAY_KEY_CHUNK_MASK);
++ sc_slot &= ASSOC_ARRAY_FAN_MASK;
++
++ pr_devel("new slot %lx >> %d -> %d\n",
++ sc_segments, diff & ASSOC_ARRAY_KEY_CHUNK_MASK, sc_slot);
++
++ /* Determine whether we need to follow the new node with a replacement
++ * for the current shortcut. We could in theory reuse the current
++ * shortcut if its parent slot number doesn't change - but that's a
++ * 1-in-16 chance so not worth expending the code upon.
++ */
++ level = diff + ASSOC_ARRAY_LEVEL_STEP;
++ if (level < shortcut->skip_to_level) {
++ pr_devel("post-shortcut %d...%d\n", level, shortcut->skip_to_level);
++ keylen = round_up(shortcut->skip_to_level, ASSOC_ARRAY_KEY_CHUNK_SIZE);
++ keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT;
++
++ new_s1 = kzalloc(sizeof(struct assoc_array_shortcut) +
++ keylen * sizeof(unsigned long), GFP_KERNEL);
++ if (!new_s1)
++ return false;
++ edit->new_meta[2] = assoc_array_shortcut_to_ptr(new_s1);
++
++ new_s1->back_pointer = assoc_array_node_to_ptr(new_n0);
++ new_s1->parent_slot = sc_slot;
++ new_s1->next_node = shortcut->next_node;
++ new_s1->skip_to_level = shortcut->skip_to_level;
++
++ new_n0->slots[sc_slot] = assoc_array_shortcut_to_ptr(new_s1);
++
++ memcpy(new_s1->index_key, shortcut->index_key,
++ keylen * sizeof(unsigned long));
++
++ edit->set[1].ptr = &side->back_pointer;
++ edit->set[1].to = assoc_array_shortcut_to_ptr(new_s1);
++ } else {
++ pr_devel("no post-shortcut\n");
++
++ /* We don't have to replace the pointed-to node as long as we
++ * use memory barriers to make sure the parent slot number is
++ * changed before the back pointer (the parent slot number is
++ * irrelevant to the old parent shortcut).
++ */
++ new_n0->slots[sc_slot] = shortcut->next_node;
++ edit->set_parent_slot[0].p = &side->parent_slot;
++ edit->set_parent_slot[0].to = sc_slot;
++ edit->set[1].ptr = &side->back_pointer;
++ edit->set[1].to = assoc_array_node_to_ptr(new_n0);
++ }
++
++ /* Install the new leaf in a spare slot in the new node. */
++ if (sc_slot == 0)
++ edit->leaf_p = &new_n0->slots[1];
++ else
++ edit->leaf_p = &new_n0->slots[0];
++
++ pr_devel("<--%s() = ok [split shortcut]\n", __func__);
++ return edit;
++}
++
++/**
++ * assoc_array_insert - Script insertion of an object into an associative array
++ * @array: The array to insert into.
++ * @ops: The operations to use.
++ * @index_key: The key to insert at.
++ * @object: The object to insert.
++ *
++ * Precalculate and preallocate a script for the insertion or replacement of an
++ * object in an associative array. This results in an edit script that can
++ * either be applied or cancelled.
++ *
++ * The function returns a pointer to an edit script or -ENOMEM.
++ *
++ * The caller should lock against other modifications and must continue to hold
++ * the lock until assoc_array_apply_edit() has been called.
++ *
++ * Accesses to the tree may take place concurrently with this function,
++ * provided they hold the RCU read lock.
++ */
++struct assoc_array_edit *assoc_array_insert(struct assoc_array *array,
++ const struct assoc_array_ops *ops,
++ const void *index_key,
++ void *object)
++{
++ struct assoc_array_walk_result result;
++ struct assoc_array_edit *edit;
++
++ pr_devel("-->%s()\n", __func__);
++
++ /* The leaf pointer we're given must not have the bottom bit set as we
++ * use those for type-marking the pointer. NULL pointers are also not
++ * allowed as they indicate an empty slot but we have to allow them
++ * here as they can be updated later.
++ */
++ BUG_ON(assoc_array_ptr_is_meta(object));
++
++ edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL);
++ if (!edit)
++ return ERR_PTR(-ENOMEM);
++ edit->array = array;
++ edit->ops = ops;
++ edit->leaf = assoc_array_leaf_to_ptr(object);
++ edit->adjust_count_by = 1;
++
++ switch (assoc_array_walk(array, ops, index_key, &result)) {
++ case assoc_array_walk_tree_empty:
++ /* Allocate a root node if there isn't one yet */
++ if (!assoc_array_insert_in_empty_tree(edit))
++ goto enomem;
++ return edit;
++
++ case assoc_array_walk_found_terminal_node:
++ /* We found a node that doesn't have a node/shortcut pointer in
++ * the slot corresponding to the index key that we have to
++ * follow.
++ */
++ if (!assoc_array_insert_into_terminal_node(edit, ops, index_key,
++ &result))
++ goto enomem;
++ return edit;
++
++ case assoc_array_walk_found_wrong_shortcut:
++ /* We found a shortcut that didn't match our key in a slot we
++ * needed to follow.
++ */
++ if (!assoc_array_insert_mid_shortcut(edit, ops, &result))
++ goto enomem;
++ return edit;
++ }
++
++enomem:
++ /* Clean up after an out of memory error */
++ pr_devel("enomem\n");
++ assoc_array_cancel_edit(edit);
++ return ERR_PTR(-ENOMEM);
++}
++
++/**
++ * assoc_array_insert_set_object - Set the new object pointer in an edit script
++ * @edit: The edit script to modify.
++ * @object: The object pointer to set.
++ *
++ * Change the object to be inserted in an edit script. The object pointed to
++ * by the old object is not freed. This must be done prior to applying the
++ * script.
++ */
++void assoc_array_insert_set_object(struct assoc_array_edit *edit, void *object)
++{
++ BUG_ON(!object);
++ edit->leaf = assoc_array_leaf_to_ptr(object);
++}
++
++struct assoc_array_delete_collapse_context {
++ struct assoc_array_node *node;
++ const void *skip_leaf;
++ int slot;
++};
++
++/*
++ * Subtree collapse to node iterator.
++ */
++static int assoc_array_delete_collapse_iterator(const void *leaf,
++ void *iterator_data)
++{
++ struct assoc_array_delete_collapse_context *collapse = iterator_data;
++
++ if (leaf == collapse->skip_leaf)
++ return 0;
++
++ BUG_ON(collapse->slot >= ASSOC_ARRAY_FAN_OUT);
++
++ collapse->node->slots[collapse->slot++] = assoc_array_leaf_to_ptr(leaf);
++ return 0;
++}
++
++/**
++ * assoc_array_delete - Script deletion of an object from an associative array
++ * @array: The array to search.
++ * @ops: The operations to use.
++ * @index_key: The key to the object.
++ *
++ * Precalculate and preallocate a script for the deletion of an object from an
++ * associative array. This results in an edit script that can either be
++ * applied or cancelled.
++ *
++ * The function returns a pointer to an edit script if the object was found,
++ * NULL if the object was not found or -ENOMEM.
++ *
++ * The caller should lock against other modifications and must continue to hold
++ * the lock until assoc_array_apply_edit() has been called.
++ *
++ * Accesses to the tree may take place concurrently with this function,
++ * provided they hold the RCU read lock.
++ */
++struct assoc_array_edit *assoc_array_delete(struct assoc_array *array,
++ const struct assoc_array_ops *ops,
++ const void *index_key)
++{
++ struct assoc_array_delete_collapse_context collapse;
++ struct assoc_array_walk_result result;
++ struct assoc_array_node *node, *new_n0;
++ struct assoc_array_edit *edit;
++ struct assoc_array_ptr *ptr;
++ bool has_meta;
++ int slot, i;
++
++ pr_devel("-->%s()\n", __func__);
++
++ edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL);
++ if (!edit)
++ return ERR_PTR(-ENOMEM);
++ edit->array = array;
++ edit->ops = ops;
++ edit->adjust_count_by = -1;
++
++ switch (assoc_array_walk(array, ops, index_key, &result)) {
++ case assoc_array_walk_found_terminal_node:
++ /* We found a node that should contain the leaf we've been
++ * asked to remove - *if* it's in the tree.
++ */
++ pr_devel("terminal_node\n");
++ node = result.terminal_node.node;
++
++ for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
++ ptr = node->slots[slot];
++ if (ptr &&
++ assoc_array_ptr_is_leaf(ptr) &&
++ ops->compare_object(assoc_array_ptr_to_leaf(ptr),
++ index_key))
++ goto found_leaf;
++ }
++ case assoc_array_walk_tree_empty:
++ case assoc_array_walk_found_wrong_shortcut:
++ default:
++ assoc_array_cancel_edit(edit);
++ pr_devel("not found\n");
++ return NULL;
++ }
++
++found_leaf:
++ BUG_ON(array->nr_leaves_on_tree <= 0);
++
++ /* In the simplest form of deletion we just clear the slot and release
++ * the leaf after a suitable interval.
++ */
++ edit->dead_leaf = node->slots[slot];
++ edit->set[0].ptr = &node->slots[slot];
++ edit->set[0].to = NULL;
++ edit->adjust_count_on = node;
++
++ /* If that concludes erasure of the last leaf, then delete the entire
++ * internal array.
++ */
++ if (array->nr_leaves_on_tree == 1) {
++ edit->set[1].ptr = &array->root;
++ edit->set[1].to = NULL;
++ edit->adjust_count_on = NULL;
++ edit->excised_subtree = array->root;
++ pr_devel("all gone\n");
++ return edit;
++ }
++
++ /* However, we'd also like to clear up some metadata blocks if we
++ * possibly can.
++ *
++ * We go for a simple algorithm of: if this node has FAN_OUT or fewer
++ * leaves in it, then attempt to collapse it - and attempt to
++ * recursively collapse up the tree.
++ *
++ * We could also try and collapse in partially filled subtrees to take
++ * up space in this node.
++ */
++ if (node->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT + 1) {
++ struct assoc_array_node *parent, *grandparent;
++ struct assoc_array_ptr *ptr;
++
++ /* First of all, we need to know if this node has metadata so
++ * that we don't try collapsing if all the leaves are already
++ * here.
++ */
++ has_meta = false;
++ for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
++ ptr = node->slots[i];
++ if (assoc_array_ptr_is_meta(ptr)) {
++ has_meta = true;
++ break;
++ }
++ }
++
++ pr_devel("leaves: %ld [m=%d]\n",
++ node->nr_leaves_on_branch - 1, has_meta);
++
++ /* Look further up the tree to see if we can collapse this node
++ * into a more proximal node too.
++ */
++ parent = node;
++ collapse_up:
++ pr_devel("collapse subtree: %ld\n", parent->nr_leaves_on_branch);
++
++ ptr = parent->back_pointer;
++ if (!ptr)
++ goto do_collapse;
++ if (assoc_array_ptr_is_shortcut(ptr)) {
++ struct assoc_array_shortcut *s = assoc_array_ptr_to_shortcut(ptr);
++ ptr = s->back_pointer;
++ if (!ptr)
++ goto do_collapse;
++ }
++
++ grandparent = assoc_array_ptr_to_node(ptr);
++ if (grandparent->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT + 1) {
++ parent = grandparent;
++ goto collapse_up;
++ }
++
++ do_collapse:
++ /* There's no point collapsing if the original node has no meta
++ * pointers to discard and if we didn't merge into one of that
++ * node's ancestry.
++ */
++ if (has_meta || parent != node) {
++ node = parent;
++
++ /* Create a new node to collapse into */
++ new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
++ if (!new_n0)
++ goto enomem;
++ edit->new_meta[0] = assoc_array_node_to_ptr(new_n0);
++
++ new_n0->back_pointer = node->back_pointer;
++ new_n0->parent_slot = node->parent_slot;
++ new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch;
++ edit->adjust_count_on = new_n0;
++
++ collapse.node = new_n0;
++ collapse.skip_leaf = assoc_array_ptr_to_leaf(edit->dead_leaf);
++ collapse.slot = 0;
++ assoc_array_subtree_iterate(assoc_array_node_to_ptr(node),
++ node->back_pointer,
++ assoc_array_delete_collapse_iterator,
++ &collapse);
++ pr_devel("collapsed %d,%lu\n", collapse.slot, new_n0->nr_leaves_on_branch);
++ BUG_ON(collapse.slot != new_n0->nr_leaves_on_branch - 1);
++
++ if (!node->back_pointer) {
++ edit->set[1].ptr = &array->root;
++ } else if (assoc_array_ptr_is_leaf(node->back_pointer)) {
++ BUG();
++ } else if (assoc_array_ptr_is_node(node->back_pointer)) {
++ struct assoc_array_node *p =
++ assoc_array_ptr_to_node(node->back_pointer);
++ edit->set[1].ptr = &p->slots[node->parent_slot];
++ } else if (assoc_array_ptr_is_shortcut(node->back_pointer)) {
++ struct assoc_array_shortcut *s =
++ assoc_array_ptr_to_shortcut(node->back_pointer);
++ edit->set[1].ptr = &s->next_node;
++ }
++ edit->set[1].to = assoc_array_node_to_ptr(new_n0);
++ edit->excised_subtree = assoc_array_node_to_ptr(node);
++ }
++ }
++
++ return edit;
++
++enomem:
++ /* Clean up after an out of memory error */
++ pr_devel("enomem\n");
++ assoc_array_cancel_edit(edit);
++ return ERR_PTR(-ENOMEM);
++}
++
++/**
++ * assoc_array_clear - Script deletion of all objects from an associative array
++ * @array: The array to clear.
++ * @ops: The operations to use.
++ *
++ * Precalculate and preallocate a script for the deletion of all the objects
++ * from an associative array. This results in an edit script that can either
++ * be applied or cancelled.
++ *
++ * The function returns a pointer to an edit script if there are objects to be
++ * deleted, NULL if there are no objects in the array or -ENOMEM.
++ *
++ * The caller should lock against other modifications and must continue to hold
++ * the lock until assoc_array_apply_edit() has been called.
++ *
++ * Accesses to the tree may take place concurrently with this function,
++ * provided they hold the RCU read lock.
++ */
++struct assoc_array_edit *assoc_array_clear(struct assoc_array *array,
++ const struct assoc_array_ops *ops)
++{
++ struct assoc_array_edit *edit;
++
++ pr_devel("-->%s()\n", __func__);
++
++ if (!array->root)
++ return NULL;
++
++ edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL);
++ if (!edit)
++ return ERR_PTR(-ENOMEM);
++ edit->array = array;
++ edit->ops = ops;
++ edit->set[1].ptr = &array->root;
++ edit->set[1].to = NULL;
++ edit->excised_subtree = array->root;
++ edit->ops_for_excised_subtree = ops;
++ pr_devel("all gone\n");
++ return edit;
++}
++
++/*
++ * Handle the deferred destruction after an applied edit.
++ */
++static void assoc_array_rcu_cleanup(struct rcu_head *head)
++{
++ struct assoc_array_edit *edit =
++ container_of(head, struct assoc_array_edit, rcu);
++ int i;
++
++ pr_devel("-->%s()\n", __func__);
++
++ if (edit->dead_leaf)
++ edit->ops->free_object(assoc_array_ptr_to_leaf(edit->dead_leaf));
++ for (i = 0; i < ARRAY_SIZE(edit->excised_meta); i++)
++ if (edit->excised_meta[i])
++ kfree(assoc_array_ptr_to_node(edit->excised_meta[i]));
++
++ if (edit->excised_subtree) {
++ BUG_ON(assoc_array_ptr_is_leaf(edit->excised_subtree));
++ if (assoc_array_ptr_is_node(edit->excised_subtree)) {
++ struct assoc_array_node *n =
++ assoc_array_ptr_to_node(edit->excised_subtree);
++ n->back_pointer = NULL;
++ } else {
++ struct assoc_array_shortcut *s =
++ assoc_array_ptr_to_shortcut(edit->excised_subtree);
++ s->back_pointer = NULL;
++ }
++ assoc_array_destroy_subtree(edit->excised_subtree,
++ edit->ops_for_excised_subtree);
++ }
++
++ kfree(edit);
++}
++
++/**
++ * assoc_array_apply_edit - Apply an edit script to an associative array
++ * @edit: The script to apply.
++ *
++ * Apply an edit script to an associative array to effect an insertion,
++ * deletion or clearance. As the edit script includes preallocated memory,
++ * this is guaranteed not to fail.
++ *
++ * The edit script, dead objects and dead metadata will be scheduled for
++ * destruction after an RCU grace period to permit those doing read-only
++ * accesses on the array to continue to do so under the RCU read lock whilst
++ * the edit is taking place.
++ */
++void assoc_array_apply_edit(struct assoc_array_edit *edit)
++{
++ struct assoc_array_shortcut *shortcut;
++ struct assoc_array_node *node;
++ struct assoc_array_ptr *ptr;
++ int i;
++
++ pr_devel("-->%s()\n", __func__);
++
++ smp_wmb();
++ if (edit->leaf_p)
++ *edit->leaf_p = edit->leaf;
++
++ smp_wmb();
++ for (i = 0; i < ARRAY_SIZE(edit->set_parent_slot); i++)
++ if (edit->set_parent_slot[i].p)
++ *edit->set_parent_slot[i].p = edit->set_parent_slot[i].to;
++
++ smp_wmb();
++ for (i = 0; i < ARRAY_SIZE(edit->set_backpointers); i++)
++ if (edit->set_backpointers[i])
++ *edit->set_backpointers[i] = edit->set_backpointers_to;
++
++ smp_wmb();
++ for (i = 0; i < ARRAY_SIZE(edit->set); i++)
++ if (edit->set[i].ptr)
++ *edit->set[i].ptr = edit->set[i].to;
++
++ if (edit->array->root == NULL) {
++ edit->array->nr_leaves_on_tree = 0;
++ } else if (edit->adjust_count_on) {
++ node = edit->adjust_count_on;
++ for (;;) {
++ node->nr_leaves_on_branch += edit->adjust_count_by;
++
++ ptr = node->back_pointer;
++ if (!ptr)
++ break;
++ if (assoc_array_ptr_is_shortcut(ptr)) {
++ shortcut = assoc_array_ptr_to_shortcut(ptr);
++ ptr = shortcut->back_pointer;
++ if (!ptr)
++ break;
++ }
++ BUG_ON(!assoc_array_ptr_is_node(ptr));
++ node = assoc_array_ptr_to_node(ptr);
++ }
++
++ edit->array->nr_leaves_on_tree += edit->adjust_count_by;
++ }
++
++ call_rcu(&edit->rcu, assoc_array_rcu_cleanup);
++}
++
++/**
++ * assoc_array_cancel_edit - Discard an edit script.
++ * @edit: The script to discard.
++ *
++ * Free an edit script and all the preallocated data it holds without making
++ * any changes to the associative array it was intended for.
++ *
++ * NOTE! In the case of an insertion script, this does _not_ release the leaf
++ * that was to be inserted. That is left to the caller.
++ */
++void assoc_array_cancel_edit(struct assoc_array_edit *edit)
++{
++ struct assoc_array_ptr *ptr;
++ int i;
++
++ pr_devel("-->%s()\n", __func__);
++
++ /* Clean up after an out of memory error */
++ for (i = 0; i < ARRAY_SIZE(edit->new_meta); i++) {
++ ptr = edit->new_meta[i];
++ if (ptr) {
++ if (assoc_array_ptr_is_node(ptr))
++ kfree(assoc_array_ptr_to_node(ptr));
++ else
++ kfree(assoc_array_ptr_to_shortcut(ptr));
++ }
++ }
++ kfree(edit);
++}
++
++/**
++ * assoc_array_gc - Garbage collect an associative array.
++ * @array: The array to clean.
++ * @ops: The operations to use.
++ * @iterator: A callback function to pass judgement on each object.
++ * @iterator_data: Private data for the callback function.
++ *
++ * Collect garbage from an associative array and pack down the internal tree to
++ * save memory.
++ *
++ * The iterator function is asked to pass judgement upon each object in the
++ * array. If it returns false, the object is discard and if it returns true,
++ * the object is kept. If it returns true, it must increment the object's
++ * usage count (or whatever it needs to do to retain it) before returning.
++ *
++ * This function returns 0 if successful or -ENOMEM if out of memory. In the
++ * latter case, the array is not changed.
++ *
++ * The caller should lock against other modifications and must continue to hold
++ * the lock until assoc_array_apply_edit() has been called.
++ *
++ * Accesses to the tree may take place concurrently with this function,
++ * provided they hold the RCU read lock.
++ */
++int assoc_array_gc(struct assoc_array *array,
++ const struct assoc_array_ops *ops,
++ bool (*iterator)(void *object, void *iterator_data),
++ void *iterator_data)
++{
++ struct assoc_array_shortcut *shortcut, *new_s;
++ struct assoc_array_node *node, *new_n;
++ struct assoc_array_edit *edit;
++ struct assoc_array_ptr *cursor, *ptr;
++ struct assoc_array_ptr *new_root, *new_parent, **new_ptr_pp;
++ unsigned long nr_leaves_on_tree;
++ int keylen, slot, nr_free, next_slot, i;
++
++ pr_devel("-->%s()\n", __func__);
++
++ if (!array->root)
++ return 0;
++
++ edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL);
++ if (!edit)
++ return -ENOMEM;
++ edit->array = array;
++ edit->ops = ops;
++ edit->ops_for_excised_subtree = ops;
++ edit->set[0].ptr = &array->root;
++ edit->excised_subtree = array->root;
++
++ new_root = new_parent = NULL;
++ new_ptr_pp = &new_root;
++ cursor = array->root;
++
++descend:
++ /* If this point is a shortcut, then we need to duplicate it and
++ * advance the target cursor.
++ */
++ if (assoc_array_ptr_is_shortcut(cursor)) {
++ shortcut = assoc_array_ptr_to_shortcut(cursor);
++ keylen = round_up(shortcut->skip_to_level, ASSOC_ARRAY_KEY_CHUNK_SIZE);
++ keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT;
++ new_s = kmalloc(sizeof(struct assoc_array_shortcut) +
++ keylen * sizeof(unsigned long), GFP_KERNEL);
++ if (!new_s)
++ goto enomem;
++ pr_devel("dup shortcut %p -> %p\n", shortcut, new_s);
++ memcpy(new_s, shortcut, (sizeof(struct assoc_array_shortcut) +
++ keylen * sizeof(unsigned long)));
++ new_s->back_pointer = new_parent;
++ new_s->parent_slot = shortcut->parent_slot;
++ *new_ptr_pp = new_parent = assoc_array_shortcut_to_ptr(new_s);
++ new_ptr_pp = &new_s->next_node;
++ cursor = shortcut->next_node;
++ }
++
++ /* Duplicate the node at this position */
++ node = assoc_array_ptr_to_node(cursor);
++ new_n = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
++ if (!new_n)
++ goto enomem;
++ pr_devel("dup node %p -> %p\n", node, new_n);
++ new_n->back_pointer = new_parent;
++ new_n->parent_slot = node->parent_slot;
++ *new_ptr_pp = new_parent = assoc_array_node_to_ptr(new_n);
++ new_ptr_pp = NULL;
++ slot = 0;
++
++continue_node:
++ /* Filter across any leaves and gc any subtrees */
++ for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
++ ptr = node->slots[slot];
++ if (!ptr)
++ continue;
++
++ if (assoc_array_ptr_is_leaf(ptr)) {
++ if (iterator(assoc_array_ptr_to_leaf(ptr),
++ iterator_data))
++ /* The iterator will have done any reference
++ * counting on the object for us.
++ */
++ new_n->slots[slot] = ptr;
++ continue;
++ }
++
++ new_ptr_pp = &new_n->slots[slot];
++ cursor = ptr;
++ goto descend;
++ }
++
++ pr_devel("-- compress node %p --\n", new_n);
++
++ /* Count up the number of empty slots in this node and work out the
++ * subtree leaf count.
++ */
++ new_n->nr_leaves_on_branch = 0;
++ nr_free = 0;
++ for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
++ ptr = new_n->slots[slot];
++ if (!ptr)
++ nr_free++;
++ else if (assoc_array_ptr_is_leaf(ptr))
++ new_n->nr_leaves_on_branch++;
++ }
++ pr_devel("free=%d, leaves=%lu\n", nr_free, new_n->nr_leaves_on_branch);
++
++ /* See what we can fold in */
++ next_slot = 0;
++ for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
++ struct assoc_array_shortcut *s;
++ struct assoc_array_node *child;
++
++ ptr = new_n->slots[slot];
++ if (!ptr || assoc_array_ptr_is_leaf(ptr))
++ continue;
++
++ s = NULL;
++ if (assoc_array_ptr_is_shortcut(ptr)) {
++ s = assoc_array_ptr_to_shortcut(ptr);
++ ptr = s->next_node;
++ }
++
++ child = assoc_array_ptr_to_node(ptr);
++ new_n->nr_leaves_on_branch += child->nr_leaves_on_branch;
++
++ if (child->nr_leaves_on_branch <= nr_free + 1) {
++ /* Fold the child node into this one */
++ pr_devel("[%d] fold node %lu/%d [nx %d]\n",
++ slot, child->nr_leaves_on_branch, nr_free + 1,
++ next_slot);
++
++ /* We would already have reaped an intervening shortcut
++ * on the way back up the tree.
++ */
++ BUG_ON(s);
++
++ new_n->slots[slot] = NULL;
++ nr_free++;
++ if (slot < next_slot)
++ next_slot = slot;
++ for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
++ struct assoc_array_ptr *p = child->slots[i];
++ if (!p)
++ continue;
++ BUG_ON(assoc_array_ptr_is_meta(p));
++ while (new_n->slots[next_slot])
++ next_slot++;
++ BUG_ON(next_slot >= ASSOC_ARRAY_FAN_OUT);
++ new_n->slots[next_slot++] = p;
++ nr_free--;
++ }
++ kfree(child);
++ } else {
++ pr_devel("[%d] retain node %lu/%d [nx %d]\n",
++ slot, child->nr_leaves_on_branch, nr_free + 1,
++ next_slot);
++ }
++ }
++
++ pr_devel("after: %lu\n", new_n->nr_leaves_on_branch);
++
++ nr_leaves_on_tree = new_n->nr_leaves_on_branch;
++
++ /* Excise this node if it is singly occupied by a shortcut */
++ if (nr_free == ASSOC_ARRAY_FAN_OUT - 1) {
++ for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++)
++ if ((ptr = new_n->slots[slot]))
++ break;
++
++ if (assoc_array_ptr_is_meta(ptr) &&
++ assoc_array_ptr_is_shortcut(ptr)) {
++ pr_devel("excise node %p with 1 shortcut\n", new_n);
++ new_s = assoc_array_ptr_to_shortcut(ptr);
++ new_parent = new_n->back_pointer;
++ slot = new_n->parent_slot;
++ kfree(new_n);
++ if (!new_parent) {
++ new_s->back_pointer = NULL;
++ new_s->parent_slot = 0;
++ new_root = ptr;
++ goto gc_complete;
++ }
++
++ if (assoc_array_ptr_is_shortcut(new_parent)) {
++ /* We can discard any preceding shortcut also */
++ struct assoc_array_shortcut *s =
++ assoc_array_ptr_to_shortcut(new_parent);
++
++ pr_devel("excise preceding shortcut\n");
++
++ new_parent = new_s->back_pointer = s->back_pointer;
++ slot = new_s->parent_slot = s->parent_slot;
++ kfree(s);
++ if (!new_parent) {
++ new_s->back_pointer = NULL;
++ new_s->parent_slot = 0;
++ new_root = ptr;
++ goto gc_complete;
++ }
++ }
++
++ new_s->back_pointer = new_parent;
++ new_s->parent_slot = slot;
++ new_n = assoc_array_ptr_to_node(new_parent);
++ new_n->slots[slot] = ptr;
++ goto ascend_old_tree;
++ }
++ }
++
++ /* Excise any shortcuts we might encounter that point to nodes that
++ * only contain leaves.
++ */
++ ptr = new_n->back_pointer;
++ if (!ptr)
++ goto gc_complete;
++
++ if (assoc_array_ptr_is_shortcut(ptr)) {
++ new_s = assoc_array_ptr_to_shortcut(ptr);
++ new_parent = new_s->back_pointer;
++ slot = new_s->parent_slot;
++
++ if (new_n->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT) {
++ struct assoc_array_node *n;
++
++ pr_devel("excise shortcut\n");
++ new_n->back_pointer = new_parent;
++ new_n->parent_slot = slot;
++ kfree(new_s);
++ if (!new_parent) {
++ new_root = assoc_array_node_to_ptr(new_n);
++ goto gc_complete;
++ }
++
++ n = assoc_array_ptr_to_node(new_parent);
++ n->slots[slot] = assoc_array_node_to_ptr(new_n);
++ }
++ } else {
++ new_parent = ptr;
++ }
++ new_n = assoc_array_ptr_to_node(new_parent);
++
++ascend_old_tree:
++ ptr = node->back_pointer;
++ if (assoc_array_ptr_is_shortcut(ptr)) {
++ shortcut = assoc_array_ptr_to_shortcut(ptr);
++ slot = shortcut->parent_slot;
++ cursor = shortcut->back_pointer;
++ } else {
++ slot = node->parent_slot;
++ cursor = ptr;
++ }
++ BUG_ON(!ptr);
++ node = assoc_array_ptr_to_node(cursor);
++ slot++;
++ goto continue_node;
++
++gc_complete:
++ edit->set[0].to = new_root;
++ assoc_array_apply_edit(edit);
++ edit->array->nr_leaves_on_tree = nr_leaves_on_tree;
++ return 0;
++
++enomem:
++ pr_devel("enomem\n");
++ assoc_array_destroy_subtree(new_root, edit->ops);
++ kfree(edit);
++ return -ENOMEM;
++}
+--
+1.8.3.1
+
+
+From 03ac60b84587fa8e57e7ec5cd3d59b7fa8d97c79 Mon Sep 17 00:00:00 2001
+From: David Howells <dhowells@redhat.com>
+Date: Fri, 30 Aug 2013 15:37:54 +0100
+Subject: [PATCH 10/10] KEYS: Expand the capacity of a keyring
+
+Expand the capacity of a keyring to be able to hold a lot more keys by using
+the previously added associative array implementation. Currently the maximum
+capacity is:
+
+ (PAGE_SIZE - sizeof(header)) / sizeof(struct key *)
+
+which, on a 64-bit system, is a little more 500. However, since this is being
+used for the NFS uid mapper, we need more than that. The new implementation
+gives us effectively unlimited capacity.
+
+With some alterations, the keyutils testsuite runs successfully to completion
+after this patch is applied. The alterations are because (a) keyrings that
+are simply added to no longer appear ordered and (b) some of the errors have
+changed a bit.
+
+Signed-off-by: David Howells <dhowells@redhat.com>
+---
+ include/keys/keyring-type.h | 17 +-
+ include/linux/key.h | 13 +-
+ lib/assoc_array.c | 1 +
+ security/keys/Kconfig | 1 +
+ security/keys/gc.c | 33 +-
+ security/keys/internal.h | 17 +-
+ security/keys/key.c | 35 +-
+ security/keys/keyring.c | 1436 ++++++++++++++++++++++---------------------
+ security/keys/request_key.c | 12 +-
+ 9 files changed, 803 insertions(+), 762 deletions(-)
+
+diff --git a/include/keys/keyring-type.h b/include/keys/keyring-type.h
+index cf49159..fca5c62 100644
+--- a/include/keys/keyring-type.h
++++ b/include/keys/keyring-type.h
+@@ -1,6 +1,6 @@
+ /* Keyring key type
+ *
+- * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved.
++ * Copyright (C) 2008, 2013 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+@@ -13,19 +13,6 @@
+ #define _KEYS_KEYRING_TYPE_H
+
+ #include <linux/key.h>
+-#include <linux/rcupdate.h>
+-
+-/*
+- * the keyring payload contains a list of the keys to which the keyring is
+- * subscribed
+- */
+-struct keyring_list {
+- struct rcu_head rcu; /* RCU deletion hook */
+- unsigned short maxkeys; /* max keys this list can hold */
+- unsigned short nkeys; /* number of keys currently held */
+- unsigned short delkey; /* key to be unlinked by RCU */
+- struct key __rcu *keys[0];
+-};
+-
++#include <linux/assoc_array.h>
+
+ #endif /* _KEYS_KEYRING_TYPE_H */
+diff --git a/include/linux/key.h b/include/linux/key.h
+index ef596c7..2417f78 100644
+--- a/include/linux/key.h
++++ b/include/linux/key.h
+@@ -22,6 +22,7 @@
+ #include <linux/sysctl.h>
+ #include <linux/rwsem.h>
+ #include <linux/atomic.h>
++#include <linux/assoc_array.h>
+
+ #ifdef __KERNEL__
+ #include <linux/uidgid.h>
+@@ -196,11 +197,13 @@ struct key {
+ * whatever
+ */
+ union {
+- unsigned long value;
+- void __rcu *rcudata;
+- void *data;
+- struct keyring_list __rcu *subscriptions;
+- } payload;
++ union {
++ unsigned long value;
++ void __rcu *rcudata;
++ void *data;
++ } payload;
++ struct assoc_array keys;
++ };
+ };
+
+ extern struct key *key_alloc(struct key_type *type,
+diff --git a/lib/assoc_array.c b/lib/assoc_array.c
+index a095281..17edeaf 100644
+--- a/lib/assoc_array.c
++++ b/lib/assoc_array.c
+@@ -12,6 +12,7 @@
+ */
+ //#define DEBUG
+ #include <linux/slab.h>
++#include <linux/err.h>
+ #include <linux/assoc_array_priv.h>
+
+ /*
+diff --git a/security/keys/Kconfig b/security/keys/Kconfig
+index a90d6d3..15e0dfe 100644
+--- a/security/keys/Kconfig
++++ b/security/keys/Kconfig
+@@ -4,6 +4,7 @@
+
+ config KEYS
+ bool "Enable access key retention support"
++ select ASSOCIATIVE_ARRAY
+ help
+ This option provides support for retaining authentication tokens and
+ access keys in the kernel.
+diff --git a/security/keys/gc.c b/security/keys/gc.c
+index d67c97b..cce621c 100644
+--- a/security/keys/gc.c
++++ b/security/keys/gc.c
+@@ -130,6 +130,13 @@ void key_gc_keytype(struct key_type *ktype)
+ kleave("");
+ }
+
++static int key_gc_keyring_func(const void *object, void *iterator_data)
++{
++ const struct key *key = object;
++ time_t *limit = iterator_data;
++ return key_is_dead(key, *limit);
++}
++
+ /*
+ * Garbage collect pointers from a keyring.
+ *
+@@ -138,10 +145,9 @@ void key_gc_keytype(struct key_type *ktype)
+ */
+ static void key_gc_keyring(struct key *keyring, time_t limit)
+ {
+- struct keyring_list *klist;
+- int loop;
++ int result;
+
+- kenter("%x", key_serial(keyring));
++ kenter("%x{%s}", keyring->serial, keyring->description ?: "");
+
+ if (keyring->flags & ((1 << KEY_FLAG_INVALIDATED) |
+ (1 << KEY_FLAG_REVOKED)))
+@@ -149,27 +155,17 @@ static void key_gc_keyring(struct key *keyring, time_t limit)
+
+ /* scan the keyring looking for dead keys */
+ rcu_read_lock();
+- klist = rcu_dereference(keyring->payload.subscriptions);
+- if (!klist)
+- goto unlock_dont_gc;
+-
+- loop = klist->nkeys;
+- smp_rmb();
+- for (loop--; loop >= 0; loop--) {
+- struct key *key = rcu_dereference(klist->keys[loop]);
+- if (key_is_dead(key, limit))
+- goto do_gc;
+- }
+-
+-unlock_dont_gc:
++ result = assoc_array_iterate(&keyring->keys,
++ key_gc_keyring_func, &limit);
+ rcu_read_unlock();
++ if (result == true)
++ goto do_gc;
++
+ dont_gc:
+ kleave(" [no gc]");
+ return;
+
+ do_gc:
+- rcu_read_unlock();
+-
+ keyring_gc(keyring, limit);
+ kleave(" [gc]");
+ }
+@@ -392,7 +388,6 @@ found_unreferenced_key:
+ */
+ found_keyring:
+ spin_unlock(&key_serial_lock);
+- kdebug("scan keyring %d", key->serial);
+ key_gc_keyring(key, limit);
+ goto maybe_resched;
+
+diff --git a/security/keys/internal.h b/security/keys/internal.h
+index 73950bf..581c6f6 100644
+--- a/security/keys/internal.h
++++ b/security/keys/internal.h
+@@ -90,20 +90,23 @@ extern void key_type_put(struct key_type *ktype);
+
+ extern int __key_link_begin(struct key *keyring,
+ const struct keyring_index_key *index_key,
+- unsigned long *_prealloc);
++ struct assoc_array_edit **_edit);
+ extern int __key_link_check_live_key(struct key *keyring, struct key *key);
+-extern void __key_link(struct key *keyring, struct key *key,
+- unsigned long *_prealloc);
++extern void __key_link(struct key *key, struct assoc_array_edit **_edit);
+ extern void __key_link_end(struct key *keyring,
+ const struct keyring_index_key *index_key,
+- unsigned long prealloc);
++ struct assoc_array_edit *edit);
+
+-extern key_ref_t __keyring_search_one(key_ref_t keyring_ref,
+- const struct keyring_index_key *index_key);
++extern key_ref_t find_key_to_update(key_ref_t keyring_ref,
++ const struct keyring_index_key *index_key);
+
+ extern struct key *keyring_search_instkey(struct key *keyring,
+ key_serial_t target_id);
+
++extern int iterate_over_keyring(const struct key *keyring,
++ int (*func)(const struct key *key, void *data),
++ void *data);
++
+ typedef int (*key_match_func_t)(const struct key *, const void *);
+
+ struct keyring_search_context {
+@@ -119,6 +122,8 @@ struct keyring_search_context {
+ #define KEYRING_SEARCH_NO_CHECK_PERM 0x0010 /* Don't check permissions */
+ #define KEYRING_SEARCH_DETECT_TOO_DEEP 0x0020 /* Give an error on excessive depth */
+
++ int (*iterator)(const void *object, void *iterator_data);
++
+ /* Internal stuff */
+ int skipped_ret;
+ bool possessed;
+diff --git a/security/keys/key.c b/security/keys/key.c
+index 7d716b8..a819b5c 100644
+--- a/security/keys/key.c
++++ b/security/keys/key.c
+@@ -409,7 +409,7 @@ static int __key_instantiate_and_link(struct key *key,
+ struct key_preparsed_payload *prep,
+ struct key *keyring,
+ struct key *authkey,
+- unsigned long *_prealloc)
++ struct assoc_array_edit **_edit)
+ {
+ int ret, awaken;
+
+@@ -436,7 +436,7 @@ static int __key_instantiate_and_link(struct key *key,
+
+ /* and link it into the destination keyring */
+ if (keyring)
+- __key_link(keyring, key, _prealloc);
++ __key_link(key, _edit);
+
+ /* disable the authorisation key */
+ if (authkey)
+@@ -476,7 +476,7 @@ int key_instantiate_and_link(struct key *key,
+ struct key *authkey)
+ {
+ struct key_preparsed_payload prep;
+- unsigned long prealloc;
++ struct assoc_array_edit *edit;
+ int ret;
+
+ memset(&prep, 0, sizeof(prep));
+@@ -490,16 +490,15 @@ int key_instantiate_and_link(struct key *key,
+ }
+
+ if (keyring) {
+- ret = __key_link_begin(keyring, &key->index_key, &prealloc);
++ ret = __key_link_begin(keyring, &key->index_key, &edit);
+ if (ret < 0)
+ goto error_free_preparse;
+ }
+
+- ret = __key_instantiate_and_link(key, &prep, keyring, authkey,
+- &prealloc);
++ ret = __key_instantiate_and_link(key, &prep, keyring, authkey, &edit);
+
+ if (keyring)
+- __key_link_end(keyring, &key->index_key, prealloc);
++ __key_link_end(keyring, &key->index_key, edit);
+
+ error_free_preparse:
+ if (key->type->preparse)
+@@ -537,7 +536,7 @@ int key_reject_and_link(struct key *key,
+ struct key *keyring,
+ struct key *authkey)
+ {
+- unsigned long prealloc;
++ struct assoc_array_edit *edit;
+ struct timespec now;
+ int ret, awaken, link_ret = 0;
+
+@@ -548,7 +547,7 @@ int key_reject_and_link(struct key *key,
+ ret = -EBUSY;
+
+ if (keyring)
+- link_ret = __key_link_begin(keyring, &key->index_key, &prealloc);
++ link_ret = __key_link_begin(keyring, &key->index_key, &edit);
+
+ mutex_lock(&key_construction_mutex);
+
+@@ -570,7 +569,7 @@ int key_reject_and_link(struct key *key,
+
+ /* and link it into the destination keyring */
+ if (keyring && link_ret == 0)
+- __key_link(keyring, key, &prealloc);
++ __key_link(key, &edit);
+
+ /* disable the authorisation key */
+ if (authkey)
+@@ -580,7 +579,7 @@ int key_reject_and_link(struct key *key,
+ mutex_unlock(&key_construction_mutex);
+
+ if (keyring)
+- __key_link_end(keyring, &key->index_key, prealloc);
++ __key_link_end(keyring, &key->index_key, edit);
+
+ /* wake up anyone waiting for a key to be constructed */
+ if (awaken)
+@@ -783,8 +782,8 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
+ .description = description,
+ };
+ struct key_preparsed_payload prep;
++ struct assoc_array_edit *edit;
+ const struct cred *cred = current_cred();
+- unsigned long prealloc;
+ struct key *keyring, *key = NULL;
+ key_ref_t key_ref;
+ int ret;
+@@ -828,7 +827,7 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
+ }
+ index_key.desc_len = strlen(index_key.description);
+
+- ret = __key_link_begin(keyring, &index_key, &prealloc);
++ ret = __key_link_begin(keyring, &index_key, &edit);
+ if (ret < 0) {
+ key_ref = ERR_PTR(ret);
+ goto error_free_prep;
+@@ -847,8 +846,8 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
+ * update that instead if possible
+ */
+ if (index_key.type->update) {
+- key_ref = __keyring_search_one(keyring_ref, &index_key);
+- if (!IS_ERR(key_ref))
++ key_ref = find_key_to_update(keyring_ref, &index_key);
++ if (key_ref)
+ goto found_matching_key;
+ }
+
+@@ -874,7 +873,7 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
+ }
+
+ /* instantiate it and link it into the target keyring */
+- ret = __key_instantiate_and_link(key, &prep, keyring, NULL, &prealloc);
++ ret = __key_instantiate_and_link(key, &prep, keyring, NULL, &edit);
+ if (ret < 0) {
+ key_put(key);
+ key_ref = ERR_PTR(ret);
+@@ -884,7 +883,7 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
+ key_ref = make_key_ref(key, is_key_possessed(keyring_ref));
+
+ error_link_end:
+- __key_link_end(keyring, &index_key, prealloc);
++ __key_link_end(keyring, &index_key, edit);
+ error_free_prep:
+ if (index_key.type->preparse)
+ index_key.type->free_preparse(&prep);
+@@ -897,7 +896,7 @@ error:
+ /* we found a matching key, so we're going to try to update it
+ * - we can drop the locks first as we have the key pinned
+ */
+- __key_link_end(keyring, &index_key, prealloc);
++ __key_link_end(keyring, &index_key, edit);
+
+ key_ref = __key_update(key_ref, &prep);
+ goto error_free_prep;
+diff --git a/security/keys/keyring.c b/security/keys/keyring.c
+index eeef1a0..f7cdea2 100644
+--- a/security/keys/keyring.c
++++ b/security/keys/keyring.c
+@@ -1,6 +1,6 @@
+ /* Keyring handling
+ *
+- * Copyright (C) 2004-2005, 2008 Red Hat, Inc. All Rights Reserved.
++ * Copyright (C) 2004-2005, 2008, 2013 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+@@ -17,25 +17,11 @@
+ #include <linux/seq_file.h>
+ #include <linux/err.h>
+ #include <keys/keyring-type.h>
++#include <keys/user-type.h>
++#include <linux/assoc_array_priv.h>
+ #include <linux/uaccess.h>
+ #include "internal.h"
+
+-#define rcu_dereference_locked_keyring(keyring) \
+- (rcu_dereference_protected( \
+- (keyring)->payload.subscriptions, \
+- rwsem_is_locked((struct rw_semaphore *)&(keyring)->sem)))
+-
+-#define rcu_deref_link_locked(klist, index, keyring) \
+- (rcu_dereference_protected( \
+- (klist)->keys[index], \
+- rwsem_is_locked((struct rw_semaphore *)&(keyring)->sem)))
+-
+-#define MAX_KEYRING_LINKS \
+- min_t(size_t, USHRT_MAX - 1, \
+- ((PAGE_SIZE - sizeof(struct keyring_list)) / sizeof(struct key *)))
+-
+-#define KEY_LINK_FIXQUOTA 1UL
+-
+ /*
+ * When plumbing the depths of the key tree, this sets a hard limit
+ * set on how deep we're willing to go.
+@@ -47,6 +33,28 @@
+ */
+ #define KEYRING_NAME_HASH_SIZE (1 << 5)
+
++/*
++ * We mark pointers we pass to the associative array with bit 1 set if
++ * they're keyrings and clear otherwise.
++ */
++#define KEYRING_PTR_SUBTYPE 0x2UL
++
++static inline bool keyring_ptr_is_keyring(const struct assoc_array_ptr *x)
++{
++ return (unsigned long)x & KEYRING_PTR_SUBTYPE;
++}
++static inline struct key *keyring_ptr_to_key(const struct assoc_array_ptr *x)
++{
++ void *object = assoc_array_ptr_to_leaf(x);
++ return (struct key *)((unsigned long)object & ~KEYRING_PTR_SUBTYPE);
++}
++static inline void *keyring_key_to_ptr(struct key *key)
++{
++ if (key->type == &key_type_keyring)
++ return (void *)((unsigned long)key | KEYRING_PTR_SUBTYPE);
++ return key;
++}
++
+ static struct list_head keyring_name_hash[KEYRING_NAME_HASH_SIZE];
+ static DEFINE_RWLOCK(keyring_name_lock);
+
+@@ -67,7 +75,6 @@ static inline unsigned keyring_hash(const char *desc)
+ */
+ static int keyring_instantiate(struct key *keyring,
+ struct key_preparsed_payload *prep);
+-static int keyring_match(const struct key *keyring, const void *criterion);
+ static void keyring_revoke(struct key *keyring);
+ static void keyring_destroy(struct key *keyring);
+ static void keyring_describe(const struct key *keyring, struct seq_file *m);
+@@ -76,9 +83,9 @@ static long keyring_read(const struct key *keyring,
+
+ struct key_type key_type_keyring = {
+ .name = "keyring",
+- .def_datalen = sizeof(struct keyring_list),
++ .def_datalen = 0,
+ .instantiate = keyring_instantiate,
+- .match = keyring_match,
++ .match = user_match,
+ .revoke = keyring_revoke,
+ .destroy = keyring_destroy,
+ .describe = keyring_describe,
+@@ -127,6 +134,7 @@ static int keyring_instantiate(struct key *keyring,
+
+ ret = -EINVAL;
+ if (prep->datalen == 0) {
++ assoc_array_init(&keyring->keys);
+ /* make the keyring available by name if it has one */
+ keyring_publish_name(keyring);
+ ret = 0;
+@@ -136,15 +144,226 @@ static int keyring_instantiate(struct key *keyring,
+ }
+
+ /*
+- * Match keyrings on their name
++ * Multiply 64-bits by 32-bits to 96-bits and fold back to 64-bit. Ideally we'd
++ * fold the carry back too, but that requires inline asm.
++ */
++static u64 mult_64x32_and_fold(u64 x, u32 y)
++{
++ u64 hi = (u64)(u32)(x >> 32) * y;
++ u64 lo = (u64)(u32)(x) * y;
++ return lo + ((u64)(u32)hi << 32) + (u32)(hi >> 32);
++}
++
++/*
++ * Hash a key type and description.
++ */
++static unsigned long hash_key_type_and_desc(const struct keyring_index_key *index_key)
++{
++ const unsigned level_shift = ASSOC_ARRAY_LEVEL_STEP;
++ const unsigned long level_mask = ASSOC_ARRAY_LEVEL_STEP_MASK;
++ const char *description = index_key->description;
++ unsigned long hash, type;
++ u32 piece;
++ u64 acc;
++ int n, desc_len = index_key->desc_len;
++
++ type = (unsigned long)index_key->type;
++
++ acc = mult_64x32_and_fold(type, desc_len + 13);
++ acc = mult_64x32_and_fold(acc, 9207);
++ for (;;) {
++ n = desc_len;
++ if (n <= 0)
++ break;
++ if (n > 4)
++ n = 4;
++ piece = 0;
++ memcpy(&piece, description, n);
++ description += n;
++ desc_len -= n;
++ acc = mult_64x32_and_fold(acc, piece);
++ acc = mult_64x32_and_fold(acc, 9207);
++ }
++
++ /* Fold the hash down to 32 bits if need be. */
++ hash = acc;
++ if (ASSOC_ARRAY_KEY_CHUNK_SIZE == 32)
++ hash ^= acc >> 32;
++
++ /* Squidge all the keyrings into a separate part of the tree to
++ * ordinary keys by making sure the lowest level segment in the hash is
++ * zero for keyrings and non-zero otherwise.
++ */
++ if (index_key->type != &key_type_keyring && (hash & level_mask) == 0)
++ return hash | (hash >> (ASSOC_ARRAY_KEY_CHUNK_SIZE - level_shift)) | 1;
++ if (index_key->type == &key_type_keyring && (hash & level_mask) != 0)
++ return (hash + (hash << level_shift)) & ~level_mask;
++ return hash;
++}
++
++/*
++ * Build the next index key chunk.
++ *
++ * On 32-bit systems the index key is laid out as:
++ *
++ * 0 4 5 9...
++ * hash desclen typeptr desc[]
++ *
++ * On 64-bit systems:
++ *
++ * 0 8 9 17...
++ * hash desclen typeptr desc[]
++ *
++ * We return it one word-sized chunk at a time.
+ */
+-static int keyring_match(const struct key *keyring, const void *description)
++static unsigned long keyring_get_key_chunk(const void *data, int level)
++{
++ const struct keyring_index_key *index_key = data;
++ unsigned long chunk = 0;
++ long offset = 0;
++ int desc_len = index_key->desc_len, n = sizeof(chunk);
++
++ level /= ASSOC_ARRAY_KEY_CHUNK_SIZE;
++ switch (level) {
++ case 0:
++ return hash_key_type_and_desc(index_key);
++ case 1:
++ return ((unsigned long)index_key->type << 8) | desc_len;
++ case 2:
++ if (desc_len == 0)
++ return (u8)((unsigned long)index_key->type >>
++ (ASSOC_ARRAY_KEY_CHUNK_SIZE - 8));
++ n--;
++ offset = 1;
++ default:
++ offset += sizeof(chunk) - 1;
++ offset += (level - 3) * sizeof(chunk);
++ if (offset >= desc_len)
++ return 0;
++ desc_len -= offset;
++ if (desc_len > n)
++ desc_len = n;
++ offset += desc_len;
++ do {
++ chunk <<= 8;
++ chunk |= ((u8*)index_key->description)[--offset];
++ } while (--desc_len > 0);
++
++ if (level == 2) {
++ chunk <<= 8;
++ chunk |= (u8)((unsigned long)index_key->type >>
++ (ASSOC_ARRAY_KEY_CHUNK_SIZE - 8));
++ }
++ return chunk;
++ }
++}
++
++static unsigned long keyring_get_object_key_chunk(const void *object, int level)
++{
++ const struct key *key = keyring_ptr_to_key(object);
++ return keyring_get_key_chunk(&key->index_key, level);
++}
++
++static bool keyring_compare_object(const void *object, const void *data)
+ {
+- return keyring->description &&
+- strcmp(keyring->description, description) == 0;
++ const struct keyring_index_key *index_key = data;
++ const struct key *key = keyring_ptr_to_key(object);
++
++ return key->index_key.type == index_key->type &&
++ key->index_key.desc_len == index_key->desc_len &&
++ memcmp(key->index_key.description, index_key->description,
++ index_key->desc_len) == 0;
+ }
+
+ /*
++ * Compare the index keys of a pair of objects and determine the bit position
++ * at which they differ - if they differ.
++ */
++static int keyring_diff_objects(const void *_a, const void *_b)
++{
++ const struct key *key_a = keyring_ptr_to_key(_a);
++ const struct key *key_b = keyring_ptr_to_key(_b);
++ const struct keyring_index_key *a = &key_a->index_key;
++ const struct keyring_index_key *b = &key_b->index_key;
++ unsigned long seg_a, seg_b;
++ int level, i;
++
++ level = 0;
++ seg_a = hash_key_type_and_desc(a);
++ seg_b = hash_key_type_and_desc(b);
++ if ((seg_a ^ seg_b) != 0)
++ goto differ;
++
++ /* The number of bits contributed by the hash is controlled by a
++ * constant in the assoc_array headers. Everything else thereafter we
++ * can deal with as being machine word-size dependent.
++ */
++ level += ASSOC_ARRAY_KEY_CHUNK_SIZE / 8;
++ seg_a = a->desc_len;
++ seg_b = b->desc_len;
++ if ((seg_a ^ seg_b) != 0)
++ goto differ;
++
++ /* The next bit may not work on big endian */
++ level++;
++ seg_a = (unsigned long)a->type;
++ seg_b = (unsigned long)b->type;
++ if ((seg_a ^ seg_b) != 0)
++ goto differ;
++
++ level += sizeof(unsigned long);
++ if (a->desc_len == 0)
++ goto same;
++
++ i = 0;
++ if (((unsigned long)a->description | (unsigned long)b->description) &
++ (sizeof(unsigned long) - 1)) {
++ do {
++ seg_a = *(unsigned long *)(a->description + i);
++ seg_b = *(unsigned long *)(b->description + i);
++ if ((seg_a ^ seg_b) != 0)
++ goto differ_plus_i;
++ i += sizeof(unsigned long);
++ } while (i < (a->desc_len & (sizeof(unsigned long) - 1)));
++ }
++
++ for (; i < a->desc_len; i++) {
++ seg_a = *(unsigned char *)(a->description + i);
++ seg_b = *(unsigned char *)(b->description + i);
++ if ((seg_a ^ seg_b) != 0)
++ goto differ_plus_i;
++ }
++
++same:
++ return -1;
++
++differ_plus_i:
++ level += i;
++differ:
++ i = level * 8 + __ffs(seg_a ^ seg_b);
++ return i;
++}
++
++/*
++ * Free an object after stripping the keyring flag off of the pointer.
++ */
++static void keyring_free_object(void *object)
++{
++ key_put(keyring_ptr_to_key(object));
++}
++
++/*
++ * Operations for keyring management by the index-tree routines.
++ */
++static const struct assoc_array_ops keyring_assoc_array_ops = {
++ .get_key_chunk = keyring_get_key_chunk,
++ .get_object_key_chunk = keyring_get_object_key_chunk,
++ .compare_object = keyring_compare_object,
++ .diff_objects = keyring_diff_objects,
++ .free_object = keyring_free_object,
++};
++
++/*
+ * Clean up a keyring when it is destroyed. Unpublish its name if it had one
+ * and dispose of its data.
+ *
+@@ -155,9 +374,6 @@ static int keyring_match(const struct key *keyring, const void *description)
+ */
+ static void keyring_destroy(struct key *keyring)
+ {
+- struct keyring_list *klist;
+- int loop;
+-
+ if (keyring->description) {
+ write_lock(&keyring_name_lock);
+
+@@ -168,12 +384,7 @@ static void keyring_destroy(struct key *keyring)
+ write_unlock(&keyring_name_lock);
+ }
+
+- klist = rcu_access_pointer(keyring->payload.subscriptions);
+- if (klist) {
+- for (loop = klist->nkeys - 1; loop >= 0; loop--)
+- key_put(rcu_access_pointer(klist->keys[loop]));
+- kfree(klist);
+- }
++ assoc_array_destroy(&keyring->keys, &keyring_assoc_array_ops);
+ }
+
+ /*
+@@ -181,76 +392,88 @@ static void keyring_destroy(struct key *keyring)
+ */
+ static void keyring_describe(const struct key *keyring, struct seq_file *m)
+ {
+- struct keyring_list *klist;
+-
+ if (keyring->description)
+ seq_puts(m, keyring->description);
+ else
+ seq_puts(m, "[anon]");
+
+ if (key_is_instantiated(keyring)) {
+- rcu_read_lock();
+- klist = rcu_dereference(keyring->payload.subscriptions);
+- if (klist)
+- seq_printf(m, ": %u/%u", klist->nkeys, klist->maxkeys);
++ if (keyring->keys.nr_leaves_on_tree != 0)
++ seq_printf(m, ": %lu", keyring->keys.nr_leaves_on_tree);
+ else
+ seq_puts(m, ": empty");
+- rcu_read_unlock();
+ }
+ }
+
++struct keyring_read_iterator_context {
++ size_t qty;
++ size_t count;
++ key_serial_t __user *buffer;
++};
++
++static int keyring_read_iterator(const void *object, void *data)
++{
++ struct keyring_read_iterator_context *ctx = data;
++ const struct key *key = keyring_ptr_to_key(object);
++ int ret;
++
++ kenter("{%s,%d},,{%zu/%zu}",
++ key->type->name, key->serial, ctx->count, ctx->qty);
++
++ if (ctx->count >= ctx->qty)
++ return 1;
++
++ ret = put_user(key->serial, ctx->buffer);
++ if (ret < 0)
++ return ret;
++ ctx->buffer++;
++ ctx->count += sizeof(key->serial);
++ return 0;
++}
++
+ /*
+ * Read a list of key IDs from the keyring's contents in binary form
+ *
+- * The keyring's semaphore is read-locked by the caller.
++ * The keyring's semaphore is read-locked by the caller. This prevents someone
++ * from modifying it under us - which could cause us to read key IDs multiple
++ * times.
+ */
+ static long keyring_read(const struct key *keyring,
+ char __user *buffer, size_t buflen)
+ {
+- struct keyring_list *klist;
+- struct key *key;
+- size_t qty, tmp;
+- int loop, ret;
++ struct keyring_read_iterator_context ctx;
++ unsigned long nr_keys;
++ int ret;
+
+- ret = 0;
+- klist = rcu_dereference_locked_keyring(keyring);
+- if (klist) {
+- /* calculate how much data we could return */
+- qty = klist->nkeys * sizeof(key_serial_t);
+-
+- if (buffer && buflen > 0) {
+- if (buflen > qty)
+- buflen = qty;
+-
+- /* copy the IDs of the subscribed keys into the
+- * buffer */
+- ret = -EFAULT;
+-
+- for (loop = 0; loop < klist->nkeys; loop++) {
+- key = rcu_deref_link_locked(klist, loop,
+- keyring);
+-
+- tmp = sizeof(key_serial_t);
+- if (tmp > buflen)
+- tmp = buflen;
+-
+- if (copy_to_user(buffer,
+- &key->serial,
+- tmp) != 0)
+- goto error;
+-
+- buflen -= tmp;
+- if (buflen == 0)
+- break;
+- buffer += tmp;
+- }
+- }
++ kenter("{%d},,%zu", key_serial(keyring), buflen);
++
++ if (buflen & (sizeof(key_serial_t) - 1))
++ return -EINVAL;
++
++ nr_keys = keyring->keys.nr_leaves_on_tree;
++ if (nr_keys == 0)
++ return 0;
+
+- ret = qty;
++ /* Calculate how much data we could return */
++ ctx.qty = nr_keys * sizeof(key_serial_t);
++
++ if (!buffer || !buflen)
++ return ctx.qty;
++
++ if (buflen > ctx.qty)
++ ctx.qty = buflen;
++
++ /* Copy the IDs of the subscribed keys into the buffer */
++ ctx.buffer = (key_serial_t __user *)buffer;
++ ctx.count = 0;
++ ret = assoc_array_iterate(&keyring->keys, keyring_read_iterator, &ctx);
++ if (ret < 0) {
++ kleave(" = %d [iterate]", ret);
++ return ret;
+ }
+
+-error:
+- return ret;
++ kleave(" = %zu [ok]", ctx.count);
++ return ctx.count;
+ }
+
+ /*
+@@ -277,219 +500,360 @@ struct key *keyring_alloc(const char *description, kuid_t uid, kgid_t gid,
+ }
+ EXPORT_SYMBOL(keyring_alloc);
+
+-/**
+- * keyring_search_aux - Search a keyring tree for a key matching some criteria
+- * @keyring_ref: A pointer to the keyring with possession indicator.
+- * @ctx: The keyring search context.
+- *
+- * Search the supplied keyring tree for a key that matches the criteria given.
+- * The root keyring and any linked keyrings must grant Search permission to the
+- * caller to be searchable and keys can only be found if they too grant Search
+- * to the caller. The possession flag on the root keyring pointer controls use
+- * of the possessor bits in permissions checking of the entire tree. In
+- * addition, the LSM gets to forbid keyring searches and key matches.
+- *
+- * The search is performed as a breadth-then-depth search up to the prescribed
+- * limit (KEYRING_SEARCH_MAX_DEPTH).
+- *
+- * Keys are matched to the type provided and are then filtered by the match
+- * function, which is given the description to use in any way it sees fit. The
+- * match function may use any attributes of a key that it wishes to to
+- * determine the match. Normally the match function from the key type would be
+- * used.
+- *
+- * RCU is used to prevent the keyring key lists from disappearing without the
+- * need to take lots of locks.
+- *
+- * Returns a pointer to the found key and increments the key usage count if
+- * successful; -EAGAIN if no matching keys were found, or if expired or revoked
+- * keys were found; -ENOKEY if only negative keys were found; -ENOTDIR if the
+- * specified keyring wasn't a keyring.
+- *
+- * In the case of a successful return, the possession attribute from
+- * @keyring_ref is propagated to the returned key reference.
++/*
++ * Iteration function to consider each key found.
+ */
+-key_ref_t keyring_search_aux(key_ref_t keyring_ref,
+- struct keyring_search_context *ctx)
++static int keyring_search_iterator(const void *object, void *iterator_data)
+ {
+- struct {
+- /* Need a separate keylist pointer for RCU purposes */
+- struct key *keyring;
+- struct keyring_list *keylist;
+- int kix;
+- } stack[KEYRING_SEARCH_MAX_DEPTH];
+-
+- struct keyring_list *keylist;
+- unsigned long kflags;
+- struct key *keyring, *key;
+- key_ref_t key_ref;
+- long err;
+- int sp, nkeys, kix;
++ struct keyring_search_context *ctx = iterator_data;
++ const struct key *key = keyring_ptr_to_key(object);
++ unsigned long kflags = key->flags;
+
+- keyring = key_ref_to_ptr(keyring_ref);
+- ctx->possessed = is_key_possessed(keyring_ref);
+- key_check(keyring);
++ kenter("{%d}", key->serial);
+
+- /* top keyring must have search permission to begin the search */
+- err = key_task_permission(keyring_ref, ctx->cred, KEY_SEARCH);
+- if (err < 0) {
+- key_ref = ERR_PTR(err);
+- goto error;
++ /* ignore keys not of this type */
++ if (key->type != ctx->index_key.type) {
++ kleave(" = 0 [!type]");
++ return 0;
+ }
+
+- key_ref = ERR_PTR(-ENOTDIR);
+- if (keyring->type != &key_type_keyring)
+- goto error;
++ /* skip invalidated, revoked and expired keys */
++ if (ctx->flags & KEYRING_SEARCH_DO_STATE_CHECK) {
++ if (kflags & ((1 << KEY_FLAG_INVALIDATED) |
++ (1 << KEY_FLAG_REVOKED))) {
++ ctx->result = ERR_PTR(-EKEYREVOKED);
++ kleave(" = %d [invrev]", ctx->skipped_ret);
++ goto skipped;
++ }
+
+- rcu_read_lock();
++ if (key->expiry && ctx->now.tv_sec >= key->expiry) {
++ ctx->result = ERR_PTR(-EKEYEXPIRED);
++ kleave(" = %d [expire]", ctx->skipped_ret);
++ goto skipped;
++ }
++ }
+
+- ctx->now = current_kernel_time();
+- err = -EAGAIN;
+- sp = 0;
+-
+- /* firstly we should check to see if this top-level keyring is what we
+- * are looking for */
+- key_ref = ERR_PTR(-EAGAIN);
+- kflags = keyring->flags;
+- if (keyring->type == ctx->index_key.type &&
+- ctx->match(keyring, ctx->match_data)) {
+- key = keyring;
+- if (ctx->flags & KEYRING_SEARCH_NO_STATE_CHECK)
+- goto found;
++ /* keys that don't match */
++ if (!ctx->match(key, ctx->match_data)) {
++ kleave(" = 0 [!match]");
++ return 0;
++ }
+
+- /* check it isn't negative and hasn't expired or been
+- * revoked */
+- if (kflags & (1 << KEY_FLAG_REVOKED))
+- goto error_2;
+- if (key->expiry && ctx->now.tv_sec >= key->expiry)
+- goto error_2;
+- key_ref = ERR_PTR(key->type_data.reject_error);
+- if (kflags & (1 << KEY_FLAG_NEGATIVE))
+- goto error_2;
+- goto found;
++ /* key must have search permissions */
++ if (!(ctx->flags & KEYRING_SEARCH_NO_CHECK_PERM) &&
++ key_task_permission(make_key_ref(key, ctx->possessed),
++ ctx->cred, KEY_SEARCH) < 0) {
++ ctx->result = ERR_PTR(-EACCES);
++ kleave(" = %d [!perm]", ctx->skipped_ret);
++ goto skipped;
+ }
+
+- /* otherwise, the top keyring must not be revoked, expired, or
+- * negatively instantiated if we are to search it */
+- key_ref = ERR_PTR(-EAGAIN);
+- if (kflags & ((1 << KEY_FLAG_INVALIDATED) |
+- (1 << KEY_FLAG_REVOKED) |
+- (1 << KEY_FLAG_NEGATIVE)) ||
+- (keyring->expiry && ctx->now.tv_sec >= keyring->expiry))
+- goto error_2;
+-
+- /* start processing a new keyring */
+-descend:
+- kflags = keyring->flags;
+- if (kflags & ((1 << KEY_FLAG_INVALIDATED) |
+- (1 << KEY_FLAG_REVOKED)))
+- goto not_this_keyring;
++ if (ctx->flags & KEYRING_SEARCH_DO_STATE_CHECK) {
++ /* we set a different error code if we pass a negative key */
++ if (kflags & (1 << KEY_FLAG_NEGATIVE)) {
++ ctx->result = ERR_PTR(key->type_data.reject_error);
++ kleave(" = %d [neg]", ctx->skipped_ret);
++ goto skipped;
++ }
++ }
+
+- keylist = rcu_dereference(keyring->payload.subscriptions);
+- if (!keylist)
+- goto not_this_keyring;
++ /* Found */
++ ctx->result = make_key_ref(key, ctx->possessed);
++ kleave(" = 1 [found]");
++ return 1;
+
+- /* iterate through the keys in this keyring first */
+- nkeys = keylist->nkeys;
+- smp_rmb();
+- for (kix = 0; kix < nkeys; kix++) {
+- key = rcu_dereference(keylist->keys[kix]);
+- kflags = key->flags;
++skipped:
++ return ctx->skipped_ret;
++}
+
+- /* ignore keys not of this type */
+- if (key->type != ctx->index_key.type)
+- continue;
++/*
++ * Search inside a keyring for a key. We can search by walking to it
++ * directly based on its index-key or we can iterate over the entire
++ * tree looking for it, based on the match function.
++ */
++static int search_keyring(struct key *keyring, struct keyring_search_context *ctx)
++{
++ if ((ctx->flags & KEYRING_SEARCH_LOOKUP_TYPE) ==
++ KEYRING_SEARCH_LOOKUP_DIRECT) {
++ const void *object;
++
++ object = assoc_array_find(&keyring->keys,
++ &keyring_assoc_array_ops,
++ &ctx->index_key);
++ return object ? ctx->iterator(object, ctx) : 0;
++ }
++ return assoc_array_iterate(&keyring->keys, ctx->iterator, ctx);
++}
+
+- /* skip invalidated, revoked and expired keys */
+- if (!(ctx->flags & KEYRING_SEARCH_NO_STATE_CHECK)) {
+- if (kflags & ((1 << KEY_FLAG_INVALIDATED) |
+- (1 << KEY_FLAG_REVOKED)))
+- continue;
++/*
++ * Search a tree of keyrings that point to other keyrings up to the maximum
++ * depth.
++ */
++static bool search_nested_keyrings(struct key *keyring,
++ struct keyring_search_context *ctx)
++{
++ struct {
++ struct key *keyring;
++ struct assoc_array_node *node;
++ int slot;
++ } stack[KEYRING_SEARCH_MAX_DEPTH];
+
+- if (key->expiry && ctx->now.tv_sec >= key->expiry)
+- continue;
+- }
++ struct assoc_array_shortcut *shortcut;
++ struct assoc_array_node *node;
++ struct assoc_array_ptr *ptr;
++ struct key *key;
++ int sp = 0, slot;
+
+- /* keys that don't match */
+- if (!ctx->match(key, ctx->match_data))
+- continue;
++ kenter("{%d},{%s,%s}",
++ keyring->serial,
++ ctx->index_key.type->name,
++ ctx->index_key.description);
+
+- /* key must have search permissions */
+- if (key_task_permission(make_key_ref(key, ctx->possessed),
+- ctx->cred, KEY_SEARCH) < 0)
+- continue;
++ if (ctx->index_key.description)
++ ctx->index_key.desc_len = strlen(ctx->index_key.description);
+
+- if (ctx->flags & KEYRING_SEARCH_NO_STATE_CHECK)
++ /* Check to see if this top-level keyring is what we are looking for
++ * and whether it is valid or not.
++ */
++ if (ctx->flags & KEYRING_SEARCH_LOOKUP_ITERATE ||
++ keyring_compare_object(keyring, &ctx->index_key)) {
++ ctx->skipped_ret = 2;
++ ctx->flags |= KEYRING_SEARCH_DO_STATE_CHECK;
++ switch (ctx->iterator(keyring_key_to_ptr(keyring), ctx)) {
++ case 1:
+ goto found;
+-
+- /* we set a different error code if we pass a negative key */
+- if (kflags & (1 << KEY_FLAG_NEGATIVE)) {
+- err = key->type_data.reject_error;
+- continue;
++ case 2:
++ return false;
++ default:
++ break;
+ }
++ }
+
++ ctx->skipped_ret = 0;
++ if (ctx->flags & KEYRING_SEARCH_NO_STATE_CHECK)
++ ctx->flags &= ~KEYRING_SEARCH_DO_STATE_CHECK;
++
++ /* Start processing a new keyring */
++descend_to_keyring:
++ kdebug("descend to %d", keyring->serial);
++ if (keyring->flags & ((1 << KEY_FLAG_INVALIDATED) |
++ (1 << KEY_FLAG_REVOKED)))
++ goto not_this_keyring;
++
++ /* Search through the keys in this keyring before its searching its
++ * subtrees.
++ */
++ if (search_keyring(keyring, ctx))
+ goto found;
+- }
+
+- /* search through the keyrings nested in this one */
+- kix = 0;
+-ascend:
+- nkeys = keylist->nkeys;
+- smp_rmb();
+- for (; kix < nkeys; kix++) {
+- key = rcu_dereference(keylist->keys[kix]);
+- if (key->type != &key_type_keyring)
+- continue;
++ /* Then manually iterate through the keyrings nested in this one.
++ *
++ * Start from the root node of the index tree. Because of the way the
++ * hash function has been set up, keyrings cluster on the leftmost
++ * branch of the root node (root slot 0) or in the root node itself.
++ * Non-keyrings avoid the leftmost branch of the root entirely (root
++ * slots 1-15).
++ */
++ ptr = ACCESS_ONCE(keyring->keys.root);
++ if (!ptr)
++ goto not_this_keyring;
+
+- /* recursively search nested keyrings
+- * - only search keyrings for which we have search permission
++ if (assoc_array_ptr_is_shortcut(ptr)) {
++ /* If the root is a shortcut, either the keyring only contains
++ * keyring pointers (everything clusters behind root slot 0) or
++ * doesn't contain any keyring pointers.
+ */
+- if (sp >= KEYRING_SEARCH_MAX_DEPTH)
++ shortcut = assoc_array_ptr_to_shortcut(ptr);
++ smp_read_barrier_depends();
++ if ((shortcut->index_key[0] & ASSOC_ARRAY_FAN_MASK) != 0)
++ goto not_this_keyring;
++
++ ptr = ACCESS_ONCE(shortcut->next_node);
++ node = assoc_array_ptr_to_node(ptr);
++ goto begin_node;
++ }
++
++ node = assoc_array_ptr_to_node(ptr);
++ smp_read_barrier_depends();
++
++ ptr = node->slots[0];
++ if (!assoc_array_ptr_is_meta(ptr))
++ goto begin_node;
++
++descend_to_node:
++ /* Descend to a more distal node in this keyring's content tree and go
++ * through that.
++ */
++ kdebug("descend");
++ if (assoc_array_ptr_is_shortcut(ptr)) {
++ shortcut = assoc_array_ptr_to_shortcut(ptr);
++ smp_read_barrier_depends();
++ ptr = ACCESS_ONCE(shortcut->next_node);
++ BUG_ON(!assoc_array_ptr_is_node(ptr));
++ node = assoc_array_ptr_to_node(ptr);
++ }
++
++begin_node:
++ kdebug("begin_node");
++ smp_read_barrier_depends();
++ slot = 0;
++ascend_to_node:
++ /* Go through the slots in a node */
++ for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
++ ptr = ACCESS_ONCE(node->slots[slot]);
++
++ if (assoc_array_ptr_is_meta(ptr) && node->back_pointer)
++ goto descend_to_node;
++
++ if (!keyring_ptr_is_keyring(ptr))
+ continue;
+
+- if (key_task_permission(make_key_ref(key, ctx->possessed),
++ key = keyring_ptr_to_key(ptr);
++
++ if (sp >= KEYRING_SEARCH_MAX_DEPTH) {
++ if (ctx->flags & KEYRING_SEARCH_DETECT_TOO_DEEP) {
++ ctx->result = ERR_PTR(-ELOOP);
++ return false;
++ }
++ goto not_this_keyring;
++ }
++
++ /* Search a nested keyring */
++ if (!(ctx->flags & KEYRING_SEARCH_NO_CHECK_PERM) &&
++ key_task_permission(make_key_ref(key, ctx->possessed),
+ ctx->cred, KEY_SEARCH) < 0)
+ continue;
+
+ /* stack the current position */
+ stack[sp].keyring = keyring;
+- stack[sp].keylist = keylist;
+- stack[sp].kix = kix;
++ stack[sp].node = node;
++ stack[sp].slot = slot;
+ sp++;
+
+ /* begin again with the new keyring */
+ keyring = key;
+- goto descend;
++ goto descend_to_keyring;
++ }
++
++ /* We've dealt with all the slots in the current node, so now we need
++ * to ascend to the parent and continue processing there.
++ */
++ ptr = ACCESS_ONCE(node->back_pointer);
++ slot = node->parent_slot;
++
++ if (ptr && assoc_array_ptr_is_shortcut(ptr)) {
++ shortcut = assoc_array_ptr_to_shortcut(ptr);
++ smp_read_barrier_depends();
++ ptr = ACCESS_ONCE(shortcut->back_pointer);
++ slot = shortcut->parent_slot;
++ }
++ if (!ptr)
++ goto not_this_keyring;
++ node = assoc_array_ptr_to_node(ptr);
++ smp_read_barrier_depends();
++ slot++;
++
++ /* If we've ascended to the root (zero backpointer), we must have just
++ * finished processing the leftmost branch rather than the root slots -
++ * so there can't be any more keyrings for us to find.
++ */
++ if (node->back_pointer) {
++ kdebug("ascend %d", slot);
++ goto ascend_to_node;
+ }
+
+- /* the keyring we're looking at was disqualified or didn't contain a
+- * matching key */
++ /* The keyring we're looking at was disqualified or didn't contain a
++ * matching key.
++ */
+ not_this_keyring:
+- if (sp > 0) {
+- /* resume the processing of a keyring higher up in the tree */
+- sp--;
+- keyring = stack[sp].keyring;
+- keylist = stack[sp].keylist;
+- kix = stack[sp].kix + 1;
+- goto ascend;
++ kdebug("not_this_keyring %d", sp);
++ if (sp <= 0) {
++ kleave(" = false");
++ return false;
+ }
+
+- key_ref = ERR_PTR(err);
+- goto error_2;
++ /* Resume the processing of a keyring higher up in the tree */
++ sp--;
++ keyring = stack[sp].keyring;
++ node = stack[sp].node;
++ slot = stack[sp].slot + 1;
++ kdebug("ascend to %d [%d]", keyring->serial, slot);
++ goto ascend_to_node;
+
+- /* we found a viable match */
++ /* We found a viable match */
+ found:
+- __key_get(key);
+- key->last_used_at = ctx->now.tv_sec;
+- keyring->last_used_at = ctx->now.tv_sec;
+- while (sp > 0)
+- stack[--sp].keyring->last_used_at = ctx->now.tv_sec;
++ key = key_ref_to_ptr(ctx->result);
+ key_check(key);
+- key_ref = make_key_ref(key, ctx->possessed);
+-error_2:
++ if (!(ctx->flags & KEYRING_SEARCH_NO_UPDATE_TIME)) {
++ key->last_used_at = ctx->now.tv_sec;
++ keyring->last_used_at = ctx->now.tv_sec;
++ while (sp > 0)
++ stack[--sp].keyring->last_used_at = ctx->now.tv_sec;
++ }
++ kleave(" = true");
++ return true;
++}
++
++/**
++ * keyring_search_aux - Search a keyring tree for a key matching some criteria
++ * @keyring_ref: A pointer to the keyring with possession indicator.
++ * @ctx: The keyring search context.
++ *
++ * Search the supplied keyring tree for a key that matches the criteria given.
++ * The root keyring and any linked keyrings must grant Search permission to the
++ * caller to be searchable and keys can only be found if they too grant Search
++ * to the caller. The possession flag on the root keyring pointer controls use
++ * of the possessor bits in permissions checking of the entire tree. In
++ * addition, the LSM gets to forbid keyring searches and key matches.
++ *
++ * The search is performed as a breadth-then-depth search up to the prescribed
++ * limit (KEYRING_SEARCH_MAX_DEPTH).
++ *
++ * Keys are matched to the type provided and are then filtered by the match
++ * function, which is given the description to use in any way it sees fit. The
++ * match function may use any attributes of a key that it wishes to to
++ * determine the match. Normally the match function from the key type would be
++ * used.
++ *
++ * RCU can be used to prevent the keyring key lists from disappearing without
++ * the need to take lots of locks.
++ *
++ * Returns a pointer to the found key and increments the key usage count if
++ * successful; -EAGAIN if no matching keys were found, or if expired or revoked
++ * keys were found; -ENOKEY if only negative keys were found; -ENOTDIR if the
++ * specified keyring wasn't a keyring.
++ *
++ * In the case of a successful return, the possession attribute from
++ * @keyring_ref is propagated to the returned key reference.
++ */
++key_ref_t keyring_search_aux(key_ref_t keyring_ref,
++ struct keyring_search_context *ctx)
++{
++ struct key *keyring;
++ long err;
++
++ ctx->iterator = keyring_search_iterator;
++ ctx->possessed = is_key_possessed(keyring_ref);
++ ctx->result = ERR_PTR(-EAGAIN);
++
++ keyring = key_ref_to_ptr(keyring_ref);
++ key_check(keyring);
++
++ if (keyring->type != &key_type_keyring)
++ return ERR_PTR(-ENOTDIR);
++
++ if (!(ctx->flags & KEYRING_SEARCH_NO_CHECK_PERM)) {
++ err = key_task_permission(keyring_ref, ctx->cred, KEY_SEARCH);
++ if (err < 0)
++ return ERR_PTR(err);
++ }
++
++ rcu_read_lock();
++ ctx->now = current_kernel_time();
++ if (search_nested_keyrings(keyring, ctx))
++ __key_get(key_ref_to_ptr(ctx->result));
+ rcu_read_unlock();
+-error:
+- return key_ref;
++ return ctx->result;
+ }
+
+ /**
+@@ -499,7 +863,7 @@ error:
+ * @description: The name of the keyring we want to find.
+ *
+ * As keyring_search_aux() above, but using the current task's credentials and
+- * type's default matching function.
++ * type's default matching function and preferred search method.
+ */
+ key_ref_t keyring_search(key_ref_t keyring,
+ struct key_type *type,
+@@ -523,58 +887,49 @@ key_ref_t keyring_search(key_ref_t keyring,
+ EXPORT_SYMBOL(keyring_search);
+
+ /*
+- * Search the given keyring only (no recursion).
++ * Search the given keyring for a key that might be updated.
+ *
+ * The caller must guarantee that the keyring is a keyring and that the
+- * permission is granted to search the keyring as no check is made here.
+- *
+- * RCU is used to make it unnecessary to lock the keyring key list here.
++ * permission is granted to modify the keyring as no check is made here. The
++ * caller must also hold a lock on the keyring semaphore.
+ *
+ * Returns a pointer to the found key with usage count incremented if
+- * successful and returns -ENOKEY if not found. Revoked and invalidated keys
+- * are skipped over.
++ * successful and returns NULL if not found. Revoked and invalidated keys are
++ * skipped over.
+ *
+ * If successful, the possession indicator is propagated from the keyring ref
+ * to the returned key reference.
+ */
+-key_ref_t __keyring_search_one(key_ref_t keyring_ref,
+- const struct keyring_index_key *index_key)
++key_ref_t find_key_to_update(key_ref_t keyring_ref,
++ const struct keyring_index_key *index_key)
+ {
+- struct keyring_list *klist;
+ struct key *keyring, *key;
+- bool possessed;
+- int nkeys, loop;
++ const void *object;
+
+ keyring = key_ref_to_ptr(keyring_ref);
+- possessed = is_key_possessed(keyring_ref);
+
+- rcu_read_lock();
++ kenter("{%d},{%s,%s}",
++ keyring->serial, index_key->type->name, index_key->description);
+
+- klist = rcu_dereference(keyring->payload.subscriptions);
+- if (klist) {
+- nkeys = klist->nkeys;
+- smp_rmb();
+- for (loop = 0; loop < nkeys ; loop++) {
+- key = rcu_dereference(klist->keys[loop]);
+- if (key->type == index_key->type &&
+- (!key->type->match ||
+- key->type->match(key, index_key->description)) &&
+- !(key->flags & ((1 << KEY_FLAG_INVALIDATED) |
+- (1 << KEY_FLAG_REVOKED)))
+- )
+- goto found;
+- }
+- }
++ object = assoc_array_find(&keyring->keys, &keyring_assoc_array_ops,
++ index_key);
+
+- rcu_read_unlock();
+- return ERR_PTR(-ENOKEY);
++ if (object)
++ goto found;
++
++ kleave(" = NULL");
++ return NULL;
+
+ found:
++ key = keyring_ptr_to_key(object);
++ if (key->flags & ((1 << KEY_FLAG_INVALIDATED) |
++ (1 << KEY_FLAG_REVOKED))) {
++ kleave(" = NULL [x]");
++ return NULL;
++ }
+ __key_get(key);
+- keyring->last_used_at = key->last_used_at =
+- current_kernel_time().tv_sec;
+- rcu_read_unlock();
+- return make_key_ref(key, possessed);
++ kleave(" = {%d}", key->serial);
++ return make_key_ref(key, is_key_possessed(keyring_ref));
+ }
+
+ /*
+@@ -637,6 +992,19 @@ out:
+ return keyring;
+ }
+
++static int keyring_detect_cycle_iterator(const void *object,
++ void *iterator_data)
++{
++ struct keyring_search_context *ctx = iterator_data;
++ const struct key *key = keyring_ptr_to_key(object);
++
++ kenter("{%d}", key->serial);
++
++ BUG_ON(key != ctx->match_data);
++ ctx->result = ERR_PTR(-EDEADLK);
++ return 1;
++}
++
+ /*
+ * See if a cycle will will be created by inserting acyclic tree B in acyclic
+ * tree A at the topmost level (ie: as a direct child of A).
+@@ -646,117 +1014,39 @@ out:
+ */
+ static int keyring_detect_cycle(struct key *A, struct key *B)
+ {
+- struct {
+- struct keyring_list *keylist;
+- int kix;
+- } stack[KEYRING_SEARCH_MAX_DEPTH];
+-
+- struct keyring_list *keylist;
+- struct key *subtree, *key;
+- int sp, nkeys, kix, ret;
++ struct keyring_search_context ctx = {
++ .index_key = A->index_key,
++ .match_data = A,
++ .iterator = keyring_detect_cycle_iterator,
++ .flags = (KEYRING_SEARCH_LOOKUP_DIRECT |
++ KEYRING_SEARCH_NO_STATE_CHECK |
++ KEYRING_SEARCH_NO_UPDATE_TIME |
++ KEYRING_SEARCH_NO_CHECK_PERM |
++ KEYRING_SEARCH_DETECT_TOO_DEEP),
++ };
+
+ rcu_read_lock();
+-
+- ret = -EDEADLK;
+- if (A == B)
+- goto cycle_detected;
+-
+- subtree = B;
+- sp = 0;
+-
+- /* start processing a new keyring */
+-descend:
+- if (test_bit(KEY_FLAG_REVOKED, &subtree->flags))
+- goto not_this_keyring;
+-
+- keylist = rcu_dereference(subtree->payload.subscriptions);
+- if (!keylist)
+- goto not_this_keyring;
+- kix = 0;
+-
+-ascend:
+- /* iterate through the remaining keys in this keyring */
+- nkeys = keylist->nkeys;
+- smp_rmb();
+- for (; kix < nkeys; kix++) {
+- key = rcu_dereference(keylist->keys[kix]);
+-
+- if (key == A)
+- goto cycle_detected;
+-
+- /* recursively check nested keyrings */
+- if (key->type == &key_type_keyring) {
+- if (sp >= KEYRING_SEARCH_MAX_DEPTH)
+- goto too_deep;
+-
+- /* stack the current position */
+- stack[sp].keylist = keylist;
+- stack[sp].kix = kix;
+- sp++;
+-
+- /* begin again with the new keyring */
+- subtree = key;
+- goto descend;
+- }
+- }
+-
+- /* the keyring we're looking at was disqualified or didn't contain a
+- * matching key */
+-not_this_keyring:
+- if (sp > 0) {
+- /* resume the checking of a keyring higher up in the tree */
+- sp--;
+- keylist = stack[sp].keylist;
+- kix = stack[sp].kix + 1;
+- goto ascend;
+- }
+-
+- ret = 0; /* no cycles detected */
+-
+-error:
++ search_nested_keyrings(B, &ctx);
+ rcu_read_unlock();
+- return ret;
+-
+-too_deep:
+- ret = -ELOOP;
+- goto error;
+-
+-cycle_detected:
+- ret = -EDEADLK;
+- goto error;
+-}
+-
+-/*
+- * Dispose of a keyring list after the RCU grace period, freeing the unlinked
+- * key
+- */
+-static void keyring_unlink_rcu_disposal(struct rcu_head *rcu)
+-{
+- struct keyring_list *klist =
+- container_of(rcu, struct keyring_list, rcu);
+-
+- if (klist->delkey != USHRT_MAX)
+- key_put(rcu_access_pointer(klist->keys[klist->delkey]));
+- kfree(klist);
++ return PTR_ERR(ctx.result) == -EAGAIN ? 0 : PTR_ERR(ctx.result);
+ }
+
+ /*
+ * Preallocate memory so that a key can be linked into to a keyring.
+ */
+-int __key_link_begin(struct key *keyring, const struct keyring_index_key *index_key,
+- unsigned long *_prealloc)
++int __key_link_begin(struct key *keyring,
++ const struct keyring_index_key *index_key,
++ struct assoc_array_edit **_edit)
+ __acquires(&keyring->sem)
+ __acquires(&keyring_serialise_link_sem)
+ {
+- struct keyring_list *klist, *nklist;
+- unsigned long prealloc;
+- unsigned max;
+- time_t lowest_lru;
+- size_t size;
+- int loop, lru, ret;
++ struct assoc_array_edit *edit;
++ int ret;
+
+ kenter("%d,%s,%s,",
+- key_serial(keyring), index_key->type->name, index_key->description);
++ keyring->serial, index_key->type->name, index_key->description);
++
++ BUG_ON(index_key->desc_len == 0);
+
+ if (keyring->type != &key_type_keyring)
+ return -ENOTDIR;
+@@ -772,88 +1062,25 @@ int __key_link_begin(struct key *keyring, const struct keyring_index_key *index_
+ if (index_key->type == &key_type_keyring)
+ down_write(&keyring_serialise_link_sem);
+
+- klist = rcu_dereference_locked_keyring(keyring);
+-
+- /* see if there's a matching key we can displace */
+- lru = -1;
+- if (klist && klist->nkeys > 0) {
+- lowest_lru = TIME_T_MAX;
+- for (loop = klist->nkeys - 1; loop >= 0; loop--) {
+- struct key *key = rcu_deref_link_locked(klist, loop,
+- keyring);
+- if (key->type == index_key->type &&
+- strcmp(key->description, index_key->description) == 0) {
+- /* Found a match - we'll replace the link with
+- * one to the new key. We record the slot
+- * position.
+- */
+- klist->delkey = loop;
+- prealloc = 0;
+- goto done;
+- }
+- if (key->last_used_at < lowest_lru) {
+- lowest_lru = key->last_used_at;
+- lru = loop;
+- }
+- }
+- }
+-
+- /* If the keyring is full then do an LRU discard */
+- if (klist &&
+- klist->nkeys == klist->maxkeys &&
+- klist->maxkeys >= MAX_KEYRING_LINKS) {
+- kdebug("LRU discard %d\n", lru);
+- klist->delkey = lru;
+- prealloc = 0;
+- goto done;
+- }
+-
+ /* check that we aren't going to overrun the user's quota */
+ ret = key_payload_reserve(keyring,
+ keyring->datalen + KEYQUOTA_LINK_BYTES);
+ if (ret < 0)
+ goto error_sem;
+
+- if (klist && klist->nkeys < klist->maxkeys) {
+- /* there's sufficient slack space to append directly */
+- klist->delkey = klist->nkeys;
+- prealloc = KEY_LINK_FIXQUOTA;
+- } else {
+- /* grow the key list */
+- max = 4;
+- if (klist) {
+- max += klist->maxkeys;
+- if (max > MAX_KEYRING_LINKS)
+- max = MAX_KEYRING_LINKS;
+- BUG_ON(max <= klist->maxkeys);
+- }
+-
+- size = sizeof(*klist) + sizeof(struct key *) * max;
+-
+- ret = -ENOMEM;
+- nklist = kmalloc(size, GFP_KERNEL);
+- if (!nklist)
+- goto error_quota;
+-
+- nklist->maxkeys = max;
+- if (klist) {
+- memcpy(nklist->keys, klist->keys,
+- sizeof(struct key *) * klist->nkeys);
+- nklist->delkey = klist->nkeys;
+- nklist->nkeys = klist->nkeys + 1;
+- klist->delkey = USHRT_MAX;
+- } else {
+- nklist->nkeys = 1;
+- nklist->delkey = 0;
+- }
+-
+- /* add the key into the new space */
+- RCU_INIT_POINTER(nklist->keys[nklist->delkey], NULL);
+- prealloc = (unsigned long)nklist | KEY_LINK_FIXQUOTA;
++ /* Create an edit script that will insert/replace the key in the
++ * keyring tree.
++ */
++ edit = assoc_array_insert(&keyring->keys,
++ &keyring_assoc_array_ops,
++ index_key,
++ NULL);
++ if (IS_ERR(edit)) {
++ ret = PTR_ERR(edit);
++ goto error_quota;
+ }
+
+-done:
+- *_prealloc = prealloc;
++ *_edit = edit;
+ kleave(" = 0");
+ return 0;
+
+@@ -893,60 +1120,12 @@ int __key_link_check_live_key(struct key *keyring, struct key *key)
+ * holds at most one link to any given key of a particular type+description
+ * combination.
+ */
+-void __key_link(struct key *keyring, struct key *key,
+- unsigned long *_prealloc)
++void __key_link(struct key *key, struct assoc_array_edit **_edit)
+ {
+- struct keyring_list *klist, *nklist;
+- struct key *discard;
+-
+- nklist = (struct keyring_list *)(*_prealloc & ~KEY_LINK_FIXQUOTA);
+- *_prealloc = 0;
+-
+- kenter("%d,%d,%p", keyring->serial, key->serial, nklist);
+-
+- klist = rcu_dereference_locked_keyring(keyring);
+-
+ __key_get(key);
+- keyring->last_used_at = key->last_used_at =
+- current_kernel_time().tv_sec;
+-
+- /* there's a matching key we can displace or an empty slot in a newly
+- * allocated list we can fill */
+- if (nklist) {
+- kdebug("reissue %hu/%hu/%hu",
+- nklist->delkey, nklist->nkeys, nklist->maxkeys);
+-
+- RCU_INIT_POINTER(nklist->keys[nklist->delkey], key);
+-
+- rcu_assign_pointer(keyring->payload.subscriptions, nklist);
+-
+- /* dispose of the old keyring list and, if there was one, the
+- * displaced key */
+- if (klist) {
+- kdebug("dispose %hu/%hu/%hu",
+- klist->delkey, klist->nkeys, klist->maxkeys);
+- call_rcu(&klist->rcu, keyring_unlink_rcu_disposal);
+- }
+- } else if (klist->delkey < klist->nkeys) {
+- kdebug("replace %hu/%hu/%hu",
+- klist->delkey, klist->nkeys, klist->maxkeys);
+-
+- discard = rcu_dereference_protected(
+- klist->keys[klist->delkey],
+- rwsem_is_locked(&keyring->sem));
+- rcu_assign_pointer(klist->keys[klist->delkey], key);
+- /* The garbage collector will take care of RCU
+- * synchronisation */
+- key_put(discard);
+- } else {
+- /* there's sufficient slack space to append directly */
+- kdebug("append %hu/%hu/%hu",
+- klist->delkey, klist->nkeys, klist->maxkeys);
+-
+- RCU_INIT_POINTER(klist->keys[klist->delkey], key);
+- smp_wmb();
+- klist->nkeys++;
+- }
++ assoc_array_insert_set_object(*_edit, keyring_key_to_ptr(key));
++ assoc_array_apply_edit(*_edit);
++ *_edit = NULL;
+ }
+
+ /*
+@@ -956,23 +1135,20 @@ void __key_link(struct key *keyring, struct key *key,
+ */
+ void __key_link_end(struct key *keyring,
+ const struct keyring_index_key *index_key,
+- unsigned long prealloc)
++ struct assoc_array_edit *edit)
+ __releases(&keyring->sem)
+ __releases(&keyring_serialise_link_sem)
+ {
+ BUG_ON(index_key->type == NULL);
+- BUG_ON(index_key->type->name == NULL);
+- kenter("%d,%s,%lx", keyring->serial, index_key->type->name, prealloc);
++ kenter("%d,%s,", keyring->serial, index_key->type->name);
+
+ if (index_key->type == &key_type_keyring)
+ up_write(&keyring_serialise_link_sem);
+
+- if (prealloc) {
+- if (prealloc & KEY_LINK_FIXQUOTA)
+- key_payload_reserve(keyring,
+- keyring->datalen -
+- KEYQUOTA_LINK_BYTES);
+- kfree((struct keyring_list *)(prealloc & ~KEY_LINK_FIXQUOTA));
++ if (edit) {
++ key_payload_reserve(keyring,
++ keyring->datalen - KEYQUOTA_LINK_BYTES);
++ assoc_array_cancel_edit(edit);
+ }
+ up_write(&keyring->sem);
+ }
+@@ -999,20 +1175,24 @@ void __key_link_end(struct key *keyring,
+ */
+ int key_link(struct key *keyring, struct key *key)
+ {
+- unsigned long prealloc;
++ struct assoc_array_edit *edit;
+ int ret;
+
++ kenter("{%d,%d}", keyring->serial, atomic_read(&keyring->usage));
++
+ key_check(keyring);
+ key_check(key);
+
+- ret = __key_link_begin(keyring, &key->index_key, &prealloc);
++ ret = __key_link_begin(keyring, &key->index_key, &edit);
+ if (ret == 0) {
++ kdebug("begun {%d,%d}", keyring->serial, atomic_read(&keyring->usage));
+ ret = __key_link_check_live_key(keyring, key);
+ if (ret == 0)
+- __key_link(keyring, key, &prealloc);
+- __key_link_end(keyring, &key->index_key, prealloc);
++ __key_link(key, &edit);
++ __key_link_end(keyring, &key->index_key, edit);
+ }
+
++ kleave(" = %d {%d,%d}", ret, keyring->serial, atomic_read(&keyring->usage));
+ return ret;
+ }
+ EXPORT_SYMBOL(key_link);
+@@ -1036,90 +1216,36 @@ EXPORT_SYMBOL(key_link);
+ */
+ int key_unlink(struct key *keyring, struct key *key)
+ {
+- struct keyring_list *klist, *nklist;
+- int loop, ret;
++ struct assoc_array_edit *edit;
++ int ret;
+
+ key_check(keyring);
+ key_check(key);
+
+- ret = -ENOTDIR;
+ if (keyring->type != &key_type_keyring)
+- goto error;
++ return -ENOTDIR;
+
+ down_write(&keyring->sem);
+
+- klist = rcu_dereference_locked_keyring(keyring);
+- if (klist) {
+- /* search the keyring for the key */
+- for (loop = 0; loop < klist->nkeys; loop++)
+- if (rcu_access_pointer(klist->keys[loop]) == key)
+- goto key_is_present;
++ edit = assoc_array_delete(&keyring->keys, &keyring_assoc_array_ops,
++ &key->index_key);
++ if (IS_ERR(edit)) {
++ ret = PTR_ERR(edit);
++ goto error;
+ }
+-
+- up_write(&keyring->sem);
+ ret = -ENOENT;
+- goto error;
+-
+-key_is_present:
+- /* we need to copy the key list for RCU purposes */
+- nklist = kmalloc(sizeof(*klist) +
+- sizeof(struct key *) * klist->maxkeys,
+- GFP_KERNEL);
+- if (!nklist)
+- goto nomem;
+- nklist->maxkeys = klist->maxkeys;
+- nklist->nkeys = klist->nkeys - 1;
+-
+- if (loop > 0)
+- memcpy(&nklist->keys[0],
+- &klist->keys[0],
+- loop * sizeof(struct key *));
+-
+- if (loop < nklist->nkeys)
+- memcpy(&nklist->keys[loop],
+- &klist->keys[loop + 1],
+- (nklist->nkeys - loop) * sizeof(struct key *));
+-
+- /* adjust the user's quota */
+- key_payload_reserve(keyring,
+- keyring->datalen - KEYQUOTA_LINK_BYTES);
+-
+- rcu_assign_pointer(keyring->payload.subscriptions, nklist);
+-
+- up_write(&keyring->sem);
+-
+- /* schedule for later cleanup */
+- klist->delkey = loop;
+- call_rcu(&klist->rcu, keyring_unlink_rcu_disposal);
++ if (edit == NULL)
++ goto error;
+
++ assoc_array_apply_edit(edit);
+ ret = 0;
+
+ error:
+- return ret;
+-nomem:
+- ret = -ENOMEM;
+ up_write(&keyring->sem);
+- goto error;
++ return ret;
+ }
+ EXPORT_SYMBOL(key_unlink);
+
+-/*
+- * Dispose of a keyring list after the RCU grace period, releasing the keys it
+- * links to.
+- */
+-static void keyring_clear_rcu_disposal(struct rcu_head *rcu)
+-{
+- struct keyring_list *klist;
+- int loop;
+-
+- klist = container_of(rcu, struct keyring_list, rcu);
+-
+- for (loop = klist->nkeys - 1; loop >= 0; loop--)
+- key_put(rcu_access_pointer(klist->keys[loop]));
+-
+- kfree(klist);
+-}
+-
+ /**
+ * keyring_clear - Clear a keyring
+ * @keyring: The keyring to clear.
+@@ -1130,33 +1256,25 @@ static void keyring_clear_rcu_disposal(struct rcu_head *rcu)
+ */
+ int keyring_clear(struct key *keyring)
+ {
+- struct keyring_list *klist;
++ struct assoc_array_edit *edit;
+ int ret;
+
+- ret = -ENOTDIR;
+- if (keyring->type == &key_type_keyring) {
+- /* detach the pointer block with the locks held */
+- down_write(&keyring->sem);
+-
+- klist = rcu_dereference_locked_keyring(keyring);
+- if (klist) {
+- /* adjust the quota */
+- key_payload_reserve(keyring,
+- sizeof(struct keyring_list));
+-
+- rcu_assign_pointer(keyring->payload.subscriptions,
+- NULL);
+- }
+-
+- up_write(&keyring->sem);
++ if (keyring->type != &key_type_keyring)
++ return -ENOTDIR;
+
+- /* free the keys after the locks have been dropped */
+- if (klist)
+- call_rcu(&klist->rcu, keyring_clear_rcu_disposal);
++ down_write(&keyring->sem);
+
++ edit = assoc_array_clear(&keyring->keys, &keyring_assoc_array_ops);
++ if (IS_ERR(edit)) {
++ ret = PTR_ERR(edit);
++ } else {
++ if (edit)
++ assoc_array_apply_edit(edit);
++ key_payload_reserve(keyring, 0);
+ ret = 0;
+ }
+
++ up_write(&keyring->sem);
+ return ret;
+ }
+ EXPORT_SYMBOL(keyring_clear);
+@@ -1168,17 +1286,25 @@ EXPORT_SYMBOL(keyring_clear);
+ */
+ static void keyring_revoke(struct key *keyring)
+ {
+- struct keyring_list *klist;
++ struct assoc_array_edit *edit;
+
+- klist = rcu_dereference_locked_keyring(keyring);
++ edit = assoc_array_clear(&keyring->keys, &keyring_assoc_array_ops);
++ if (!IS_ERR(edit)) {
++ if (edit)
++ assoc_array_apply_edit(edit);
++ key_payload_reserve(keyring, 0);
++ }
++}
+
+- /* adjust the quota */
+- key_payload_reserve(keyring, 0);
++static bool gc_iterator(void *object, void *iterator_data)
++{
++ struct key *key = keyring_ptr_to_key(object);
++ time_t *limit = iterator_data;
+
+- if (klist) {
+- rcu_assign_pointer(keyring->payload.subscriptions, NULL);
+- call_rcu(&klist->rcu, keyring_clear_rcu_disposal);
+- }
++ if (key_is_dead(key, *limit))
++ return false;
++ key_get(key);
++ return true;
+ }
+
+ /*
+@@ -1191,88 +1317,12 @@ static void keyring_revoke(struct key *keyring)
+ */
+ void keyring_gc(struct key *keyring, time_t limit)
+ {
+- struct keyring_list *klist, *new;
+- struct key *key;
+- int loop, keep, max;
+-
+ kenter("{%x,%s}", key_serial(keyring), keyring->description);
+
+ down_write(&keyring->sem);
+-
+- klist = rcu_dereference_locked_keyring(keyring);
+- if (!klist)
+- goto no_klist;
+-
+- /* work out how many subscriptions we're keeping */
+- keep = 0;
+- for (loop = klist->nkeys - 1; loop >= 0; loop--)
+- if (!key_is_dead(rcu_deref_link_locked(klist, loop, keyring),
+- limit))
+- keep++;
+-
+- if (keep == klist->nkeys)
+- goto just_return;
+-
+- /* allocate a new keyring payload */
+- max = roundup(keep, 4);
+- new = kmalloc(sizeof(struct keyring_list) + max * sizeof(struct key *),
+- GFP_KERNEL);
+- if (!new)
+- goto nomem;
+- new->maxkeys = max;
+- new->nkeys = 0;
+- new->delkey = 0;
+-
+- /* install the live keys
+- * - must take care as expired keys may be updated back to life
+- */
+- keep = 0;
+- for (loop = klist->nkeys - 1; loop >= 0; loop--) {
+- key = rcu_deref_link_locked(klist, loop, keyring);
+- if (!key_is_dead(key, limit)) {
+- if (keep >= max)
+- goto discard_new;
+- RCU_INIT_POINTER(new->keys[keep++], key_get(key));
+- }
+- }
+- new->nkeys = keep;
+-
+- /* adjust the quota */
+- key_payload_reserve(keyring,
+- sizeof(struct keyring_list) +
+- KEYQUOTA_LINK_BYTES * keep);
+-
+- if (keep == 0) {
+- rcu_assign_pointer(keyring->payload.subscriptions, NULL);
+- kfree(new);
+- } else {
+- rcu_assign_pointer(keyring->payload.subscriptions, new);
+- }
+-
+- up_write(&keyring->sem);
+-
+- call_rcu(&klist->rcu, keyring_clear_rcu_disposal);
+- kleave(" [yes]");
+- return;
+-
+-discard_new:
+- new->nkeys = keep;
+- keyring_clear_rcu_disposal(&new->rcu);
++ assoc_array_gc(&keyring->keys, &keyring_assoc_array_ops,
++ gc_iterator, &limit);
+ up_write(&keyring->sem);
+- kleave(" [discard]");
+- return;
+
+-just_return:
+- up_write(&keyring->sem);
+- kleave(" [no dead]");
+- return;
+-
+-no_klist:
+- up_write(&keyring->sem);
+- kleave(" [no_klist]");
+- return;
+-
+-nomem:
+- up_write(&keyring->sem);
+- kleave(" [oom]");
++ kleave("");
+ }
+diff --git a/security/keys/request_key.c b/security/keys/request_key.c
+index ab75df4..df94827 100644
+--- a/security/keys/request_key.c
++++ b/security/keys/request_key.c
+@@ -351,7 +351,7 @@ static int construct_alloc_key(struct keyring_search_context *ctx,
+ struct key_user *user,
+ struct key **_key)
+ {
+- unsigned long prealloc;
++ struct assoc_array_edit *edit;
+ struct key *key;
+ key_perm_t perm;
+ key_ref_t key_ref;
+@@ -380,7 +380,7 @@ static int construct_alloc_key(struct keyring_search_context *ctx,
+ set_bit(KEY_FLAG_USER_CONSTRUCT, &key->flags);
+
+ if (dest_keyring) {
+- ret = __key_link_begin(dest_keyring, &ctx->index_key, &prealloc);
++ ret = __key_link_begin(dest_keyring, &ctx->index_key, &edit);
+ if (ret < 0)
+ goto link_prealloc_failed;
+ }
+@@ -395,11 +395,11 @@ static int construct_alloc_key(struct keyring_search_context *ctx,
+ goto key_already_present;
+
+ if (dest_keyring)
+- __key_link(dest_keyring, key, &prealloc);
++ __key_link(key, &edit);
+
+ mutex_unlock(&key_construction_mutex);
+ if (dest_keyring)
+- __key_link_end(dest_keyring, &ctx->index_key, prealloc);
++ __key_link_end(dest_keyring, &ctx->index_key, edit);
+ mutex_unlock(&user->cons_lock);
+ *_key = key;
+ kleave(" = 0 [%d]", key_serial(key));
+@@ -414,8 +414,8 @@ key_already_present:
+ if (dest_keyring) {
+ ret = __key_link_check_live_key(dest_keyring, key);
+ if (ret == 0)
+- __key_link(dest_keyring, key, &prealloc);
+- __key_link_end(dest_keyring, &ctx->index_key, prealloc);
++ __key_link(key, &edit);
++ __key_link_end(dest_keyring, &ctx->index_key, edit);
+ if (ret < 0)
+ goto link_check_failed;
+ }
+--
+1.8.3.1
+
diff --git a/keys-krb-support.patch b/keys-krb-support.patch
new file mode 100644
index 000000000..07a909daa
--- /dev/null
+++ b/keys-krb-support.patch
@@ -0,0 +1,747 @@
+From 64160c504842a359801cff17464931fa028ff164 Mon Sep 17 00:00:00 2001
+From: David Howells <dhowells@redhat.com>
+Date: Fri, 30 Aug 2013 15:37:54 +0100
+Subject: [PATCH 1/2] KEYS: Implement a big key type that can save to tmpfs
+
+Implement a big key type that can save its contents to tmpfs and thus
+swapspace when memory is tight. This is useful for Kerberos ticket caches.
+
+Signed-off-by: David Howells <dhowells@redhat.com>
+Tested-by: Simo Sorce <simo@redhat.com>
+---
+ include/keys/big_key-type.h | 25 ++++++
+ include/linux/key.h | 1 +
+ security/keys/Kconfig | 11 +++
+ security/keys/Makefile | 1 +
+ security/keys/big_key.c | 204 ++++++++++++++++++++++++++++++++++++++++++++
+ 5 files changed, 242 insertions(+)
+ create mode 100644 include/keys/big_key-type.h
+ create mode 100644 security/keys/big_key.c
+
+diff --git a/include/keys/big_key-type.h b/include/keys/big_key-type.h
+new file mode 100644
+index 0000000..d69bc8a
+--- /dev/null
++++ b/include/keys/big_key-type.h
+@@ -0,0 +1,25 @@
++/* Big capacity key type.
++ *
++ * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved.
++ * Written by David Howells (dhowells@redhat.com)
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License
++ * as published by the Free Software Foundation; either version
++ * 2 of the License, or (at your option) any later version.
++ */
++
++#ifndef _KEYS_BIG_KEY_TYPE_H
++#define _KEYS_BIG_KEY_TYPE_H
++
++#include <linux/key-type.h>
++
++extern struct key_type key_type_big_key;
++
++extern int big_key_instantiate(struct key *key, struct key_preparsed_payload *prep);
++extern void big_key_revoke(struct key *key);
++extern void big_key_destroy(struct key *key);
++extern void big_key_describe(const struct key *big_key, struct seq_file *m);
++extern long big_key_read(const struct key *key, char __user *buffer, size_t buflen);
++
++#endif /* _KEYS_BIG_KEY_TYPE_H */
+diff --git a/include/linux/key.h b/include/linux/key.h
+index 2417f78..010dbb6 100644
+--- a/include/linux/key.h
++++ b/include/linux/key.h
+@@ -201,6 +201,7 @@ struct key {
+ unsigned long value;
+ void __rcu *rcudata;
+ void *data;
++ void *data2[2];
+ } payload;
+ struct assoc_array keys;
+ };
+diff --git a/security/keys/Kconfig b/security/keys/Kconfig
+index 15e0dfe..b563622 100644
+--- a/security/keys/Kconfig
++++ b/security/keys/Kconfig
+@@ -20,6 +20,17 @@ config KEYS
+
+ If you are unsure as to whether this is required, answer N.
+
++config BIG_KEYS
++ tristate "Large payload keys"
++ depends on KEYS
++ depends on TMPFS
++ help
++ This option provides support for holding large keys within the kernel
++ (for example Kerberos ticket caches). The data may be stored out to
++ swapspace by tmpfs.
++
++ If you are unsure as to whether this is required, answer N.
++
+ config TRUSTED_KEYS
+ tristate "TRUSTED KEYS"
+ depends on KEYS && TCG_TPM
+diff --git a/security/keys/Makefile b/security/keys/Makefile
+index 504aaa0..c487c77 100644
+--- a/security/keys/Makefile
++++ b/security/keys/Makefile
+@@ -22,5 +22,6 @@ obj-$(CONFIG_SYSCTL) += sysctl.o
+ #
+ # Key types
+ #
++obj-$(CONFIG_BIG_KEYS) += big_key.o
+ obj-$(CONFIG_TRUSTED_KEYS) += trusted.o
+ obj-$(CONFIG_ENCRYPTED_KEYS) += encrypted-keys/
+diff --git a/security/keys/big_key.c b/security/keys/big_key.c
+new file mode 100644
+index 0000000..5f9defc
+--- /dev/null
++++ b/security/keys/big_key.c
+@@ -0,0 +1,204 @@
++/* Large capacity key type
++ *
++ * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved.
++ * Written by David Howells (dhowells@redhat.com)
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public Licence
++ * as published by the Free Software Foundation; either version
++ * 2 of the Licence, or (at your option) any later version.
++ */
++
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/seq_file.h>
++#include <linux/file.h>
++#include <linux/shmem_fs.h>
++#include <linux/err.h>
++#include <keys/user-type.h>
++#include <keys/big_key-type.h>
++
++MODULE_LICENSE("GPL");
++
++/*
++ * If the data is under this limit, there's no point creating a shm file to
++ * hold it as the permanently resident metadata for the shmem fs will be at
++ * least as large as the data.
++ */
++#define BIG_KEY_FILE_THRESHOLD (sizeof(struct inode) + sizeof(struct dentry))
++
++/*
++ * big_key defined keys take an arbitrary string as the description and an
++ * arbitrary blob of data as the payload
++ */
++struct key_type key_type_big_key = {
++ .name = "big_key",
++ .def_lookup_type = KEYRING_SEARCH_LOOKUP_DIRECT,
++ .instantiate = big_key_instantiate,
++ .match = user_match,
++ .revoke = big_key_revoke,
++ .destroy = big_key_destroy,
++ .describe = big_key_describe,
++ .read = big_key_read,
++};
++
++/*
++ * Instantiate a big key
++ */
++int big_key_instantiate(struct key *key, struct key_preparsed_payload *prep)
++{
++ struct path *path = (struct path *)&key->payload.data2;
++ struct file *file;
++ ssize_t written;
++ size_t datalen = prep->datalen;
++ int ret;
++
++ ret = -EINVAL;
++ if (datalen <= 0 || datalen > 1024 * 1024 || !prep->data)
++ goto error;
++
++ /* Set an arbitrary quota */
++ ret = key_payload_reserve(key, 16);
++ if (ret < 0)
++ goto error;
++
++ key->type_data.x[1] = datalen;
++
++ if (datalen > BIG_KEY_FILE_THRESHOLD) {
++ /* Create a shmem file to store the data in. This will permit the data
++ * to be swapped out if needed.
++ *
++ * TODO: Encrypt the stored data with a temporary key.
++ */
++ file = shmem_file_setup("", datalen, 0);
++ if (IS_ERR(file))
++ goto err_quota;
++
++ written = kernel_write(file, prep->data, prep->datalen, 0);
++ if (written != datalen) {
++ if (written >= 0)
++ ret = -ENOMEM;
++ goto err_fput;
++ }
++
++ /* Pin the mount and dentry to the key so that we can open it again
++ * later
++ */
++ *path = file->f_path;
++ path_get(path);
++ fput(file);
++ } else {
++ /* Just store the data in a buffer */
++ void *data = kmalloc(datalen, GFP_KERNEL);
++ if (!data) {
++ ret = -ENOMEM;
++ goto err_quota;
++ }
++
++ key->payload.data = memcpy(data, prep->data, prep->datalen);
++ }
++ return 0;
++
++err_fput:
++ fput(file);
++err_quota:
++ key_payload_reserve(key, 0);
++error:
++ return ret;
++}
++
++/*
++ * dispose of the links from a revoked keyring
++ * - called with the key sem write-locked
++ */
++void big_key_revoke(struct key *key)
++{
++ struct path *path = (struct path *)&key->payload.data2;
++
++ /* clear the quota */
++ key_payload_reserve(key, 0);
++ if (key_is_instantiated(key) && key->type_data.x[1] > BIG_KEY_FILE_THRESHOLD)
++ vfs_truncate(path, 0);
++}
++
++/*
++ * dispose of the data dangling from the corpse of a big_key key
++ */
++void big_key_destroy(struct key *key)
++{
++ if (key->type_data.x[1] > BIG_KEY_FILE_THRESHOLD) {
++ struct path *path = (struct path *)&key->payload.data2;
++ path_put(path);
++ path->mnt = NULL;
++ path->dentry = NULL;
++ } else {
++ kfree(key->payload.data);
++ key->payload.data = NULL;
++ }
++}
++
++/*
++ * describe the big_key key
++ */
++void big_key_describe(const struct key *key, struct seq_file *m)
++{
++ unsigned long datalen = key->type_data.x[1];
++
++ seq_puts(m, key->description);
++
++ if (key_is_instantiated(key))
++ seq_printf(m, ": %lu [%s]",
++ datalen,
++ datalen > BIG_KEY_FILE_THRESHOLD ? "file" : "buff");
++}
++
++/*
++ * read the key data
++ * - the key's semaphore is read-locked
++ */
++long big_key_read(const struct key *key, char __user *buffer, size_t buflen)
++{
++ unsigned long datalen = key->type_data.x[1];
++ long ret;
++
++ if (!buffer || buflen < datalen)
++ return datalen;
++
++ if (datalen > BIG_KEY_FILE_THRESHOLD) {
++ struct path *path = (struct path *)&key->payload.data2;
++ struct file *file;
++ loff_t pos;
++
++ file = dentry_open(path, O_RDONLY, current_cred());
++ if (IS_ERR(file))
++ return PTR_ERR(file);
++
++ pos = 0;
++ ret = vfs_read(file, buffer, datalen, &pos);
++ fput(file);
++ if (ret >= 0 && ret != datalen)
++ ret = -EIO;
++ } else {
++ ret = datalen;
++ if (copy_to_user(buffer, key->payload.data, datalen) != 0)
++ ret = -EFAULT;
++ }
++
++ return ret;
++}
++
++/*
++ * Module stuff
++ */
++static int __init big_key_init(void)
++{
++ return register_key_type(&key_type_big_key);
++}
++
++static void __exit big_key_cleanup(void)
++{
++ unregister_key_type(&key_type_big_key);
++}
++
++module_init(big_key_init);
++module_exit(big_key_cleanup);
+--
+1.8.3.1
+
+
+From b1e5b74e060add16de8d6005802644fa1700167f Mon Sep 17 00:00:00 2001
+From: David Howells <dhowells@redhat.com>
+Date: Fri, 30 Aug 2013 15:37:54 +0100
+Subject: [PATCH 2/2] KEYS: Add per-user_namespace registers for persistent
+ per-UID kerberos caches
+
+Add support for per-user_namespace registers of persistent per-UID kerberos
+caches held within the kernel.
+
+This allows the kerberos cache to be retained beyond the life of all a user's
+processes so that the user's cron jobs can work.
+
+The kerberos cache is envisioned as a keyring/key tree looking something like:
+
+ struct user_namespace
+ \___ .krb_cache keyring - The register
+ \___ _krb.0 keyring - Root's Kerberos cache
+ \___ _krb.5000 keyring - User 5000's Kerberos cache
+ \___ _krb.5001 keyring - User 5001's Kerberos cache
+ \___ tkt785 big_key - A ccache blob
+ \___ tkt12345 big_key - Another ccache blob
+
+Or possibly:
+
+ struct user_namespace
+ \___ .krb_cache keyring - The register
+ \___ _krb.0 keyring - Root's Kerberos cache
+ \___ _krb.5000 keyring - User 5000's Kerberos cache
+ \___ _krb.5001 keyring - User 5001's Kerberos cache
+ \___ tkt785 keyring - A ccache
+ \___ krbtgt/REDHAT.COM@REDHAT.COM big_key
+ \___ http/REDHAT.COM@REDHAT.COM user
+ \___ afs/REDHAT.COM@REDHAT.COM user
+ \___ nfs/REDHAT.COM@REDHAT.COM user
+ \___ krbtgt/KERNEL.ORG@KERNEL.ORG big_key
+ \___ http/KERNEL.ORG@KERNEL.ORG big_key
+
+What goes into a particular Kerberos cache is entirely up to userspace. Kernel
+support is limited to giving you the Kerberos cache keyring that you want.
+
+The user asks for their Kerberos cache by:
+
+ krb_cache = keyctl_get_krbcache(uid, dest_keyring);
+
+The uid is -1 or the user's own UID for the user's own cache or the uid of some
+other user's cache (requires CAP_SETUID). This permits rpc.gssd or whatever to
+mess with the cache.
+
+The cache returned is a keyring named "_krb.<uid>" that the possessor can read,
+search, clear, invalidate, unlink from and add links to. Active LSMs get a
+chance to rule on whether the caller is permitted to make a link.
+
+Each uid's cache keyring is created when it first accessed and is given a
+timeout that is extended each time this function is called so that the keyring
+goes away after a while. The timeout is configurable by sysctl but defaults to
+three days.
+
+Each user_namespace struct gets a lazily-created keyring that serves as the
+register. The cache keyrings are added to it. This means that standard key
+search and garbage collection facilities are available.
+
+The user_namespace struct's register goes away when it does and anything left
+in it is then automatically gc'd.
+
+Signed-off-by: David Howells <dhowells@redhat.com>
+Tested-by: Simo Sorce <simo@redhat.com>
+cc: Serge E. Hallyn <serge.hallyn@ubuntu.com>
+cc: Eric W. Biederman <ebiederm@xmission.com>
+---
+ include/linux/user_namespace.h | 6 ++
+ include/uapi/linux/keyctl.h | 1 +
+ kernel/user.c | 4 +
+ kernel/user_namespace.c | 6 ++
+ security/keys/Kconfig | 17 +++++
+ security/keys/Makefile | 1 +
+ security/keys/compat.c | 3 +
+ security/keys/internal.h | 9 +++
+ security/keys/keyctl.c | 3 +
+ security/keys/persistent.c | 169 +++++++++++++++++++++++++++++++++++++++++
+ security/keys/sysctl.c | 11 +++
+ 11 files changed, 230 insertions(+)
+ create mode 100644 security/keys/persistent.c
+
+diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
+index b6b215f..cf21958 100644
+--- a/include/linux/user_namespace.h
++++ b/include/linux/user_namespace.h
+@@ -28,6 +28,12 @@ struct user_namespace {
+ unsigned int proc_inum;
+ bool may_mount_sysfs;
+ bool may_mount_proc;
++
++ /* Register of per-UID persistent keyrings for this namespace */
++#ifdef CONFIG_PERSISTENT_KEYRINGS
++ struct key *persistent_keyring_register;
++ struct rw_semaphore persistent_keyring_register_sem;
++#endif
+ };
+
+ extern struct user_namespace init_user_ns;
+diff --git a/include/uapi/linux/keyctl.h b/include/uapi/linux/keyctl.h
+index c9b7f4fa..840cb99 100644
+--- a/include/uapi/linux/keyctl.h
++++ b/include/uapi/linux/keyctl.h
+@@ -56,5 +56,6 @@
+ #define KEYCTL_REJECT 19 /* reject a partially constructed key */
+ #define KEYCTL_INSTANTIATE_IOV 20 /* instantiate a partially constructed key */
+ #define KEYCTL_INVALIDATE 21 /* invalidate a key */
++#define KEYCTL_GET_PERSISTENT 22 /* get a user's persistent keyring */
+
+ #endif /* _LINUX_KEYCTL_H */
+diff --git a/kernel/user.c b/kernel/user.c
+index 69b4c3d..6c9e1b9 100644
+--- a/kernel/user.c
++++ b/kernel/user.c
+@@ -53,6 +53,10 @@ struct user_namespace init_user_ns = {
+ .proc_inum = PROC_USER_INIT_INO,
+ .may_mount_sysfs = true,
+ .may_mount_proc = true,
++#ifdef CONFIG_KEYS_KERBEROS_CACHE
++ .krb_cache_register_sem =
++ __RWSEM_INITIALIZER(init_user_ns.krb_cache_register_sem),
++#endif
+ };
+ EXPORT_SYMBOL_GPL(init_user_ns);
+
+diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
+index d8c30db..ef7985e 100644
+--- a/kernel/user_namespace.c
++++ b/kernel/user_namespace.c
+@@ -99,6 +99,9 @@ int create_user_ns(struct cred *new)
+
+ update_mnt_policy(ns);
+
++#ifdef CONFIG_PERSISTENT_KEYRINGS
++ rwsem_init(&ns->persistent_keyring_register_sem);
++#endif
+ return 0;
+ }
+
+@@ -123,6 +126,9 @@ void free_user_ns(struct user_namespace *ns)
+
+ do {
+ parent = ns->parent;
++#ifdef CONFIG_PERSISTENT_KEYRINGS
++ key_put(ns->persistent_keyring_register);
++#endif
+ proc_free_inum(ns->proc_inum);
+ kmem_cache_free(user_ns_cachep, ns);
+ ns = parent;
+diff --git a/security/keys/Kconfig b/security/keys/Kconfig
+index b563622..53d8748 100644
+--- a/security/keys/Kconfig
++++ b/security/keys/Kconfig
+@@ -20,6 +20,23 @@ config KEYS
+
+ If you are unsure as to whether this is required, answer N.
+
++config PERSISTENT_KEYRINGS
++ bool "Enable register of persistent per-UID keyrings"
++ depends on KEYS
++ help
++ This option provides a register of persistent per-UID keyrings,
++ primarily aimed at Kerberos key storage. The keyrings are persistent
++ in the sense that they stay around after all processes of that UID
++ have exited, not that they survive the machine being rebooted.
++
++ A particular keyring may be accessed by either the user whose keyring
++ it is or by a process with administrative privileges. The active
++ LSMs gets to rule on which admin-level processes get to access the
++ cache.
++
++ Keyrings are created and added into the register upon demand and get
++ removed if they expire (a default timeout is set upon creation).
++
+ config BIG_KEYS
+ tristate "Large payload keys"
+ depends on KEYS
+diff --git a/security/keys/Makefile b/security/keys/Makefile
+index c487c77..dfb3a7b 100644
+--- a/security/keys/Makefile
++++ b/security/keys/Makefile
+@@ -18,6 +18,7 @@ obj-y := \
+ obj-$(CONFIG_KEYS_COMPAT) += compat.o
+ obj-$(CONFIG_PROC_FS) += proc.o
+ obj-$(CONFIG_SYSCTL) += sysctl.o
++obj-$(CONFIG_PERSISTENT_KEYRINGS) += persistent.o
+
+ #
+ # Key types
+diff --git a/security/keys/compat.c b/security/keys/compat.c
+index d65fa7f..bbd32c7 100644
+--- a/security/keys/compat.c
++++ b/security/keys/compat.c
+@@ -138,6 +138,9 @@ asmlinkage long compat_sys_keyctl(u32 option,
+ case KEYCTL_INVALIDATE:
+ return keyctl_invalidate_key(arg2);
+
++ case KEYCTL_GET_PERSISTENT:
++ return keyctl_get_persistent(arg2, arg3);
++
+ default:
+ return -EOPNOTSUPP;
+ }
+diff --git a/security/keys/internal.h b/security/keys/internal.h
+index 581c6f6..80b2aac 100644
+--- a/security/keys/internal.h
++++ b/security/keys/internal.h
+@@ -255,6 +255,15 @@ extern long keyctl_invalidate_key(key_serial_t);
+ extern long keyctl_instantiate_key_common(key_serial_t,
+ const struct iovec *,
+ unsigned, size_t, key_serial_t);
++#ifdef CONFIG_PERSISTENT_KEYRINGS
++extern long keyctl_get_persistent(uid_t, key_serial_t);
++extern unsigned persistent_keyring_expiry;
++#else
++static inline long keyctl_get_persistent(uid_t uid, key_serial_t destring)
++{
++ return -EOPNOTSUPP;
++}
++#endif
+
+ /*
+ * Debugging key validation
+diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
+index 33cfd27..cee72ce 100644
+--- a/security/keys/keyctl.c
++++ b/security/keys/keyctl.c
+@@ -1667,6 +1667,9 @@ SYSCALL_DEFINE5(keyctl, int, option, unsigned long, arg2, unsigned long, arg3,
+ case KEYCTL_INVALIDATE:
+ return keyctl_invalidate_key((key_serial_t) arg2);
+
++ case KEYCTL_GET_PERSISTENT:
++ return keyctl_get_persistent((uid_t)arg2, (key_serial_t)arg3);
++
+ default:
+ return -EOPNOTSUPP;
+ }
+diff --git a/security/keys/persistent.c b/security/keys/persistent.c
+new file mode 100644
+index 0000000..631a022
+--- /dev/null
++++ b/security/keys/persistent.c
+@@ -0,0 +1,169 @@
++/* General persistent per-UID keyrings register
++ *
++ * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved.
++ * Written by David Howells (dhowells@redhat.com)
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public Licence
++ * as published by the Free Software Foundation; either version
++ * 2 of the Licence, or (at your option) any later version.
++ */
++
++#include <linux/user_namespace.h>
++#include "internal.h"
++
++unsigned persistent_keyring_expiry = 3 * 24 * 3600; /* Expire after 3 days of non-use */
++
++/*
++ * Create the persistent keyring register for the current user namespace.
++ *
++ * Called with the namespace's sem locked for writing.
++ */
++static int key_create_persistent_register(struct user_namespace *ns)
++{
++ struct key *reg = keyring_alloc(".persistent_register",
++ KUIDT_INIT(0), KGIDT_INIT(0),
++ current_cred(),
++ ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
++ KEY_USR_VIEW | KEY_USR_READ),
++ KEY_ALLOC_NOT_IN_QUOTA, NULL);
++ if (IS_ERR(reg))
++ return PTR_ERR(reg);
++
++ ns->persistent_keyring_register = reg;
++ return 0;
++}
++
++/*
++ * Create the persistent keyring for the specified user.
++ *
++ * Called with the namespace's sem locked for writing.
++ */
++static key_ref_t key_create_persistent(struct user_namespace *ns, kuid_t uid,
++ struct keyring_index_key *index_key)
++{
++ struct key *persistent;
++ key_ref_t reg_ref, persistent_ref;
++
++ if (!ns->persistent_keyring_register) {
++ long err = key_create_persistent_register(ns);
++ if (err < 0)
++ return ERR_PTR(err);
++ } else {
++ reg_ref = make_key_ref(ns->persistent_keyring_register, true);
++ persistent_ref = find_key_to_update(reg_ref, index_key);
++ if (persistent_ref)
++ return persistent_ref;
++ }
++
++ persistent = keyring_alloc(index_key->description,
++ uid, INVALID_GID, current_cred(),
++ ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
++ KEY_USR_VIEW | KEY_USR_READ),
++ KEY_ALLOC_NOT_IN_QUOTA,
++ ns->persistent_keyring_register);
++ if (IS_ERR(persistent))
++ return ERR_CAST(persistent);
++
++ return make_key_ref(persistent, true);
++}
++
++/*
++ * Get the persistent keyring for a specific UID and link it to the nominated
++ * keyring.
++ */
++static long key_get_persistent(struct user_namespace *ns, kuid_t uid,
++ key_ref_t dest_ref)
++{
++ struct keyring_index_key index_key;
++ struct key *persistent;
++ key_ref_t reg_ref, persistent_ref;
++ char buf[32];
++ long ret;
++
++ /* Look in the register if it exists */
++ index_key.type = &key_type_keyring;
++ index_key.description = buf;
++ index_key.desc_len = sprintf(buf, "_persistent.%u", from_kuid(ns, uid));
++
++ if (ns->persistent_keyring_register) {
++ reg_ref = make_key_ref(ns->persistent_keyring_register, true);
++ down_read(&ns->persistent_keyring_register_sem);
++ persistent_ref = find_key_to_update(reg_ref, &index_key);
++ up_read(&ns->persistent_keyring_register_sem);
++
++ if (persistent_ref)
++ goto found;
++ }
++
++ /* It wasn't in the register, so we'll need to create it. We might
++ * also need to create the register.
++ */
++ down_write(&ns->persistent_keyring_register_sem);
++ persistent_ref = key_create_persistent(ns, uid, &index_key);
++ up_write(&ns->persistent_keyring_register_sem);
++ if (!IS_ERR(persistent_ref))
++ goto found;
++
++ return PTR_ERR(persistent_ref);
++
++found:
++ ret = key_task_permission(persistent_ref, current_cred(), KEY_LINK);
++ if (ret == 0) {
++ persistent = key_ref_to_ptr(persistent_ref);
++ ret = key_link(key_ref_to_ptr(dest_ref), persistent);
++ if (ret == 0) {
++ key_set_timeout(persistent, persistent_keyring_expiry);
++ ret = persistent->serial;
++ }
++ }
++
++ key_ref_put(persistent_ref);
++ return ret;
++}
++
++/*
++ * Get the persistent keyring for a specific UID and link it to the nominated
++ * keyring.
++ */
++long keyctl_get_persistent(uid_t _uid, key_serial_t destid)
++{
++ struct user_namespace *ns = current_user_ns();
++ key_ref_t dest_ref;
++ kuid_t uid;
++ long ret;
++
++ /* -1 indicates the current user */
++ if (_uid == (uid_t)-1) {
++ uid = current_uid();
++ } else {
++ uid = make_kuid(ns, _uid);
++ if (!uid_valid(uid))
++ return -EINVAL;
++
++ /* You can only see your own persistent cache if you're not
++ * sufficiently privileged.
++ */
++ if (uid != current_uid() &&
++ uid != current_suid() &&
++ uid != current_euid() &&
++ uid != current_fsuid() &&
++ !ns_capable(ns, CAP_SETUID))
++ return -EPERM;
++ }
++
++ /* There must be a destination keyring */
++ dest_ref = lookup_user_key(destid, KEY_LOOKUP_CREATE, KEY_WRITE);
++ if (IS_ERR(dest_ref))
++ return PTR_ERR(dest_ref);
++ if (key_ref_to_ptr(dest_ref)->type != &key_type_keyring) {
++ ret = -ENOTDIR;
++ goto out_put_dest;
++ }
++
++ ret = key_get_persistent(ns, uid, dest_ref);
++
++out_put_dest:
++ key_ref_put(dest_ref);
++ return ret;
++}
+diff --git a/security/keys/sysctl.c b/security/keys/sysctl.c
+index ee32d18..8c0af08 100644
+--- a/security/keys/sysctl.c
++++ b/security/keys/sysctl.c
+@@ -61,5 +61,16 @@ ctl_table key_sysctls[] = {
+ .extra1 = (void *) &zero,
+ .extra2 = (void *) &max,
+ },
++#ifdef CONFIG_PERSISTENT_KEYRINGS
++ {
++ .procname = "persistent_keyring_expiry",
++ .data = &persistent_keyring_expiry,
++ .maxlen = sizeof(unsigned),
++ .mode = 0644,
++ .proc_handler = proc_dointvec_minmax,
++ .extra1 = (void *) &zero,
++ .extra2 = (void *) &max,
++ },
++#endif
+ { }
+ };
+--
+1.8.3.1
+