From 2480f57fb3023eb047c5f2d6dfefef41ab9b893c Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 2 Dec 2013 11:24:18 +0000 Subject: [PATCH 1/7] KEYS: Pre-clear struct key on allocation The second word of key->payload does not get initialised in key_alloc(), but the big_key type is relying on it having been cleared. The problem comes when big_key fails to instantiate a large key and doesn't then set the payload. The big_key_destroy() op is called from the garbage collector and this assumes that the dentry pointer stored in the second word will be NULL if instantiation did not complete. Therefore just pre-clear the entire struct key on allocation rather than trying to be clever and only initialising to 0 only those bits that aren't otherwise initialised. The lack of initialisation can lead to a bug report like the following if big_key failed to initialise its file: general protection fault: 0000 [#1] SMP Modules linked in: ... CPU: 0 PID: 51 Comm: kworker/0:1 Not tainted 3.10.0-53.el7.x86_64 #1 Hardware name: Dell Inc. PowerEdge 1955/0HC513, BIOS 1.4.4 12/09/2008 Workqueue: events key_garbage_collector task: ffff8801294f5680 ti: ffff8801296e2000 task.ti: ffff8801296e2000 RIP: 0010:[] dput+0x21/0x2d0 ... Call Trace: [] path_put+0x16/0x30 [] big_key_destroy+0x44/0x60 [] key_gc_unused_keys.constprop.2+0x5b/0xe0 [] key_garbage_collector+0x1df/0x3c0 [] process_one_work+0x17b/0x460 [] worker_thread+0x11b/0x400 [] ? rescuer_thread+0x3e0/0x3e0 [] kthread+0xc0/0xd0 [] ? kthread_create_on_node+0x110/0x110 [] ret_from_fork+0x7c/0xb0 [] ? kthread_create_on_node+0x110/0x110 Reported-by: Patrik Kis Signed-off-by: David Howells Reviewed-by: Stephen Gallagher --- security/keys/key.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/security/keys/key.c b/security/keys/key.c index 55d110f0aced..6e21c11e48bc 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -272,7 +272,7 @@ struct key *key_alloc(struct key_type *type, const char *desc, } /* allocate and initialise the key and its description */ - key = kmem_cache_alloc(key_jar, GFP_KERNEL); + key = kmem_cache_zalloc(key_jar, GFP_KERNEL); if (!key) goto no_memory_2; @@ -293,18 +293,12 @@ struct key *key_alloc(struct key_type *type, const char *desc, key->uid = uid; key->gid = gid; key->perm = perm; - key->flags = 0; - key->expiry = 0; - key->payload.data = NULL; - key->security = NULL; if (!(flags & KEY_ALLOC_NOT_IN_QUOTA)) key->flags |= 1 << KEY_FLAG_IN_QUOTA; if (flags & KEY_ALLOC_TRUSTED) key->flags |= 1 << KEY_FLAG_TRUSTED; - memset(&key->type_data, 0, sizeof(key->type_data)); - #ifdef KEY_DEBUGGING key->magic = KEY_DEBUG_MAGIC; #endif From d54e58b7f01552b0eb7d445f4b58de4499ad5ea6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 2 Dec 2013 11:24:18 +0000 Subject: [PATCH 2/7] KEYS: Fix the keyring hash function The keyring hash function (used by the associative array) is supposed to clear the bottommost nibble of the index key (where the hash value resides) for keyrings and make sure it is non-zero for non-keyrings. This is done to make keyrings cluster together on one branch of the tree separately to other keys. Unfortunately, the wrong mask is used, so only the bottom two bits are examined and cleared and not the whole bottom nibble. This means that keys and keyrings can still be successfully searched for under most circumstances as the hash is consistent in its miscalculation, but if a keyring's associative array bottom node gets filled up then approx 75% of the keyrings will not be put into the 0 branch. The consequence of this is that a key in a keyring linked to by another keyring, ie. keyring A -> keyring B -> key may not be found if the search starts at keyring A and then descends into keyring B because search_nested_keyrings() only searches up the 0 branch (as it "knows" all keyrings must be there and not elsewhere in the tree). The fix is to use the right mask. This can be tested with: r=`keyctl newring sandbox @s` for ((i=0; i<=16; i++)); do keyctl newring ring$i $r; done for ((i=0; i<=16; i++)); do keyctl add user a$i a %:ring$i; done for ((i=0; i<=16; i++)); do keyctl search $r user a$i; done This creates a sandbox keyring, then creates 17 keyrings therein (labelled ring0..ring16). This causes the root node of the sandbox's associative array to overflow and for the tree to have extra nodes inserted. Each keyring then is given a user key (labelled aN for ringN) for us to search for. We then search for the user keys we added, starting from the sandbox. If working correctly, it should return the same ordered list of key IDs as for...keyctl add... did. Without this patch, it reports ENOKEY "Required key not available" for some of the keys. Just which keys get this depends as the kernel pointer to the key type forms part of the hash function. Reported-by: Nalin Dahyabhai Signed-off-by: David Howells Tested-by: Stephen Gallagher --- security/keys/keyring.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/security/keys/keyring.c b/security/keys/keyring.c index 69f0cb7bab7e..0adbc77a59b9 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -160,7 +160,7 @@ static u64 mult_64x32_and_fold(u64 x, u32 y) static unsigned long hash_key_type_and_desc(const struct keyring_index_key *index_key) { const unsigned level_shift = ASSOC_ARRAY_LEVEL_STEP; - const unsigned long level_mask = ASSOC_ARRAY_LEVEL_STEP_MASK; + const unsigned long fan_mask = ASSOC_ARRAY_FAN_MASK; const char *description = index_key->description; unsigned long hash, type; u32 piece; @@ -194,10 +194,10 @@ static unsigned long hash_key_type_and_desc(const struct keyring_index_key *inde * ordinary keys by making sure the lowest level segment in the hash is * zero for keyrings and non-zero otherwise. */ - if (index_key->type != &key_type_keyring && (hash & level_mask) == 0) + if (index_key->type != &key_type_keyring && (hash & fan_mask) == 0) return hash | (hash >> (ASSOC_ARRAY_KEY_CHUNK_SIZE - level_shift)) | 1; - if (index_key->type == &key_type_keyring && (hash & level_mask) != 0) - return (hash + (hash << level_shift)) & ~level_mask; + if (index_key->type == &key_type_keyring && (hash & fan_mask) != 0) + return (hash + (hash << level_shift)) & ~fan_mask; return hash; } From 23fd78d76415729b338ff1802a0066b4a62f7fb8 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 2 Dec 2013 11:24:18 +0000 Subject: [PATCH 3/7] KEYS: Fix multiple key add into associative array If sufficient keys (or keyrings) are added into a keyring such that a node in the associative array's tree overflows (each node has a capacity N, currently 16) and such that all N+1 keys have the same index key segment for that level of the tree (the level'th nibble of the index key), then assoc_array_insert() calls ops->diff_objects() to indicate at which bit position the two index keys vary. However, __key_link_begin() passes a NULL object to assoc_array_insert() with the intention of supplying the correct pointer later before we commit the change. This means that keyring_diff_objects() is given a NULL pointer as one of its arguments which it does not expect. This results in an oops like the attached. With the previous patch to fix the keyring hash function, this can be forced much more easily by creating a keyring and only adding keyrings to it. Add any other sort of key and a different insertion path is taken - all 16+1 objects must want to cluster in the same node slot. This can be tested by: r=`keyctl newring sandbox @s` for ((i=0; i<=16; i++)); do keyctl newring ring$i $r; done This should work fine, but oopses when the 17th keyring is added. Since ops->diff_objects() is always called with the first pointer pointing to the object to be inserted (ie. the NULL pointer), we can fix the problem by changing the to-be-inserted object pointer to point to the index key passed into assoc_array_insert() instead. Whilst we're at it, we also switch the arguments so that they are the same as for ->compare_object(). BUG: unable to handle kernel NULL pointer dereference at 0000000000000088 IP: [] hash_key_type_and_desc+0x18/0xb0 ... RIP: 0010:[] hash_key_type_and_desc+0x18/0xb0 ... Call Trace: [] keyring_diff_objects+0x21/0xd2 [] assoc_array_insert+0x3b6/0x908 [] __key_link_begin+0x78/0xe5 [] key_create_or_update+0x17d/0x36a [] SyS_add_key+0x123/0x183 [] tracesys+0xdd/0xe2 Signed-off-by: David Howells Tested-by: Stephen Gallagher --- Documentation/assoc_array.txt | 6 +++--- include/linux/assoc_array.h | 6 +++--- lib/assoc_array.c | 4 ++-- security/keys/keyring.c | 7 +++---- 4 files changed, 11 insertions(+), 12 deletions(-) diff --git a/Documentation/assoc_array.txt b/Documentation/assoc_array.txt index f4faec0f66e4..2f2c6cdd73c0 100644 --- a/Documentation/assoc_array.txt +++ b/Documentation/assoc_array.txt @@ -164,10 +164,10 @@ This points to a number of methods, all of which need to be provided: (4) Diff the index keys of two objects. - int (*diff_objects)(const void *a, const void *b); + int (*diff_objects)(const void *object, const void *index_key); - Return the bit position at which the index keys of two objects differ or - -1 if they are the same. + Return the bit position at which the index key of the specified object + differs from the given index key or -1 if they are the same. (5) Free an object. diff --git a/include/linux/assoc_array.h b/include/linux/assoc_array.h index 9a193b84238a..a89df3be1686 100644 --- a/include/linux/assoc_array.h +++ b/include/linux/assoc_array.h @@ -41,10 +41,10 @@ struct assoc_array_ops { /* Is this the object we're looking for? */ bool (*compare_object)(const void *object, const void *index_key); - /* How different are two objects, to a bit position in their keys? (or - * -1 if they're the same) + /* How different is an object from an index key, to a bit position in + * their keys? (or -1 if they're the same) */ - int (*diff_objects)(const void *a, const void *b); + int (*diff_objects)(const void *object, const void *index_key); /* Method to free an object. */ void (*free_object)(void *object); diff --git a/lib/assoc_array.c b/lib/assoc_array.c index 17edeaf19180..1b6a44f1ec3e 100644 --- a/lib/assoc_array.c +++ b/lib/assoc_array.c @@ -759,8 +759,8 @@ static bool assoc_array_insert_into_terminal_node(struct assoc_array_edit *edit, pr_devel("all leaves cluster together\n"); diff = INT_MAX; for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { - int x = ops->diff_objects(assoc_array_ptr_to_leaf(edit->leaf), - assoc_array_ptr_to_leaf(node->slots[i])); + int x = ops->diff_objects(assoc_array_ptr_to_leaf(node->slots[i]), + index_key); if (x < diff) { BUG_ON(x < 0); diff = x; diff --git a/security/keys/keyring.c b/security/keys/keyring.c index 0adbc77a59b9..3dd8445cd489 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -279,12 +279,11 @@ static bool keyring_compare_object(const void *object, const void *data) * Compare the index keys of a pair of objects and determine the bit position * at which they differ - if they differ. */ -static int keyring_diff_objects(const void *_a, const void *_b) +static int keyring_diff_objects(const void *object, const void *data) { - const struct key *key_a = keyring_ptr_to_key(_a); - const struct key *key_b = keyring_ptr_to_key(_b); + const struct key *key_a = keyring_ptr_to_key(object); const struct keyring_index_key *a = &key_a->index_key; - const struct keyring_index_key *b = &key_b->index_key; + const struct keyring_index_key *b = data; unsigned long seg_a, seg_b; int level, i; From 9c5e45df215b4788f7a41c983ce862d08a083c2d Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 2 Dec 2013 11:24:19 +0000 Subject: [PATCH 4/7] KEYS: Fix searching of nested keyrings If a keyring contains more than 16 keyrings (the capacity of a single node in the associative array) then those keyrings are split over multiple nodes arranged as a tree. If search_nested_keyrings() is called to search the keyring then it will attempt to manually walk over just the 0 branch of the associative array tree where all the keyring links are stored. This works provided the key is found before the algorithm steps from one node containing keyrings to a child node or if there are sufficiently few keyring links that the keyrings are all in one node. However, if the algorithm does need to step from a node to a child node, it doesn't change the node pointer unless a shortcut also gets transited. This means that the algorithm will keep scanning the same node over and over again without terminating and without returning. To fix this, move the internal-pointer-to-node translation from inside the shortcut transit handler so that it applies it to node arrival as well. This can be tested by: r=`keyctl newring sandbox @s` for ((i=0; i<=16; i++)); do keyctl newring ring$i $r; done for ((i=0; i<=16; i++)); do keyctl add user a$i a %:ring$i; done for ((i=0; i<=16; i++)); do keyctl search $r user a$i; done for ((i=17; i<=20; i++)); do keyctl search $r user a$i; done The searches should all complete successfully (or with an error for 17-20), but instead one or more of them will hang. Signed-off-by: David Howells Tested-by: Stephen Gallagher --- security/keys/keyring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/keys/keyring.c b/security/keys/keyring.c index 3dd8445cd489..d46cbc5e335e 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -690,8 +690,8 @@ static bool search_nested_keyrings(struct key *keyring, smp_read_barrier_depends(); ptr = ACCESS_ONCE(shortcut->next_node); BUG_ON(!assoc_array_ptr_is_node(ptr)); - node = assoc_array_ptr_to_node(ptr); } + node = assoc_array_ptr_to_node(ptr); begin_node: kdebug("begin_node"); From c7277090927a5e71871e799a355ed2940f6c8fc6 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Mon, 2 Dec 2013 11:24:19 +0000 Subject: [PATCH 5/7] security: shmem: implement kernel private shmem inodes We have a problem where the big_key key storage implementation uses a shmem backed inode to hold the key contents. Because of this detail of implementation LSM checks are being done between processes trying to read the keys and the tmpfs backed inode. The LSM checks are already being handled on the key interface level and should not be enforced at the inode level (since the inode is an implementation detail, not a part of the security model) This patch implements a new function shmem_kernel_file_setup() which returns the equivalent to shmem_file_setup() only the underlying inode has S_PRIVATE set. This means that all LSM checks for the inode in question are skipped. It should only be used for kernel internal operations where the inode is not exposed to userspace without proper LSM checking. It is possible that some other users of shmem_file_setup() should use the new interface, but this has not been explored. Reproducing this bug is a little bit difficult. The steps I used on Fedora are: (1) Turn off selinux enforcing: setenforce 0 (2) Create a huge key k=`dd if=/dev/zero bs=8192 count=1 | keyctl padd big_key test-key @s` (3) Access the key in another context: runcon system_u:system_r:httpd_t:s0-s0:c0.c1023 keyctl print $k >/dev/null (4) Examine the audit logs: ausearch -m AVC -i --subject httpd_t | audit2allow If the last command's output includes a line that looks like: allow httpd_t user_tmpfs_t:file { open read }; There was an inode check between httpd and the tmpfs filesystem. With this patch no such denial will be seen. (NOTE! you should clear your audit log if you have tested for this previously) (Please return you box to enforcing) Signed-off-by: Eric Paris Signed-off-by: David Howells cc: Hugh Dickins cc: linux-mm@kvack.org --- include/linux/shmem_fs.h | 2 ++ mm/shmem.c | 36 +++++++++++++++++++++++++++++------- security/keys/big_key.c | 2 +- 3 files changed, 32 insertions(+), 8 deletions(-) diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index 30aa0dc60d75..9d55438bc4ad 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -47,6 +47,8 @@ extern int shmem_init(void); extern int shmem_fill_super(struct super_block *sb, void *data, int silent); extern struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags); +extern struct file *shmem_kernel_file_setup(const char *name, loff_t size, + unsigned long flags); extern int shmem_zero_setup(struct vm_area_struct *); extern int shmem_lock(struct file *file, int lock, struct user_struct *user); extern void shmem_unlock_mapping(struct address_space *mapping); diff --git a/mm/shmem.c b/mm/shmem.c index 8297623fcaed..902a14842b74 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2918,13 +2918,8 @@ static struct dentry_operations anon_ops = { .d_dname = simple_dname }; -/** - * shmem_file_setup - get an unlinked file living in tmpfs - * @name: name for dentry (to be seen in /proc//maps - * @size: size to be set for the file - * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size - */ -struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags) +static struct file *__shmem_file_setup(const char *name, loff_t size, + unsigned long flags, unsigned int i_flags) { struct file *res; struct inode *inode; @@ -2957,6 +2952,7 @@ struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags if (!inode) goto put_dentry; + inode->i_flags |= i_flags; d_instantiate(path.dentry, inode); inode->i_size = size; clear_nlink(inode); /* It is unlinked */ @@ -2977,6 +2973,32 @@ struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags shmem_unacct_size(flags, size); return res; } + +/** + * shmem_kernel_file_setup - get an unlinked file living in tmpfs which must be + * kernel internal. There will be NO LSM permission checks against the + * underlying inode. So users of this interface must do LSM checks at a + * higher layer. The one user is the big_key implementation. LSM checks + * are provided at the key level rather than the inode level. + * @name: name for dentry (to be seen in /proc//maps + * @size: size to be set for the file + * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size + */ +struct file *shmem_kernel_file_setup(const char *name, loff_t size, unsigned long flags) +{ + return __shmem_file_setup(name, size, flags, S_PRIVATE); +} + +/** + * shmem_file_setup - get an unlinked file living in tmpfs + * @name: name for dentry (to be seen in /proc//maps + * @size: size to be set for the file + * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size + */ +struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags) +{ + return __shmem_file_setup(name, size, flags, 0); +} EXPORT_SYMBOL_GPL(shmem_file_setup); /** diff --git a/security/keys/big_key.c b/security/keys/big_key.c index 7f44c3207a9b..8137b27d641d 100644 --- a/security/keys/big_key.c +++ b/security/keys/big_key.c @@ -70,7 +70,7 @@ int big_key_instantiate(struct key *key, struct key_preparsed_payload *prep) * * TODO: Encrypt the stored data with a temporary key. */ - file = shmem_file_setup("", datalen, 0); + file = shmem_kernel_file_setup("", datalen, 0); if (IS_ERR(file)) { ret = PTR_ERR(file); goto err_quota; From 7cfe5b3310a1b45f385ff18647bddb487a6c5525 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 10 Dec 2013 17:42:50 +1030 Subject: [PATCH 6/7] Ignore generated file kernel/x509_certificate_list $ git status # On branch pending-rebases # Untracked files: # (use "git add ..." to include in what will be committed) # # kernel/x509_certificate_list nothing added to commit but untracked files present (use "git add" to track) $ Signed-off-by: Rusty Russell Signed-off-by: David Howells --- kernel/.gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/.gitignore b/kernel/.gitignore index b3097bde4e9c..790d83c7d160 100644 --- a/kernel/.gitignore +++ b/kernel/.gitignore @@ -5,3 +5,4 @@ config_data.h config_data.gz timeconst.h hz.bc +x509_certificate_list From 62226983da070f7e51068ec2e3a4da34672964c7 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Thu, 5 Dec 2013 14:48:22 +0100 Subject: [PATCH 7/7] KEYS: correct alignment of system_certificate_list content in assembly file Apart from data-type specific alignment constraints, there are also architecture-specific alignment requirements. For example, on s390 symbols must be on even addresses implying a 2-byte alignment. If the system_certificate_list_end symbol is on an odd address and if this address is loaded, the least-significant bit is ignored. As a result, the load_system_certificate_list() fails to load the certificates because of a wrong certificate length calculation. To be safe, align system_certificate_list on an 8-byte boundary. Also improve the length calculation of the system_certificate_list content. Introduce a system_certificate_list_size (8-byte aligned because of unsigned long) variable that stores the length. Let the linker calculate this size by introducing a start and end label for the certificate content. Signed-off-by: Hendrik Brueckner Signed-off-by: David Howells --- kernel/system_certificates.S | 14 ++++++++++++-- kernel/system_keyring.c | 4 ++-- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/kernel/system_certificates.S b/kernel/system_certificates.S index 4aef390671cb..3e9868d47535 100644 --- a/kernel/system_certificates.S +++ b/kernel/system_certificates.S @@ -3,8 +3,18 @@ __INITRODATA + .align 8 .globl VMLINUX_SYMBOL(system_certificate_list) VMLINUX_SYMBOL(system_certificate_list): +__cert_list_start: .incbin "kernel/x509_certificate_list" - .globl VMLINUX_SYMBOL(system_certificate_list_end) -VMLINUX_SYMBOL(system_certificate_list_end): +__cert_list_end: + + .align 8 + .globl VMLINUX_SYMBOL(system_certificate_list_size) +VMLINUX_SYMBOL(system_certificate_list_size): +#ifdef CONFIG_64BIT + .quad __cert_list_end - __cert_list_start +#else + .long __cert_list_end - __cert_list_start +#endif diff --git a/kernel/system_keyring.c b/kernel/system_keyring.c index 564dd93430a2..52ebc70263f4 100644 --- a/kernel/system_keyring.c +++ b/kernel/system_keyring.c @@ -22,7 +22,7 @@ struct key *system_trusted_keyring; EXPORT_SYMBOL_GPL(system_trusted_keyring); extern __initconst const u8 system_certificate_list[]; -extern __initconst const u8 system_certificate_list_end[]; +extern __initconst const unsigned long system_certificate_list_size; /* * Load the compiled-in keys @@ -60,8 +60,8 @@ static __init int load_system_certificate_list(void) pr_notice("Loading compiled-in X.509 certificates\n"); - end = system_certificate_list_end; p = system_certificate_list; + end = p + system_certificate_list_size; while (p < end) { /* Each cert begins with an ASN.1 SEQUENCE tag and must be more * than 256 bytes in size.