summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/Kconfig11
-rw-r--r--arch/arm/Kconfig1
-rw-r--r--arch/arm/include/asm/processor.h10
-rw-r--r--arch/arm64/Kconfig1
-rw-r--r--arch/arm64/include/asm/processor.h10
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/include/asm/processor.h8
-rw-r--r--arch/x86/kvm/x86.c7
-rw-r--r--drivers/misc/lkdtm.h4
-rw-r--r--drivers/misc/lkdtm_core.c4
-rw-r--r--drivers/misc/lkdtm_usercopy.c101
-rw-r--r--drivers/scsi/scsi_lib.c9
-rw-r--r--fs/befs/linuxvfs.c14
-rw-r--r--fs/cifs/cifsfs.c10
-rw-r--r--fs/dcache.c9
-rw-r--r--fs/exofs/super.c7
-rw-r--r--fs/ext2/super.c12
-rw-r--r--fs/ext4/super.c12
-rw-r--r--fs/fhandle.c3
-rw-r--r--fs/freevxfs/vxfs_super.c8
-rw-r--r--fs/jfs/super.c8
-rw-r--r--fs/orangefs/super.c15
-rw-r--r--fs/ufs/super.c13
-rw-r--r--include/linux/sched/task.h14
-rw-r--r--include/linux/slab.h41
-rw-r--r--include/linux/slab_def.h3
-rw-r--r--include/linux/slub_def.h3
-rw-r--r--include/linux/stddef.h10
-rw-r--r--include/linux/uaccess.h8
-rw-r--r--include/net/sctp/structs.h9
-rw-r--r--include/net/sock.h2
-rw-r--r--kernel/fork.c31
-rw-r--r--mm/slab.c36
-rw-r--r--mm/slab.h8
-rw-r--r--mm/slab_common.c62
-rw-r--r--mm/slub.c49
-rw-r--r--mm/usercopy.c133
-rw-r--r--net/caif/caif_socket.c2
-rw-r--r--net/core/sock.c4
-rw-r--r--net/ipv4/raw.c2
-rw-r--r--net/ipv6/raw.c2
-rw-r--r--net/sctp/socket.c10
-rw-r--r--security/Kconfig14
-rw-r--r--tools/objtool/check.c1
-rw-r--r--virt/kvm/kvm_main.c8
45 files changed, 515 insertions, 215 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index d007b2a15b22..467dfa35bf96 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -245,6 +245,17 @@ config ARCH_TASK_STRUCT_ON_STACK
config ARCH_TASK_STRUCT_ALLOCATOR
bool
+config HAVE_ARCH_THREAD_STRUCT_WHITELIST
+ bool
+ depends on !ARCH_TASK_STRUCT_ALLOCATOR
+ help
+ An architecture should select this to provide hardened usercopy
+ knowledge about what region of the thread_struct should be
+ whitelisted for copying to userspace. Normally this is only the
+ FPU registers. Specifically, arch_thread_struct_whitelist()
+ should be implemented. Without this, the entire thread_struct
+ field in task_struct will be left whitelisted.
+
# Select if arch has its private alloc_thread_stack() function
config ARCH_THREAD_STACK_ALLOCATOR
bool
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 4daddc53e7bc..7e3d53575486 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -51,6 +51,7 @@ config ARM
select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU
select HAVE_ARCH_MMAP_RND_BITS if MMU
select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT)
+ select HAVE_ARCH_THREAD_STRUCT_WHITELIST
select HAVE_ARCH_TRACEHOOK
select HAVE_ARM_SMCCC if CPU_V7
select HAVE_EBPF_JIT if !CPU_ENDIAN_BE32
diff --git a/arch/arm/include/asm/processor.h b/arch/arm/include/asm/processor.h
index 338cbe0a18ef..1bf65b47808a 100644
--- a/arch/arm/include/asm/processor.h
+++ b/arch/arm/include/asm/processor.h
@@ -45,6 +45,16 @@ struct thread_struct {
struct debug_info debug;
};
+/*
+ * Everything usercopied to/from thread_struct is statically-sized, so
+ * no hardened usercopy whitelist is needed.
+ */
+static inline void arch_thread_struct_whitelist(unsigned long *offset,
+ unsigned long *size)
+{
+ *offset = *size = 0;
+}
+
#define INIT_THREAD { }
#define start_thread(regs,pc,sp) \
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index b2b95f79c746..53612879fe56 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -91,6 +91,7 @@ config ARM64
select HAVE_ARCH_MMAP_RND_BITS
select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT
select HAVE_ARCH_SECCOMP_FILTER
+ select HAVE_ARCH_THREAD_STRUCT_WHITELIST
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
select HAVE_ARCH_VMAP_STACK
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index cee4ae25a5d1..6db43ebd648d 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -113,6 +113,16 @@ struct thread_struct {
struct debug_info debug; /* debugging */
};
+/*
+ * Everything usercopied to/from thread_struct is statically-sized, so
+ * no hardened usercopy whitelist is needed.
+ */
+static inline void arch_thread_struct_whitelist(unsigned long *offset,
+ unsigned long *size)
+{
+ *offset = *size = 0;
+}
+
#ifdef CONFIG_COMPAT
#define task_user_tls(t) \
({ \
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 72d5149bcaa1..b0771ceabb4b 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -116,6 +116,7 @@ config X86
select HAVE_ARCH_MMAP_RND_COMPAT_BITS if MMU && COMPAT
select HAVE_ARCH_COMPAT_MMAP_BASES if MMU && COMPAT
select HAVE_ARCH_SECCOMP_FILTER
+ select HAVE_ARCH_THREAD_STRUCT_WHITELIST
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD if X86_64
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index efbde088a718..6207f2f740b6 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -507,6 +507,14 @@ struct thread_struct {
*/
};
+/* Whitelist the FPU state from the task_struct for hardened usercopy. */
+static inline void arch_thread_struct_whitelist(unsigned long *offset,
+ unsigned long *size)
+{
+ *offset = offsetof(struct thread_struct, fpu.state);
+ *size = fpu_kernel_xstate_size;
+}
+
/*
* Thread-synchronous status.
*
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c53298dfbf50..5040ff9de6ab 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4237,13 +4237,14 @@ set_identity_unlock:
mutex_unlock(&kvm->lock);
break;
case KVM_XEN_HVM_CONFIG: {
+ struct kvm_xen_hvm_config xhc;
r = -EFAULT;
- if (copy_from_user(&kvm->arch.xen_hvm_config, argp,
- sizeof(struct kvm_xen_hvm_config)))
+ if (copy_from_user(&xhc, argp, sizeof(xhc)))
goto out;
r = -EINVAL;
- if (kvm->arch.xen_hvm_config.flags)
+ if (xhc.flags)
goto out;
+ memcpy(&kvm->arch.xen_hvm_config, &xhc, sizeof(xhc));
r = 0;
break;
}
diff --git a/drivers/misc/lkdtm.h b/drivers/misc/lkdtm.h
index 687a0dbbe199..9e513dcfd809 100644
--- a/drivers/misc/lkdtm.h
+++ b/drivers/misc/lkdtm.h
@@ -76,8 +76,8 @@ void __init lkdtm_usercopy_init(void);
void __exit lkdtm_usercopy_exit(void);
void lkdtm_USERCOPY_HEAP_SIZE_TO(void);
void lkdtm_USERCOPY_HEAP_SIZE_FROM(void);
-void lkdtm_USERCOPY_HEAP_FLAG_TO(void);
-void lkdtm_USERCOPY_HEAP_FLAG_FROM(void);
+void lkdtm_USERCOPY_HEAP_WHITELIST_TO(void);
+void lkdtm_USERCOPY_HEAP_WHITELIST_FROM(void);
void lkdtm_USERCOPY_STACK_FRAME_TO(void);
void lkdtm_USERCOPY_STACK_FRAME_FROM(void);
void lkdtm_USERCOPY_STACK_BEYOND(void);
diff --git a/drivers/misc/lkdtm_core.c b/drivers/misc/lkdtm_core.c
index 4942da93d066..2154d1bfd18b 100644
--- a/drivers/misc/lkdtm_core.c
+++ b/drivers/misc/lkdtm_core.c
@@ -177,8 +177,8 @@ static const struct crashtype crashtypes[] = {
CRASHTYPE(ATOMIC_TIMING),
CRASHTYPE(USERCOPY_HEAP_SIZE_TO),
CRASHTYPE(USERCOPY_HEAP_SIZE_FROM),
- CRASHTYPE(USERCOPY_HEAP_FLAG_TO),
- CRASHTYPE(USERCOPY_HEAP_FLAG_FROM),
+ CRASHTYPE(USERCOPY_HEAP_WHITELIST_TO),
+ CRASHTYPE(USERCOPY_HEAP_WHITELIST_FROM),
CRASHTYPE(USERCOPY_STACK_FRAME_TO),
CRASHTYPE(USERCOPY_STACK_FRAME_FROM),
CRASHTYPE(USERCOPY_STACK_BEYOND),
diff --git a/drivers/misc/lkdtm_usercopy.c b/drivers/misc/lkdtm_usercopy.c
index a64372cc148d..9725aed305bb 100644
--- a/drivers/misc/lkdtm_usercopy.c
+++ b/drivers/misc/lkdtm_usercopy.c
@@ -20,7 +20,7 @@
*/
static volatile size_t unconst = 0;
static volatile size_t cache_size = 1024;
-static struct kmem_cache *bad_cache;
+static struct kmem_cache *whitelist_cache;
static const unsigned char test_text[] = "This is a test.\n";
@@ -115,10 +115,16 @@ free_user:
vm_munmap(user_addr, PAGE_SIZE);
}
+/*
+ * This checks for whole-object size validation with hardened usercopy,
+ * with or without usercopy whitelisting.
+ */
static void do_usercopy_heap_size(bool to_user)
{
unsigned long user_addr;
unsigned char *one, *two;
+ void __user *test_user_addr;
+ void *test_kern_addr;
size_t size = unconst + 1024;
one = kmalloc(size, GFP_KERNEL);
@@ -139,27 +145,30 @@ static void do_usercopy_heap_size(bool to_user)
memset(one, 'A', size);
memset(two, 'B', size);
+ test_user_addr = (void __user *)(user_addr + 16);
+ test_kern_addr = one + 16;
+
if (to_user) {
pr_info("attempting good copy_to_user of correct size\n");
- if (copy_to_user((void __user *)user_addr, one, size)) {
+ if (copy_to_user(test_user_addr, test_kern_addr, size / 2)) {
pr_warn("copy_to_user failed unexpectedly?!\n");
goto free_user;
}
pr_info("attempting bad copy_to_user of too large size\n");
- if (copy_to_user((void __user *)user_addr, one, 2 * size)) {
+ if (copy_to_user(test_user_addr, test_kern_addr, size)) {
pr_warn("copy_to_user failed, but lacked Oops\n");
goto free_user;
}
} else {
pr_info("attempting good copy_from_user of correct size\n");
- if (copy_from_user(one, (void __user *)user_addr, size)) {
+ if (copy_from_user(test_kern_addr, test_user_addr, size / 2)) {
pr_warn("copy_from_user failed unexpectedly?!\n");
goto free_user;
}
pr_info("attempting bad copy_from_user of too large size\n");
- if (copy_from_user(one, (void __user *)user_addr, 2 * size)) {
+ if (copy_from_user(test_kern_addr, test_user_addr, size)) {
pr_warn("copy_from_user failed, but lacked Oops\n");
goto free_user;
}
@@ -172,77 +181,79 @@ free_kernel:
kfree(two);
}
-static void do_usercopy_heap_flag(bool to_user)
+/*
+ * This checks for the specific whitelist window within an object. If this
+ * test passes, then do_usercopy_heap_size() tests will pass too.
+ */
+static void do_usercopy_heap_whitelist(bool to_user)
{
- unsigned long user_addr;
- unsigned char *good_buf = NULL;
- unsigned char *bad_buf = NULL;
+ unsigned long user_alloc;
+ unsigned char *buf = NULL;
+ unsigned char __user *user_addr;
+ size_t offset, size;
/* Make sure cache was prepared. */
- if (!bad_cache) {
+ if (!whitelist_cache) {
pr_warn("Failed to allocate kernel cache\n");
return;
}
/*
- * Allocate one buffer from each cache (kmalloc will have the
- * SLAB_USERCOPY flag already, but "bad_cache" won't).
+ * Allocate a buffer with a whitelisted window in the buffer.
*/
- good_buf = kmalloc(cache_size, GFP_KERNEL);
- bad_buf = kmem_cache_alloc(bad_cache, GFP_KERNEL);
- if (!good_buf || !bad_buf) {
- pr_warn("Failed to allocate buffers from caches\n");
+ buf = kmem_cache_alloc(whitelist_cache, GFP_KERNEL);
+ if (!buf) {
+ pr_warn("Failed to allocate buffer from whitelist cache\n");
goto free_alloc;
}
/* Allocate user memory we'll poke at. */
- user_addr = vm_mmap(NULL, 0, PAGE_SIZE,
+ user_alloc = vm_mmap(NULL, 0, PAGE_SIZE,
PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_ANONYMOUS | MAP_PRIVATE, 0);
- if (user_addr >= TASK_SIZE) {
+ if (user_alloc >= TASK_SIZE) {
pr_warn("Failed to allocate user memory\n");
goto free_alloc;
}
+ user_addr = (void __user *)user_alloc;
- memset(good_buf, 'A', cache_size);
- memset(bad_buf, 'B', cache_size);
+ memset(buf, 'B', cache_size);
+
+ /* Whitelisted window in buffer, from kmem_cache_create_usercopy. */
+ offset = (cache_size / 4) + unconst;
+ size = (cache_size / 16) + unconst;
if (to_user) {
- pr_info("attempting good copy_to_user with SLAB_USERCOPY\n");
- if (copy_to_user((void __user *)user_addr, good_buf,
- cache_size)) {
+ pr_info("attempting good copy_to_user inside whitelist\n");
+ if (copy_to_user(user_addr, buf + offset, size)) {
pr_warn("copy_to_user failed unexpectedly?!\n");
goto free_user;
}
- pr_info("attempting bad copy_to_user w/o SLAB_USERCOPY\n");
- if (copy_to_user((void __user *)user_addr, bad_buf,
- cache_size)) {
+ pr_info("attempting bad copy_to_user outside whitelist\n");
+ if (copy_to_user(user_addr, buf + offset - 1, size)) {
pr_warn("copy_to_user failed, but lacked Oops\n");
goto free_user;
}
} else {
- pr_info("attempting good copy_from_user with SLAB_USERCOPY\n");
- if (copy_from_user(good_buf, (void __user *)user_addr,
- cache_size)) {
+ pr_info("attempting good copy_from_user inside whitelist\n");
+ if (copy_from_user(buf + offset, user_addr, size)) {
pr_warn("copy_from_user failed unexpectedly?!\n");
goto free_user;
}
- pr_info("attempting bad copy_from_user w/o SLAB_USERCOPY\n");
- if (copy_from_user(bad_buf, (void __user *)user_addr,
- cache_size)) {
+ pr_info("attempting bad copy_from_user outside whitelist\n");
+ if (copy_from_user(buf + offset - 1, user_addr, size)) {
pr_warn("copy_from_user failed, but lacked Oops\n");
goto free_user;
}
}
free_user:
- vm_munmap(user_addr, PAGE_SIZE);
+ vm_munmap(user_alloc, PAGE_SIZE);
free_alloc:
- if (bad_buf)
- kmem_cache_free(bad_cache, bad_buf);
- kfree(good_buf);
+ if (buf)
+ kmem_cache_free(whitelist_cache, buf);
}
/* Callable tests. */
@@ -256,14 +267,14 @@ void lkdtm_USERCOPY_HEAP_SIZE_FROM(void)
do_usercopy_heap_size(false);
}
-void lkdtm_USERCOPY_HEAP_FLAG_TO(void)
+void lkdtm_USERCOPY_HEAP_WHITELIST_TO(void)
{
- do_usercopy_heap_flag(true);
+ do_usercopy_heap_whitelist(true);
}
-void lkdtm_USERCOPY_HEAP_FLAG_FROM(void)
+void lkdtm_USERCOPY_HEAP_WHITELIST_FROM(void)
{
- do_usercopy_heap_flag(false);
+ do_usercopy_heap_whitelist(false);
}
void lkdtm_USERCOPY_STACK_FRAME_TO(void)
@@ -314,11 +325,15 @@ free_user:
void __init lkdtm_usercopy_init(void)
{
/* Prepare cache that lacks SLAB_USERCOPY flag. */
- bad_cache = kmem_cache_create("lkdtm-no-usercopy", cache_size, 0,
- 0, NULL);
+ whitelist_cache =
+ kmem_cache_create_usercopy("lkdtm-usercopy", cache_size,
+ 0, 0,
+ cache_size / 4,
+ cache_size / 16,
+ NULL);
}
void __exit lkdtm_usercopy_exit(void)
{
- kmem_cache_destroy(bad_cache);
+ kmem_cache_destroy(whitelist_cache);
}
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 976c936029cb..8efe4731ed89 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -79,14 +79,15 @@ int scsi_init_sense_cache(struct Scsi_Host *shost)
if (shost->unchecked_isa_dma) {
scsi_sense_isadma_cache =
kmem_cache_create("scsi_sense_cache(DMA)",
- SCSI_SENSE_BUFFERSIZE, 0,
- SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA, NULL);
+ SCSI_SENSE_BUFFERSIZE, 0,
+ SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA, NULL);
if (!scsi_sense_isadma_cache)
ret = -ENOMEM;
} else {
scsi_sense_cache =
- kmem_cache_create("scsi_sense_cache",
- SCSI_SENSE_BUFFERSIZE, 0, SLAB_HWCACHE_ALIGN, NULL);
+ kmem_cache_create_usercopy("scsi_sense_cache",
+ SCSI_SENSE_BUFFERSIZE, 0, SLAB_HWCACHE_ALIGN,
+ 0, SCSI_SENSE_BUFFERSIZE, NULL);
if (!scsi_sense_cache)
ret = -ENOMEM;
}
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index ee236231cafa..af2832aaeec5 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -444,11 +444,15 @@ unacquire_none:
static int __init
befs_init_inodecache(void)
{
- befs_inode_cachep = kmem_cache_create("befs_inode_cache",
- sizeof (struct befs_inode_info),
- 0, (SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD|SLAB_ACCOUNT),
- init_once);
+ befs_inode_cachep = kmem_cache_create_usercopy("befs_inode_cache",
+ sizeof(struct befs_inode_info), 0,
+ (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|
+ SLAB_ACCOUNT),
+ offsetof(struct befs_inode_info,
+ i_data.symlink),
+ sizeof_field(struct befs_inode_info,
+ i_data.symlink),
+ init_once);
if (befs_inode_cachep == NULL)
return -ENOMEM;
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index a7be591d8e18..32cdea67bbfd 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -1239,9 +1239,11 @@ cifs_init_request_bufs(void)
cifs_dbg(VFS, "CIFSMaxBufSize %d 0x%x\n",
CIFSMaxBufSize, CIFSMaxBufSize);
*/
- cifs_req_cachep = kmem_cache_create("cifs_request",
+ cifs_req_cachep = kmem_cache_create_usercopy("cifs_request",
CIFSMaxBufSize + max_hdr_size, 0,
- SLAB_HWCACHE_ALIGN, NULL);
+ SLAB_HWCACHE_ALIGN, 0,
+ CIFSMaxBufSize + max_hdr_size,
+ NULL);
if (cifs_req_cachep == NULL)
return -ENOMEM;
@@ -1267,9 +1269,9 @@ cifs_init_request_bufs(void)
more SMBs to use small buffer alloc and is still much more
efficient to alloc 1 per page off the slab compared to 17K (5page)
alloc of large cifs buffers even when page debugging is on */
- cifs_sm_req_cachep = kmem_cache_create("cifs_small_rq",
+ cifs_sm_req_cachep = kmem_cache_create_usercopy("cifs_small_rq",
MAX_CIFS_SMALL_BUFFER_SIZE, 0, SLAB_HWCACHE_ALIGN,
- NULL);
+ 0, MAX_CIFS_SMALL_BUFFER_SIZE, NULL);
if (cifs_sm_req_cachep == NULL) {
mempool_destroy(cifs_req_poolp);
kmem_cache_destroy(cifs_req_cachep);
diff --git a/fs/dcache.c b/fs/dcache.c
index 5b207e0de459..cca2b377ff0a 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -3602,8 +3602,9 @@ static void __init dcache_init(void)
* but it is probably not worth it because of the cache nature
* of the dcache.
*/
- dentry_cache = KMEM_CACHE(dentry,
- SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD|SLAB_ACCOUNT);
+ dentry_cache = KMEM_CACHE_USERCOPY(dentry,
+ SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD|SLAB_ACCOUNT,
+ d_iname);
/* Hash may have been set up in dcache_init_early */
if (!hashdist)
@@ -3641,8 +3642,8 @@ void __init vfs_caches_init_early(void)
void __init vfs_caches_init(void)
{
- names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0,
- SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
+ names_cachep = kmem_cache_create_usercopy("names_cache", PATH_MAX, 0,
+ SLAB_HWCACHE_ALIGN|SLAB_PANIC, 0, PATH_MAX, NULL);
dcache_init();
inode_init();
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 7e244093c0e5..179cd5c2f52a 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -193,10 +193,13 @@ static void exofs_init_once(void *foo)
*/
static int init_inodecache(void)
{
- exofs_inode_cachep = kmem_cache_create("exofs_inode_cache",
+ exofs_inode_cachep = kmem_cache_create_usercopy("exofs_inode_cache",
sizeof(struct exofs_i_info), 0,
SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD |
- SLAB_ACCOUNT, exofs_init_once);
+ SLAB_ACCOUNT,
+ offsetof(struct exofs_i_info, i_data),
+ sizeof_field(struct exofs_i_info, i_data),
+ exofs_init_once);
if (exofs_inode_cachep == NULL)
return -ENOMEM;
return 0;
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 554c98b8a93a..db5f9daa7780 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -221,11 +221,13 @@ static void init_once(void *foo)
static int __init init_inodecache(void)
{
- ext2_inode_cachep = kmem_cache_create("ext2_inode_cache",
- sizeof(struct ext2_inode_info),
- 0, (SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD|SLAB_ACCOUNT),
- init_once);
+ ext2_inode_cachep = kmem_cache_create_usercopy("ext2_inode_cache",
+ sizeof(struct ext2_inode_info), 0,
+ (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|
+ SLAB_ACCOUNT),
+ offsetof(struct ext2_inode_info, i_data),
+ sizeof_field(struct ext2_inode_info, i_data),
+ init_once);
if (ext2_inode_cachep == NULL)
return -ENOMEM;
return 0;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 82eead1c8b84..899e6faf3381 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1038,11 +1038,13 @@ static void init_once(void *foo)
static int __init init_inodecache(void)
{
- ext4_inode_cachep = kmem_cache_create("ext4_inode_cache",
- sizeof(struct ext4_inode_info),
- 0, (SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD|SLAB_ACCOUNT),
- init_once);
+ ext4_inode_cachep = kmem_cache_create_usercopy("ext4_inode_cache",
+ sizeof(struct ext4_inode_info), 0,
+ (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|
+ SLAB_ACCOUNT),
+ offsetof(struct ext4_inode_info, i_data),
+ sizeof_field(struct ext4_inode_info, i_data),
+ init_once);
if (ext4_inode_cachep == NULL)
return -ENOMEM;
return 0;
diff --git a/fs/fhandle.c b/fs/fhandle.c
index 0ace128f5d23..0ee727485615 100644
--- a/fs/fhandle.c
+++ b/fs/fhandle.c
@@ -69,8 +69,7 @@ static long do_sys_name_to_handle(struct path *path,
} else
retval = 0;
/* copy the mount id */
- if (copy_to_user(mnt_id, &real_mount(path->mnt)->mnt_id,
- sizeof(*mnt_id)) ||
+ if (put_user(real_mount(path->mnt)->mnt_id, mnt_id) ||
copy_to_user(ufh, handle,
sizeof(struct file_handle) + handle_bytes))
retval = -EFAULT;
diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c
index f989efa051a0..48b24bb50d02 100644
--- a/fs/freevxfs/vxfs_super.c
+++ b/fs/freevxfs/vxfs_super.c
@@ -332,9 +332,13 @@ vxfs_init(void)
{
int rv;
- vxfs_inode_cachep = kmem_cache_create("vxfs_inode",
+ vxfs_inode_cachep = kmem_cache_create_usercopy("vxfs_inode",
sizeof(struct vxfs_inode_info), 0,
- SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL);
+ SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
+ offsetof(struct vxfs_inode_info, vii_immed.vi_immed),
+ sizeof_field(struct vxfs_inode_info,
+ vii_immed.vi_immed),
+ NULL);
if (!vxfs_inode_cachep)
return -ENOMEM;
rv = register_filesystem(&vxfs_fs_type);
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 90373aebfdca..1b9264fd54b6 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -965,9 +965,11 @@ static int __init init_jfs_fs(void)
int rc;
jfs_inode_cachep =
- kmem_cache_create("jfs_ip", sizeof(struct jfs_inode_info), 0,
- SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|SLAB_ACCOUNT,
- init_once);
+ kmem_cache_create_usercopy("jfs_ip", sizeof(struct jfs_inode_info),
+ 0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|SLAB_ACCOUNT,
+ offsetof(struct jfs_inode_info, i_inline),
+ sizeof_field(struct jfs_inode_info, i_inline),
+ init_once);
if (jfs_inode_cachep == NULL)
return -ENOMEM;
diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c
index 36f1390b5ed7..62d49e53061c 100644
--- a/fs/orangefs/super.c
+++ b/fs/orangefs/super.c
@@ -610,11 +610,16 @@ void orangefs_kill_sb(struct super_block *sb)
int orangefs_inode_cache_initialize(void)
{
- orangefs_inode_cache = kmem_cache_create("orangefs_inode_cache",
- sizeof(struct orangefs_inode_s),
- 0,
- ORANGEFS_CACHE_CREATE_FLAGS,
- orangefs_inode_cache_ctor);
+ orangefs_inode_cache = kmem_cache_create_usercopy(
+ "orangefs_inode_cache",
+ sizeof(struct orangefs_inode_s),
+ 0,
+ ORANGEFS_CACHE_CREATE_FLAGS,
+ offsetof(struct orangefs_inode_s,
+ link_target),
+ sizeof_field(struct orangefs_inode_s,
+ link_target),
+ orangefs_inode_cache_ctor);
if (!orangefs_inode_cache) {
gossip_err("Cannot create orangefs_inode_cache\n");
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index b6ba80e05bff..8254b8b3690f 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1467,11 +1467,14 @@ static void init_once(void *foo)
static int __init init_inodecache(void)
{
- ufs_inode_cachep = kmem_cache_create("ufs_inode_cache",
- sizeof(struct ufs_inode_info),
- 0, (SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD|SLAB_ACCOUNT),
- init_once);
+ ufs_inode_cachep = kmem_cache_create_usercopy("ufs_inode_cache",
+ sizeof(struct ufs_inode_info), 0,
+ (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|
+ SLAB_ACCOUNT),
+ offsetof(struct ufs_inode_info, i_u1.i_symlink),
+ sizeof_field(struct ufs_inode_info,
+ i_u1.i_symlink),
+ init_once);
if (ufs_inode_cachep == NULL)
return -ENOMEM;
return 0;
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index 05b8650f06f5..5be31eb7b266 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -104,6 +104,20 @@ extern int arch_task_struct_size __read_mostly;
# define arch_task_struct_size (sizeof(struct task_struct))
#endif
+#ifndef CONFIG_HAVE_ARCH_THREAD_STRUCT_WHITELIST
+/*
+ * If an architecture has not declared a thread_struct whitelist we
+ * must assume something there may need to be copied to userspace.
+ */
+static inline void arch_thread_struct_whitelist(unsigned long *offset,
+ unsigned long *size)
+{
+ *offset = 0;
+ /* Handle dynamically sized thread_struct. */
+ *size = arch_task_struct_size - offsetof(struct task_struct, thread);
+}
+#endif
+
#ifdef CONFIG_VMAP_STACK
static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t)
{
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 50697a1d6621..231abc8976c5 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -135,9 +135,15 @@ struct mem_cgroup;
void __init kmem_cache_init(void);
bool slab_is_available(void);
-struct kmem_cache *kmem_cache_create(const char *, size_t, size_t,
- slab_flags_t,
- void (*)(void *));
+extern bool usercopy_fallback;
+
+struct kmem_cache *kmem_cache_create(const char *name, size_t size,
+ size_t align, slab_flags_t flags,
+ void (*ctor)(void *));
+struct kmem_cache *kmem_cache_create_usercopy(const char *name,
+ size_t size, size_t align, slab_flags_t flags,
+ size_t useroffset, size_t usersize,
+ void (*ctor)(void *));
void kmem_cache_destroy(struct kmem_cache *);
int kmem_cache_shrink(struct kmem_cache *);
@@ -153,9 +159,20 @@ void memcg_destroy_kmem_caches(struct mem_cgroup *);
* f.e. add ____cacheline_aligned_in_smp to the struct declaration
* then the objects will be properly aligned in SMP configurations.
*/
-#define KMEM_CACHE(__struct, __flags) kmem_cache_create(#__struct,\
- sizeof(struct __struct), __alignof__(struct __struct),\
- (__flags), NULL)
+#define KMEM_CACHE(__struct, __flags) \
+ kmem_cache_create(#__struct, sizeof(struct __struct), \
+ __alignof__(struct __struct), (__flags), NULL)
+
+/*
+ * To whitelist a single field for copying to/from usercopy, use this
+ * macro instead for KMEM_CACHE() above.
+ */
+#define KMEM_CACHE_USERCOPY(__struct, __flags, __field) \
+ kmem_cache_create_usercopy(#__struct, \
+ sizeof(struct __struct), \
+ __alignof__(struct __struct), (__flags), \
+ offsetof(struct __struct, __field), \
+ sizeof_field(struct __struct, __field), NULL)
/*
* Common kmalloc functions provided by all allocators
@@ -167,15 +184,11 @@ void kzfree(const void *);
size_t ksize(const void *);
#ifdef CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR
-const char *__check_heap_object(const void *ptr, unsigned long n,
- struct page *page);
+void __check_heap_object(const void *ptr, unsigned long n, struct page *page,
+ bool to_user);
#else
-static inline const char *__check_heap_object(const void *ptr,
- unsigned long n,
- struct page *page)
-{
- return NULL;
-}
+static inline void __check_heap_object(const void *ptr, unsigned long n,
+ struct page *page, bool to_user) { }
#endif
/*
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index 072e46e9e1d5..7385547c04b1 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -85,6 +85,9 @@ struct kmem_cache {
unsigned int *random_seq;
#endif
+ size_t useroffset; /* Usercopy region offset */
+ size_t usersize; /* Usercopy region size */
+
struct kmem_cache_node *node[MAX_NUMNODES];
};
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 0adae162dc8f..8ad99c47b19c 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -135,6 +135,9 @@ struct kmem_cache {
struct kasan_cache kasan_info;
#endif
+ size_t useroffset; /* Usercopy region offset */
+ size_t usersize; /* Usercopy region size */
+
struct kmem_cache_node *node[MAX_NUMNODES];
};
diff --git a/include/linux/stddef.h b/include/linux/stddef.h
index 2181719fd907..998a4ba28eba 100644
--- a/include/linux/stddef.h
+++ b/include/linux/stddef.h
@@ -20,12 +20,20 @@ enum {
#endif
/**
+ * sizeof_field(TYPE, MEMBER)
+ *
+ * @TYPE: The structure containing the field of interest
+ * @MEMBER: The field to return the size of
+ */
+#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER))
+
+/**
* offsetofend(TYPE, MEMBER)
*
* @TYPE: The type of the structure
* @MEMBER: The member within the structure to get the end offset of
*/
#define offsetofend(TYPE, MEMBER) \
- (offsetof(TYPE, MEMBER) + sizeof(((TYPE *)0)->MEMBER))
+ (offsetof(TYPE, MEMBER) + sizeof_field(TYPE, MEMBER))
#endif
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 251e655d407f..efe79c1cdd47 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -273,4 +273,12 @@ extern long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count);
#define unsafe_put_user(x, ptr, err) do { if (unlikely(__put_user(x, ptr))) goto err; } while (0)
#endif
+#ifdef CONFIG_HARDENED_USERCOPY
+void usercopy_warn(const char *name, const char *detail, bool to_user,
+ unsigned long offset, unsigned long len);
+void __noreturn usercopy_abort(const char *name, const char *detail,
+ bool to_user, unsigned long offset,
+ unsigned long len);
+#endif
+
#endif /* __LINUX_UACCESS_H__ */
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 02369e379d35..03e92dda1813 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -203,12 +203,17 @@ struct sctp_sock {
/* Flags controlling Heartbeat, SACK delay, and Path MTU Discovery. */
__u32 param_flags;
- struct sctp_initmsg initmsg;
struct sctp_rtoinfo rtoinfo;
struct sctp_paddrparams paddrparam;
- struct sctp_event_subscribe subscribe;
struct sctp_assocparams assocparams;
+ /*
+ * These two structures must be grouped together for the usercopy
+ * whitelist region.
+ */
+ struct sctp_event_subscribe subscribe;
+ struct sctp_initmsg initmsg;
+
int user_frag;
__u32 autoclose;
diff --git a/include/net/sock.h b/include/net/sock.h
index 63731289186a..169c92afcafa 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1109,6 +1109,8 @@ struct proto {
struct kmem_cache *slab;
unsigned int obj_size;
slab_flags_t slab_flags;
+ size_t useroffset; /* Usercopy region offset */
+ size_t usersize; /* Usercopy region size */
struct percpu_counter *orphan_count;
diff --git a/kernel/fork.c b/kernel/fork.c
index 5e6cf0dd031c..5c372c954f3b 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -283,8 +283,9 @@ static void free_thread_stack(struct task_struct *tsk)
void thread_stack_cache_init(void)
{
- thread_stack_cache = kmem_cache_create("thread_stack", THREAD_SIZE,
- THREAD_SIZE, 0, NULL);
+ thread_stack_cache = kmem_cache_create_usercopy("thread_stack",
+ THREAD_SIZE, THREAD_SIZE, 0, 0,
+ THREAD_SIZE, NULL);
BUG_ON(thread_stack_cache == NULL);
}
# endif
@@ -693,6 +694,21 @@ static void set_max_threads(unsigned int max_threads_suggested)
int arch_task_struct_size __read_mostly;
#endif
+static void task_struct_whitelist(unsigned long *offset, unsigned long *size)
+{
+ /* Fetch thread_struct whitelist for the architecture. */
+ arch_thread_struct_whitelist(offset, size);
+
+ /*
+ * Handle zero-sized whitelist or empty thread_struct, otherwise
+ * adjust offset to position of thread_struct in task_struct.
+ */
+ if (unlikely(*size == 0))
+ *offset = 0;
+ else
+ *offset += offsetof(struct task_struct, thread);
+}
+
void __init fork_init(void)
{
int i;
@@ -701,11 +717,14 @@ void __init fork_init(void)
#define ARCH_MIN_TASKALIGN 0
#endif
int align = max_t(int, L1_CACHE_BYTES, ARCH_MIN_TASKALIGN);
+ unsigned long useroffset, usersize;
/* create a slab on which task_structs can be allocated */
- task_struct_cachep = kmem_cache_create("task_struct",
+ task_struct_whitelist(&useroffset, &usersize);
+ task_struct_cachep = kmem_cache_create_usercopy("task_struct",
arch_task_struct_size, align,
- SLAB_PANIC|SLAB_ACCOUNT, NULL);
+ SLAB_PANIC|SLAB_ACCOUNT,
+ useroffset, usersize, NULL);
#endif
/* do the arch specific task caches init */
@@ -2248,9 +2267,11 @@ void __init proc_caches_init(void)
* maximum number of CPU's we can ever have. The cpumask_allocation
* is at the end of the structure, exactly for that reason.
*/
- mm_cachep = kmem_cache_create("mm_struct",
+ mm_cachep = kmem_cache_create_usercopy("mm_struct",
sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
+ offsetof(struct mm_struct, saved_auxv),
+ sizeof_field(struct mm_struct, saved_auxv),
NULL);
vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC|SLAB_ACCOUNT);
mmap_init();
diff --git a/mm/slab.c b/mm/slab.c
index 226906294183..cd86f15071ad 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1281,7 +1281,7 @@ void __init kmem_cache_init(void)
create_boot_cache(kmem_cache, "kmem_cache",
offsetof(struct kmem_cache, node) +
nr_node_ids * sizeof(struct kmem_cache_node *),
- SLAB_HWCACHE_ALIGN);
+ SLAB_HWCACHE_ALIGN, 0, 0);
list_add(&kmem_cache->list, &slab_caches);
slab_state = PARTIAL;
@@ -1291,7 +1291,8 @@ void __init kmem_cache_init(void)
*/
kmalloc_caches[INDEX_NODE] = create_kmalloc_cache(
kmalloc_info[INDEX_NODE].name,
- kmalloc_size(INDEX_NODE), ARCH_KMALLOC_FLAGS);
+ kmalloc_size(INDEX_NODE), ARCH_KMALLOC_FLAGS,
+ 0, kmalloc_size(INDEX_NODE));
slab_state = PARTIAL_NODE;
setup_kmalloc_cache_index_table();
@@ -4385,13 +4386,15 @@ module_init(slab_proc_init);
#ifdef CONFIG_HARDENED_USERCOPY
/*
- * Rejects objects that are incorrectly sized.
+ * Rejects incorrectly sized objects and objects that are to be copied
+ * to/from userspace but do not fall entirely within the containing slab
+ * cache's usercopy region.
*
* Returns NULL if check passes, otherwise const char * to name of cache
* to indicate an error.
*/
-const char *__check_heap_object(const void *ptr, unsigned long n,
- struct page *page)
+void __check_heap_object(const void *ptr, unsigned long n, struct page *page,
+ bool to_user)
{
struct kmem_cache *cachep;
unsigned int objnr;
@@ -4405,11 +4408,26 @@ const char *__check_heap_object(const void *ptr, unsigned long n,
/* Find offset within object. */
offset = ptr - index_to_obj(cachep, page, objnr) - obj_offset(cachep);
- /* Allow address range falling entirely within object size. */
- if (offset <= cachep->object_size && n <= cachep->object_size - offset)
- return NULL;
+ /* Allow address range falling entirely within usercopy region. */
+ if (offset >= cachep->useroffset &&
+ offset - cachep->useroffset <= cachep->usersize &&
+ n <= cachep->useroffset - offset + cachep->usersize)
+ return;
+
+ /*
+ * If the copy is still within the allocated object, produce
+ * a warning instead of rejecting the copy. This is intended
+ * to be a temporary method to find any missing usercopy
+ * whitelists.
+ */
+ if (usercopy_fallback &&
+ offset <= cachep->object_size &&
+ n <= cachep->object_size - offset) {
+ usercopy_warn("SLAB object", cachep->name, to_user, offset, n);
+ return;
+ }
- return cachep->name;
+ usercopy_abort("SLAB object", cachep->name, to_user, offset, n);
}
#endif /* CONFIG_HARDENED_USERCOPY */
diff --git a/mm/slab.h b/mm/slab.h
index e8e2095a6185..51813236e773 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -22,6 +22,8 @@ struct kmem_cache {
unsigned int size; /* The aligned/padded/added on size */
unsigned int align; /* Alignment as calculated */
slab_flags_t flags; /* Active flags on the slab */
+ size_t useroffset; /* Usercopy region offset */
+ size_t usersize; /* Usercopy region size */
const char *name; /* Slab name for sysfs */
int refcount; /* Use counter */
void (*ctor)(void *); /* Called on object slot creation */
@@ -92,9 +94,11 @@ struct kmem_cache *kmalloc_slab(size_t, gfp_t);
int __kmem_cache_create(struct kmem_cache *, slab_flags_t flags);
extern struct kmem_cache *create_kmalloc_cache(const char *name, size_t size,
- slab_flags_t flags);
+ slab_flags_t flags, size_t useroffset,
+ size_t usersize);
extern void create_boot_cache(struct kmem_cache *, const char *name,
- size_t size, slab_flags_t flags);
+ size_t size, slab_flags_t flags, size_t useroffset,
+ size_t usersize);
int slab_unmergeable(struct kmem_cache *s);
struct kmem_cache *find_mergeable(size_t size, size_t align,
diff --git a/mm/slab_common.c b/mm/slab_common.c
index deeddf95cdcf..10f127b2de7c 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -31,6 +31,14 @@ LIST_HEAD(slab_caches);
DEFINE_MUTEX(slab_mutex);
struct kmem_cache *kmem_cache;
+#ifdef CONFIG_HARDENED_USERCOPY
+bool usercopy_fallback __ro_after_init =
+ IS_ENABLED(CONFIG_HARDENED_USERCOPY_FALLBACK);
+module_param(usercopy_fallback, bool, 0400);
+MODULE_PARM_DESC(usercopy_fallback,
+ "WARN instead of reject usercopy whitelist violations");
+#endif
+
static LIST_HEAD(slab_caches_to_rcu_destroy);
static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work);
static DECLARE_WORK(slab_caches_to_rcu_destroy_work,
@@ -310,6 +318,9 @@ int slab_unmergeable(struct kmem_cache *s)
if (s->ctor)
return 1;
+ if (s->usersize)
+ return 1;
+
/*
* We may have set a slab to be unmergeable during bootstrap.
*/
@@ -368,12 +379,16 @@ struct kmem_cache *find_mergeable(size_t size, size_t align,
static struct kmem_cache *create_cache(const char *name,
size_t object_size, size_t size, size_t align,
- slab_flags_t flags, void (*ctor)(void *),
+ slab_flags_t flags, size_t useroffset,
+ size_t usersize, void (*ctor)(void *),
struct mem_cgroup *memcg, struct kmem_cache *root_cache)
{
struct kmem_cache *s;
int err;
+ if (WARN_ON(useroffset + usersize > object_size))
+ useroffset = usersize = 0;
+
err = -ENOMEM;
s = kmem_cache_zalloc(kmem_cache, GFP_KERNEL);
if (!s)
@@ -384,6 +399,8 @@ static struct kmem_cache *create_cache(const char *name,
s->size = size;
s->align = align;
s->ctor = ctor;
+ s->useroffset = useroffset;
+ s->usersize = usersize;
err = init_memcg_params(s, memcg, root_cache);
if (err)
@@ -408,11 +425,13 @@ out_free_cache:
}
/*
- * kmem_cache_create - Create a cache.
+ * kmem_cache_create_usercopy - Create a cache.
* @name: A string which is used in /proc/slabinfo to identify this cache.
* @size: The size of objects to be created in this cache.
* @align: The required alignment for the objects.
* @flags: SLAB flags
+ * @useroffset: Usercopy region offset
+ * @usersize: Usercopy region size
* @ctor: A constructor for the objects.
*
* Returns a ptr to the cache on success, NULL on failure.
@@ -432,8 +451,9 @@ out_free_cache:
* as davem.
*/
struct kmem_cache *
-kmem_cache_create(const char *name, size_t size, size_t align,
- slab_flags_t flags, void (*ctor)(void *))
+kmem_cache_create_usercopy(const char *name, size_t size, size_t align,
+ slab_flags_t flags, size_t useroffset, size_t usersize,
+ void (*ctor)(void *))
{
struct kmem_cache *s = NULL;
const char *cache_name;
@@ -464,7 +484,13 @@ kmem_cache_create(const char *name, size_t size, size_t align,
*/
flags &= CACHE_CREATE_MASK;
- s = __kmem_cache_alias(name, size, align, flags, ctor);
+ /* Fail closed on bad usersize of useroffset values. */
+ if (WARN_ON(!usersize && useroffset) ||
+ WARN_ON(size < usersize || size - usersize < useroffset))
+ usersize = useroffset = 0;
+
+ if (!usersize)
+ s = __kmem_cache_alias(name, size, align, flags, ctor);
if (s)
goto out_unlock;
@@ -476,7 +502,7 @@ kmem_cache_create(const char *name, size_t size, size_t align,
s = create_cache(cache_name, size, size,
calculate_alignment(flags, align, size),
- flags, ctor, NULL, NULL);
+ flags, useroffset, usersize, ctor, NULL, NULL);
if (IS_ERR(s)) {
err = PTR_ERR(s);
kfree_const(cache_name);
@@ -502,6 +528,15 @@ out_unlock:
}
return s;
}
+EXPORT_SYMBOL(kmem_cache_create_usercopy);
+
+struct kmem_cache *
+kmem_cache_create(const char *name, size_t size, size_t align,
+ slab_flags_t flags, void (*ctor)(void *))
+{
+ return kmem_cache_create_usercopy(name, size, align, flags, 0, 0,
+ ctor);
+}
EXPORT_SYMBOL(kmem_cache_create);
static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work)
@@ -614,6 +649,7 @@ void memcg_create_kmem_cache(struct mem_cgroup *memcg,
s = create_cache(cache_name, root_cache->object_size,
root_cache->size, root_cache->align,
root_cache->flags & CACHE_CREATE_MASK,
+ root_cache->useroffset, root_cache->usersize,
root_cache->ctor, memcg, root_cache);
/*
* If we could not create a memcg cache, do not complain, because
@@ -881,13 +917,15 @@ bool slab_is_available(void)
#ifndef CONFIG_SLOB
/* Create a cache during boot when no slab services are available yet */
void __init create_boot_cache(struct kmem_cache *s, const char *name, size_t size,
- slab_flags_t flags)
+ slab_flags_t flags, size_t useroffset, size_t usersize)
{
int err;
s->name = name;
s->size = s->object_size = size;
s->align = calculate_alignment(flags, ARCH_KMALLOC_MINALIGN, size);
+ s->useroffset = useroffset;
+ s->usersize = usersize;
slab_init_memcg_params(s);
@@ -901,14 +939,15 @@ void __init create_boot_cache(struct kmem_cache *s, const char *name, size_t siz
}
struct kmem_cache *__init create_kmalloc_cache(const char *name, size_t size,
- slab_flags_t flags)
+ slab_flags_t flags, size_t useroffset,
+ size_t usersize)
{
struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
if (!s)
panic("Out of memory when creating slab %s\n", name);
- create_boot_cache(s, name, size, flags);
+ create_boot_cache(s, name, size, flags, useroffset, usersize);
list_add(&s->list, &slab_caches);
memcg_link_cache(s);
s->refcount = 1;
@@ -1062,7 +1101,8 @@ void __init setup_kmalloc_cache_index_table(void)
static void __init new_kmalloc_cache(int idx, slab_flags_t flags)
{
kmalloc_caches[idx] = create_kmalloc_cache(kmalloc_info[idx].name,
- kmalloc_info[idx].size, flags);
+ kmalloc_info[idx].size, flags, 0,
+ kmalloc_info[idx].size);
}
/*
@@ -1103,7 +1143,7 @@ void __init create_kmalloc_caches(slab_flags_t flags)
BUG_ON(!n);
kmalloc_dma_caches[i] = create_kmalloc_cache(n,
- size, SLAB_CACHE_DMA | flags);
+ size, SLAB_CACHE_DMA | flags, 0, 0);
}
}
#endif
diff --git a/mm/slub.c b/mm/slub.c
index 693b7074bc53..cc71176c6eef 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3813,13 +3813,15 @@ EXPORT_SYMBOL(__kmalloc_node);
#ifdef CONFIG_HARDENED_USERCOPY
/*
- * Rejects objects that are incorrectly sized.
+ * Rejects incorrectly sized objects and objects that are to be copied
+ * to/from userspace but do not fall entirely within the containing slab
+ * cache's usercopy region.
*
* Returns NULL if check passes, otherwise const char * to name of cache
* to indicate an error.
*/
-const char *__check_heap_object(const void *ptr, unsigned long n,
- struct page *page)
+void __check_heap_object(const void *ptr, unsigned long n, struct page *page,
+ bool to_user)
{
struct kmem_cache *s;
unsigned long offset;
@@ -3827,11 +3829,11 @@ const char *__check_heap_object(const void *ptr, unsigned long n,
/* Find object and usable object size. */
s = page->slab_cache;
- object_size = slab_ksize(s);
/* Reject impossible pointers. */
if (ptr < page_address(page))
- return s->name;
+ usercopy_abort("SLUB object not in SLUB page?!", NULL,
+ to_user, 0, n);
/* Find offset within object. */
offset = (ptr - page_address(page)) % s->size;
@@ -3839,15 +3841,31 @@ const char *__check_heap_object(const void *ptr, unsigned long n,
/* Adjust for redzone and reject if within the redzone. */
if (kmem_cache_debug(s) && s->flags & SLAB_RED_ZONE) {
if (offset < s->red_left_pad)
- return s->name;
+ usercopy_abort("SLUB object in left red zone",
+ s->name, to_user, offset, n);
offset -= s->red_left_pad;
}
- /* Allow address range falling entirely within object size. */
- if (offset <= object_size && n <= object_size - offset)
- return NULL;
+ /* Allow address range falling entirely within usercopy region. */
+ if (offset >= s->useroffset &&
+ offset - s->useroffset <= s->usersize &&
+ n <= s->useroffset - offset + s->usersize)
+ return;
+
+ /*
+ * If the copy is still within the allocated object, produce
+ * a warning instead of rejecting the copy. This is intended
+ * to be a temporary method to find any missing usercopy
+ * whitelists.
+ */
+ object_size = slab_ksize(s);
+ if (usercopy_fallback &&
+ offset <= object_size && n <= object_size - offset) {
+ usercopy_warn("SLUB object", s->name, to_user, offset, n);
+ return;
+ }
- return s->name;
+ usercopy_abort("SLUB object", s->name, to_user, offset, n);
}
#endif /* CONFIG_HARDENED_USERCOPY */
@@ -4181,7 +4199,7 @@ void __init kmem_cache_init(void)
kmem_cache = &boot_kmem_cache;
create_boot_cache(kmem_cache_node, "kmem_cache_node",
- sizeof(struct kmem_cache_node), SLAB_HWCACHE_ALIGN);
+ sizeof(struct kmem_cache_node), SLAB_HWCACHE_ALIGN, 0, 0);
register_hotmemory_notifier(&slab_memory_callback_nb);
@@ -4191,7 +4209,7 @@ void __init kmem_cache_init(void)
create_boot_cache(kmem_cache, "kmem_cache",
offsetof(struct kmem_cache, node) +
nr_node_ids * sizeof(struct kmem_cache_node *),
- SLAB_HWCACHE_ALIGN);
+ SLAB_HWCACHE_ALIGN, 0, 0);
kmem_cache = bootstrap(&boot_kmem_cache);
@@ -5061,6 +5079,12 @@ static ssize_t cache_dma_show(struct kmem_cache *s, char *buf)
SLAB_ATTR_RO(cache_dma);
#endif
+static ssize_t usersize_show(struct kmem_cache *s, char *buf)
+{
+ return sprintf(buf, "%zu\n", s->usersize);
+}
+SLAB_ATTR_RO(usersize);
+
static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf)
{
return sprintf(buf, "%d\n", !!(s->flags & SLAB_TYPESAFE_BY_RCU));
@@ -5435,6 +5459,7 @@ static struct attribute *slab_attrs[] = {
#ifdef CONFIG_FAILSLAB
&failslab_attr.attr,
#endif
+ &usersize_attr.attr,
NULL
};
diff --git a/mm/usercopy.c b/mm/usercopy.c
index a9852b24715d..e9e9325f7638 100644
--- a/mm/usercopy.c
+++ b/mm/usercopy.c
@@ -58,12 +58,40 @@ static noinline int check_stack_object(const void *obj, unsigned long len)
return GOOD_STACK;
}
-static void report_usercopy(const void *ptr, unsigned long len,
- bool to_user, const char *type)
+/*
+ * If these functions are reached, then CONFIG_HARDENED_USERCOPY has found
+ * an unexpected state during a copy_from_user() or copy_to_user() call.
+ * There are several checks being performed on the buffer by the
+ * __check_object_size() function. Normal stack buffer usage should never
+ * trip the checks, and kernel text addressing will always trip the check.
+ * For cache objects, it is checking that only the whitelisted range of
+ * bytes for a given cache is being accessed (via the cache's usersize and
+ * useroffset fields). To adjust a cache whitelist, use the usercopy-aware
+ * kmem_cache_create_usercopy() function to create the cache (and
+ * carefully audit the whitelist range).
+ */
+void usercopy_warn(const char *name, const char *detail, bool to_user,
+ unsigned long offset, unsigned long len)
+{
+ WARN_ONCE(1, "Bad or missing usercopy whitelist? Kernel memory %s attempt detected %s %s%s%s%s (offset %lu, size %lu)!\n",
+ to_user ? "exposure" : "overwrite",
+ to_user ? "from" : "to",
+ name ? : "unknown?!",
+ detail ? " '" : "", detail ? : "", detail ? "'" : "",
+ offset, len);
+}
+
+void __noreturn usercopy_abort(const char *name, const char *detail,
+ bool to_user, unsigned long offset,
+ unsigned long len)
{
- pr_emerg("kernel memory %s attempt detected %s %p (%s) (%lu bytes)\n",
- to_user ? "exposure" : "overwrite",
- to_user ? "from" : "to", ptr, type ? : "unknown", len);
+ pr_emerg("Kernel memory %s attempt detected %s %s%s%s%s (offset %lu, size %lu)!\n",
+ to_user ? "exposure" : "overwrite",
+ to_user ? "from" : "to",
+ name ? : "unknown?!",
+ detail ? " '" : "", detail ? : "", detail ? "'" : "",
+ offset, len);
+
/*
* For greater effect, it would be nice to do do_group_exit(),
* but BUG() actually hooks all the lock-breaking and per-arch
@@ -73,10 +101,10 @@ static void report_usercopy(const void *ptr, unsigned long len,
}
/* Returns true if any portion of [ptr,ptr+n) over laps with [low,high). */
-static bool overlaps(const void *ptr, unsigned long n, unsigned long low,
- unsigned long high)
+static bool overlaps(const unsigned long ptr, unsigned long n,
+ unsigned long low, unsigned long high)
{
- unsigned long check_low = (uintptr_t)ptr;
+ const unsigned long check_low = ptr;
unsigned long check_high = check_low + n;
/* Does not overlap if entirely above or entirely below. */
@@ -87,15 +115,15 @@ static bool overlaps(const void *ptr, unsigned long n, unsigned long low,
}
/* Is this address range in the kernel text area? */
-static inline const char *check_kernel_text_object(const void *ptr,
- unsigned long n)
+static inline void check_kernel_text_object(const unsigned long ptr,
+ unsigned long n, bool to_user)
{
unsigned long textlow = (unsigned long)_stext;
unsigned long texthigh = (unsigned long)_etext;
unsigned long textlow_linear, texthigh_linear;
if (overlaps(ptr, n, textlow, texthigh))
- return "<kernel text>";
+ usercopy_abort("kernel text", NULL, to_user, ptr - textlow, n);
/*
* Some architectures have virtual memory mappings with a secondary
@@ -108,32 +136,30 @@ static inline const char *check_kernel_text_object(const void *ptr,
textlow_linear = (unsigned long)lm_alias(textlow);
/* No different mapping: we're done. */
if (textlow_linear == textlow)
- return NULL;
+ return;
/* Check the secondary mapping... */
texthigh_linear = (unsigned long)lm_alias(texthigh);
if (overlaps(ptr, n, textlow_linear, texthigh_linear))
- return "<linear kernel text>";
-
- return NULL;
+ usercopy_abort("linear kernel text", NULL, to_user,
+ ptr - textlow_linear, n);
}
-static inline const char *check_bogus_address(const void *ptr, unsigned long n)
+static inline void check_bogus_address(const unsigned long ptr, unsigned long n,
+ bool to_user)
{
/* Reject if object wraps past end of memory. */
- if ((unsigned long)ptr + n < (unsigned long)ptr)
- return "<wrapped address>";
+ if (ptr + n < ptr)
+ usercopy_abort("wrapped address", NULL, to_user, 0, ptr + n);
/* Reject if NULL or ZERO-allocation. */
if (ZERO_OR_NULL_PTR(ptr))
- return "<null>";
-
- return NULL;
+ usercopy_abort("null address", NULL, to_user, ptr, n);
}
/* Checks for allocs that are marked in some way as spanning multiple pages. */
-static inline const char *check_page_span(const void *ptr, unsigned long n,
- struct page *page, bool to_user)
+static inline void check_page_span(const void *ptr, unsigned long n,
+ struct page *page, bool to_user)
{
#ifdef CONFIG_HARDENED_USERCOPY_PAGESPAN
const void *end = ptr + n - 1;
@@ -150,28 +176,28 @@ static inline const char *check_page_span(const void *ptr, unsigned long n,
if (ptr >= (const void *)__start_rodata &&
end <= (const void *)__end_rodata) {
if (!to_user)
- return "<rodata>";
- return NULL;
+ usercopy_abort("rodata", NULL, to_user, 0, n);
+ return;
}
/* Allow kernel data region (if not marked as Reserved). */
if (ptr >= (const void *)_sdata && end <= (const void *)_edata)
- return NULL;
+ return;
/* Allow kernel bss region (if not marked as Reserved). */
if (ptr >= (const void *)__bss_start &&
end <= (const void *)__bss_stop)
- return NULL;
+ return;
/* Is the object wholly within one base page? */
if (likely(((unsigned long)ptr & (unsigned long)PAGE_MASK) ==
((unsigned long)end & (unsigned long)PAGE_MASK)))
- return NULL;
+ return;
/* Allow if fully inside the same compound (__GFP_COMP) page. */
endpage = virt_to_head_page(end);
if (likely(endpage == page))
- return NULL;
+ return;
/*
* Reject if range is entirely either Reserved (i.e. special or
@@ -181,36 +207,37 @@ static inline const char *check_page_span(const void *ptr, unsigned long n,
is_reserved = PageReserved(page);
is_cma = is_migrate_cma_page(page);
if (!is_reserved && !is_cma)
- return "<spans multiple pages>";
+ usercopy_abort("spans multiple pages", NULL, to_user, 0, n);
for (ptr += PAGE_SIZE; ptr <= end; ptr += PAGE_SIZE) {
page = virt_to_head_page(ptr);
if (is_reserved && !PageReserved(page))
- return "<spans Reserved and non-Reserved pages>";
+ usercopy_abort("spans Reserved and non-Reserved pages",
+ NULL, to_user, 0, n);
if (is_cma && !is_migrate_cma_page(page))
- return "<spans CMA and non-CMA pages>";
+ usercopy_abort("spans CMA and non-CMA pages", NULL,
+ to_user, 0, n);
}
#endif
-
- return NULL;
}
-static inline const char *check_heap_object(const void *ptr, unsigned long n,
- bool to_user)
+static inline void check_heap_object(const void *ptr, unsigned long n,
+ bool to_user)
{
struct page *page;
if (!virt_addr_valid(ptr))
- return NULL;
+ return;
page = virt_to_head_page(ptr);
- /* Check slab allocator for flags and size. */
- if (PageSlab(page))
- return __check_heap_object(ptr, n, page);
-
- /* Verify object does not incorrectly span multiple pages. */
- return check_page_span(ptr, n, page, to_user);
+ if (PageSlab(page)) {
+ /* Check slab allocator for flags and size. */
+ __check_heap_object(ptr, n, page, to_user);
+ } else {
+ /* Verify object does not incorrectly span multiple pages. */
+ check_page_span(ptr, n, page, to_user);
+ }
}
/*
@@ -221,21 +248,15 @@ static inline const char *check_heap_object(const void *ptr, unsigned long n,
*/
void __check_object_size(const void *ptr, unsigned long n, bool to_user)
{
- const char *err;
-
/* Skip all tests if size is zero. */
if (!n)
return;
/* Check for invalid addresses. */
- err = check_bogus_address(ptr, n);
- if (err)
- goto report;
+ check_bogus_address((const unsigned long)ptr, n, to_user);
/* Check for bad heap object. */
- err = check_heap_object(ptr, n, to_user);
- if (err)
- goto report;
+ check_heap_object(ptr, n, to_user);
/* Check for bad stack object. */
switch (check_stack_object(ptr, n)) {
@@ -251,16 +272,10 @@ void __check_object_size(const void *ptr, unsigned long n, bool to_user)
*/
return;
default:
- err = "<process stack>";
- goto report;
+ usercopy_abort("process stack", NULL, to_user, 0, n);
}
/* Check for object in kernel to avoid text exposure. */
- err = check_kernel_text_object(ptr, n);
- if (!err)
- return;
-
-report:
- report_usercopy(ptr, n, to_user, err);
+ check_kernel_text_object((const unsigned long)ptr, n, to_user);
}
EXPORT_SYMBOL(__check_object_size);
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 64048cec41e0..b109445a1df9 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -1032,6 +1032,8 @@ static int caif_create(struct net *net, struct socket *sock, int protocol,
static struct proto prot = {.name = "PF_CAIF",
.owner = THIS_MODULE,
.obj_size = sizeof(struct caifsock),
+ .useroffset = offsetof(struct caifsock, conn_req.param),
+ .usersize = sizeof_field(struct caifsock, conn_req.param)
};
if (!capable(CAP_SYS_ADMIN) && !capable(CAP_NET_ADMIN))
diff --git a/net/core/sock.c b/net/core/sock.c
index e50e7b3f2223..b026e1717df4 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -3191,8 +3191,10 @@ static int req_prot_init(const struct proto *prot)
int proto_register(struct proto *prot, int alloc_slab)
{
if (alloc_slab) {
- prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
+ prot->slab = kmem_cache_create_usercopy(prot->name,
+ prot->obj_size, 0,
SLAB_HWCACHE_ALIGN | prot->slab_flags,
+ prot->useroffset, prot->usersize,
NULL);
if (prot->slab == NULL) {
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 7c509697ebc7..9b367fc48d7d 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -990,6 +990,8 @@ struct proto raw_prot = {
.hash = raw_hash_sk,
.unhash = raw_unhash_sk,
.obj_size = sizeof(struct raw_sock),
+ .useroffset = offsetof(struct raw_sock, filter),
+ .usersize = sizeof_field(struct raw_sock, filter),
.h.raw_hash = &raw_v4_hashinfo,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_raw_setsockopt,
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index ddda7eb3c623..4c25339b1984 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -1272,6 +1272,8 @@ struct proto rawv6_prot = {
.hash = raw_hash_sk,
.unhash = raw_unhash_sk,
.obj_size = sizeof(struct raw6_sock),
+ .useroffset = offsetof(struct raw6_sock, filter),
+ .usersize = sizeof_field(struct raw6_sock, filter),
.h.raw_hash = &raw_v6_hashinfo,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_rawv6_setsockopt,
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 356e387f82e7..ebb8cb9eb0bd 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -5053,7 +5053,7 @@ static int sctp_getsockopt_autoclose(struct sock *sk, int len, char __user *optv
len = sizeof(int);
if (put_user(len, optlen))
return -EFAULT;
- if (copy_to_user(optval, &sctp_sk(sk)->autoclose, len))
+ if (put_user(sctp_sk(sk)->autoclose, (int __user *)optval))
return -EFAULT;
return 0;
}
@@ -8552,6 +8552,10 @@ struct proto sctp_prot = {
.unhash = sctp_unhash,
.get_port = sctp_get_port,
.obj_size = sizeof(struct sctp_sock),
+ .useroffset = offsetof(struct sctp_sock, subscribe),
+ .usersize = offsetof(struct sctp_sock, initmsg) -
+ offsetof(struct sctp_sock, subscribe) +
+ sizeof_field(struct sctp_sock, initmsg),
.sysctl_mem = sysctl_sctp_mem,
.sysctl_rmem = sysctl_sctp_rmem,
.sysctl_wmem = sysctl_sctp_wmem,
@@ -8591,6 +8595,10 @@ struct proto sctpv6_prot = {
.unhash = sctp_unhash,
.get_port = sctp_get_port,
.obj_size = sizeof(struct sctp6_sock),
+ .useroffset = offsetof(struct sctp6_sock, sctp.subscribe),
+ .usersize = offsetof(struct sctp6_sock, sctp.initmsg) -
+ offsetof(struct sctp6_sock, sctp.subscribe) +
+ sizeof_field(struct sctp6_sock, sctp.initmsg),
.sysctl_mem = sysctl_sctp_mem,
.sysctl_rmem = sysctl_sctp_rmem,
.sysctl_wmem = sysctl_sctp_wmem,
diff --git a/security/Kconfig b/security/Kconfig
index 3709db95027f..c4302067a3ad 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -164,6 +164,20 @@ config HARDENED_USERCOPY
or are part of the kernel text. This kills entire classes
of heap overflow exploits and similar kernel memory exposures.
+config HARDENED_USERCOPY_FALLBACK
+ bool "Allow usercopy whitelist violations to fallback to object size"
+ depends on HARDENED_USERCOPY
+ default y
+ help
+ This is a temporary option that allows missing usercopy whitelists
+ to be discovered via a WARN() to the kernel log, instead of
+ rejecting the copy, falling back to non-whitelisted hardened
+ usercopy that checks the slab allocation size instead of the
+ whitelist size. This option will be removed once it seems like
+ all missing usercopy whitelists have been identified and fixed.
+ Booting with "slab_common.usercopy_fallback=Y/N" can change
+ this setting.
+
config HARDENED_USERCOPY_PAGESPAN
bool "Refuse to copy allocations that span multiple pages"
depends on HARDENED_USERCOPY
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index f40d46e24bcc..524becf88e04 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -138,6 +138,7 @@ static int __dead_end_function(struct objtool_file *file, struct symbol *func,
"__reiserfs_panic",
"lbug_with_loc",
"fortify_panic",
+ "usercopy_abort",
};
if (func->bind == STB_WEAK)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 35db929f92f0..001085b611ad 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -4019,8 +4019,12 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
/* A kmem cache lets us meet the alignment requirements of fx_save. */
if (!vcpu_align)
vcpu_align = __alignof__(struct kvm_vcpu);
- kvm_vcpu_cache = kmem_cache_create("kvm_vcpu", vcpu_size, vcpu_align,
- SLAB_ACCOUNT, NULL);
+ kvm_vcpu_cache =
+ kmem_cache_create_usercopy("kvm_vcpu", vcpu_size, vcpu_align,
+ SLAB_ACCOUNT,
+ offsetof(struct kvm_vcpu, arch),
+ sizeof_field(struct kvm_vcpu, arch),
+ NULL);
if (!kvm_vcpu_cache) {
r = -ENOMEM;
goto out_free_3;