Blame - arch/x86/mm/mpx.c - kernel/msm-4.19

blob: e500949bae24534ce7fea11d73bddb2810affa99 [file] [log] [blame]

Greg Kroah-Hartman	b244131	2017-11-01 15:07:57 +0100	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0
Qiaowei Ren	57319d8	2014-11-14 07:18:27 -0800	[diff] [blame]	2	/*
				3	* mpx.c - Memory Protection eXtensions
				4	*
				5	* Copyright (c) 2014, Intel Corporation.
				6	* Qiaowei Ren <qiaowei.ren@intel.com>
				7	* Dave Hansen <dave.hansen@intel.com>
				8	*/
				9	#include <linux/kernel.h>
Dave Hansen	fcc7ffd	2014-11-14 07:18:28 -0800	[diff] [blame]	10	#include <linux/slab.h>
Ingo Molnar	589ee62	2017-02-04 00:16:44 +0100	[diff] [blame]	11	#include <linux/mm_types.h>
Qiaowei Ren	57319d8	2014-11-14 07:18:27 -0800	[diff] [blame]	12	#include <linux/syscalls.h>
				13	#include <linux/sched/sysctl.h>
				14
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	15	#include <asm/insn.h>
Ricardo Neri	32542ee	2017-10-27 13:25:36 -0700	[diff] [blame]	16	#include <asm/insn-eval.h>
Qiaowei Ren	57319d8	2014-11-14 07:18:27 -0800	[diff] [blame]	17	#include <asm/mman.h>
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	18	#include <asm/mmu_context.h>
Qiaowei Ren	57319d8	2014-11-14 07:18:27 -0800	[diff] [blame]	19	#include <asm/mpx.h>
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	20	#include <asm/processor.h>
Ingo Molnar	78f7f1e	2015-04-24 02:54:44 +0200	[diff] [blame]	21	#include <asm/fpu/internal.h>
Qiaowei Ren	57319d8	2014-11-14 07:18:27 -0800	[diff] [blame]	22
Dave Hansen	e7126cf	2015-06-07 11:37:03 -0700	[diff] [blame]	23	#define CREATE_TRACE_POINTS
				24	#include <asm/trace/mpx.h>
				25
Dave Hansen	613fcb7	2015-06-07 11:37:05 -0700	[diff] [blame]	26	static inline unsigned long mpx_bd_size_bytes(struct mm_struct *mm)
				27	{
				28	if (is_64bit_mm(mm))
				29	return MPX_BD_SIZE_BYTES_64;
				30	else
				31	return MPX_BD_SIZE_BYTES_32;
				32	}
				33
				34	static inline unsigned long mpx_bt_size_bytes(struct mm_struct *mm)
				35	{
				36	if (is_64bit_mm(mm))
				37	return MPX_BT_SIZE_BYTES_64;
				38	else
				39	return MPX_BT_SIZE_BYTES_32;
				40	}
				41
Qiaowei Ren	57319d8	2014-11-14 07:18:27 -0800	[diff] [blame]	42	/*
				43	* This is really a simplified "vm_mmap". it only handles MPX
				44	* bounds tables (the bounds directory is user-allocated).
Qiaowei Ren	57319d8	2014-11-14 07:18:27 -0800	[diff] [blame]	45	*/
				46	static unsigned long mpx_mmap(unsigned long len)
				47	{
Qiaowei Ren	57319d8	2014-11-14 07:18:27 -0800	[diff] [blame]	48	struct mm_struct *mm = current->mm;
Oleg Nesterov	1fcfd8d	2015-09-09 15:39:29 -0700	[diff] [blame]	49	unsigned long addr, populate;
Qiaowei Ren	57319d8	2014-11-14 07:18:27 -0800	[diff] [blame]	50
Dave Hansen	eb099e5	2015-06-07 11:37:02 -0700	[diff] [blame]	51	/* Only bounds table can be allocated here */
Dave Hansen	613fcb7	2015-06-07 11:37:05 -0700	[diff] [blame]	52	if (len != mpx_bt_size_bytes(mm))
Qiaowei Ren	57319d8	2014-11-14 07:18:27 -0800	[diff] [blame]	53	return -EINVAL;
				54
				55	down_write(&mm->mmap_sem);
Oleg Nesterov	1fcfd8d	2015-09-09 15:39:29 -0700	[diff] [blame]	56	addr = do_mmap(NULL, 0, len, PROT_READ \| PROT_WRITE,
Mike Rapoport	897ab3e	2017-02-24 14:58:22 -0800	[diff] [blame]	57	MAP_ANONYMOUS \| MAP_PRIVATE, VM_MPX, 0, &populate, NULL);
Qiaowei Ren	57319d8	2014-11-14 07:18:27 -0800	[diff] [blame]	58	up_write(&mm->mmap_sem);
Oleg Nesterov	1fcfd8d	2015-09-09 15:39:29 -0700	[diff] [blame]	59	if (populate)
				60	mm_populate(addr, populate);
				61
				62	return addr;
Qiaowei Ren	57319d8	2014-11-14 07:18:27 -0800	[diff] [blame]	63	}
Dave Hansen	fcc7ffd	2014-11-14 07:18:28 -0800	[diff] [blame]	64
Dave Hansen	fcc7ffd	2014-11-14 07:18:28 -0800	[diff] [blame]	65	static int mpx_insn_decode(struct insn *insn,
				66	struct pt_regs *regs)
				67	{
				68	unsigned char buf[MAX_INSN_SIZE];
				69	int x86_64 = !test_thread_flag(TIF_IA32);
				70	int not_copied;
				71	int nr_copied;
				72
				73	not_copied = copy_from_user(buf, (void __user *)regs->ip, sizeof(buf));
				74	nr_copied = sizeof(buf) - not_copied;
				75	/*
				76	* The decoder _should_ fail nicely if we pass it a short buffer.
				77	* But, let's not depend on that implementation detail. If we
				78	* did not get anything, just error out now.
				79	*/
				80	if (!nr_copied)
				81	return -EFAULT;
				82	insn_init(insn, buf, nr_copied, x86_64);
				83	insn_get_length(insn);
				84	/*
				85	* copy_from_user() tries to get as many bytes as we could see in
				86	* the largest possible instruction. If the instruction we are
				87	* after is shorter than that _and_ we attempt to copy from
				88	* something unreadable, we might get a short read. This is OK
				89	* as long as the read did not stop in the middle of the
				90	* instruction. Check to see if we got a partial instruction.
				91	*/
				92	if (nr_copied < insn->length)
				93	return -EFAULT;
				94
				95	insn_get_opcode(insn);
				96	/*
				97	* We only _really_ need to decode bndcl/bndcn/bndcu
				98	* Error out on anything else.
				99	*/
				100	if (insn->opcode.bytes[0] != 0x0f)
				101	goto bad_opcode;
				102	if ((insn->opcode.bytes[1] != 0x1a) &&
				103	(insn->opcode.bytes[1] != 0x1b))
				104	goto bad_opcode;
				105
				106	return 0;
				107	bad_opcode:
				108	return -EINVAL;
				109	}
				110
				111	/*
				112	* If a bounds overflow occurs then a #BR is generated. This
				113	* function decodes MPX instructions to get violation address
				114	* and set this address into extended struct siginfo.
				115	*
				116	* Note that this is not a super precise way of doing this.
				117	* Userspace could have, by the time we get here, written
				118	* anything it wants in to the instructions. We can not
				119	* trust anything about it. They might not be valid
				120	* instructions or might encode invalid registers, etc...
				121	*
				122	* The caller is expected to kfree() the returned siginfo_t.
				123	*/
Dave Hansen	46a6e0c	2015-06-07 11:37:02 -0700	[diff] [blame]	124	siginfo_t mpx_generate_siginfo(struct pt_regs regs)
Dave Hansen	fcc7ffd	2014-11-14 07:18:28 -0800	[diff] [blame]	125	{
Dave Hansen	1126cb45	2015-09-02 16:31:29 -0700	[diff] [blame]	126	const struct mpx_bndreg_state *bndregs;
				127	const struct mpx_bndreg *bndreg;
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	128	siginfo_t *info = NULL;
Dave Hansen	fcc7ffd	2014-11-14 07:18:28 -0800	[diff] [blame]	129	struct insn insn;
				130	uint8_t bndregno;
				131	int err;
Dave Hansen	fcc7ffd	2014-11-14 07:18:28 -0800	[diff] [blame]	132
				133	err = mpx_insn_decode(&insn, regs);
				134	if (err)
				135	goto err_out;
				136
				137	/*
				138	* We know at this point that we are only dealing with
				139	* MPX instructions.
				140	*/
				141	insn_get_modrm(&insn);
				142	bndregno = X86_MODRM_REG(insn.modrm.value);
				143	if (bndregno > 3) {
				144	err = -EINVAL;
				145	goto err_out;
				146	}
Dave Hansen	a84eeaa	2015-06-07 11:37:01 -0700	[diff] [blame]	147	/* get bndregs field from current task's xsave area */
Dave Hansen	d91cab7	2015-09-02 16:31:26 -0700	[diff] [blame]	148	bndregs = get_xsave_field_ptr(XFEATURE_MASK_BNDREGS);
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	149	if (!bndregs) {
				150	err = -EINVAL;
				151	goto err_out;
				152	}
				153	/* now go select the individual register in the set of 4 */
Dave Hansen	1126cb45	2015-09-02 16:31:29 -0700	[diff] [blame]	154	bndreg = &bndregs->bndreg[bndregno];
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	155
Dave Hansen	fcc7ffd	2014-11-14 07:18:28 -0800	[diff] [blame]	156	info = kzalloc(sizeof(*info), GFP_KERNEL);
				157	if (!info) {
				158	err = -ENOMEM;
				159	goto err_out;
				160	}
				161	/*
				162	* The registers are always 64-bit, but the upper 32
				163	* bits are ignored in 32-bit mode. Also, note that the
				164	* upper bounds are architecturally represented in 1's
				165	* complement form.
				166	*
				167	* The 'unsigned long' cast is because the compiler
				168	* complains when casting from integers to different-size
				169	* pointers.
				170	*/
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	171	info->si_lower = (void __user *)(unsigned long)bndreg->lower_bound;
				172	info->si_upper = (void __user *)(unsigned long)~bndreg->upper_bound;
Dave Hansen	fcc7ffd	2014-11-14 07:18:28 -0800	[diff] [blame]	173	info->si_addr_lsb = 0;
				174	info->si_signo = SIGSEGV;
				175	info->si_errno = 0;
				176	info->si_code = SEGV_BNDERR;
Ricardo Neri	32542ee	2017-10-27 13:25:36 -0700	[diff] [blame]	177	info->si_addr = insn_get_addr_ref(&insn, regs);
Dave Hansen	fcc7ffd	2014-11-14 07:18:28 -0800	[diff] [blame]	178	/*
				179	* We were not able to extract an address from the instruction,
				180	* probably because there was something invalid in it.
				181	*/
Tobias Klauser	4538286	2017-01-12 16:53:11 +0100	[diff] [blame]	182	if (info->si_addr == (void __user *)-1) {
Dave Hansen	fcc7ffd	2014-11-14 07:18:28 -0800	[diff] [blame]	183	err = -EINVAL;
				184	goto err_out;
				185	}
Dave Hansen	97efebf	2015-06-07 11:37:03 -0700	[diff] [blame]	186	trace_mpx_bounds_register_exception(info->si_addr, bndreg);
Dave Hansen	fcc7ffd	2014-11-14 07:18:28 -0800	[diff] [blame]	187	return info;
				188	err_out:
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	189	/* info might be NULL, but kfree() handles that */
				190	kfree(info);
Dave Hansen	fcc7ffd	2014-11-14 07:18:28 -0800	[diff] [blame]	191	return ERR_PTR(err);
				192	}
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	193
Dave Hansen	46a6e0c	2015-06-07 11:37:02 -0700	[diff] [blame]	194	static __user void *mpx_get_bounds_dir(void)
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	195	{
Dave Hansen	1126cb45	2015-09-02 16:31:29 -0700	[diff] [blame]	196	const struct mpx_bndcsr *bndcsr;
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	197
				198	if (!cpu_feature_enabled(X86_FEATURE_MPX))
				199	return MPX_INVALID_BOUNDS_DIR;
				200
				201	/*
				202	* The bounds directory pointer is stored in a register
				203	* only accessible if we first do an xsave.
				204	*/
Dave Hansen	d91cab7	2015-09-02 16:31:26 -0700	[diff] [blame]	205	bndcsr = get_xsave_field_ptr(XFEATURE_MASK_BNDCSR);
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	206	if (!bndcsr)
				207	return MPX_INVALID_BOUNDS_DIR;
				208
				209	/*
				210	* Make sure the register looks valid by checking the
				211	* enable bit.
				212	*/
				213	if (!(bndcsr->bndcfgu & MPX_BNDCFG_ENABLE_FLAG))
				214	return MPX_INVALID_BOUNDS_DIR;
				215
				216	/*
				217	* Lastly, mask off the low bits used for configuration
				218	* flags, and return the address of the bounds table.
				219	*/
				220	return (void __user *)(unsigned long)
				221	(bndcsr->bndcfgu & MPX_BNDCFG_ADDR_MASK);
				222	}
				223
Dave Hansen	46a6e0c	2015-06-07 11:37:02 -0700	[diff] [blame]	224	int mpx_enable_management(void)
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	225	{
				226	void __user *bd_base = MPX_INVALID_BOUNDS_DIR;
Dave Hansen	46a6e0c	2015-06-07 11:37:02 -0700	[diff] [blame]	227	struct mm_struct *mm = current->mm;
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	228	int ret = 0;
				229
				230	/*
				231	* runtime in the userspace will be responsible for allocation of
				232	* the bounds directory. Then, it will save the base of the bounds
				233	* directory into XSAVE/XRSTOR Save Area and enable MPX through
				234	* XRSTOR instruction.
				235	*
Dave Hansen	a84eeaa	2015-06-07 11:37:01 -0700	[diff] [blame]	236	* The copy_xregs_to_kernel() beneath get_xsave_field_ptr() is
				237	* expected to be relatively expensive. Storing the bounds
				238	* directory here means that we do not have to do xsave in the
Mark Rutland	cb02de9	2016-12-16 12:40:55 +0000	[diff] [blame]	239	* unmap path; we can just use mm->context.bd_addr instead.
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	240	*/
Dave Hansen	46a6e0c	2015-06-07 11:37:02 -0700	[diff] [blame]	241	bd_base = mpx_get_bounds_dir();
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	242	down_write(&mm->mmap_sem);
Kirill A. Shutemov	44b0491	2017-07-17 01:59:51 +0300	[diff] [blame]	243
				244	/* MPX doesn't support addresses above 47 bits yet. */
				245	if (find_vma(mm, DEFAULT_MAP_WINDOW)) {
				246	pr_warn_once("%s (%d): MPX cannot handle addresses "
				247	"above 47-bits. Disabling.",
				248	current->comm, current->pid);
				249	ret = -ENXIO;
				250	goto out;
				251	}
Mark Rutland	cb02de9	2016-12-16 12:40:55 +0000	[diff] [blame]	252	mm->context.bd_addr = bd_base;
				253	if (mm->context.bd_addr == MPX_INVALID_BOUNDS_DIR)
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	254	ret = -ENXIO;
Kirill A. Shutemov	44b0491	2017-07-17 01:59:51 +0300	[diff] [blame]	255	out:
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	256	up_write(&mm->mmap_sem);
				257	return ret;
				258	}
				259
Dave Hansen	46a6e0c	2015-06-07 11:37:02 -0700	[diff] [blame]	260	int mpx_disable_management(void)
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	261	{
				262	struct mm_struct *mm = current->mm;
				263
				264	if (!cpu_feature_enabled(X86_FEATURE_MPX))
				265	return -ENXIO;
				266
				267	down_write(&mm->mmap_sem);
Mark Rutland	cb02de9	2016-12-16 12:40:55 +0000	[diff] [blame]	268	mm->context.bd_addr = MPX_INVALID_BOUNDS_DIR;
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	269	up_write(&mm->mmap_sem);
				270	return 0;
				271	}
				272
Dave Hansen	6ac52bb	2015-06-07 11:37:05 -0700	[diff] [blame]	273	static int mpx_cmpxchg_bd_entry(struct mm_struct *mm,
				274	unsigned long *curval,
				275	unsigned long __user *addr,
				276	unsigned long old_val, unsigned long new_val)
				277	{
				278	int ret;
				279	/*
				280	* user_atomic_cmpxchg_inatomic() actually uses sizeof()
				281	* the pointer that we pass to it to figure out how much
				282	* data to cmpxchg. We have to be careful here not to
				283	* pass a pointer to a 64-bit data type when we only want
				284	* a 32-bit copy.
				285	*/
				286	if (is_64bit_mm(mm)) {
				287	ret = user_atomic_cmpxchg_inatomic(curval,
				288	addr, old_val, new_val);
				289	} else {
				290	u32 uninitialized_var(curval_32);
				291	u32 old_val_32 = old_val;
				292	u32 new_val_32 = new_val;
				293	u32 __user addr_32 = (u32 __user )addr;
				294
				295	ret = user_atomic_cmpxchg_inatomic(&curval_32,
				296	addr_32, old_val_32, new_val_32);
				297	*curval = curval_32;
				298	}
				299	return ret;
				300	}
				301
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	302	/*
Dave Hansen	613fcb7	2015-06-07 11:37:05 -0700	[diff] [blame]	303	* With 32-bit mode, a bounds directory is 4MB, and the size of each
				304	* bounds table is 16KB. With 64-bit mode, a bounds directory is 2GB,
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	305	* and the size of each bounds table is 4MB.
				306	*/
Dave Hansen	613fcb7	2015-06-07 11:37:05 -0700	[diff] [blame]	307	static int allocate_bt(struct mm_struct mm, long __user bd_entry)
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	308	{
				309	unsigned long expected_old_val = 0;
				310	unsigned long actual_old_val = 0;
				311	unsigned long bt_addr;
Dave Hansen	a1149fc	2015-06-07 11:37:04 -0700	[diff] [blame]	312	unsigned long bd_new_entry;
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	313	int ret = 0;
				314
				315	/*
				316	* Carve the virtual space out of userspace for the new
				317	* bounds table:
				318	*/
Dave Hansen	613fcb7	2015-06-07 11:37:05 -0700	[diff] [blame]	319	bt_addr = mpx_mmap(mpx_bt_size_bytes(mm));
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	320	if (IS_ERR((void *)bt_addr))
				321	return PTR_ERR((void *)bt_addr);
				322	/*
				323	* Set the valid flag (kinda like _PAGE_PRESENT in a pte)
				324	*/
Dave Hansen	a1149fc	2015-06-07 11:37:04 -0700	[diff] [blame]	325	bd_new_entry = bt_addr \| MPX_BD_ENTRY_VALID_FLAG;
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	326
				327	/*
				328	* Go poke the address of the new bounds table in to the
				329	* bounds directory entry out in userspace memory. Note:
				330	* we may race with another CPU instantiating the same table.
				331	* In that case the cmpxchg will see an unexpected
				332	* 'actual_old_val'.
				333	*
				334	* This can fault, but that's OK because we do not hold
				335	* mmap_sem at this point, unlike some of the other part
				336	* of the MPX code that have to pagefault_disable().
				337	*/
Dave Hansen	6ac52bb	2015-06-07 11:37:05 -0700	[diff] [blame]	338	ret = mpx_cmpxchg_bd_entry(mm, &actual_old_val, bd_entry,
				339	expected_old_val, bd_new_entry);
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	340	if (ret)
				341	goto out_unmap;
				342
				343	/*
				344	* The user_atomic_cmpxchg_inatomic() will only return nonzero
				345	* for faults, not if the cmpxchg itself fails. Now we must
				346	* verify that the cmpxchg itself completed successfully.
				347	*/
				348	/*
				349	* We expected an empty 'expected_old_val', but instead found
				350	* an apparently valid entry. Assume we raced with another
				351	* thread to instantiate this table and desclare succecss.
				352	*/
				353	if (actual_old_val & MPX_BD_ENTRY_VALID_FLAG) {
				354	ret = 0;
				355	goto out_unmap;
				356	}
				357	/*
				358	* We found a non-empty bd_entry but it did not have the
				359	* VALID_FLAG set. Return an error which will result in
				360	* a SEGV since this probably means that somebody scribbled
				361	* some invalid data in to a bounds table.
				362	*/
				363	if (expected_old_val != actual_old_val) {
				364	ret = -EINVAL;
				365	goto out_unmap;
				366	}
Dave Hansen	cd4996d	2015-06-07 11:37:04 -0700	[diff] [blame]	367	trace_mpx_new_bounds_table(bt_addr);
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	368	return 0;
				369	out_unmap:
Dave Hansen	613fcb7	2015-06-07 11:37:05 -0700	[diff] [blame]	370	vm_munmap(bt_addr, mpx_bt_size_bytes(mm));
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	371	return ret;
				372	}
				373
				374	/*
				375	* When a BNDSTX instruction attempts to save bounds to a bounds
				376	* table, it will first attempt to look up the table in the
				377	* first-level bounds directory. If it does not find a table in
				378	* the directory, a #BR is generated and we get here in order to
				379	* allocate a new table.
				380	*
				381	* With 32-bit mode, the size of BD is 4MB, and the size of each
				382	* bound table is 16KB. With 64-bit mode, the size of BD is 2GB,
				383	* and the size of each bound table is 4MB.
				384	*/
Dave Hansen	46a6e0c	2015-06-07 11:37:02 -0700	[diff] [blame]	385	static int do_mpx_bt_fault(void)
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	386	{
				387	unsigned long bd_entry, bd_base;
Dave Hansen	1126cb45	2015-09-02 16:31:29 -0700	[diff] [blame]	388	const struct mpx_bndcsr *bndcsr;
Dave Hansen	613fcb7	2015-06-07 11:37:05 -0700	[diff] [blame]	389	struct mm_struct *mm = current->mm;
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	390
Dave Hansen	d91cab7	2015-09-02 16:31:26 -0700	[diff] [blame]	391	bndcsr = get_xsave_field_ptr(XFEATURE_MASK_BNDCSR);
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	392	if (!bndcsr)
				393	return -EINVAL;
				394	/*
				395	* Mask off the preserve and enable bits
				396	*/
				397	bd_base = bndcsr->bndcfgu & MPX_BNDCFG_ADDR_MASK;
				398	/*
				399	* The hardware provides the address of the missing or invalid
				400	* entry via BNDSTATUS, so we don't have to go look it up.
				401	*/
				402	bd_entry = bndcsr->bndstatus & MPX_BNDSTA_ADDR_MASK;
				403	/*
				404	* Make sure the directory entry is within where we think
				405	* the directory is.
				406	*/
				407	if ((bd_entry < bd_base) \|\|
Dave Hansen	613fcb7	2015-06-07 11:37:05 -0700	[diff] [blame]	408	(bd_entry >= bd_base + mpx_bd_size_bytes(mm)))
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	409	return -EINVAL;
				410
Dave Hansen	613fcb7	2015-06-07 11:37:05 -0700	[diff] [blame]	411	return allocate_bt(mm, (long __user *)bd_entry);
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	412	}
				413
Dave Hansen	46a6e0c	2015-06-07 11:37:02 -0700	[diff] [blame]	414	int mpx_handle_bd_fault(void)
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	415	{
				416	/*
				417	* Userspace never asked us to manage the bounds tables,
				418	* so refuse to help.
				419	*/
				420	if (!kernel_managing_mpx_tables(current->mm))
				421	return -EINVAL;
				422
Joerg Roedel	5ed386e	2017-04-06 16:19:22 +0200	[diff] [blame]	423	return do_mpx_bt_fault();
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	424	}
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	425
				426	/*
				427	* A thin wrapper around get_user_pages(). Returns 0 if the
				428	* fault was resolved or -errno if not.
				429	*/
				430	static int mpx_resolve_fault(long __user *addr, int write)
				431	{
				432	long gup_ret;
				433	int nr_pages = 1;
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	434
Lorenzo Stoakes	768ae30	2016-10-13 01:20:16 +0100	[diff] [blame]	435	gup_ret = get_user_pages((unsigned long)addr, nr_pages,
				436	write ? FOLL_WRITE : 0, NULL, NULL);
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	437	/*
				438	* get_user_pages() returns number of pages gotten.
				439	* 0 means we failed to fault in and get anything,
				440	* probably because 'addr' is bad.
				441	*/
				442	if (!gup_ret)
				443	return -EFAULT;
				444	/* Other error, return it */
				445	if (gup_ret < 0)
				446	return gup_ret;
				447	/* must have gup'd a page and gup_ret>0, success */
				448	return 0;
				449	}
				450
Dave Hansen	5458765	2015-06-07 11:37:04 -0700	[diff] [blame]	451	static unsigned long mpx_bd_entry_to_bt_addr(struct mm_struct *mm,
				452	unsigned long bd_entry)
				453	{
				454	unsigned long bt_addr = bd_entry;
				455	int align_to_bytes;
				456	/*
				457	* Bit 0 in a bt_entry is always the valid bit.
				458	*/
				459	bt_addr &= ~MPX_BD_ENTRY_VALID_FLAG;
				460	/*
				461	* Tables are naturally aligned at 8-byte boundaries
				462	* on 64-bit and 4-byte boundaries on 32-bit. The
				463	* documentation makes it appear that the low bits
				464	* are ignored by the hardware, so we do the same.
				465	*/
				466	if (is_64bit_mm(mm))
				467	align_to_bytes = 8;
				468	else
				469	align_to_bytes = 4;
				470	bt_addr &= ~(align_to_bytes-1);
				471	return bt_addr;
				472	}
				473
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	474	/*
Dave Hansen	46561c3	2015-11-11 10:19:31 -0800	[diff] [blame]	475	* We only want to do a 4-byte get_user() on 32-bit. Otherwise,
				476	* we might run off the end of the bounds table if we are on
				477	* a 64-bit kernel and try to get 8 bytes.
				478	*/
Tobias Klauser	6bce725	2017-03-08 14:30:34 +0100	[diff] [blame]	479	static int get_user_bd_entry(struct mm_struct mm, unsigned long bd_entry_ret,
Dave Hansen	46561c3	2015-11-11 10:19:31 -0800	[diff] [blame]	480	long __user *bd_entry_ptr)
				481	{
				482	u32 bd_entry_32;
				483	int ret;
				484
				485	if (is_64bit_mm(mm))
				486	return get_user(*bd_entry_ret, bd_entry_ptr);
				487
				488	/*
				489	* Note that get_user() uses the type of the pointer to
				490	* establish the size of the get, not the destination.
				491	*/
				492	ret = get_user(bd_entry_32, (u32 __user *)bd_entry_ptr);
				493	*bd_entry_ret = bd_entry_32;
				494	return ret;
				495	}
				496
				497	/*
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	498	* Get the base of bounds tables pointed by specific bounds
				499	* directory entry.
				500	*/
				501	static int get_bt_addr(struct mm_struct *mm,
Dave Hansen	5458765	2015-06-07 11:37:04 -0700	[diff] [blame]	502	long __user *bd_entry_ptr,
				503	unsigned long *bt_addr_result)
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	504	{
				505	int ret;
				506	int valid_bit;
Dave Hansen	5458765	2015-06-07 11:37:04 -0700	[diff] [blame]	507	unsigned long bd_entry;
				508	unsigned long bt_addr;
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	509
Dave Hansen	5458765	2015-06-07 11:37:04 -0700	[diff] [blame]	510	if (!access_ok(VERIFY_READ, (bd_entry_ptr), sizeof(*bd_entry_ptr)))
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	511	return -EFAULT;
				512
				513	while (1) {
				514	int need_write = 0;
				515
				516	pagefault_disable();
Dave Hansen	46561c3	2015-11-11 10:19:31 -0800	[diff] [blame]	517	ret = get_user_bd_entry(mm, &bd_entry, bd_entry_ptr);
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	518	pagefault_enable();
				519	if (!ret)
				520	break;
				521	if (ret == -EFAULT)
Dave Hansen	5458765	2015-06-07 11:37:04 -0700	[diff] [blame]	522	ret = mpx_resolve_fault(bd_entry_ptr, need_write);
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	523	/*
				524	* If we could not resolve the fault, consider it
				525	* userspace's fault and error out.
				526	*/
				527	if (ret)
				528	return ret;
				529	}
				530
Dave Hansen	5458765	2015-06-07 11:37:04 -0700	[diff] [blame]	531	valid_bit = bd_entry & MPX_BD_ENTRY_VALID_FLAG;
				532	bt_addr = mpx_bd_entry_to_bt_addr(mm, bd_entry);
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	533
				534	/*
				535	* When the kernel is managing bounds tables, a bounds directory
				536	* entry will either have a valid address (plus the valid bit)
				537	* OR be completely empty. If we see a !valid entry and some
				538	* data in the address field, we know something is wrong. This
				539	* -EINVAL return will cause a SIGSEGV.
				540	*/
Dave Hansen	5458765	2015-06-07 11:37:04 -0700	[diff] [blame]	541	if (!valid_bit && bt_addr)
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	542	return -EINVAL;
				543	/*
				544	* Do we have an completely zeroed bt entry? That is OK. It
				545	* just means there was no bounds table for this memory. Make
				546	* sure to distinguish this from -EINVAL, which will cause
				547	* a SEGV.
				548	*/
				549	if (!valid_bit)
				550	return -ENOENT;
				551
Dave Hansen	5458765	2015-06-07 11:37:04 -0700	[diff] [blame]	552	*bt_addr_result = bt_addr;
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	553	return 0;
				554	}
				555
Dave Hansen	613fcb7	2015-06-07 11:37:05 -0700	[diff] [blame]	556	static inline int bt_entry_size_bytes(struct mm_struct *mm)
				557	{
				558	if (is_64bit_mm(mm))
				559	return MPX_BT_ENTRY_BYTES_64;
				560	else
				561	return MPX_BT_ENTRY_BYTES_32;
				562	}
				563
				564	/*
				565	* Take a virtual address and turns it in to the offset in bytes
				566	* inside of the bounds table where the bounds table entry
				567	* controlling 'addr' can be found.
				568	*/
				569	static unsigned long mpx_get_bt_entry_offset_bytes(struct mm_struct *mm,
				570	unsigned long addr)
				571	{
				572	unsigned long bt_table_nr_entries;
				573	unsigned long offset = addr;
				574
				575	if (is_64bit_mm(mm)) {
				576	/* Bottom 3 bits are ignored on 64-bit */
				577	offset >>= 3;
				578	bt_table_nr_entries = MPX_BT_NR_ENTRIES_64;
				579	} else {
				580	/* Bottom 2 bits are ignored on 32-bit */
				581	offset >>= 2;
				582	bt_table_nr_entries = MPX_BT_NR_ENTRIES_32;
				583	}
				584	/*
				585	* We know the size of the table in to which we are
				586	* indexing, and we have eliminated all the low bits
				587	* which are ignored for indexing.
				588	*
				589	* Mask out all the high bits which we do not need
				590	* to index in to the table. Note that the tables
				591	* are always powers of two so this gives us a proper
				592	* mask.
				593	*/
				594	offset &= (bt_table_nr_entries-1);
				595	/*
				596	* We now have an entry offset in terms of entries in
				597	* the table. We need to scale it back up to bytes.
				598	*/
				599	offset *= bt_entry_size_bytes(mm);
				600	return offset;
				601	}
				602
				603	/*
				604	* How much virtual address space does a single bounds
				605	* directory entry cover?
				606	*
				607	* Note, we need a long long because 4GB doesn't fit in
				608	* to a long on 32-bit.
				609	*/
				610	static inline unsigned long bd_entry_virt_space(struct mm_struct *mm)
				611	{
Dave Hansen	f3119b8	2015-11-11 10:19:34 -0800	[diff] [blame]	612	unsigned long long virt_space;
				613	unsigned long long GB = (1ULL << 30);
				614
				615	/*
				616	* This covers 32-bit emulation as well as 32-bit kernels
Adam Buchbinder	6a6256f	2016-02-23 15:34:30 -0800	[diff] [blame]	617	* running on 64-bit hardware.
Dave Hansen	f3119b8	2015-11-11 10:19:34 -0800	[diff] [blame]	618	*/
				619	if (!is_64bit_mm(mm))
				620	return (4ULL * GB) / MPX_BD_NR_ENTRIES_32;
				621
				622	/*
				623	* 'x86_virt_bits' returns what the hardware is capable
Adam Buchbinder	6a6256f	2016-02-23 15:34:30 -0800	[diff] [blame]	624	* of, and returns the full >32-bit address space when
Dave Hansen	f3119b8	2015-11-11 10:19:34 -0800	[diff] [blame]	625	* running 32-bit kernels on 64-bit hardware.
				626	*/
				627	virt_space = (1ULL << boot_cpu_data.x86_virt_bits);
				628	return virt_space / MPX_BD_NR_ENTRIES_64;
Dave Hansen	613fcb7	2015-06-07 11:37:05 -0700	[diff] [blame]	629	}
				630
				631	/*
Dave Hansen	3ceaccd	2015-06-07 11:37:06 -0700	[diff] [blame]	632	* Free the backing physical pages of bounds table 'bt_addr'.
				633	* Assume start...end is within that bounds table.
Dave Hansen	613fcb7	2015-06-07 11:37:05 -0700	[diff] [blame]	634	*/
Dave Hansen	3ceaccd	2015-06-07 11:37:06 -0700	[diff] [blame]	635	static noinline int zap_bt_entries_mapping(struct mm_struct *mm,
				636	unsigned long bt_addr,
				637	unsigned long start_mapping, unsigned long end_mapping)
				638	{
				639	struct vm_area_struct *vma;
				640	unsigned long addr, len;
				641	unsigned long start;
				642	unsigned long end;
				643
				644	/*
				645	* if we 'end' on a boundary, the offset will be 0 which
				646	* is not what we want. Back it up a byte to get the
				647	* last bt entry. Then once we have the entry itself,
				648	* move 'end' back up by the table entry size.
				649	*/
				650	start = bt_addr + mpx_get_bt_entry_offset_bytes(mm, start_mapping);
				651	end = bt_addr + mpx_get_bt_entry_offset_bytes(mm, end_mapping - 1);
				652	/*
				653	* Move end back up by one entry. Among other things
				654	* this ensures that it remains page-aligned and does
				655	* not screw up zap_page_range()
				656	*/
				657	end += bt_entry_size_bytes(mm);
				658
				659	/*
				660	* Find the first overlapping vma. If vma->vm_start > start, there
				661	* will be a hole in the bounds table. This -EINVAL return will
				662	* cause a SIGSEGV.
				663	*/
				664	vma = find_vma(mm, start);
				665	if (!vma \|\| vma->vm_start > start)
				666	return -EINVAL;
				667
				668	/*
				669	* A NUMA policy on a VM_MPX VMA could cause this bounds table to
				670	* be split. So we need to look across the entire 'start -> end'
				671	* range of this bounds table, find all of the VM_MPX VMAs, and
				672	* zap only those.
				673	*/
				674	addr = start;
				675	while (vma && vma->vm_start < end) {
				676	/*
				677	* We followed a bounds directory entry down
				678	* here. If we find a non-MPX VMA, that's bad,
				679	* so stop immediately and return an error. This
				680	* probably results in a SIGSEGV.
				681	*/
Kirill A. Shutemov	a8965276	2015-07-20 14:29:58 -0700	[diff] [blame]	682	if (!(vma->vm_flags & VM_MPX))
Dave Hansen	3ceaccd	2015-06-07 11:37:06 -0700	[diff] [blame]	683	return -EINVAL;
				684
				685	len = min(vma->vm_end, end) - addr;
Kirill A. Shutemov	ecf1385	2017-02-22 15:46:37 -0800	[diff] [blame]	686	zap_page_range(vma, addr, len);
Dave Hansen	3ceaccd	2015-06-07 11:37:06 -0700	[diff] [blame]	687	trace_mpx_unmap_zap(addr, addr+len);
				688
				689	vma = vma->vm_next;
				690	addr = vma->vm_start;
				691	}
				692	return 0;
				693	}
				694
Dave Hansen	613fcb7	2015-06-07 11:37:05 -0700	[diff] [blame]	695	static unsigned long mpx_get_bd_entry_offset(struct mm_struct *mm,
				696	unsigned long addr)
				697	{
				698	/*
				699	* There are several ways to derive the bd offsets. We
				700	* use the following approach here:
				701	* 1. We know the size of the virtual address space
				702	* 2. We know the number of entries in a bounds table
				703	* 3. We know that each entry covers a fixed amount of
				704	* virtual address space.
				705	* So, we can just divide the virtual address by the
				706	* virtual space used by one entry to determine which
				707	* entry "controls" the given virtual address.
				708	*/
				709	if (is_64bit_mm(mm)) {
				710	int bd_entry_size = 8; /* 64-bit pointer */
				711	/*
				712	* Take the 64-bit addressing hole in to account.
				713	*/
				714	addr &= ((1UL << boot_cpu_data.x86_virt_bits) - 1);
				715	return (addr / bd_entry_virt_space(mm)) * bd_entry_size;
				716	} else {
				717	int bd_entry_size = 4; /* 32-bit pointer */
				718	/*
				719	* 32-bit has no hole so this case needs no mask
				720	*/
				721	return (addr / bd_entry_virt_space(mm)) * bd_entry_size;
				722	}
				723	/*
				724	* The two return calls above are exact copies. If we
				725	* pull out a single copy and put it in here, gcc won't
				726	* realize that we're doing a power-of-2 divide and use
				727	* shifts. It uses a real divide. If we put them up
				728	* there, it manages to figure it out (gcc 4.8.3).
				729	*/
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	730	}
				731
Dave Hansen	3ceaccd	2015-06-07 11:37:06 -0700	[diff] [blame]	732	static int unmap_entire_bt(struct mm_struct *mm,
				733	long __user *bd_entry, unsigned long bt_addr)
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	734	{
Dave Hansen	3ceaccd	2015-06-07 11:37:06 -0700	[diff] [blame]	735	unsigned long expected_old_val = bt_addr \| MPX_BD_ENTRY_VALID_FLAG;
				736	unsigned long uninitialized_var(actual_old_val);
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	737	int ret;
				738
Dave Hansen	3ceaccd	2015-06-07 11:37:06 -0700	[diff] [blame]	739	while (1) {
				740	int need_write = 1;
				741	unsigned long cleared_bd_entry = 0;
				742
				743	pagefault_disable();
				744	ret = mpx_cmpxchg_bd_entry(mm, &actual_old_val,
				745	bd_entry, expected_old_val, cleared_bd_entry);
				746	pagefault_enable();
				747	if (!ret)
				748	break;
				749	if (ret == -EFAULT)
				750	ret = mpx_resolve_fault(bd_entry, need_write);
				751	/*
				752	* If we could not resolve the fault, consider it
				753	* userspace's fault and error out.
				754	*/
				755	if (ret)
				756	return ret;
				757	}
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	758	/*
Dave Hansen	3ceaccd	2015-06-07 11:37:06 -0700	[diff] [blame]	759	* The cmpxchg was performed, check the results.
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	760	*/
Dave Hansen	3ceaccd	2015-06-07 11:37:06 -0700	[diff] [blame]	761	if (actual_old_val != expected_old_val) {
				762	/*
				763	* Someone else raced with us to unmap the table.
				764	* That is OK, since we were both trying to do
				765	* the same thing. Declare success.
				766	*/
				767	if (!actual_old_val)
				768	return 0;
				769	/*
				770	* Something messed with the bounds directory
				771	* entry. We hold mmap_sem for read or write
				772	* here, so it could not be a _new_ bounds table
				773	* that someone just allocated. Something is
				774	* wrong, so pass up the error and SIGSEGV.
				775	*/
				776	return -EINVAL;
				777	}
				778	/*
				779	* Note, we are likely being called under do_munmap() already. To
				780	* avoid recursion, do_munmap() will check whether it comes
				781	* from one bounds table through VM_MPX flag.
				782	*/
Mike Rapoport	897ab3e	2017-02-24 14:58:22 -0800	[diff] [blame]	783	return do_munmap(mm, bt_addr, mpx_bt_size_bytes(mm), NULL);
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	784	}
				785
Dave Hansen	3ceaccd	2015-06-07 11:37:06 -0700	[diff] [blame]	786	static int try_unmap_single_bt(struct mm_struct *mm,
				787	unsigned long start, unsigned long end)
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	788	{
Dave Hansen	3ceaccd	2015-06-07 11:37:06 -0700	[diff] [blame]	789	struct vm_area_struct *next;
				790	struct vm_area_struct *prev;
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	791	/*
Dave Hansen	3ceaccd	2015-06-07 11:37:06 -0700	[diff] [blame]	792	* "bta" == Bounds Table Area: the area controlled by the
				793	* bounds table that we are unmapping.
				794	*/
				795	unsigned long bta_start_vaddr = start & ~(bd_entry_virt_space(mm)-1);
				796	unsigned long bta_end_vaddr = bta_start_vaddr + bd_entry_virt_space(mm);
				797	unsigned long uninitialized_var(bt_addr);
				798	void __user *bde_vaddr;
				799	int ret;
				800	/*
Dave Hansen	bea03c5	2015-06-07 11:37:06 -0700	[diff] [blame]	801	* We already unlinked the VMAs from the mm's rbtree so 'start'
				802	* is guaranteed to be in a hole. This gets us the first VMA
				803	* before the hole in to 'prev' and the next VMA after the hole
				804	* in to 'next'.
				805	*/
				806	next = find_vma_prev(mm, start, &prev);
				807	/*
				808	* Do not count other MPX bounds table VMAs as neighbors.
				809	* Although theoretically possible, we do not allow bounds
				810	* tables for bounds tables so our heads do not explode.
				811	* If we count them as neighbors here, we may end up with
				812	* lots of tables even though we have no actual table
				813	* entries in use.
				814	*/
Kirill A. Shutemov	a8965276	2015-07-20 14:29:58 -0700	[diff] [blame]	815	while (next && (next->vm_flags & VM_MPX))
Dave Hansen	bea03c5	2015-06-07 11:37:06 -0700	[diff] [blame]	816	next = next->vm_next;
Kirill A. Shutemov	a8965276	2015-07-20 14:29:58 -0700	[diff] [blame]	817	while (prev && (prev->vm_flags & VM_MPX))
Dave Hansen	bea03c5	2015-06-07 11:37:06 -0700	[diff] [blame]	818	prev = prev->vm_prev;
				819	/*
Dave Hansen	3ceaccd	2015-06-07 11:37:06 -0700	[diff] [blame]	820	* We know 'start' and 'end' lie within an area controlled
				821	* by a single bounds table. See if there are any other
				822	* VMAs controlled by that bounds table. If there are not
				823	* then we can "expand" the are we are unmapping to possibly
				824	* cover the entire table.
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	825	*/
				826	next = find_vma_prev(mm, start, &prev);
Dave Hansen	3ceaccd	2015-06-07 11:37:06 -0700	[diff] [blame]	827	if ((!prev \|\| prev->vm_end <= bta_start_vaddr) &&
				828	(!next \|\| next->vm_start >= bta_end_vaddr)) {
				829	/*
				830	* No neighbor VMAs controlled by same bounds
				831	* table. Try to unmap the whole thing
				832	*/
				833	start = bta_start_vaddr;
				834	end = bta_end_vaddr;
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	835	}
				836
Mark Rutland	cb02de9	2016-12-16 12:40:55 +0000	[diff] [blame]	837	bde_vaddr = mm->context.bd_addr + mpx_get_bd_entry_offset(mm, start);
Dave Hansen	3ceaccd	2015-06-07 11:37:06 -0700	[diff] [blame]	838	ret = get_bt_addr(mm, bde_vaddr, &bt_addr);
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	839	/*
Dave Hansen	3ceaccd	2015-06-07 11:37:06 -0700	[diff] [blame]	840	* No bounds table there, so nothing to unmap.
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	841	*/
Dave Hansen	3ceaccd	2015-06-07 11:37:06 -0700	[diff] [blame]	842	if (ret == -ENOENT) {
				843	ret = 0;
				844	return 0;
				845	}
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	846	if (ret)
				847	return ret;
Dave Hansen	3ceaccd	2015-06-07 11:37:06 -0700	[diff] [blame]	848	/*
				849	* We are unmapping an entire table. Either because the
				850	* unmap that started this whole process was large enough
				851	* to cover an entire table, or that the unmap was small
				852	* but was the area covered by a bounds table.
				853	*/
				854	if ((start == bta_start_vaddr) &&
				855	(end == bta_end_vaddr))
				856	return unmap_entire_bt(mm, bde_vaddr, bt_addr);
				857	return zap_bt_entries_mapping(mm, bt_addr, start, end);
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	858	}
				859
				860	static int mpx_unmap_tables(struct mm_struct *mm,
				861	unsigned long start, unsigned long end)
				862	{
Dave Hansen	3ceaccd	2015-06-07 11:37:06 -0700	[diff] [blame]	863	unsigned long one_unmap_start;
Dave Hansen	2a1dcb1	2015-06-07 11:37:03 -0700	[diff] [blame]	864	trace_mpx_unmap_search(start, end);
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	865
Dave Hansen	3ceaccd	2015-06-07 11:37:06 -0700	[diff] [blame]	866	one_unmap_start = start;
				867	while (one_unmap_start < end) {
				868	int ret;
				869	unsigned long next_unmap_start = ALIGN(one_unmap_start+1,
				870	bd_entry_virt_space(mm));
				871	unsigned long one_unmap_end = end;
				872	/*
				873	* if the end is beyond the current bounds table,
				874	* move it back so we only deal with a single one
				875	* at a time
				876	*/
				877	if (one_unmap_end > next_unmap_start)
				878	one_unmap_end = next_unmap_start;
				879	ret = try_unmap_single_bt(mm, one_unmap_start, one_unmap_end);
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	880	if (ret)
				881	return ret;
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	882
Dave Hansen	3ceaccd	2015-06-07 11:37:06 -0700	[diff] [blame]	883	one_unmap_start = next_unmap_start;
				884	}
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	885	return 0;
				886	}
				887
				888	/*
				889	* Free unused bounds tables covered in a virtual address region being
				890	* munmap()ed. Assume end > start.
				891	*
				892	* This function will be called by do_munmap(), and the VMAs covering
				893	* the virtual address region start...end have already been split if
				894	* necessary, and the 'vma' is the first vma in this range (start -> end).
				895	*/
				896	void mpx_notify_unmap(struct mm_struct mm, struct vm_area_struct vma,
				897	unsigned long start, unsigned long end)
				898	{
				899	int ret;
				900
				901	/*
				902	* Refuse to do anything unless userspace has asked
				903	* the kernel to help manage the bounds tables,
				904	*/
				905	if (!kernel_managing_mpx_tables(current->mm))
				906	return;
				907	/*
				908	* This will look across the entire 'start -> end' range,
				909	* and find all of the non-VM_MPX VMAs.
				910	*
				911	* To avoid recursion, if a VM_MPX vma is found in the range
				912	* (start->end), we will not continue follow-up work. This
				913	* recursion represents having bounds tables for bounds tables,
				914	* which should not occur normally. Being strict about it here
				915	* helps ensure that we do not have an exploitable stack overflow.
				916	*/
				917	do {
				918	if (vma->vm_flags & VM_MPX)
				919	return;
				920	vma = vma->vm_next;
				921	} while (vma && vma->vm_start < end);
				922
				923	ret = mpx_unmap_tables(mm, start, end);
				924	if (ret)
				925	force_sig(SIGSEGV, current);
				926	}
Kirill A. Shutemov	44b0491	2017-07-17 01:59:51 +0300	[diff] [blame]	927
				928	/* MPX cannot handle addresses above 47 bits yet. */
				929	unsigned long mpx_unmapped_area_check(unsigned long addr, unsigned long len,
				930	unsigned long flags)
				931	{
				932	if (!kernel_managing_mpx_tables(current->mm))
				933	return addr;
				934	if (addr + len <= DEFAULT_MAP_WINDOW)
				935	return addr;
				936	if (flags & MAP_FIXED)
				937	return -ENOMEM;
				938
				939	/*
				940	* Requested len is larger than the whole area we're allowed to map in.
				941	* Resetting hinting address wouldn't do much good -- fail early.
				942	*/
				943	if (len > DEFAULT_MAP_WINDOW)
				944	return -ENOMEM;
				945
				946	/* Look for unmap area within DEFAULT_MAP_WINDOW */
				947	return 0;
				948	}