Blame - arch/x86/mm/tlb.c - kernel/msm-5.4

blob: 92e46f4c058c64a0bf68f52212a70e5401752acf [file] [log] [blame]

Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	1	#include <linux/init.h>
				2
				3	#include <linux/mm.h>
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	4	#include <linux/spinlock.h>
				5	#include <linux/smp.h>
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	6	#include <linux/interrupt.h>
Paul Gortmaker	4b599fe	2016-07-13 20:18:55 -0400	[diff] [blame]	7	#include <linux/export.h>
Shaohua Li	9329672	2010-10-20 11:07:03 +0800	[diff] [blame]	8	#include <linux/cpu.h>
Tim Chen	18bf3c3	2018-01-29 22:04:47 +0000	[diff] [blame]	9	#include <linux/debugfs.h>
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	10
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	11	#include <asm/tlbflush.h>
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	12	#include <asm/mmu_context.h>
Tim Chen	18bf3c3	2018-01-29 22:04:47 +0000	[diff] [blame]	13	#include <asm/nospec-branch.h>
Jan Beulich	350f8f5	2009-11-13 11:54:40 +0000	[diff] [blame]	14	#include <asm/cache.h>
Tejun Heo	6dd01be	2009-01-21 17:26:06 +0900	[diff] [blame]	15	#include <asm/apic.h>
Tejun Heo	bdbcdd4	2009-01-21 17:26:06 +0900	[diff] [blame]	16	#include <asm/uv/uv.h>
Glauber Costa	5af5573	2008-03-25 13:28:56 -0300	[diff] [blame]	17
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	18	/*
Andy Lutomirski	ce4a4e56	2017-05-28 10:00:14 -0700	[diff] [blame]	19	* TLB flushing, formerly SMP-only
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	20	* c/o Linus Torvalds.
				21	*
				22	* These mean you can really definitely utterly forget about
				23	* writing to user space from interrupts. (Its not allowed anyway).
				24	*
				25	* Optimizations Manfred Spraul <manfred@colorfullife.com>
				26	*
				27	* More scalable flush, from Andi Kleen
				28	*
Alex Shi	52aec33	2012-06-28 09:02:23 +0800	[diff] [blame]	29	* Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	30	*/
				31
Dave Hansen	2ea907c	2017-12-04 15:07:57 +0100	[diff] [blame]	32	/*
				33	* We get here when we do something requiring a TLB invalidation
				34	* but could not go invalidate all of the contexts. We do the
				35	* necessary invalidation by clearing out the 'ctx_id' which
				36	* forces a TLB flush when the context is loaded.
				37	*/
zhong jiang	387048f	2018-07-21 15:55:32 +0800	[diff] [blame]	38	static void clear_asid_other(void)
Dave Hansen	2ea907c	2017-12-04 15:07:57 +0100	[diff] [blame]	39	{
				40	u16 asid;
				41
				42	/*
				43	* This is only expected to be set if we have disabled
				44	* kernel _PAGE_GLOBAL pages.
				45	*/
				46	if (!static_cpu_has(X86_FEATURE_PTI)) {
				47	WARN_ON_ONCE(1);
				48	return;
				49	}
				50
				51	for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) {
				52	/* Do not need to flush the current asid */
				53	if (asid == this_cpu_read(cpu_tlbstate.loaded_mm_asid))
				54	continue;
				55	/*
				56	* Make sure the next time we go to switch to
				57	* this asid, we do a flush:
				58	*/
				59	this_cpu_write(cpu_tlbstate.ctxs[asid].ctx_id, 0);
				60	}
				61	this_cpu_write(cpu_tlbstate.invalidate_other, false);
				62	}
				63
Andy Lutomirski	f39681e	2017-06-29 08:53:15 -0700	[diff] [blame]	64	atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1);
				65
Andy Lutomirski	b956575	2017-10-09 09:50:49 -0700	[diff] [blame]	66
Andy Lutomirski	10af623	2017-07-24 21:41:38 -0700	[diff] [blame]	67	static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
				68	u16 new_asid, bool need_flush)
				69	{
				70	u16 asid;
				71
				72	if (!static_cpu_has(X86_FEATURE_PCID)) {
				73	*new_asid = 0;
				74	*need_flush = true;
				75	return;
				76	}
				77
Dave Hansen	2ea907c	2017-12-04 15:07:57 +0100	[diff] [blame]	78	if (this_cpu_read(cpu_tlbstate.invalidate_other))
				79	clear_asid_other();
				80
Andy Lutomirski	10af623	2017-07-24 21:41:38 -0700	[diff] [blame]	81	for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) {
				82	if (this_cpu_read(cpu_tlbstate.ctxs[asid].ctx_id) !=
				83	next->context.ctx_id)
				84	continue;
				85
				86	*new_asid = asid;
				87	*need_flush = (this_cpu_read(cpu_tlbstate.ctxs[asid].tlb_gen) <
				88	next_tlb_gen);
				89	return;
				90	}
				91
				92	/*
				93	* We don't currently own an ASID slot on this CPU.
				94	* Allocate a slot.
				95	*/
				96	*new_asid = this_cpu_add_return(cpu_tlbstate.next_asid, 1) - 1;
				97	if (*new_asid >= TLB_NR_DYN_ASIDS) {
				98	*new_asid = 0;
				99	this_cpu_write(cpu_tlbstate.next_asid, 1);
				100	}
				101	*need_flush = true;
				102	}
				103
Dave Hansen	48e1119	2017-12-04 15:07:58 +0100	[diff] [blame]	104	static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, bool need_flush)
				105	{
				106	unsigned long new_mm_cr3;
				107
				108	if (need_flush) {
Peter Zijlstra	6fd166a	2017-12-04 15:07:59 +0100	[diff] [blame]	109	invalidate_user_asid(new_asid);
Dave Hansen	48e1119	2017-12-04 15:07:58 +0100	[diff] [blame]	110	new_mm_cr3 = build_cr3(pgdir, new_asid);
				111	} else {
				112	new_mm_cr3 = build_cr3_noflush(pgdir, new_asid);
				113	}
				114
				115	/*
				116	* Caution: many callers of this function expect
				117	* that load_cr3() is serializing and orders TLB
				118	* fills with respect to the mm_cpumask writes.
				119	*/
				120	write_cr3(new_mm_cr3);
				121	}
				122
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	123	void leave_mm(int cpu)
				124	{
Andy Lutomirski	3d28ebc	2017-05-28 10:00:15 -0700	[diff] [blame]	125	struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
				126
				127	/*
				128	* It's plausible that we're in lazy TLB mode while our mm is init_mm.
				129	* If so, our callers still expect us to flush the TLB, but there
				130	* aren't any user TLB entries in init_mm to worry about.
				131	*
				132	* This needs to happen before any other sanity checks due to
				133	* intel_idle's shenanigans.
				134	*/
				135	if (loaded_mm == &init_mm)
				136	return;
				137
Andy Lutomirski	94b1b03	2017-06-29 08:53:17 -0700	[diff] [blame]	138	/* Warn if we're not lazy. */
Andy Lutomirski	b956575	2017-10-09 09:50:49 -0700	[diff] [blame]	139	WARN_ON(!this_cpu_read(cpu_tlbstate.is_lazy));
Andy Lutomirski	3d28ebc	2017-05-28 10:00:15 -0700	[diff] [blame]	140
				141	switch_mm(NULL, &init_mm, NULL);
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	142	}
Andy Lutomirski	6753573	2017-11-04 04:16:12 -0700	[diff] [blame]	143	EXPORT_SYMBOL_GPL(leave_mm);
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	144
Andy Lutomirski	69c0319	2016-04-26 09:39:08 -0700	[diff] [blame]	145	void switch_mm(struct mm_struct prev, struct mm_struct next,
				146	struct task_struct *tsk)
				147	{
Andy Lutomirski	078194f	2016-04-26 09:39:09 -0700	[diff] [blame]	148	unsigned long flags;
				149
				150	local_irq_save(flags);
				151	switch_mm_irqs_off(prev, next, tsk);
				152	local_irq_restore(flags);
				153	}
				154
Andy Lutomirski	5beda7d	2018-01-25 13:12:14 -0800	[diff] [blame]	155	static void sync_current_stack_to_mm(struct mm_struct *mm)
				156	{
				157	unsigned long sp = current_stack_pointer;
				158	pgd_t *pgd = pgd_offset(mm, sp);
				159
Kirill A. Shutemov	ed7588d	2018-05-18 13:35:24 +0300	[diff] [blame]	160	if (pgtable_l5_enabled()) {
Andy Lutomirski	5beda7d	2018-01-25 13:12:14 -0800	[diff] [blame]	161	if (unlikely(pgd_none(*pgd))) {
				162	pgd_t *pgd_ref = pgd_offset_k(sp);
				163
				164	set_pgd(pgd, *pgd_ref);
				165	}
				166	} else {
				167	/*
				168	* "pgd" is faked. The top level entries are "p4d"s, so sync
				169	* the p4d. This compiles to approximately the same code as
				170	* the 5-level case.
				171	*/
				172	p4d_t *p4d = p4d_offset(pgd, sp);
				173
				174	if (unlikely(p4d_none(*p4d))) {
				175	pgd_t *pgd_ref = pgd_offset_k(sp);
				176	p4d_t *p4d_ref = p4d_offset(pgd_ref, sp);
				177
				178	set_p4d(p4d, *p4d_ref);
				179	}
				180	}
				181	}
				182
Andy Lutomirski	078194f	2016-04-26 09:39:09 -0700	[diff] [blame]	183	void switch_mm_irqs_off(struct mm_struct prev, struct mm_struct next,
				184	struct task_struct *tsk)
				185	{
Andy Lutomirski	3d28ebc	2017-05-28 10:00:15 -0700	[diff] [blame]	186	struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm);
Andy Lutomirski	10af623	2017-07-24 21:41:38 -0700	[diff] [blame]	187	u16 prev_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
Andy Lutomirski	94b1b03	2017-06-29 08:53:17 -0700	[diff] [blame]	188	unsigned cpu = smp_processor_id();
				189	u64 next_tlb_gen;
Rik van Riel	12c4d97	2018-09-25 23:58:39 -0400	[diff] [blame]	190	bool need_flush;
				191	u16 new_asid;
Andy Lutomirski	69c0319	2016-04-26 09:39:08 -0700	[diff] [blame]	192
Andy Lutomirski	3d28ebc	2017-05-28 10:00:15 -0700	[diff] [blame]	193	/*
Andy Lutomirski	94b1b03	2017-06-29 08:53:17 -0700	[diff] [blame]	194	* NB: The scheduler will call us with prev == next when switching
				195	* from lazy TLB mode to normal mode if active_mm isn't changing.
				196	* When this happens, we don't assume that CR3 (and hence
				197	* cpu_tlbstate.loaded_mm) matches next.
Andy Lutomirski	3d28ebc	2017-05-28 10:00:15 -0700	[diff] [blame]	198	*
				199	* NB: leave_mm() calls us with prev == NULL and tsk == NULL.
				200	*/
Andy Lutomirski	e37e43a	2016-08-11 02:35:23 -0700	[diff] [blame]	201
Andy Lutomirski	94b1b03	2017-06-29 08:53:17 -0700	[diff] [blame]	202	/* We don't want flush_tlb_func_* to run concurrently with us. */
				203	if (IS_ENABLED(CONFIG_PROVE_LOCKING))
				204	WARN_ON_ONCE(!irqs_disabled());
				205
				206	/*
				207	* Verify that CR3 is what we think it is. This will catch
				208	* hypothetical buggy code that directly switches to swapper_pg_dir
Andy Lutomirski	10af623	2017-07-24 21:41:38 -0700	[diff] [blame]	209	* without going through leave_mm() / switch_mm_irqs_off() or that
				210	* does something like write_cr3(read_cr3_pa()).
Andy Lutomirski	a376e7f	2017-09-07 22:06:57 -0700	[diff] [blame]	211	*
				212	* Only do this check if CONFIG_DEBUG_VM=y because __read_cr3()
				213	* isn't free.
Andy Lutomirski	94b1b03	2017-06-29 08:53:17 -0700	[diff] [blame]	214	*/
Andy Lutomirski	a376e7f	2017-09-07 22:06:57 -0700	[diff] [blame]	215	#ifdef CONFIG_DEBUG_VM
Dave Hansen	50fb83a6	2017-12-04 15:07:54 +0100	[diff] [blame]	216	if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev->pgd, prev_asid))) {
Andy Lutomirski	a376e7f	2017-09-07 22:06:57 -0700	[diff] [blame]	217	/*
				218	* If we were to BUG here, we'd be very likely to kill
				219	* the system so hard that we don't see the call trace.
				220	* Try to recover instead by ignoring the error and doing
				221	* a global flush to minimize the chance of corruption.
				222	*
				223	* (This is far from being a fully correct recovery.
				224	* Architecturally, the CPU could prefetch something
				225	* back into an incorrect ASID slot and leave it there
				226	* to cause trouble down the road. It's better than
				227	* nothing, though.)
				228	*/
				229	__flush_tlb_all();
				230	}
				231	#endif
Andy Lutomirski	b956575	2017-10-09 09:50:49 -0700	[diff] [blame]	232	this_cpu_write(cpu_tlbstate.is_lazy, false);
Andy Lutomirski	e37e43a	2016-08-11 02:35:23 -0700	[diff] [blame]	233
Mathieu Desnoyers	306e060	2018-01-29 15:20:12 -0500	[diff] [blame]	234	/*
Mathieu Desnoyers	10bcc80	2018-01-29 15:20:18 -0500	[diff] [blame]	235	* The membarrier system call requires a full memory barrier and
				236	* core serialization before returning to user-space, after
				237	* storing to rq->curr. Writing to CR3 provides that full
				238	* memory barrier and core serializing instruction.
Mathieu Desnoyers	306e060	2018-01-29 15:20:12 -0500	[diff] [blame]	239	*/
Andy Lutomirski	3d28ebc	2017-05-28 10:00:15 -0700	[diff] [blame]	240	if (real_prev == next) {
Andy Lutomirski	e8b9b0c	2017-10-14 09:59:49 -0700	[diff] [blame]	241	VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
				242	next->context.ctx_id);
Andy Lutomirski	94b1b03	2017-06-29 08:53:17 -0700	[diff] [blame]	243
Andy Lutomirski	69c0319	2016-04-26 09:39:08 -0700	[diff] [blame]	244	/*
Peter Zijlstra	52a288c	2018-08-22 17:30:13 +0200	[diff] [blame]	245	* We don't currently support having a real mm loaded without
				246	* our cpu set in mm_cpumask(). We have all the bookkeeping
				247	* in place to figure out whether we would need to flush
				248	* if our cpu were cleared in mm_cpumask(), but we don't
				249	* currently use it.
Andy Lutomirski	69c0319	2016-04-26 09:39:08 -0700	[diff] [blame]	250	*/
Andy Lutomirski	b956575	2017-10-09 09:50:49 -0700	[diff] [blame]	251	if (WARN_ON_ONCE(real_prev != &init_mm &&
				252	!cpumask_test_cpu(cpu, mm_cpumask(next))))
				253	cpumask_set_cpu(cpu, mm_cpumask(next));
				254
Peter Zijlstra	52a288c	2018-08-22 17:30:13 +0200	[diff] [blame]	255	return;
Andy Lutomirski	94b1b03	2017-06-29 08:53:17 -0700	[diff] [blame]	256	} else {
Tim Chen	18bf3c3	2018-01-29 22:04:47 +0000	[diff] [blame]	257	u64 last_ctx_id = this_cpu_read(cpu_tlbstate.last_ctx_id);
				258
				259	/*
				260	* Avoid user/user BTB poisoning by flushing the branch
				261	* predictor when switching between processes. This stops
				262	* one process from doing Spectre-v2 attacks on another.
				263	*
				264	* As an optimization, flush indirect branches only when
				265	* switching into processes that disable dumping. This
				266	* protects high value processes like gpg, without having
				267	* too high performance overhead. IBPB is expensive!
				268	*
				269	* This will not flush branches when switching into kernel
				270	* threads. It will also not flush if we switch to idle
				271	* thread and back to the same process. It will flush if we
				272	* switch to a different non-dumpable process.
				273	*/
				274	if (tsk && tsk->mm &&
				275	tsk->mm->context.ctx_id != last_ctx_id &&
				276	get_dumpable(tsk->mm) != SUID_DUMP_USER)
				277	indirect_branch_prediction_barrier();
Andy Lutomirski	94b1b03	2017-06-29 08:53:17 -0700	[diff] [blame]	278
				279	if (IS_ENABLED(CONFIG_VMAP_STACK)) {
				280	/*
				281	* If our current stack is in vmalloc space and isn't
				282	* mapped in the new pgd, we'll double-fault. Forcibly
				283	* map it.
				284	*/
Andy Lutomirski	5beda7d	2018-01-25 13:12:14 -0800	[diff] [blame]	285	sync_current_stack_to_mm(next);
Andy Lutomirski	94b1b03	2017-06-29 08:53:17 -0700	[diff] [blame]	286	}
				287
Rik van Riel	e9d8c61	2018-07-16 15:03:37 -0400	[diff] [blame]	288	/*
				289	* Stop remote flushes for the previous mm.
				290	* Skip kernel threads; we never send init_mm TLB flushing IPIs,
				291	* but the bitmap manipulation can cause cache line contention.
				292	*/
				293	if (real_prev != &init_mm) {
				294	VM_WARN_ON_ONCE(!cpumask_test_cpu(cpu,
				295	mm_cpumask(real_prev)));
				296	cpumask_clear_cpu(cpu, mm_cpumask(real_prev));
				297	}
Andy Lutomirski	94b1b03	2017-06-29 08:53:17 -0700	[diff] [blame]	298
				299	/*
				300	* Start remote flushes and then read tlb_gen.
				301	*/
Rik van Riel	e9d8c61	2018-07-16 15:03:37 -0400	[diff] [blame]	302	if (next != &init_mm)
				303	cpumask_set_cpu(cpu, mm_cpumask(next));
Andy Lutomirski	94b1b03	2017-06-29 08:53:17 -0700	[diff] [blame]	304	next_tlb_gen = atomic64_read(&next->context.tlb_gen);
				305
Andy Lutomirski	10af623	2017-07-24 21:41:38 -0700	[diff] [blame]	306	choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush);
Andy Lutomirski	94b1b03	2017-06-29 08:53:17 -0700	[diff] [blame]	307
Andy Lutomirski	4012e77	2018-08-29 08:47:18 -0700	[diff] [blame]	308	/* Let nmi_uaccess_okay() know that we're changing CR3. */
				309	this_cpu_write(cpu_tlbstate.loaded_mm, LOADED_MM_SWITCHING);
				310	barrier();
Rik van Riel	12c4d97	2018-09-25 23:58:39 -0400	[diff] [blame]	311	}
Andy Lutomirski	4012e77	2018-08-29 08:47:18 -0700	[diff] [blame]	312
Rik van Riel	12c4d97	2018-09-25 23:58:39 -0400	[diff] [blame]	313	if (need_flush) {
				314	this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
				315	this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
				316	load_new_mm_cr3(next->pgd, new_asid, true);
Andy Lutomirski	10af623	2017-07-24 21:41:38 -0700	[diff] [blame]	317
Tim Chen	18bf3c3	2018-01-29 22:04:47 +0000	[diff] [blame]	318	/*
Rik van Riel	12c4d97	2018-09-25 23:58:39 -0400	[diff] [blame]	319	* NB: This gets called via leave_mm() in the idle path
				320	* where RCU functions differently. Tracing normally
				321	* uses RCU, so we need to use the _rcuidle variant.
				322	*
				323	* (There is no good reason for this. The idle code should
				324	* be rearranged to call this before rcu_idle_enter().)
Tim Chen	18bf3c3	2018-01-29 22:04:47 +0000	[diff] [blame]	325	*/
Rik van Riel	12c4d97	2018-09-25 23:58:39 -0400	[diff] [blame]	326	trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
				327	} else {
				328	/* The new ASID is already up to date. */
				329	load_new_mm_cr3(next->pgd, new_asid, false);
Tim Chen	18bf3c3	2018-01-29 22:04:47 +0000	[diff] [blame]	330
Rik van Riel	12c4d97	2018-09-25 23:58:39 -0400	[diff] [blame]	331	/* See above wrt _rcuidle. */
				332	trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0);
Andy Lutomirski	3d28ebc	2017-05-28 10:00:15 -0700	[diff] [blame]	333	}
Andy Lutomirski	69c0319	2016-04-26 09:39:08 -0700	[diff] [blame]	334
Rik van Riel	12c4d97	2018-09-25 23:58:39 -0400	[diff] [blame]	335	/*
				336	* Record last user mm's context id, so we can avoid
				337	* flushing branch buffer with IBPB if we switch back
				338	* to the same user.
				339	*/
				340	if (next != &init_mm)
				341	this_cpu_write(cpu_tlbstate.last_ctx_id, next->context.ctx_id);
				342
				343	/* Make sure we write CR3 before loaded_mm. */
				344	barrier();
				345
				346	this_cpu_write(cpu_tlbstate.loaded_mm, next);
				347	this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid);
				348
Andy Lutomirski	3d28ebc	2017-05-28 10:00:15 -0700	[diff] [blame]	349	load_mm_cr4(next);
Andy Lutomirski	7353425	2017-06-20 22:22:08 -0700	[diff] [blame]	350	switch_ldt(real_prev, next);
Andy Lutomirski	69c0319	2016-04-26 09:39:08 -0700	[diff] [blame]	351	}
				352
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame]	353	/*
Andy Lutomirski	4e57b94	2017-10-14 09:59:50 -0700	[diff] [blame]	354	* Please ignore the name of this function. It should be called
				355	* switch_to_kernel_thread().
				356	*
Andy Lutomirski	b956575	2017-10-09 09:50:49 -0700	[diff] [blame]	357	* enter_lazy_tlb() is a hint from the scheduler that we are entering a
				358	* kernel thread or other context without an mm. Acceptable implementations
				359	* include doing nothing whatsoever, switching to init_mm, or various clever
				360	* lazy tricks to try to minimize TLB flushes.
				361	*
				362	* The scheduler reserves the right to call enter_lazy_tlb() several times
				363	* in a row. It will notify us that we're going back to a real mm by
				364	* calling switch_mm_irqs_off().
				365	*/
				366	void enter_lazy_tlb(struct mm_struct mm, struct task_struct tsk)
				367	{
				368	if (this_cpu_read(cpu_tlbstate.loaded_mm) == &init_mm)
				369	return;
				370
Rik van Riel	5462bc3	2018-09-25 23:58:38 -0400	[diff] [blame]	371	this_cpu_write(cpu_tlbstate.is_lazy, true);
Andy Lutomirski	b956575	2017-10-09 09:50:49 -0700	[diff] [blame]	372	}
				373
				374	/*
Andy Lutomirski	72c0098	2017-09-06 19:54:53 -0700	[diff] [blame]	375	* Call this when reinitializing a CPU. It fixes the following potential
				376	* problems:
				377	*
				378	* - The ASID changed from what cpu_tlbstate thinks it is (most likely
				379	* because the CPU was taken down and came back up with CR3's PCID
				380	* bits clear. CPU hotplug can do this.
				381	*
				382	* - The TLB contains junk in slots corresponding to inactive ASIDs.
				383	*
				384	* - The CPU went so far out to lunch that it may have missed a TLB
				385	* flush.
				386	*/
				387	void initialize_tlbstate_and_flush(void)
				388	{
				389	int i;
				390	struct mm_struct *mm = this_cpu_read(cpu_tlbstate.loaded_mm);
				391	u64 tlb_gen = atomic64_read(&init_mm.context.tlb_gen);
				392	unsigned long cr3 = __read_cr3();
				393
				394	/* Assert that CR3 already references the right mm. */
				395	WARN_ON((cr3 & CR3_ADDR_MASK) != __pa(mm->pgd));
				396
				397	/*
				398	* Assert that CR4.PCIDE is set if needed. (CR4.PCIDE initialization
				399	* doesn't work like other CR4 bits because it can only be set from
				400	* long mode.)
				401	*/
Andy Lutomirski	7898f79	2017-09-10 08:52:58 -0700	[diff] [blame]	402	WARN_ON(boot_cpu_has(X86_FEATURE_PCID) &&
Andy Lutomirski	72c0098	2017-09-06 19:54:53 -0700	[diff] [blame]	403	!(cr4_read_shadow() & X86_CR4_PCIDE));
				404
				405	/* Force ASID 0 and force a TLB flush. */
Dave Hansen	50fb83a6	2017-12-04 15:07:54 +0100	[diff] [blame]	406	write_cr3(build_cr3(mm->pgd, 0));
Andy Lutomirski	72c0098	2017-09-06 19:54:53 -0700	[diff] [blame]	407
				408	/* Reinitialize tlbstate. */
Tim Chen	18bf3c3	2018-01-29 22:04:47 +0000	[diff] [blame]	409	this_cpu_write(cpu_tlbstate.last_ctx_id, mm->context.ctx_id);
Andy Lutomirski	72c0098	2017-09-06 19:54:53 -0700	[diff] [blame]	410	this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
				411	this_cpu_write(cpu_tlbstate.next_asid, 1);
				412	this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id);
				413	this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, tlb_gen);
				414
				415	for (i = 1; i < TLB_NR_DYN_ASIDS; i++)
				416	this_cpu_write(cpu_tlbstate.ctxs[i].ctx_id, 0);
				417	}
				418
				419	/*
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame]	420	* flush_tlb_func_common()'s memory ordering requirement is that any
				421	* TLB fills that happen after we flush the TLB are ordered after we
				422	* read active_mm's tlb_gen. We don't need any explicit barriers
				423	* because all x86 flush operations are serializing and the
				424	* atomic64_read operation won't be reordered by the compiler.
				425	*/
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	426	static void flush_tlb_func_common(const struct flush_tlb_info *f,
				427	bool local, enum tlb_flush_reason reason)
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	428	{
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame]	429	/*
				430	* We have three different tlb_gen values in here. They are:
				431	*
				432	* - mm_tlb_gen: the latest generation.
				433	* - local_tlb_gen: the generation that this CPU has already caught
				434	* up to.
				435	* - f->new_tlb_gen: the generation that the requester of the flush
				436	* wants us to catch up to.
				437	*/
				438	struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
Andy Lutomirski	10af623	2017-07-24 21:41:38 -0700	[diff] [blame]	439	u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame]	440	u64 mm_tlb_gen = atomic64_read(&loaded_mm->context.tlb_gen);
Andy Lutomirski	10af623	2017-07-24 21:41:38 -0700	[diff] [blame]	441	u64 local_tlb_gen = this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen);
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame]	442
Andy Lutomirski	bc0d5a8	2017-06-29 08:53:13 -0700	[diff] [blame]	443	/* This code cannot presently handle being reentered. */
				444	VM_WARN_ON(!irqs_disabled());
				445
Andy Lutomirski	b956575	2017-10-09 09:50:49 -0700	[diff] [blame]	446	if (unlikely(loaded_mm == &init_mm))
				447	return;
				448
Andy Lutomirski	10af623	2017-07-24 21:41:38 -0700	[diff] [blame]	449	VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].ctx_id) !=
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame]	450	loaded_mm->context.ctx_id);
				451
Andy Lutomirski	b956575	2017-10-09 09:50:49 -0700	[diff] [blame]	452	if (this_cpu_read(cpu_tlbstate.is_lazy)) {
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame]	453	/*
Andy Lutomirski	b956575	2017-10-09 09:50:49 -0700	[diff] [blame]	454	* We're in lazy mode. We need to at least flush our
				455	* paging-structure cache to avoid speculatively reading
				456	* garbage into our TLB. Since switching to init_mm is barely
				457	* slower than a minimal flush, just switch to init_mm.
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame]	458	*/
Andy Lutomirski	b956575	2017-10-09 09:50:49 -0700	[diff] [blame]	459	switch_mm_irqs_off(NULL, &init_mm, NULL);
Andy Lutomirski	b3b90e5	2017-05-22 15:30:02 -0700	[diff] [blame]	460	return;
				461	}
				462
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame]	463	if (unlikely(local_tlb_gen == mm_tlb_gen)) {
				464	/*
				465	* There's nothing to do: we're already up to date. This can
				466	* happen if two concurrent flushes happen -- the first flush to
				467	* be handled can catch us all the way up, leaving no work for
				468	* the second flush.
				469	*/
Andy Lutomirski	94b1b03	2017-06-29 08:53:17 -0700	[diff] [blame]	470	trace_tlb_flush(reason, 0);
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame]	471	return;
				472	}
				473
				474	WARN_ON_ONCE(local_tlb_gen > mm_tlb_gen);
				475	WARN_ON_ONCE(f->new_tlb_gen > mm_tlb_gen);
				476
				477	/*
				478	* If we get to this point, we know that our TLB is out of date.
				479	* This does not strictly imply that we need to flush (it's
				480	* possible that f->new_tlb_gen <= local_tlb_gen), but we're
				481	* going to need to flush in the very near future, so we might
				482	* as well get it over with.
				483	*
				484	* The only question is whether to do a full or partial flush.
				485	*
				486	* We do a partial flush if requested and two extra conditions
				487	* are met:
				488	*
				489	* 1. f->new_tlb_gen == local_tlb_gen + 1. We have an invariant that
				490	* we've always done all needed flushes to catch up to
				491	* local_tlb_gen. If, for example, local_tlb_gen == 2 and
				492	* f->new_tlb_gen == 3, then we know that the flush needed to bring
				493	* us up to date for tlb_gen 3 is the partial flush we're
				494	* processing.
				495	*
				496	* As an example of why this check is needed, suppose that there
				497	* are two concurrent flushes. The first is a full flush that
				498	* changes context.tlb_gen from 1 to 2. The second is a partial
				499	* flush that changes context.tlb_gen from 2 to 3. If they get
				500	* processed on this CPU in reverse order, we'll see
				501	* local_tlb_gen == 1, mm_tlb_gen == 3, and end != TLB_FLUSH_ALL.
Andy Lutomirski	1299ef1	2018-01-31 08:03:10 -0800	[diff] [blame]	502	* If we were to use __flush_tlb_one_user() and set local_tlb_gen to
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame]	503	* 3, we'd be break the invariant: we'd update local_tlb_gen above
				504	* 1 without the full flush that's needed for tlb_gen 2.
				505	*
				506	* 2. f->new_tlb_gen == mm_tlb_gen. This is purely an optimiation.
				507	* Partial TLB flushes are not all that much cheaper than full TLB
				508	* flushes, so it seems unlikely that it would be a performance win
				509	* to do a partial flush if that won't bring our TLB fully up to
				510	* date. By doing a full flush instead, we can increase
				511	* local_tlb_gen all the way to mm_tlb_gen and we can probably
				512	* avoid another flush in the very near future.
				513	*/
				514	if (f->end != TLB_FLUSH_ALL &&
				515	f->new_tlb_gen == local_tlb_gen + 1 &&
				516	f->new_tlb_gen == mm_tlb_gen) {
				517	/* Partial flush */
Peter Zijlstra	a31acd3	2018-08-26 12:56:48 +0200	[diff] [blame]	518	unsigned long nr_invalidate = (f->end - f->start) >> f->stride_shift;
				519	unsigned long addr = f->start;
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame]	520
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	521	while (addr < f->end) {
Andy Lutomirski	1299ef1	2018-01-31 08:03:10 -0800	[diff] [blame]	522	__flush_tlb_one_user(addr);
Peter Zijlstra	a31acd3	2018-08-26 12:56:48 +0200	[diff] [blame]	523	addr += 1UL << f->stride_shift;
Andy Lutomirski	b3b90e5	2017-05-22 15:30:02 -0700	[diff] [blame]	524	}
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	525	if (local)
Peter Zijlstra	a31acd3	2018-08-26 12:56:48 +0200	[diff] [blame]	526	count_vm_tlb_events(NR_TLB_LOCAL_FLUSH_ONE, nr_invalidate);
				527	trace_tlb_flush(reason, nr_invalidate);
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame]	528	} else {
				529	/* Full flush. */
				530	local_flush_tlb();
				531	if (local)
				532	count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
				533	trace_tlb_flush(reason, TLB_FLUSH_ALL);
Andy Lutomirski	b3b90e5	2017-05-22 15:30:02 -0700	[diff] [blame]	534	}
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame]	535
				536	/* Both paths above update our state to mm_tlb_gen. */
Andy Lutomirski	10af623	2017-07-24 21:41:38 -0700	[diff] [blame]	537	this_cpu_write(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen, mm_tlb_gen);
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	538	}
				539
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	540	static void flush_tlb_func_local(void *info, enum tlb_flush_reason reason)
				541	{
				542	const struct flush_tlb_info *f = info;
				543
				544	flush_tlb_func_common(f, true, reason);
				545	}
				546
				547	static void flush_tlb_func_remote(void *info)
				548	{
				549	const struct flush_tlb_info *f = info;
				550
				551	inc_irq_stat(irq_tlb_count);
				552
Andy Lutomirski	3d28ebc	2017-05-28 10:00:15 -0700	[diff] [blame]	553	if (f->mm && f->mm != this_cpu_read(cpu_tlbstate.loaded_mm))
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	554	return;
				555
				556	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
				557	flush_tlb_func_common(f, false, TLB_REMOTE_SHOOTDOWN);
				558	}
				559
Rusty Russell	4595f96	2009-01-10 21:58:09 -0800	[diff] [blame]	560	void native_flush_tlb_others(const struct cpumask *cpumask,
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	561	const struct flush_tlb_info *info)
Rusty Russell	4595f96	2009-01-10 21:58:09 -0800	[diff] [blame]	562	{
Mel Gorman	ec65993	2014-01-21 14:33:16 -0800	[diff] [blame]	563	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	564	if (info->end == TLB_FLUSH_ALL)
Nadav Amit	18c9824	2016-04-01 14:31:23 -0700	[diff] [blame]	565	trace_tlb_flush(TLB_REMOTE_SEND_IPI, TLB_FLUSH_ALL);
				566	else
				567	trace_tlb_flush(TLB_REMOTE_SEND_IPI,
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	568	(info->end - info->start) >> PAGE_SHIFT);
Nadav Amit	18c9824	2016-04-01 14:31:23 -0700	[diff] [blame]	569
Rusty Russell	4595f96	2009-01-10 21:58:09 -0800	[diff] [blame]	570	if (is_uv_system()) {
Andy Lutomirski	94b1b03	2017-06-29 08:53:17 -0700	[diff] [blame]	571	/*
				572	* This whole special case is confused. UV has a "Broadcast
				573	* Assist Unit", which seems to be a fancy way to send IPIs.
				574	* Back when x86 used an explicit TLB flush IPI, UV was
				575	* optimized to use its own mechanism. These days, x86 uses
				576	* smp_call_function_many(), but UV still uses a manual IPI,
				577	* and that IPI's action is out of date -- it does a manual
				578	* flush instead of calling flush_tlb_func_remote(). This
				579	* means that the percpu tlb_gen variables won't be updated
				580	* and we'll do pointless flushes on future context switches.
				581	*
				582	* Rather than hooking native_flush_tlb_others() here, I think
				583	* that UV should be updated so that smp_call_function_many(),
				584	* etc, are optimal on UV.
				585	*/
Peter Zijlstra	52a288c	2018-08-22 17:30:13 +0200	[diff] [blame]	586	unsigned int cpu;
				587
Xiao Guangrong	25542c6	2011-03-15 09:57:37 +0800	[diff] [blame]	588	cpu = smp_processor_id();
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	589	cpumask = uv_flush_tlb_others(cpumask, info);
Tejun Heo	bdbcdd4	2009-01-21 17:26:06 +0900	[diff] [blame]	590	if (cpumask)
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	591	smp_call_function_many(cpumask, flush_tlb_func_remote,
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	592	(void *)info, 1);
Mike Travis	0e21990	2009-01-10 21:58:10 -0800	[diff] [blame]	593	return;
Rusty Russell	4595f96	2009-01-10 21:58:09 -0800	[diff] [blame]	594	}
Peter Zijlstra	52a288c	2018-08-22 17:30:13 +0200	[diff] [blame]	595	smp_call_function_many(cpumask, flush_tlb_func_remote,
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	596	(void *)info, 1);
Rusty Russell	4595f96	2009-01-10 21:58:09 -0800	[diff] [blame]	597	}
				598
Dave Hansen	a510247	2014-07-31 08:41:03 -0700	[diff] [blame]	599	/*
				600	* See Documentation/x86/tlb.txt for details. We choose 33
				601	* because it is large enough to cover the vast majority (at
				602	* least 95%) of allocations, and is small enough that we are
				603	* confident it will not cause too much overhead. Each single
				604	* flush is about 100 ns, so this caps the maximum overhead at
				605	* _about_ 3,000 ns.
				606	*
				607	* This is in units of pages.
				608	*/
Jeremiah Mahler	8642685	2014-08-09 00:38:33 -0700	[diff] [blame]	609	static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
Dave Hansen	e9f4e0a	2014-07-31 08:40:55 -0700	[diff] [blame]	610
Alex Shi	611ae8e	2012-06-28 09:02:22 +0800	[diff] [blame]	611	void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
Rik van Riel	016c4d9	2018-09-25 23:58:42 -0400	[diff] [blame]	612	unsigned long end, unsigned int stride_shift,
				613	bool freed_tables)
Alex Shi	611ae8e	2012-06-28 09:02:22 +0800	[diff] [blame]	614	{
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	615	int cpu;
Alex Shi	611ae8e	2012-06-28 09:02:22 +0800	[diff] [blame]	616
Nadav Amit	515ab7c	2018-01-31 13:19:12 -0800	[diff] [blame]	617	struct flush_tlb_info info __aligned(SMP_CACHE_BYTES) = {
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	618	.mm = mm,
Peter Zijlstra	a31acd3	2018-08-26 12:56:48 +0200	[diff] [blame]	619	.stride_shift = stride_shift,
Rik van Riel	9780781	2018-09-25 23:58:43 -0400	[diff] [blame^]	620	.freed_tables = freed_tables,
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	621	};
Andy Lutomirski	ce27374	2017-04-22 00:01:21 -0700	[diff] [blame]	622
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	623	cpu = get_cpu();
Andy Lutomirski	ce27374	2017-04-22 00:01:21 -0700	[diff] [blame]	624
Andy Lutomirski	f39681e	2017-06-29 08:53:15 -0700	[diff] [blame]	625	/* This is also a barrier that synchronizes with switch_mm(). */
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame]	626	info.new_tlb_gen = inc_mm_tlb_gen(mm);
Andy Lutomirski	71b3c12	2016-01-06 12:21:01 -0800	[diff] [blame]	627
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	628	/* Should we flush just the requested range? */
				629	if ((end != TLB_FLUSH_ALL) &&
Peter Zijlstra	a31acd3	2018-08-26 12:56:48 +0200	[diff] [blame]	630	((end - start) >> stride_shift) <= tlb_single_page_flush_ceiling) {
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	631	info.start = start;
				632	info.end = end;
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	633	} else {
				634	info.start = 0UL;
				635	info.end = TLB_FLUSH_ALL;
Dave Hansen	4995ab9	2014-07-31 08:40:54 -0700	[diff] [blame]	636	}
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	637
Andy Lutomirski	bc0d5a8	2017-06-29 08:53:13 -0700	[diff] [blame]	638	if (mm == this_cpu_read(cpu_tlbstate.loaded_mm)) {
				639	VM_WARN_ON(irqs_disabled());
				640	local_irq_disable();
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	641	flush_tlb_func_local(&info, TLB_LOCAL_MM_SHOOTDOWN);
Andy Lutomirski	bc0d5a8	2017-06-29 08:53:13 -0700	[diff] [blame]	642	local_irq_enable();
				643	}
				644
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	645	if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids)
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	646	flush_tlb_others(mm_cpumask(mm), &info);
Andy Lutomirski	94b1b03	2017-06-29 08:53:17 -0700	[diff] [blame]	647
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	648	put_cpu();
Alex Shi	e7b52ff	2012-06-28 09:02:17 +0800	[diff] [blame]	649	}
				650
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	651
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	652	static void do_flush_tlb_all(void *info)
				653	{
Mel Gorman	ec65993	2014-01-21 14:33:16 -0800	[diff] [blame]	654	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	655	__flush_tlb_all();
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	656	}
				657
				658	void flush_tlb_all(void)
				659	{
Mel Gorman	ec65993	2014-01-21 14:33:16 -0800	[diff] [blame]	660	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
Jens Axboe	15c8b6c	2008-05-09 09:39:44 +0200	[diff] [blame]	661	on_each_cpu(do_flush_tlb_all, NULL, 1);
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	662	}
Alex Shi	3df3212	2012-06-28 09:02:20 +0800	[diff] [blame]	663
Alex Shi	effee4b	2012-06-28 09:02:24 +0800	[diff] [blame]	664	static void do_kernel_range_flush(void *info)
				665	{
				666	struct flush_tlb_info *f = info;
				667	unsigned long addr;
				668
				669	/* flush range by one by one 'invlpg' */
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	670	for (addr = f->start; addr < f->end; addr += PAGE_SIZE)
Andy Lutomirski	1299ef1	2018-01-31 08:03:10 -0800	[diff] [blame]	671	__flush_tlb_one_kernel(addr);
Alex Shi	effee4b	2012-06-28 09:02:24 +0800	[diff] [blame]	672	}
				673
				674	void flush_tlb_kernel_range(unsigned long start, unsigned long end)
				675	{
Alex Shi	effee4b	2012-06-28 09:02:24 +0800	[diff] [blame]	676
				677	/* Balance as user space task's flush, a bit conservative */
Dave Hansen	e9f4e0a	2014-07-31 08:40:55 -0700	[diff] [blame]	678	if (end == TLB_FLUSH_ALL \|\|
Andy Lutomirski	be4ffc0	2017-05-28 10:00:16 -0700	[diff] [blame]	679	(end - start) > tlb_single_page_flush_ceiling << PAGE_SHIFT) {
Alex Shi	effee4b	2012-06-28 09:02:24 +0800	[diff] [blame]	680	on_each_cpu(do_flush_tlb_all, NULL, 1);
Dave Hansen	e9f4e0a	2014-07-31 08:40:55 -0700	[diff] [blame]	681	} else {
				682	struct flush_tlb_info info;
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	683	info.start = start;
				684	info.end = end;
Alex Shi	effee4b	2012-06-28 09:02:24 +0800	[diff] [blame]	685	on_each_cpu(do_kernel_range_flush, &info, 1);
				686	}
				687	}
Dave Hansen	2d040a1	2014-07-31 08:41:01 -0700	[diff] [blame]	688
Andy Lutomirski	e73ad5f	2017-05-22 15:30:03 -0700	[diff] [blame]	689	void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
				690	{
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	691	struct flush_tlb_info info = {
				692	.mm = NULL,
				693	.start = 0UL,
				694	.end = TLB_FLUSH_ALL,
				695	};
				696
Andy Lutomirski	e73ad5f	2017-05-22 15:30:03 -0700	[diff] [blame]	697	int cpu = get_cpu();
				698
Andy Lutomirski	bc0d5a8	2017-06-29 08:53:13 -0700	[diff] [blame]	699	if (cpumask_test_cpu(cpu, &batch->cpumask)) {
				700	VM_WARN_ON(irqs_disabled());
				701	local_irq_disable();
Andy Lutomirski	3f79e4c	2017-05-28 10:00:13 -0700	[diff] [blame]	702	flush_tlb_func_local(&info, TLB_LOCAL_SHOOTDOWN);
Andy Lutomirski	bc0d5a8	2017-06-29 08:53:13 -0700	[diff] [blame]	703	local_irq_enable();
				704	}
				705
Andy Lutomirski	e73ad5f	2017-05-22 15:30:03 -0700	[diff] [blame]	706	if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids)
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	707	flush_tlb_others(&batch->cpumask, &info);
Andy Lutomirski	94b1b03	2017-06-29 08:53:17 -0700	[diff] [blame]	708
Andy Lutomirski	e73ad5f	2017-05-22 15:30:03 -0700	[diff] [blame]	709	cpumask_clear(&batch->cpumask);
				710
				711	put_cpu();
				712	}
				713
Dave Hansen	2d040a1	2014-07-31 08:41:01 -0700	[diff] [blame]	714	static ssize_t tlbflush_read_file(struct file file, char __user user_buf,
				715	size_t count, loff_t *ppos)
				716	{
				717	char buf[32];
				718	unsigned int len;
				719
				720	len = sprintf(buf, "%ld\n", tlb_single_page_flush_ceiling);
				721	return simple_read_from_buffer(user_buf, count, ppos, buf, len);
				722	}
				723
				724	static ssize_t tlbflush_write_file(struct file *file,
				725	const char __user user_buf, size_t count, loff_t ppos)
				726	{
				727	char buf[32];
				728	ssize_t len;
				729	int ceiling;
				730
				731	len = min(count, sizeof(buf) - 1);
				732	if (copy_from_user(buf, user_buf, len))
				733	return -EFAULT;
				734
				735	buf[len] = '\0';
				736	if (kstrtoint(buf, 0, &ceiling))
				737	return -EINVAL;
				738
				739	if (ceiling < 0)
				740	return -EINVAL;
				741
				742	tlb_single_page_flush_ceiling = ceiling;
				743	return count;
				744	}
				745
				746	static const struct file_operations fops_tlbflush = {
				747	.read = tlbflush_read_file,
				748	.write = tlbflush_write_file,
				749	.llseek = default_llseek,
				750	};
				751
				752	static int __init create_tlb_single_page_flush_ceiling(void)
				753	{
				754	debugfs_create_file("tlb_single_page_flush_ceiling", S_IRUSR \| S_IWUSR,
				755	arch_debugfs_dir, NULL, &fops_tlbflush);
				756	return 0;
				757	}
				758	late_initcall(create_tlb_single_page_flush_ceiling);