Blame - drivers/misc/sgi-gru/grutlbpurge.c - kernel/msm-4.9

blob: 1d125091f5e721f773e0c10597b2d5a232f18a49 [file] [log] [blame]

Jack Steiner	ee5b8fe	2008-07-29 22:33:59 -0700	[diff] [blame]	1	/*
				2	* SN Platform GRU Driver
				3	*
				4	* MMUOPS callbacks + TLB flushing
				5	*
				6	* This file handles emu notifier callbacks from the core kernel. The callbacks
				7	* are used to update the TLB in the GRU as a result of changes in the
				8	* state of a process address space. This file also handles TLB invalidates
				9	* from the GRU driver.
				10	*
				11	* Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
				12	*
				13	* This program is free software; you can redistribute it and/or modify
				14	* it under the terms of the GNU General Public License as published by
				15	* the Free Software Foundation; either version 2 of the License, or
				16	* (at your option) any later version.
				17	*
				18	* This program is distributed in the hope that it will be useful,
				19	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				20	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				21	* GNU General Public License for more details.
				22	*
				23	* You should have received a copy of the GNU General Public License
				24	* along with this program; if not, write to the Free Software
				25	* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
				26	*/
				27
				28	#include <linux/kernel.h>
				29	#include <linux/list.h>
				30	#include <linux/spinlock.h>
				31	#include <linux/mm.h>
				32	#include <linux/slab.h>
				33	#include <linux/device.h>
				34	#include <linux/hugetlb.h>
				35	#include <linux/delay.h>
				36	#include <linux/timex.h>
Jack Steiner	ee5b8fe	2008-07-29 22:33:59 -0700	[diff] [blame]	37	#include <linux/srcu.h>
				38	#include <asm/processor.h>
				39	#include "gru.h"
				40	#include "grutables.h"
				41	#include <asm/uv/uv_hub.h>
				42
				43	#define gru_random() get_cycles()
				44
				45	/* ---------------------------------- TLB Invalidation functions --------
				46	* get_tgh_handle
				47	*
				48	* Find a TGH to use for issuing a TLB invalidate. For GRUs that are on the
				49	* local blade, use a fixed TGH that is a function of the blade-local cpu
				50	* number. Normally, this TGH is private to the cpu & no contention occurs for
				51	* the TGH. For offblade GRUs, select a random TGH in the range above the
				52	* private TGHs. A spinlock is required to access this TGH & the lock must be
				53	* released when the invalidate is completes. This sucks, but it is the best we
				54	* can do.
				55	*
				56	* Note that the spinlock is IN the TGH handle so locking does not involve
				57	* additional cache lines.
				58	*
				59	*/
				60	static inline int get_off_blade_tgh(struct gru_state *gru)
				61	{
				62	int n;
				63
				64	n = GRU_NUM_TGH - gru->gs_tgh_first_remote;
				65	n = gru_random() % n;
				66	n += gru->gs_tgh_first_remote;
				67	return n;
				68	}
				69
				70	static inline int get_on_blade_tgh(struct gru_state *gru)
				71	{
				72	return uv_blade_processor_id() >> gru->gs_tgh_local_shift;
				73	}
				74
				75	static struct gru_tlb_global_handle *get_lock_tgh_handle(struct gru_state
				76	*gru)
				77	{
				78	struct gru_tlb_global_handle *tgh;
				79	int n;
				80
				81	preempt_disable();
				82	if (uv_numa_blade_id() == gru->gs_blade_id)
				83	n = get_on_blade_tgh(gru);
				84	else
				85	n = get_off_blade_tgh(gru);
				86	tgh = get_tgh_by_index(gru, n);
				87	lock_tgh_handle(tgh);
				88
				89	return tgh;
				90	}
				91
				92	static void get_unlock_tgh_handle(struct gru_tlb_global_handle *tgh)
				93	{
				94	unlock_tgh_handle(tgh);
				95	preempt_enable();
				96	}
				97
				98	/*
				99	* gru_flush_tlb_range
				100	*
				101	* General purpose TLB invalidation function. This function scans every GRU in
				102	* the ENTIRE system (partition) looking for GRUs where the specified MM has
				103	* been accessed by the GRU. For each GRU found, the TLB must be invalidated OR
				104	* the ASID invalidated. Invalidating an ASID causes a new ASID to be assigned
				105	* on the next fault. This effectively flushes the ENTIRE TLB for the MM at the
				106	* cost of (possibly) a large number of future TLBmisses.
				107	*
				108	* The current algorithm is optimized based on the following (somewhat true)
				109	* assumptions:
				110	* - GRU contexts are not loaded into a GRU unless a reference is made to
				111	* the data segment or control block (this is true, not an assumption).
				112	* If a DS/CB is referenced, the user will also issue instructions that
				113	* cause TLBmisses. It is not necessary to optimize for the case where
				114	* contexts are loaded but no instructions cause TLB misses. (I know
				115	* this will happen but I'm not optimizing for it).
				116	* - GRU instructions to invalidate TLB entries are SLOOOOWWW - normally
				117	* a few usec but in unusual cases, it could be longer. Avoid if
				118	* possible.
				119	* - intrablade process migration between cpus is not frequent but is
				120	* common.
				121	* - a GRU context is not typically migrated to a different GRU on the
				122	* blade because of intrablade migration
				123	* - interblade migration is rare. Processes migrate their GRU context to
				124	* the new blade.
				125	* - if interblade migration occurs, migration back to the original blade
				126	* is very very rare (ie., no optimization for this case)
				127	* - most GRU instruction operate on a subset of the user REGIONS. Code
				128	* & shared library regions are not likely targets of GRU instructions.
				129	*
				130	* To help improve the efficiency of TLB invalidation, the GMS data
				131	* structure is maintained for EACH address space (MM struct). The GMS is
				132	* also the structure that contains the pointer to the mmu callout
				133	* functions. This structure is linked to the mm_struct for the address space
				134	* using the mmu "register" function. The mmu interfaces are used to
				135	* provide the callbacks for TLB invalidation. The GMS contains:
				136	*
				137	* - asid[maxgrus] array. ASIDs are assigned to a GRU when a context is
				138	* loaded into the GRU.
				139	* - asidmap[maxgrus]. bitmap to make it easier to find non-zero asids in
				140	* the above array
				141	* - ctxbitmap[maxgrus]. Indicates the contexts that are currently active
				142	* in the GRU for the address space. This bitmap must be passed to the
				143	* GRU to do an invalidate.
				144	*
				145	* The current algorithm for invalidating TLBs is:
				146	* - scan the asidmap for GRUs where the context has been loaded, ie,
				147	* asid is non-zero.
				148	* - for each gru found:
				149	* - if the ctxtmap is non-zero, there are active contexts in the
				150	* GRU. TLB invalidate instructions must be issued to the GRU.
				151	* - if the ctxtmap is zero, no context is active. Set the ASID to
				152	* zero to force a full TLB invalidation. This is fast but will
				153	* cause a lot of TLB misses if the context is reloaded onto the
				154	* GRU
				155	*
				156	*/
				157
				158	void gru_flush_tlb_range(struct gru_mm_struct *gms, unsigned long start,
				159	unsigned long len)
				160	{
				161	struct gru_state *gru;
				162	struct gru_mm_tracker *asids;
				163	struct gru_tlb_global_handle *tgh;
				164	unsigned long num;
				165	int grupagesize, pagesize, pageshift, gid, asid;
				166
				167	/* ZZZ TODO - handle huge pages */
				168	pageshift = PAGE_SHIFT;
				169	pagesize = (1UL << pageshift);
				170	grupagesize = GRU_PAGESIZE(pageshift);
				171	num = min(((len + pagesize - 1) >> pageshift), GRUMAXINVAL);
				172
				173	STAT(flush_tlb);
				174	gru_dbg(grudev, "gms %p, start 0x%lx, len 0x%lx, asidmap 0x%lx\n", gms,
				175	start, len, gms->ms_asidmap[0]);
				176
				177	spin_lock(&gms->ms_asid_lock);
				178	for_each_gru_in_bitmap(gid, gms->ms_asidmap) {
				179	STAT(flush_tlb_gru);
				180	gru = GID_TO_GRU(gid);
				181	asids = gms->ms_asids + gid;
				182	asid = asids->mt_asid;
				183	if (asids->mt_ctxbitmap && asid) {
				184	STAT(flush_tlb_gru_tgh);
				185	asid = GRUASID(asid, start);
				186	gru_dbg(grudev,
				187	" FLUSH gruid %d, asid 0x%x, num %ld, cbmap 0x%x\n",
				188	gid, asid, num, asids->mt_ctxbitmap);
				189	tgh = get_lock_tgh_handle(gru);
Jack Steiner	fe5bb6b	2009-04-02 16:59:04 -0700	[diff] [blame]	190	tgh_invalidate(tgh, start, ~0, asid, grupagesize, 0,
Jack Steiner	ee5b8fe	2008-07-29 22:33:59 -0700	[diff] [blame]	191	num - 1, asids->mt_ctxbitmap);
				192	get_unlock_tgh_handle(tgh);
				193	} else {
				194	STAT(flush_tlb_gru_zero_asid);
				195	asids->mt_asid = 0;
				196	__clear_bit(gru->gs_gid, gms->ms_asidmap);
				197	gru_dbg(grudev,
				198	" CLEARASID gruid %d, asid 0x%x, cbtmap 0x%x, asidmap 0x%lx\n",
				199	gid, asid, asids->mt_ctxbitmap,
				200	gms->ms_asidmap[0]);
				201	}
				202	}
				203	spin_unlock(&gms->ms_asid_lock);
				204	}
				205
				206	/*
				207	* Flush the entire TLB on a chiplet.
				208	*/
				209	void gru_flush_all_tlb(struct gru_state *gru)
				210	{
				211	struct gru_tlb_global_handle *tgh;
				212
Jack Steiner	4388460	2009-04-02 16:59:05 -0700	[diff] [blame]	213	gru_dbg(grudev, "gid %d\n", gru->gs_gid);
Jack Steiner	ee5b8fe	2008-07-29 22:33:59 -0700	[diff] [blame]	214	tgh = get_lock_tgh_handle(gru);
Jack Steiner	fe5bb6b	2009-04-02 16:59:04 -0700	[diff] [blame]	215	tgh_invalidate(tgh, 0, ~0, 0, 1, 1, GRUMAXINVAL - 1, 0xffff);
Jack Steiner	ee5b8fe	2008-07-29 22:33:59 -0700	[diff] [blame]	216	get_unlock_tgh_handle(tgh);
Jack Steiner	ee5b8fe	2008-07-29 22:33:59 -0700	[diff] [blame]	217	}
				218
				219	/*
				220	* MMUOPS notifier callout functions
				221	*/
				222	static void gru_invalidate_range_start(struct mmu_notifier *mn,
				223	struct mm_struct *mm,
				224	unsigned long start, unsigned long end)
				225	{
				226	struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
				227	ms_notifier);
				228
				229	STAT(mmu_invalidate_range);
				230	atomic_inc(&gms->ms_range_active);
				231	gru_dbg(grudev, "gms %p, start 0x%lx, end 0x%lx, act %d\n", gms,
				232	start, end, atomic_read(&gms->ms_range_active));
				233	gru_flush_tlb_range(gms, start, end - start);
				234	}
				235
				236	static void gru_invalidate_range_end(struct mmu_notifier *mn,
				237	struct mm_struct *mm, unsigned long start,
				238	unsigned long end)
				239	{
				240	struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
				241	ms_notifier);
				242
Jack Steiner	9ca8e40c1	2008-07-29 22:34:02 -0700	[diff] [blame]	243	/* ..._and_test() provides needed barrier */
				244	(void)atomic_dec_and_test(&gms->ms_range_active);
				245
Jack Steiner	ee5b8fe	2008-07-29 22:33:59 -0700	[diff] [blame]	246	wake_up_all(&gms->ms_wait_queue);
				247	gru_dbg(grudev, "gms %p, start 0x%lx, end 0x%lx\n", gms, start, end);
				248	}
				249
				250	static void gru_invalidate_page(struct mmu_notifier mn, struct mm_struct mm,
				251	unsigned long address)
				252	{
				253	struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
				254	ms_notifier);
				255
				256	STAT(mmu_invalidate_page);
				257	gru_flush_tlb_range(gms, address, PAGE_SIZE);
				258	gru_dbg(grudev, "gms %p, address 0x%lx\n", gms, address);
				259	}
				260
				261	static void gru_release(struct mmu_notifier mn, struct mm_struct mm)
				262	{
				263	struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
				264	ms_notifier);
				265
				266	gms->ms_released = 1;
				267	gru_dbg(grudev, "gms %p\n", gms);
				268	}
				269
				270
				271	static const struct mmu_notifier_ops gru_mmuops = {
				272	.invalidate_page = gru_invalidate_page,
				273	.invalidate_range_start = gru_invalidate_range_start,
				274	.invalidate_range_end = gru_invalidate_range_end,
				275	.release = gru_release,
				276	};
				277
				278	/* Move this to the basic mmu_notifier file. But for now... */
				279	static struct mmu_notifier mmu_find_ops(struct mm_struct mm,
				280	const struct mmu_notifier_ops *ops)
				281	{
				282	struct mmu_notifier mn, gru_mn = NULL;
				283	struct hlist_node *n;
				284
				285	if (mm->mmu_notifier_mm) {
				286	rcu_read_lock();
				287	hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list,
				288	hlist)
				289	if (mn->ops == ops) {
				290	gru_mn = mn;
				291	break;
				292	}
				293	rcu_read_unlock();
				294	}
				295	return gru_mn;
				296	}
				297
				298	struct gru_mm_struct *gru_register_mmu_notifier(void)
				299	{
				300	struct gru_mm_struct *gms;
				301	struct mmu_notifier *mn;
				302
				303	mn = mmu_find_ops(current->mm, &gru_mmuops);
				304	if (mn) {
				305	gms = container_of(mn, struct gru_mm_struct, ms_notifier);
				306	atomic_inc(&gms->ms_refcnt);
				307	} else {
				308	gms = kzalloc(sizeof(*gms), GFP_KERNEL);
				309	if (gms) {
				310	spin_lock_init(&gms->ms_asid_lock);
				311	gms->ms_notifier.ops = &gru_mmuops;
				312	atomic_set(&gms->ms_refcnt, 1);
				313	init_waitqueue_head(&gms->ms_wait_queue);
				314	__mmu_notifier_register(&gms->ms_notifier, current->mm);
				315	}
				316	}
				317	gru_dbg(grudev, "gms %p, refcnt %d\n", gms,
				318	atomic_read(&gms->ms_refcnt));
				319	return gms;
				320	}
				321
				322	void gru_drop_mmu_notifier(struct gru_mm_struct *gms)
				323	{
				324	gru_dbg(grudev, "gms %p, refcnt %d, released %d\n", gms,
				325	atomic_read(&gms->ms_refcnt), gms->ms_released);
				326	if (atomic_dec_return(&gms->ms_refcnt) == 0) {
				327	if (!gms->ms_released)
				328	mmu_notifier_unregister(&gms->ms_notifier, current->mm);
				329	kfree(gms);
				330	}
				331	}
				332
				333	/*
				334	* Setup TGH parameters. There are:
				335	* - 24 TGH handles per GRU chiplet
				336	* - a portion (MAX_LOCAL_TGH) of the handles are reserved for
				337	* use by blade-local cpus
				338	* - the rest are used by off-blade cpus. This usage is
				339	* less frequent than blade-local usage.
				340	*
				341	* For now, use 16 handles for local flushes, 8 for remote flushes. If the blade
				342	* has less tan or equal to 16 cpus, each cpu has a unique handle that it can
				343	* use.
				344	*/
				345	#define MAX_LOCAL_TGH 16
				346
				347	void gru_tgh_flush_init(struct gru_state *gru)
				348	{
				349	int cpus, shift = 0, n;
				350
				351	cpus = uv_blade_nr_possible_cpus(gru->gs_blade_id);
				352
				353	/* n = cpus rounded up to next power of 2 */
				354	if (cpus) {
				355	n = 1 << fls(cpus - 1);
				356
				357	/*
				358	* shift count for converting local cpu# to TGH index
				359	* 0 if cpus <= MAX_LOCAL_TGH,
				360	* 1 if cpus <= 2*MAX_LOCAL_TGH,
				361	* etc
				362	*/
				363	shift = max(0, fls(n - 1) - fls(MAX_LOCAL_TGH - 1));
				364	}
				365	gru->gs_tgh_local_shift = shift;
				366
				367	/* first starting TGH index to use for remote purges */
				368	gru->gs_tgh_first_remote = (cpus + (1 << shift) - 1) >> shift;
				369
				370	}