Blame - arch/sh/mm/cache-sh5.c - kernel/msm-4.19

blob: 4617e3aeee7335f19187f3dc440f75055a94d4c6 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*
Paul Mundt	a23ba43	2007-11-28 20:19:38 +0900	[diff] [blame^]	2	* arch/sh/mm/cache-sh5.c
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3	*
				4	* Original version Copyright (C) 2000, 2001 Paolo Alberelli
				5	* Second version Copyright (C) benedict.gaster@superh.com 2002
				6	* Third version Copyright Richard.Curnow@superh.com 2003
				7	* Hacks to third version Copyright (C) 2003 Paul Mundt
Paul Mundt	a23ba43	2007-11-28 20:19:38 +0900	[diff] [blame^]	8	*
				9	* This file is subject to the terms and conditions of the GNU General Public
				10	* License. See the file "COPYING" in the main directory of this archive
				11	* for more details.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	12	*/
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	13	#include <linux/init.h>
				14	#include <linux/mman.h>
				15	#include <linux/mm.h>
				16	#include <linux/threads.h>
				17	#include <asm/page.h>
				18	#include <asm/pgtable.h>
				19	#include <asm/processor.h>
				20	#include <asm/cache.h>
				21	#include <asm/tlb.h>
				22	#include <asm/io.h>
				23	#include <asm/uaccess.h>
				24	#include <asm/mmu_context.h>
				25	#include <asm/pgalloc.h> /* for flush_itlb_range */
				26
				27	#include <linux/proc_fs.h>
				28
				29	/* This function is in entry.S */
				30	extern unsigned long switch_and_save_asid(unsigned long new_asid);
				31
				32	/* Wired TLB entry for the D-cache */
				33	static unsigned long long dtlb_cache_slot;
				34
				35	/**
				36	* sh64_cache_init()
				37	*
				38	* This is pretty much just a straightforward clone of the SH
				39	* detect_cpu_and_cache_system().
				40	*
				41	* This function is responsible for setting up all of the cache
				42	* info dynamically as well as taking care of CPU probing and
				43	* setting up the relevant subtype data.
				44	*
				45	* FIXME: For the time being, we only really support the SH5-101
				46	* out of the box, and don't support dynamic probing for things
				47	* like the SH5-103 or even cut2 of the SH5-101. Implement this
				48	* later!
				49	*/
				50	int __init sh64_cache_init(void)
				51	{
				52	/*
				53	* First, setup some sane values for the I-cache.
				54	*/
				55	cpu_data->icache.ways = 4;
				56	cpu_data->icache.sets = 256;
				57	cpu_data->icache.linesz = L1_CACHE_BYTES;
				58
				59	/*
				60	* FIXME: This can probably be cleaned up a bit as well.. for example,
				61	* do we really need the way shift _and_ the way_step_shift ?? Judging
				62	* by the existing code, I would guess no.. is there any valid reason
				63	* why we need to be tracking this around?
				64	*/
				65	cpu_data->icache.way_shift = 13;
				66	cpu_data->icache.entry_shift = 5;
				67	cpu_data->icache.set_shift = 4;
				68	cpu_data->icache.way_step_shift = 16;
				69	cpu_data->icache.asid_shift = 2;
				70
				71	/*
				72	* way offset = cache size / associativity, so just don't factor in
				73	* associativity in the first place..
				74	*/
				75	cpu_data->icache.way_ofs = cpu_data->icache.sets *
				76	cpu_data->icache.linesz;
				77
				78	cpu_data->icache.asid_mask = 0x3fc;
				79	cpu_data->icache.idx_mask = 0x1fe0;
				80	cpu_data->icache.epn_mask = 0xffffe000;
				81	cpu_data->icache.flags = 0;
				82
				83	/*
				84	* Next, setup some sane values for the D-cache.
				85	*
				86	* On the SH5, these are pretty consistent with the I-cache settings,
				87	* so we just copy over the existing definitions.. these can be fixed
				88	* up later, especially if we add runtime CPU probing.
				89	*
				90	* Though in the meantime it saves us from having to duplicate all of
				91	* the above definitions..
				92	*/
				93	cpu_data->dcache = cpu_data->icache;
				94
				95	/*
				96	* Setup any cache-related flags here
				97	*/
				98	#if defined(CONFIG_DCACHE_WRITE_THROUGH)
				99	set_bit(SH_CACHE_MODE_WT, &(cpu_data->dcache.flags));
				100	#elif defined(CONFIG_DCACHE_WRITE_BACK)
				101	set_bit(SH_CACHE_MODE_WB, &(cpu_data->dcache.flags));
				102	#endif
				103
				104	/*
				105	* We also need to reserve a slot for the D-cache in the DTLB, so we
				106	* do this now ..
				107	*/
				108	dtlb_cache_slot = sh64_get_wired_dtlb_entry();
				109
				110	return 0;
				111	}
				112
				113	#ifdef CONFIG_DCACHE_DISABLED
				114	#define sh64_dcache_purge_all() do { } while (0)
				115	#define sh64_dcache_purge_coloured_phy_page(paddr, eaddr) do { } while (0)
				116	#define sh64_dcache_purge_user_range(mm, start, end) do { } while (0)
				117	#define sh64_dcache_purge_phy_page(paddr) do { } while (0)
				118	#define sh64_dcache_purge_virt_page(mm, eaddr) do { } while (0)
				119	#define sh64_dcache_purge_kernel_range(start, end) do { } while (0)
				120	#define sh64_dcache_wback_current_user_range(start, end) do { } while (0)
				121	#endif
				122
				123	/##########################################################################/
				124
				125	/* From here onwards, a rewrite of the implementation,
				126	by Richard.Curnow@superh.com.
				127
				128	The major changes in this compared to the old version are;
				129	1. use more selective purging through OCBP instead of using ALLOCO to purge
				130	by natural replacement. This avoids purging out unrelated cache lines
				131	that happen to be in the same set.
				132	2. exploit the APIs copy_user_page and clear_user_page better
				133	3. be more selective about I-cache purging, in particular use invalidate_all
				134	more sparingly.
				135
				136	*/
				137
				138	/*##########################################################################
				139	SUPPORT FUNCTIONS
				140	##########################################################################*/
				141
				142	/****************************************************************************/
				143	/* The following group of functions deal with mapping and unmapping a temporary
				144	page into the DTLB slot that have been set aside for our exclusive use. */
				145	/* In order to accomplish this, we use the generic interface for adding and
Paul Mundt	a23ba43	2007-11-28 20:19:38 +0900	[diff] [blame^]	146	removing a wired slot entry as defined in arch/sh/mm/tlb-sh5.c */
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	147	/****************************************************************************/
				148
				149	static unsigned long slot_own_flags;
				150
				151	static inline void sh64_setup_dtlb_cache_slot(unsigned long eaddr, unsigned long asid, unsigned long paddr)
				152	{
				153	local_irq_save(slot_own_flags);
				154	sh64_setup_tlb_slot(dtlb_cache_slot, eaddr, asid, paddr);
				155	}
				156
				157	static inline void sh64_teardown_dtlb_cache_slot(void)
				158	{
				159	sh64_teardown_tlb_slot(dtlb_cache_slot);
				160	local_irq_restore(slot_own_flags);
				161	}
				162
				163	/****************************************************************************/
				164
				165	#ifndef CONFIG_ICACHE_DISABLED
				166
				167	static void __inline__ sh64_icache_inv_all(void)
				168	{
				169	unsigned long long addr, flag, data;
				170	unsigned int flags;
				171
				172	addr=ICCR0;
				173	flag=ICCR0_ICI;
				174	data=0;
				175
				176	/* Make this a critical section for safety (probably not strictly necessary.) */
				177	local_irq_save(flags);
				178
				179	/* Without %1 it gets unexplicably wrong */
				180	asm volatile("getcfg %3, 0, %0\n\t"
				181	"or %0, %2, %0\n\t"
				182	"putcfg %3, 0, %0\n\t"
				183	"synci"
				184	: "=&r" (data)
				185	: "0" (data), "r" (flag), "r" (addr));
				186
				187	local_irq_restore(flags);
				188	}
				189
				190	static void sh64_icache_inv_kernel_range(unsigned long start, unsigned long end)
				191	{
				192	/* Invalidate range of addresses [start,end] from the I-cache, where
				193	* the addresses lie in the kernel superpage. */
				194
				195	unsigned long long ullend, addr, aligned_start;
				196	#if (NEFF == 32)
				197	aligned_start = (unsigned long long)(signed long long)(signed long) start;
				198	#else
				199	#error "NEFF != 32"
				200	#endif
				201	aligned_start &= L1_CACHE_ALIGN_MASK;
				202	addr = aligned_start;
				203	#if (NEFF == 32)
				204	ullend = (unsigned long long) (signed long long) (signed long) end;
				205	#else
				206	#error "NEFF != 32"
				207	#endif
				208	while (addr <= ullend) {
				209	asm __volatile__ ("icbi %0, 0" : : "r" (addr));
				210	addr += L1_CACHE_BYTES;
				211	}
				212	}
				213
				214	static void sh64_icache_inv_user_page(struct vm_area_struct *vma, unsigned long eaddr)
				215	{
				216	/* If we get called, we know that vma->vm_flags contains VM_EXEC.
				217	Also, eaddr is page-aligned. */
				218
				219	unsigned long long addr, end_addr;
				220	unsigned long flags = 0;
				221	unsigned long running_asid, vma_asid;
				222	addr = eaddr;
				223	end_addr = addr + PAGE_SIZE;
				224
				225	/* Check whether we can use the current ASID for the I-cache
				226	invalidation. For example, if we're called via
				227	access_process_vm->flush_cache_page->here, (e.g. when reading from
				228	/proc), 'running_asid' will be that of the reader, not of the
				229	victim.
				230
				231	Also, note the risk that we might get pre-empted between the ASID
				232	compare and blocking IRQs, and before we regain control, the
				233	pid->ASID mapping changes. However, the whole cache will get
				234	invalidated when the mapping is renewed, so the worst that can
				235	happen is that the loop below ends up invalidating somebody else's
				236	cache entries.
				237	*/
				238
				239	running_asid = get_asid();
				240	vma_asid = (vma->vm_mm->context & MMU_CONTEXT_ASID_MASK);
				241	if (running_asid != vma_asid) {
				242	local_irq_save(flags);
				243	switch_and_save_asid(vma_asid);
				244	}
				245	while (addr < end_addr) {
				246	/* Worth unrolling a little */
				247	asm __volatile__("icbi %0, 0" : : "r" (addr));
				248	asm __volatile__("icbi %0, 32" : : "r" (addr));
				249	asm __volatile__("icbi %0, 64" : : "r" (addr));
				250	asm __volatile__("icbi %0, 96" : : "r" (addr));
				251	addr += 128;
				252	}
				253	if (running_asid != vma_asid) {
				254	switch_and_save_asid(running_asid);
				255	local_irq_restore(flags);
				256	}
				257	}
				258
				259	/****************************************************************************/
				260
				261	static void sh64_icache_inv_user_page_range(struct mm_struct *mm,
				262	unsigned long start, unsigned long end)
				263	{
				264	/* Used for invalidating big chunks of I-cache, i.e. assume the range
				265	is whole pages. If 'start' or 'end' is not page aligned, the code
				266	is conservative and invalidates to the ends of the enclosing pages.
				267	This is functionally OK, just a performance loss. */
				268
				269	/* See the comments below in sh64_dcache_purge_user_range() regarding
				270	the choice of algorithm. However, for the I-cache option (2) isn't
				271	available because there are no physical tags so aliases can't be
				272	resolved. The icbi instruction has to be used through the user
				273	mapping. Because icbi is cheaper than ocbp on a cache hit, it
				274	would be cheaper to use the selective code for a large range than is
				275	possible with the D-cache. Just assume 64 for now as a working
				276	figure.
				277	*/
				278
				279	int n_pages;
				280
				281	if (!mm) return;
				282
				283	n_pages = ((end - start) >> PAGE_SHIFT);
				284	if (n_pages >= 64) {
				285	sh64_icache_inv_all();
				286	} else {
				287	unsigned long aligned_start;
				288	unsigned long eaddr;
				289	unsigned long after_last_page_start;
				290	unsigned long mm_asid, current_asid;
				291	unsigned long long flags = 0ULL;
				292
				293	mm_asid = mm->context & MMU_CONTEXT_ASID_MASK;
				294	current_asid = get_asid();
				295
				296	if (mm_asid != current_asid) {
				297	/* Switch ASID and run the invalidate loop under cli */
				298	local_irq_save(flags);
				299	switch_and_save_asid(mm_asid);
				300	}
				301
				302	aligned_start = start & PAGE_MASK;
				303	after_last_page_start = PAGE_SIZE + ((end - 1) & PAGE_MASK);
				304
				305	while (aligned_start < after_last_page_start) {
				306	struct vm_area_struct *vma;
				307	unsigned long vma_end;
				308	vma = find_vma(mm, aligned_start);
				309	if (!vma \|\| (aligned_start <= vma->vm_end)) {
				310	/* Avoid getting stuck in an error condition */
				311	aligned_start += PAGE_SIZE;
				312	continue;
				313	}
				314	vma_end = vma->vm_end;
				315	if (vma->vm_flags & VM_EXEC) {
				316	/* Executable */
				317	eaddr = aligned_start;
				318	while (eaddr < vma_end) {
				319	sh64_icache_inv_user_page(vma, eaddr);
				320	eaddr += PAGE_SIZE;
				321	}
				322	}
				323	aligned_start = vma->vm_end; /* Skip to start of next region */
				324	}
				325	if (mm_asid != current_asid) {
				326	switch_and_save_asid(current_asid);
				327	local_irq_restore(flags);
				328	}
				329	}
				330	}
				331
				332	static void sh64_icache_inv_user_small_range(struct mm_struct *mm,
				333	unsigned long start, int len)
				334	{
				335
				336	/* Invalidate a small range of user context I-cache, not necessarily
				337	page (or even cache-line) aligned. */
				338
				339	unsigned long long eaddr = start;
				340	unsigned long long eaddr_end = start + len;
				341	unsigned long current_asid, mm_asid;
				342	unsigned long long flags;
				343	unsigned long long epage_start;
				344
				345	/* Since this is used inside ptrace, the ASID in the mm context
				346	typically won't match current_asid. We'll have to switch ASID to do
				347	this. For safety, and given that the range will be small, do all
				348	this under cli.
				349
				350	Note, there is a hazard that the ASID in mm->context is no longer
				351	actually associated with mm, i.e. if the mm->context has started a
				352	new cycle since mm was last active. However, this is just a
				353	performance issue: all that happens is that we invalidate lines
				354	belonging to another mm, so the owning process has to refill them
				355	when that mm goes live again. mm itself can't have any cache
				356	entries because there will have been a flush_cache_all when the new
				357	mm->context cycle started. */
				358
				359	/* Align to start of cache line. Otherwise, suppose len==8 and start
				360	was at 32N+28 : the last 4 bytes wouldn't get invalidated. */
				361	eaddr = start & L1_CACHE_ALIGN_MASK;
				362	eaddr_end = start + len;
				363
				364	local_irq_save(flags);
				365	mm_asid = mm->context & MMU_CONTEXT_ASID_MASK;
				366	current_asid = switch_and_save_asid(mm_asid);
				367
				368	epage_start = eaddr & PAGE_MASK;
				369
				370	while (eaddr < eaddr_end)
				371	{
				372	asm __volatile__("icbi %0, 0" : : "r" (eaddr));
				373	eaddr += L1_CACHE_BYTES;
				374	}
				375	switch_and_save_asid(current_asid);
				376	local_irq_restore(flags);
				377	}
				378
				379	static void sh64_icache_inv_current_user_range(unsigned long start, unsigned long end)
				380	{
				381	/* The icbi instruction never raises ITLBMISS. i.e. if there's not a
				382	cache hit on the virtual tag the instruction ends there, without a
				383	TLB lookup. */
				384
				385	unsigned long long aligned_start;
				386	unsigned long long ull_end;
				387	unsigned long long addr;
				388
				389	ull_end = end;
				390
				391	/* Just invalidate over the range using the natural addresses. TLB
				392	miss handling will be OK (TBC). Since it's for the current process,
				393	either we're already in the right ASID context, or the ASIDs have
				394	been recycled since we were last active in which case we might just
				395	invalidate another processes I-cache entries : no worries, just a
				396	performance drop for him. */
				397	aligned_start = start & L1_CACHE_ALIGN_MASK;
				398	addr = aligned_start;
				399	while (addr < ull_end) {
				400	asm __volatile__ ("icbi %0, 0" : : "r" (addr));
				401	asm __volatile__ ("nop");
				402	asm __volatile__ ("nop");
				403	addr += L1_CACHE_BYTES;
				404	}
				405	}
				406
				407	#endif /* !CONFIG_ICACHE_DISABLED */
				408
				409	/****************************************************************************/
				410
				411	#ifndef CONFIG_DCACHE_DISABLED
				412
				413	/* Buffer used as the target of alloco instructions to purge data from cache
				414	sets by natural eviction. -- RPC */
				415	#define DUMMY_ALLOCO_AREA_SIZE L1_CACHE_SIZE_BYTES + (1024 * 4)
				416	static unsigned char dummy_alloco_area[DUMMY_ALLOCO_AREA_SIZE] __cacheline_aligned = { 0, };
				417
				418	/****************************************************************************/
				419
				420	static void __inline__ sh64_dcache_purge_sets(int sets_to_purge_base, int n_sets)
				421	{
				422	/* Purge all ways in a particular block of sets, specified by the base
				423	set number and number of sets. Can handle wrap-around, if that's
				424	needed. */
				425
				426	int dummy_buffer_base_set;
				427	unsigned long long eaddr, eaddr0, eaddr1;
				428	int j;
				429	int set_offset;
				430
				431	dummy_buffer_base_set = ((int)&dummy_alloco_area & cpu_data->dcache.idx_mask) >> cpu_data->dcache.entry_shift;
				432	set_offset = sets_to_purge_base - dummy_buffer_base_set;
				433
				434	for (j=0; j<n_sets; j++, set_offset++) {
				435	set_offset &= (cpu_data->dcache.sets - 1);
				436	eaddr0 = (unsigned long long)dummy_alloco_area + (set_offset << cpu_data->dcache.entry_shift);
				437
				438	/* Do one alloco which hits the required set per cache way. For
				439	write-back mode, this will purge the #ways resident lines. There's
				440	little point unrolling this loop because the allocos stall more if
				441	they're too close together. */
				442	eaddr1 = eaddr0 + cpu_data->dcache.way_ofs * cpu_data->dcache.ways;
				443	for (eaddr=eaddr0; eaddr<eaddr1; eaddr+=cpu_data->dcache.way_ofs) {
				444	asm __volatile__ ("alloco %0, 0" : : "r" (eaddr));
				445	asm __volatile__ ("synco"); /* TAKum03020 */
				446	}
				447
				448	eaddr1 = eaddr0 + cpu_data->dcache.way_ofs * cpu_data->dcache.ways;
				449	for (eaddr=eaddr0; eaddr<eaddr1; eaddr+=cpu_data->dcache.way_ofs) {
				450	/* Load from each address. Required because alloco is a NOP if
				451	the cache is write-through. Write-through is a config option. */
				452	if (test_bit(SH_CACHE_MODE_WT, &(cpu_data->dcache.flags)))
				453	(volatile unsigned char )(int)eaddr;
				454	}
				455	}
				456
				457	/* Don't use OCBI to invalidate the lines. That costs cycles directly.
				458	If the dummy block is just left resident, it will naturally get
				459	evicted as required. */
				460
				461	return;
				462	}
				463
				464	/****************************************************************************/
				465
				466	static void sh64_dcache_purge_all(void)
				467	{
				468	/* Purge the entire contents of the dcache. The most efficient way to
				469	achieve this is to use alloco instructions on a region of unused
				470	memory equal in size to the cache, thereby causing the current
				471	contents to be discarded by natural eviction. The alternative,
				472	namely reading every tag, setting up a mapping for the corresponding
				473	page and doing an OCBP for the line, would be much more expensive.
				474	*/
				475
				476	sh64_dcache_purge_sets(0, cpu_data->dcache.sets);
				477
				478	return;
				479
				480	}
				481
				482	/****************************************************************************/
				483
				484	static void sh64_dcache_purge_kernel_range(unsigned long start, unsigned long end)
				485	{
				486	/* Purge the range of addresses [start,end] from the D-cache. The
				487	addresses lie in the superpage mapping. There's no harm if we
				488	overpurge at either end - just a small performance loss. */
				489	unsigned long long ullend, addr, aligned_start;
				490	#if (NEFF == 32)
				491	aligned_start = (unsigned long long)(signed long long)(signed long) start;
				492	#else
				493	#error "NEFF != 32"
				494	#endif
				495	aligned_start &= L1_CACHE_ALIGN_MASK;
				496	addr = aligned_start;
				497	#if (NEFF == 32)
				498	ullend = (unsigned long long) (signed long long) (signed long) end;
				499	#else
				500	#error "NEFF != 32"
				501	#endif
				502	while (addr <= ullend) {
				503	asm __volatile__ ("ocbp %0, 0" : : "r" (addr));
				504	addr += L1_CACHE_BYTES;
				505	}
				506	return;
				507	}
				508
				509	/* Assumes this address (+ (2**n_synbits) pages up from it) aren't used for
				510	anything else in the kernel */
				511	#define MAGIC_PAGE0_START 0xffffffffec000000ULL
				512
				513	static void sh64_dcache_purge_coloured_phy_page(unsigned long paddr, unsigned long eaddr)
				514	{
				515	/* Purge the physical page 'paddr' from the cache. It's known that any
				516	cache lines requiring attention have the same page colour as the the
				517	address 'eaddr'.
				518
				519	This relies on the fact that the D-cache matches on physical tags
				520	when no virtual tag matches. So we create an alias for the original
				521	page and purge through that. (Alternatively, we could have done
				522	this by switching ASID to match the original mapping and purged
				523	through that, but that involves ASID switching cost + probably a
				524	TLBMISS + refill anyway.)
				525	*/
				526
				527	unsigned long long magic_page_start;
				528	unsigned long long magic_eaddr, magic_eaddr_end;
				529
				530	magic_page_start = MAGIC_PAGE0_START + (eaddr & CACHE_OC_SYN_MASK);
				531
				532	/* As long as the kernel is not pre-emptible, this doesn't need to be
				533	under cli/sti. */
				534
				535	sh64_setup_dtlb_cache_slot(magic_page_start, get_asid(), paddr);
				536
				537	magic_eaddr = magic_page_start;
				538	magic_eaddr_end = magic_eaddr + PAGE_SIZE;
				539	while (magic_eaddr < magic_eaddr_end) {
				540	/* Little point in unrolling this loop - the OCBPs are blocking
				541	and won't go any quicker (i.e. the loop overhead is parallel
				542	to part of the OCBP execution.) */
				543	asm __volatile__ ("ocbp %0, 0" : : "r" (magic_eaddr));
				544	magic_eaddr += L1_CACHE_BYTES;
				545	}
				546
				547	sh64_teardown_dtlb_cache_slot();
				548	}
				549
				550	/****************************************************************************/
				551
				552	static void sh64_dcache_purge_phy_page(unsigned long paddr)
				553	{
				554	/* Pure a page given its physical start address, by creating a
				555	temporary 1 page mapping and purging across that. Even if we know
				556	the virtual address (& vma or mm) of the page, the method here is
				557	more elegant because it avoids issues of coping with page faults on
				558	the purge instructions (i.e. no special-case code required in the
				559	critical path in the TLB miss handling). */
				560
				561	unsigned long long eaddr_start, eaddr, eaddr_end;
				562	int i;
				563
				564	/* As long as the kernel is not pre-emptible, this doesn't need to be
				565	under cli/sti. */
				566
				567	eaddr_start = MAGIC_PAGE0_START;
				568	for (i=0; i < (1 << CACHE_OC_N_SYNBITS); i++) {
				569	sh64_setup_dtlb_cache_slot(eaddr_start, get_asid(), paddr);
				570
				571	eaddr = eaddr_start;
				572	eaddr_end = eaddr + PAGE_SIZE;
				573	while (eaddr < eaddr_end) {
				574	asm __volatile__ ("ocbp %0, 0" : : "r" (eaddr));
				575	eaddr += L1_CACHE_BYTES;
				576	}
				577
				578	sh64_teardown_dtlb_cache_slot();
				579	eaddr_start += PAGE_SIZE;
				580	}
				581	}
				582
Hugh Dickins	60ec558	2005-10-29 18:16:34 -0700	[diff] [blame]	583	static void sh64_dcache_purge_user_pages(struct mm_struct *mm,
				584	unsigned long addr, unsigned long end)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	585	{
				586	pgd_t *pgd;
				587	pmd_t *pmd;
				588	pte_t *pte;
				589	pte_t entry;
Hugh Dickins	60ec558	2005-10-29 18:16:34 -0700	[diff] [blame]	590	spinlock_t *ptl;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	591	unsigned long paddr;
				592
Hugh Dickins	60ec558	2005-10-29 18:16:34 -0700	[diff] [blame]	593	if (!mm)
				594	return; /* No way to find physical address of page */
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	595
Hugh Dickins	60ec558	2005-10-29 18:16:34 -0700	[diff] [blame]	596	pgd = pgd_offset(mm, addr);
				597	if (pgd_bad(*pgd))
				598	return;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	599
Hugh Dickins	60ec558	2005-10-29 18:16:34 -0700	[diff] [blame]	600	pmd = pmd_offset(pgd, addr);
				601	if (pmd_none(pmd) \|\| pmd_bad(pmd))
				602	return;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	603
Hugh Dickins	60ec558	2005-10-29 18:16:34 -0700	[diff] [blame]	604	pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
				605	do {
				606	entry = *pte;
				607	if (pte_none(entry) \|\| !pte_present(entry))
				608	continue;
				609	paddr = pte_val(entry) & PAGE_MASK;
				610	sh64_dcache_purge_coloured_phy_page(paddr, addr);
				611	} while (pte++, addr += PAGE_SIZE, addr != end);
				612	pte_unmap_unlock(pte - 1, ptl);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	613	}
				614	/****************************************************************************/
				615
				616	static void sh64_dcache_purge_user_range(struct mm_struct *mm,
				617	unsigned long start, unsigned long end)
				618	{
				619	/* There are at least 5 choices for the implementation of this, with
				620	pros (+), cons(-), comments(*):
				621
				622	1. ocbp each line in the range through the original user's ASID
				623	+ no lines spuriously evicted
				624	- tlbmiss handling (must either handle faults on demand => extra
				625	special-case code in tlbmiss critical path), or map the page in
				626	advance (=> flush_tlb_range in advance to avoid multiple hits)
				627	- ASID switching
				628	- expensive for large ranges
				629
				630	2. temporarily map each page in the range to a special effective
				631	address and ocbp through the temporary mapping; relies on the
				632	fact that SH-5 OCB* always do TLB lookup and match on ptags (they
				633	never look at the etags)
				634	+ no spurious evictions
				635	- expensive for large ranges
				636	* surely cheaper than (1)
				637
				638	3. walk all the lines in the cache, check the tags, if a match
				639	occurs create a page mapping to ocbp the line through
				640	+ no spurious evictions
				641	- tag inspection overhead
				642	- (especially for small ranges)
				643	- potential cost of setting up/tearing down page mapping for
				644	every line that matches the range
				645	* cost partly independent of range size
				646
				647	4. walk all the lines in the cache, check the tags, if a match
				648	occurs use 4 * alloco to purge the line (+3 other probably
				649	innocent victims) by natural eviction
				650	+ no tlb mapping overheads
				651	- spurious evictions
				652	- tag inspection overhead
				653
				654	5. implement like flush_cache_all
				655	+ no tag inspection overhead
				656	- spurious evictions
				657	- bad for small ranges
				658
				659	(1) can be ruled out as more expensive than (2). (2) appears best
				660	for small ranges. The choice between (3), (4) and (5) for large
				661	ranges and the range size for the large/small boundary need
				662	benchmarking to determine.
				663
				664	For now use approach (2) for small ranges and (5) for large ones.
				665
				666	*/
				667
				668	int n_pages;
				669
				670	n_pages = ((end - start) >> PAGE_SHIFT);
Hugh Dickins	60ec558	2005-10-29 18:16:34 -0700	[diff] [blame]	671	if (n_pages >= 64 \|\| ((start ^ (end - 1)) & PMD_MASK)) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	672	#if 1
				673	sh64_dcache_purge_all();
				674	#else
				675	unsigned long long set, way;
				676	unsigned long mm_asid = mm->context & MMU_CONTEXT_ASID_MASK;
				677	for (set = 0; set < cpu_data->dcache.sets; set++) {
				678	unsigned long long set_base_config_addr = CACHE_OC_ADDRESS_ARRAY + (set << cpu_data->dcache.set_shift);
				679	for (way = 0; way < cpu_data->dcache.ways; way++) {
				680	unsigned long long config_addr = set_base_config_addr + (way << cpu_data->dcache.way_step_shift);
				681	unsigned long long tag0;
				682	unsigned long line_valid;
				683
				684	asm __volatile__("getcfg %1, 0, %0" : "=r" (tag0) : "r" (config_addr));
				685	line_valid = tag0 & SH_CACHE_VALID;
				686	if (line_valid) {
				687	unsigned long cache_asid;
				688	unsigned long epn;
				689
				690	cache_asid = (tag0 & cpu_data->dcache.asid_mask) >> cpu_data->dcache.asid_shift;
				691	/* The next line needs some
				692	explanation. The virtual tags
				693	encode bits [31:13] of the virtual
				694	address, bit [12] of the 'tag' being
				695	implied by the cache set index. */
				696	epn = (tag0 & cpu_data->dcache.epn_mask) \| ((set & 0x80) << cpu_data->dcache.entry_shift);
				697
				698	if ((cache_asid == mm_asid) && (start <= epn) && (epn < end)) {
				699	/* TODO : could optimise this
				700	call by batching multiple
				701	adjacent sets together. */
				702	sh64_dcache_purge_sets(set, 1);
				703	break; /* Don't waste time inspecting other ways for this set */
				704	}
				705	}
				706	}
				707	}
				708	#endif
				709	} else {
Hugh Dickins	60ec558	2005-10-29 18:16:34 -0700	[diff] [blame]	710	/* Small range, covered by a single page table page */
				711	start &= PAGE_MASK; /* should already be so */
				712	end = PAGE_ALIGN(end); /* should already be so */
				713	sh64_dcache_purge_user_pages(mm, start, end);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	714	}
				715	return;
				716	}
				717
				718	static void sh64_dcache_wback_current_user_range(unsigned long start, unsigned long end)
				719	{
				720	unsigned long long aligned_start;
				721	unsigned long long ull_end;
				722	unsigned long long addr;
				723
				724	ull_end = end;
				725
				726	/* Just wback over the range using the natural addresses. TLB miss
				727	handling will be OK (TBC) : the range has just been written to by
				728	the signal frame setup code, so the PTEs must exist.
				729
				730	Note, if we have CONFIG_PREEMPT and get preempted inside this loop,
				731	it doesn't matter, even if the pid->ASID mapping changes whilst
				732	we're away. In that case the cache will have been flushed when the
				733	mapping was renewed. So the writebacks below will be nugatory (and
				734	we'll doubtless have to fault the TLB entry/ies in again with the
				735	new ASID), but it's a rare case.
				736	*/
				737	aligned_start = start & L1_CACHE_ALIGN_MASK;
				738	addr = aligned_start;
				739	while (addr < ull_end) {
				740	asm __volatile__ ("ocbwb %0, 0" : : "r" (addr));
				741	addr += L1_CACHE_BYTES;
				742	}
				743	}
				744
				745	/****************************************************************************/
				746
				747	/* These MUST lie in an area of virtual address space that's otherwise unused. */
				748	#define UNIQUE_EADDR_START 0xe0000000UL
				749	#define UNIQUE_EADDR_END 0xe8000000UL
				750
				751	static unsigned long sh64_make_unique_eaddr(unsigned long user_eaddr, unsigned long paddr)
				752	{
				753	/* Given a physical address paddr, and a user virtual address
				754	user_eaddr which will eventually be mapped to it, create a one-off
				755	kernel-private eaddr mapped to the same paddr. This is used for
				756	creating special destination pages for copy_user_page and
				757	clear_user_page */
				758
				759	static unsigned long current_pointer = UNIQUE_EADDR_START;
				760	unsigned long coloured_pointer;
				761
				762	if (current_pointer == UNIQUE_EADDR_END) {
				763	sh64_dcache_purge_all();
				764	current_pointer = UNIQUE_EADDR_START;
				765	}
				766
				767	coloured_pointer = (current_pointer & ~CACHE_OC_SYN_MASK) \| (user_eaddr & CACHE_OC_SYN_MASK);
				768	sh64_setup_dtlb_cache_slot(coloured_pointer, get_asid(), paddr);
				769
				770	current_pointer += (PAGE_SIZE << CACHE_OC_N_SYNBITS);
				771
				772	return coloured_pointer;
				773	}
				774
				775	/****************************************************************************/
				776
				777	static void sh64_copy_user_page_coloured(void to, void from, unsigned long address)
				778	{
				779	void *coloured_to;
				780
				781	/* Discard any existing cache entries of the wrong colour. These are
				782	present quite often, if the kernel has recently used the page
				783	internally, then given it up, then it's been allocated to the user.
				784	*/
				785	sh64_dcache_purge_coloured_phy_page(__pa(to), (unsigned long) to);
				786
				787	coloured_to = (void *) sh64_make_unique_eaddr(address, __pa(to));
				788	sh64_page_copy(from, coloured_to);
				789
				790	sh64_teardown_dtlb_cache_slot();
				791	}
				792
				793	static void sh64_clear_user_page_coloured(void *to, unsigned long address)
				794	{
				795	void *coloured_to;
				796
				797	/* Discard any existing kernel-originated lines of the wrong colour (as
				798	above) */
				799	sh64_dcache_purge_coloured_phy_page(__pa(to), (unsigned long) to);
				800
				801	coloured_to = (void *) sh64_make_unique_eaddr(address, __pa(to));
				802	sh64_page_clear(coloured_to);
				803
				804	sh64_teardown_dtlb_cache_slot();
				805	}
				806
				807	#endif /* !CONFIG_DCACHE_DISABLED */
				808
				809	/****************************************************************************/
				810
				811	/*##########################################################################
				812	EXTERNALLY CALLABLE API.
				813	##########################################################################*/
				814
				815	/* These functions are described in Documentation/cachetlb.txt.
				816	Each one of these functions varies in behaviour depending on whether the
				817	I-cache and/or D-cache are configured out.
				818
				819	Note that the Linux term 'flush' corresponds to what is termed 'purge' in
				820	the sh/sh64 jargon for the D-cache, i.e. write back dirty data then
				821	invalidate the cache lines, and 'invalidate' for the I-cache.
				822	*/
				823
				824	#undef FLUSH_TRACE
				825
				826	void flush_cache_all(void)
				827	{
				828	/* Invalidate the entire contents of both caches, after writing back to
				829	memory any dirty data from the D-cache. */
				830	sh64_dcache_purge_all();
				831	sh64_icache_inv_all();
				832	}
				833
				834	/****************************************************************************/
				835
				836	void flush_cache_mm(struct mm_struct *mm)
				837	{
				838	/* Invalidate an entire user-address space from both caches, after
				839	writing back dirty data (e.g. for shared mmap etc). */
				840
				841	/* This could be coded selectively by inspecting all the tags then
				842	doing 4*alloco on any set containing a match (as for
				843	flush_cache_range), but fork/exit/execve (where this is called from)
				844	are expensive anyway. */
				845
				846	/* Have to do a purge here, despite the comments re I-cache below.
				847	There could be odd-coloured dirty data associated with the mm still
				848	in the cache - if this gets written out through natural eviction
				849	after the kernel has reused the page there will be chaos.
				850	*/
				851
				852	sh64_dcache_purge_all();
				853
				854	/* The mm being torn down won't ever be active again, so any Icache
				855	lines tagged with its ASID won't be visible for the rest of the
				856	lifetime of this ASID cycle. Before the ASID gets reused, there
				857	will be a flush_cache_all. Hence we don't need to touch the
				858	I-cache. This is similar to the lack of action needed in
				859	flush_tlb_mm - see fault.c. */
				860	}
				861
				862	/****************************************************************************/
				863
				864	void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
				865	unsigned long end)
				866	{
				867	struct mm_struct *mm = vma->vm_mm;
				868
				869	/* Invalidate (from both caches) the range [start,end) of virtual
				870	addresses from the user address space specified by mm, after writing
				871	back any dirty data.
				872
Hugh Dickins	60ec558	2005-10-29 18:16:34 -0700	[diff] [blame]	873	Note, 'end' is 1 byte beyond the end of the range to flush. */
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	874
				875	sh64_dcache_purge_user_range(mm, start, end);
				876	sh64_icache_inv_user_page_range(mm, start, end);
				877	}
				878
				879	/****************************************************************************/
				880
				881	void flush_cache_page(struct vm_area_struct *vma, unsigned long eaddr, unsigned long pfn)
				882	{
				883	/* Invalidate any entries in either cache for the vma within the user
				884	address space vma->vm_mm for the page starting at virtual address
				885	'eaddr'. This seems to be used primarily in breaking COW. Note,
				886	the I-cache must be searched too in case the page in question is
				887	both writable and being executed from (e.g. stack trampolines.)
				888
Hugh Dickins	60ec558	2005-10-29 18:16:34 -0700	[diff] [blame]	889	Note, this is called with pte lock held.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	890	*/
				891
				892	sh64_dcache_purge_phy_page(pfn << PAGE_SHIFT);
				893
				894	if (vma->vm_flags & VM_EXEC) {
				895	sh64_icache_inv_user_page(vma, eaddr);
				896	}
				897	}
				898
				899	/****************************************************************************/
				900
				901	#ifndef CONFIG_DCACHE_DISABLED
				902
				903	void copy_user_page(void to, void from, unsigned long address, struct page *page)
				904	{
				905	/* 'from' and 'to' are kernel virtual addresses (within the superpage
				906	mapping of the physical RAM). 'address' is the user virtual address
				907	where the copy 'to' will be mapped after. This allows a custom
				908	mapping to be used to ensure that the new copy is placed in the
				909	right cache sets for the user to see it without having to bounce it
				910	out via memory. Note however : the call to flush_page_to_ram in
				911	(generic)/mm/memory.c:(break_cow) undoes all this good work in that one
				912	very important case!
				913
				914	TBD : can we guarantee that on every call, any cache entries for
				915	'from' are in the same colour sets as 'address' also? i.e. is this
				916	always used just to deal with COW? (I suspect not). */
				917
				918	/* There are two possibilities here for when the page 'from' was last accessed:
				919	* by the kernel : this is OK, no purge required.
				920	* by the/a user (e.g. for break_COW) : need to purge.
				921
				922	If the potential user mapping at 'address' is the same colour as
				923	'from' there is no need to purge any cache lines from the 'from'
				924	page mapped into cache sets of colour 'address'. (The copy will be
				925	accessing the page through 'from').
				926	*/
				927
				928	if (((address ^ (unsigned long) from) & CACHE_OC_SYN_MASK) != 0) {
				929	sh64_dcache_purge_coloured_phy_page(__pa(from), address);
				930	}
				931
				932	if (((address ^ (unsigned long) to) & CACHE_OC_SYN_MASK) == 0) {
				933	/* No synonym problem on destination */
				934	sh64_page_copy(from, to);
				935	} else {
				936	sh64_copy_user_page_coloured(to, from, address);
				937	}
				938
				939	/* Note, don't need to flush 'from' page from the cache again - it's
				940	done anyway by the generic code */
				941	}
				942
				943	void clear_user_page(void to, unsigned long address, struct page page)
				944	{
				945	/* 'to' is a kernel virtual address (within the superpage
				946	mapping of the physical RAM). 'address' is the user virtual address
				947	where the 'to' page will be mapped after. This allows a custom
				948	mapping to be used to ensure that the new copy is placed in the
				949	right cache sets for the user to see it without having to bounce it
				950	out via memory.
				951	*/
				952
				953	if (((address ^ (unsigned long) to) & CACHE_OC_SYN_MASK) == 0) {
				954	/* No synonym problem on destination */
				955	sh64_page_clear(to);
				956	} else {
				957	sh64_clear_user_page_coloured(to, address);
				958	}
				959	}
				960
				961	#endif /* !CONFIG_DCACHE_DISABLED */
				962
				963	/****************************************************************************/
				964
				965	void flush_dcache_page(struct page *page)
				966	{
				967	sh64_dcache_purge_phy_page(page_to_phys(page));
				968	wmb();
				969	}
				970
				971	/****************************************************************************/
				972
				973	void flush_icache_range(unsigned long start, unsigned long end)
				974	{
				975	/* Flush the range [start,end] of kernel virtual adddress space from
				976	the I-cache. The corresponding range must be purged from the
				977	D-cache also because the SH-5 doesn't have cache snooping between
				978	the caches. The addresses will be visible through the superpage
				979	mapping, therefore it's guaranteed that there no cache entries for
				980	the range in cache sets of the wrong colour.
				981
				982	Primarily used for cohering the I-cache after a module has
				983	been loaded. */
				984
				985	/* We also make sure to purge the same range from the D-cache since
				986	flush_page_to_ram() won't be doing this for us! */
				987
				988	sh64_dcache_purge_kernel_range(start, end);
				989	wmb();
				990	sh64_icache_inv_kernel_range(start, end);
				991	}
				992
				993	/****************************************************************************/
				994
				995	void flush_icache_user_range(struct vm_area_struct *vma,
				996	struct page *page, unsigned long addr, int len)
				997	{
				998	/* Flush the range of user (defined by vma->vm_mm) address space
				999	starting at 'addr' for 'len' bytes from the cache. The range does
				1000	not straddle a page boundary, the unique physical page containing
				1001	the range is 'page'. This seems to be used mainly for invalidating
				1002	an address range following a poke into the program text through the
				1003	ptrace() call from another process (e.g. for BRK instruction
				1004	insertion). */
				1005
				1006	sh64_dcache_purge_coloured_phy_page(page_to_phys(page), addr);
				1007	mb();
				1008
				1009	if (vma->vm_flags & VM_EXEC) {
				1010	sh64_icache_inv_user_small_range(vma->vm_mm, addr, len);
				1011	}
				1012	}
				1013
				1014	/*##########################################################################
				1015	ARCH/SH64 PRIVATE CALLABLE API.
				1016	##########################################################################*/
				1017
				1018	void flush_cache_sigtramp(unsigned long start, unsigned long end)
				1019	{
				1020	/* For the address range [start,end), write back the data from the
				1021	D-cache and invalidate the corresponding region of the I-cache for
				1022	the current process. Used to flush signal trampolines on the stack
				1023	to make them executable. */
				1024
				1025	sh64_dcache_wback_current_user_range(start, end);
				1026	wmb();
				1027	sh64_icache_inv_current_user_range(start, end);
				1028	}
				1029