Blame - arch/sh64/mm/cache.c - kernel/msm-4.9

blob: 421487cfff4cb77a30ca570b142f281ac6e51218 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*
				2	* This file is subject to the terms and conditions of the GNU General Public
				3	* License. See the file "COPYING" in the main directory of this archive
				4	* for more details.
				5	*
				6	* arch/sh64/mm/cache.c
				7	*
				8	* Original version Copyright (C) 2000, 2001 Paolo Alberelli
				9	* Second version Copyright (C) benedict.gaster@superh.com 2002
				10	* Third version Copyright Richard.Curnow@superh.com 2003
				11	* Hacks to third version Copyright (C) 2003 Paul Mundt
				12	*/
				13
				14	/****************************************************************************/
				15
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	16	#include <linux/init.h>
				17	#include <linux/mman.h>
				18	#include <linux/mm.h>
				19	#include <linux/threads.h>
				20	#include <asm/page.h>
				21	#include <asm/pgtable.h>
				22	#include <asm/processor.h>
				23	#include <asm/cache.h>
				24	#include <asm/tlb.h>
				25	#include <asm/io.h>
				26	#include <asm/uaccess.h>
				27	#include <asm/mmu_context.h>
				28	#include <asm/pgalloc.h> /* for flush_itlb_range */
				29
				30	#include <linux/proc_fs.h>
				31
				32	/* This function is in entry.S */
				33	extern unsigned long switch_and_save_asid(unsigned long new_asid);
				34
				35	/* Wired TLB entry for the D-cache */
				36	static unsigned long long dtlb_cache_slot;
				37
				38	/**
				39	* sh64_cache_init()
				40	*
				41	* This is pretty much just a straightforward clone of the SH
				42	* detect_cpu_and_cache_system().
				43	*
				44	* This function is responsible for setting up all of the cache
				45	* info dynamically as well as taking care of CPU probing and
				46	* setting up the relevant subtype data.
				47	*
				48	* FIXME: For the time being, we only really support the SH5-101
				49	* out of the box, and don't support dynamic probing for things
				50	* like the SH5-103 or even cut2 of the SH5-101. Implement this
				51	* later!
				52	*/
				53	int __init sh64_cache_init(void)
				54	{
				55	/*
				56	* First, setup some sane values for the I-cache.
				57	*/
				58	cpu_data->icache.ways = 4;
				59	cpu_data->icache.sets = 256;
				60	cpu_data->icache.linesz = L1_CACHE_BYTES;
				61
				62	/*
				63	* FIXME: This can probably be cleaned up a bit as well.. for example,
				64	* do we really need the way shift _and_ the way_step_shift ?? Judging
				65	* by the existing code, I would guess no.. is there any valid reason
				66	* why we need to be tracking this around?
				67	*/
				68	cpu_data->icache.way_shift = 13;
				69	cpu_data->icache.entry_shift = 5;
				70	cpu_data->icache.set_shift = 4;
				71	cpu_data->icache.way_step_shift = 16;
				72	cpu_data->icache.asid_shift = 2;
				73
				74	/*
				75	* way offset = cache size / associativity, so just don't factor in
				76	* associativity in the first place..
				77	*/
				78	cpu_data->icache.way_ofs = cpu_data->icache.sets *
				79	cpu_data->icache.linesz;
				80
				81	cpu_data->icache.asid_mask = 0x3fc;
				82	cpu_data->icache.idx_mask = 0x1fe0;
				83	cpu_data->icache.epn_mask = 0xffffe000;
				84	cpu_data->icache.flags = 0;
				85
				86	/*
				87	* Next, setup some sane values for the D-cache.
				88	*
				89	* On the SH5, these are pretty consistent with the I-cache settings,
				90	* so we just copy over the existing definitions.. these can be fixed
				91	* up later, especially if we add runtime CPU probing.
				92	*
				93	* Though in the meantime it saves us from having to duplicate all of
				94	* the above definitions..
				95	*/
				96	cpu_data->dcache = cpu_data->icache;
				97
				98	/*
				99	* Setup any cache-related flags here
				100	*/
				101	#if defined(CONFIG_DCACHE_WRITE_THROUGH)
				102	set_bit(SH_CACHE_MODE_WT, &(cpu_data->dcache.flags));
				103	#elif defined(CONFIG_DCACHE_WRITE_BACK)
				104	set_bit(SH_CACHE_MODE_WB, &(cpu_data->dcache.flags));
				105	#endif
				106
				107	/*
				108	* We also need to reserve a slot for the D-cache in the DTLB, so we
				109	* do this now ..
				110	*/
				111	dtlb_cache_slot = sh64_get_wired_dtlb_entry();
				112
				113	return 0;
				114	}
				115
				116	#ifdef CONFIG_DCACHE_DISABLED
				117	#define sh64_dcache_purge_all() do { } while (0)
				118	#define sh64_dcache_purge_coloured_phy_page(paddr, eaddr) do { } while (0)
				119	#define sh64_dcache_purge_user_range(mm, start, end) do { } while (0)
				120	#define sh64_dcache_purge_phy_page(paddr) do { } while (0)
				121	#define sh64_dcache_purge_virt_page(mm, eaddr) do { } while (0)
				122	#define sh64_dcache_purge_kernel_range(start, end) do { } while (0)
				123	#define sh64_dcache_wback_current_user_range(start, end) do { } while (0)
				124	#endif
				125
				126	/##########################################################################/
				127
				128	/* From here onwards, a rewrite of the implementation,
				129	by Richard.Curnow@superh.com.
				130
				131	The major changes in this compared to the old version are;
				132	1. use more selective purging through OCBP instead of using ALLOCO to purge
				133	by natural replacement. This avoids purging out unrelated cache lines
				134	that happen to be in the same set.
				135	2. exploit the APIs copy_user_page and clear_user_page better
				136	3. be more selective about I-cache purging, in particular use invalidate_all
				137	more sparingly.
				138
				139	*/
				140
				141	/*##########################################################################
				142	SUPPORT FUNCTIONS
				143	##########################################################################*/
				144
				145	/****************************************************************************/
				146	/* The following group of functions deal with mapping and unmapping a temporary
				147	page into the DTLB slot that have been set aside for our exclusive use. */
				148	/* In order to accomplish this, we use the generic interface for adding and
				149	removing a wired slot entry as defined in arch/sh64/mm/tlb.c */
				150	/****************************************************************************/
				151
				152	static unsigned long slot_own_flags;
				153
				154	static inline void sh64_setup_dtlb_cache_slot(unsigned long eaddr, unsigned long asid, unsigned long paddr)
				155	{
				156	local_irq_save(slot_own_flags);
				157	sh64_setup_tlb_slot(dtlb_cache_slot, eaddr, asid, paddr);
				158	}
				159
				160	static inline void sh64_teardown_dtlb_cache_slot(void)
				161	{
				162	sh64_teardown_tlb_slot(dtlb_cache_slot);
				163	local_irq_restore(slot_own_flags);
				164	}
				165
				166	/****************************************************************************/
				167
				168	#ifndef CONFIG_ICACHE_DISABLED
				169
				170	static void __inline__ sh64_icache_inv_all(void)
				171	{
				172	unsigned long long addr, flag, data;
				173	unsigned int flags;
				174
				175	addr=ICCR0;
				176	flag=ICCR0_ICI;
				177	data=0;
				178
				179	/* Make this a critical section for safety (probably not strictly necessary.) */
				180	local_irq_save(flags);
				181
				182	/* Without %1 it gets unexplicably wrong */
				183	asm volatile("getcfg %3, 0, %0\n\t"
				184	"or %0, %2, %0\n\t"
				185	"putcfg %3, 0, %0\n\t"
				186	"synci"
				187	: "=&r" (data)
				188	: "0" (data), "r" (flag), "r" (addr));
				189
				190	local_irq_restore(flags);
				191	}
				192
				193	static void sh64_icache_inv_kernel_range(unsigned long start, unsigned long end)
				194	{
				195	/* Invalidate range of addresses [start,end] from the I-cache, where
				196	* the addresses lie in the kernel superpage. */
				197
				198	unsigned long long ullend, addr, aligned_start;
				199	#if (NEFF == 32)
				200	aligned_start = (unsigned long long)(signed long long)(signed long) start;
				201	#else
				202	#error "NEFF != 32"
				203	#endif
				204	aligned_start &= L1_CACHE_ALIGN_MASK;
				205	addr = aligned_start;
				206	#if (NEFF == 32)
				207	ullend = (unsigned long long) (signed long long) (signed long) end;
				208	#else
				209	#error "NEFF != 32"
				210	#endif
				211	while (addr <= ullend) {
				212	asm __volatile__ ("icbi %0, 0" : : "r" (addr));
				213	addr += L1_CACHE_BYTES;
				214	}
				215	}
				216
				217	static void sh64_icache_inv_user_page(struct vm_area_struct *vma, unsigned long eaddr)
				218	{
				219	/* If we get called, we know that vma->vm_flags contains VM_EXEC.
				220	Also, eaddr is page-aligned. */
				221
				222	unsigned long long addr, end_addr;
				223	unsigned long flags = 0;
				224	unsigned long running_asid, vma_asid;
				225	addr = eaddr;
				226	end_addr = addr + PAGE_SIZE;
				227
				228	/* Check whether we can use the current ASID for the I-cache
				229	invalidation. For example, if we're called via
				230	access_process_vm->flush_cache_page->here, (e.g. when reading from
				231	/proc), 'running_asid' will be that of the reader, not of the
				232	victim.
				233
				234	Also, note the risk that we might get pre-empted between the ASID
				235	compare and blocking IRQs, and before we regain control, the
				236	pid->ASID mapping changes. However, the whole cache will get
				237	invalidated when the mapping is renewed, so the worst that can
				238	happen is that the loop below ends up invalidating somebody else's
				239	cache entries.
				240	*/
				241
				242	running_asid = get_asid();
				243	vma_asid = (vma->vm_mm->context & MMU_CONTEXT_ASID_MASK);
				244	if (running_asid != vma_asid) {
				245	local_irq_save(flags);
				246	switch_and_save_asid(vma_asid);
				247	}
				248	while (addr < end_addr) {
				249	/* Worth unrolling a little */
				250	asm __volatile__("icbi %0, 0" : : "r" (addr));
				251	asm __volatile__("icbi %0, 32" : : "r" (addr));
				252	asm __volatile__("icbi %0, 64" : : "r" (addr));
				253	asm __volatile__("icbi %0, 96" : : "r" (addr));
				254	addr += 128;
				255	}
				256	if (running_asid != vma_asid) {
				257	switch_and_save_asid(running_asid);
				258	local_irq_restore(flags);
				259	}
				260	}
				261
				262	/****************************************************************************/
				263
				264	static void sh64_icache_inv_user_page_range(struct mm_struct *mm,
				265	unsigned long start, unsigned long end)
				266	{
				267	/* Used for invalidating big chunks of I-cache, i.e. assume the range
				268	is whole pages. If 'start' or 'end' is not page aligned, the code
				269	is conservative and invalidates to the ends of the enclosing pages.
				270	This is functionally OK, just a performance loss. */
				271
				272	/* See the comments below in sh64_dcache_purge_user_range() regarding
				273	the choice of algorithm. However, for the I-cache option (2) isn't
				274	available because there are no physical tags so aliases can't be
				275	resolved. The icbi instruction has to be used through the user
				276	mapping. Because icbi is cheaper than ocbp on a cache hit, it
				277	would be cheaper to use the selective code for a large range than is
				278	possible with the D-cache. Just assume 64 for now as a working
				279	figure.
				280	*/
				281
				282	int n_pages;
				283
				284	if (!mm) return;
				285
				286	n_pages = ((end - start) >> PAGE_SHIFT);
				287	if (n_pages >= 64) {
				288	sh64_icache_inv_all();
				289	} else {
				290	unsigned long aligned_start;
				291	unsigned long eaddr;
				292	unsigned long after_last_page_start;
				293	unsigned long mm_asid, current_asid;
				294	unsigned long long flags = 0ULL;
				295
				296	mm_asid = mm->context & MMU_CONTEXT_ASID_MASK;
				297	current_asid = get_asid();
				298
				299	if (mm_asid != current_asid) {
				300	/* Switch ASID and run the invalidate loop under cli */
				301	local_irq_save(flags);
				302	switch_and_save_asid(mm_asid);
				303	}
				304
				305	aligned_start = start & PAGE_MASK;
				306	after_last_page_start = PAGE_SIZE + ((end - 1) & PAGE_MASK);
				307
				308	while (aligned_start < after_last_page_start) {
				309	struct vm_area_struct *vma;
				310	unsigned long vma_end;
				311	vma = find_vma(mm, aligned_start);
				312	if (!vma \|\| (aligned_start <= vma->vm_end)) {
				313	/* Avoid getting stuck in an error condition */
				314	aligned_start += PAGE_SIZE;
				315	continue;
				316	}
				317	vma_end = vma->vm_end;
				318	if (vma->vm_flags & VM_EXEC) {
				319	/* Executable */
				320	eaddr = aligned_start;
				321	while (eaddr < vma_end) {
				322	sh64_icache_inv_user_page(vma, eaddr);
				323	eaddr += PAGE_SIZE;
				324	}
				325	}
				326	aligned_start = vma->vm_end; /* Skip to start of next region */
				327	}
				328	if (mm_asid != current_asid) {
				329	switch_and_save_asid(current_asid);
				330	local_irq_restore(flags);
				331	}
				332	}
				333	}
				334
				335	static void sh64_icache_inv_user_small_range(struct mm_struct *mm,
				336	unsigned long start, int len)
				337	{
				338
				339	/* Invalidate a small range of user context I-cache, not necessarily
				340	page (or even cache-line) aligned. */
				341
				342	unsigned long long eaddr = start;
				343	unsigned long long eaddr_end = start + len;
				344	unsigned long current_asid, mm_asid;
				345	unsigned long long flags;
				346	unsigned long long epage_start;
				347
				348	/* Since this is used inside ptrace, the ASID in the mm context
				349	typically won't match current_asid. We'll have to switch ASID to do
				350	this. For safety, and given that the range will be small, do all
				351	this under cli.
				352
				353	Note, there is a hazard that the ASID in mm->context is no longer
				354	actually associated with mm, i.e. if the mm->context has started a
				355	new cycle since mm was last active. However, this is just a
				356	performance issue: all that happens is that we invalidate lines
				357	belonging to another mm, so the owning process has to refill them
				358	when that mm goes live again. mm itself can't have any cache
				359	entries because there will have been a flush_cache_all when the new
				360	mm->context cycle started. */
				361
				362	/* Align to start of cache line. Otherwise, suppose len==8 and start
				363	was at 32N+28 : the last 4 bytes wouldn't get invalidated. */
				364	eaddr = start & L1_CACHE_ALIGN_MASK;
				365	eaddr_end = start + len;
				366
				367	local_irq_save(flags);
				368	mm_asid = mm->context & MMU_CONTEXT_ASID_MASK;
				369	current_asid = switch_and_save_asid(mm_asid);
				370
				371	epage_start = eaddr & PAGE_MASK;
				372
				373	while (eaddr < eaddr_end)
				374	{
				375	asm __volatile__("icbi %0, 0" : : "r" (eaddr));
				376	eaddr += L1_CACHE_BYTES;
				377	}
				378	switch_and_save_asid(current_asid);
				379	local_irq_restore(flags);
				380	}
				381
				382	static void sh64_icache_inv_current_user_range(unsigned long start, unsigned long end)
				383	{
				384	/* The icbi instruction never raises ITLBMISS. i.e. if there's not a
				385	cache hit on the virtual tag the instruction ends there, without a
				386	TLB lookup. */
				387
				388	unsigned long long aligned_start;
				389	unsigned long long ull_end;
				390	unsigned long long addr;
				391
				392	ull_end = end;
				393
				394	/* Just invalidate over the range using the natural addresses. TLB
				395	miss handling will be OK (TBC). Since it's for the current process,
				396	either we're already in the right ASID context, or the ASIDs have
				397	been recycled since we were last active in which case we might just
				398	invalidate another processes I-cache entries : no worries, just a
				399	performance drop for him. */
				400	aligned_start = start & L1_CACHE_ALIGN_MASK;
				401	addr = aligned_start;
				402	while (addr < ull_end) {
				403	asm __volatile__ ("icbi %0, 0" : : "r" (addr));
				404	asm __volatile__ ("nop");
				405	asm __volatile__ ("nop");
				406	addr += L1_CACHE_BYTES;
				407	}
				408	}
				409
				410	#endif /* !CONFIG_ICACHE_DISABLED */
				411
				412	/****************************************************************************/
				413
				414	#ifndef CONFIG_DCACHE_DISABLED
				415
				416	/* Buffer used as the target of alloco instructions to purge data from cache
				417	sets by natural eviction. -- RPC */
				418	#define DUMMY_ALLOCO_AREA_SIZE L1_CACHE_SIZE_BYTES + (1024 * 4)
				419	static unsigned char dummy_alloco_area[DUMMY_ALLOCO_AREA_SIZE] __cacheline_aligned = { 0, };
				420
				421	/****************************************************************************/
				422
				423	static void __inline__ sh64_dcache_purge_sets(int sets_to_purge_base, int n_sets)
				424	{
				425	/* Purge all ways in a particular block of sets, specified by the base
				426	set number and number of sets. Can handle wrap-around, if that's
				427	needed. */
				428
				429	int dummy_buffer_base_set;
				430	unsigned long long eaddr, eaddr0, eaddr1;
				431	int j;
				432	int set_offset;
				433
				434	dummy_buffer_base_set = ((int)&dummy_alloco_area & cpu_data->dcache.idx_mask) >> cpu_data->dcache.entry_shift;
				435	set_offset = sets_to_purge_base - dummy_buffer_base_set;
				436
				437	for (j=0; j<n_sets; j++, set_offset++) {
				438	set_offset &= (cpu_data->dcache.sets - 1);
				439	eaddr0 = (unsigned long long)dummy_alloco_area + (set_offset << cpu_data->dcache.entry_shift);
				440
				441	/* Do one alloco which hits the required set per cache way. For
				442	write-back mode, this will purge the #ways resident lines. There's
				443	little point unrolling this loop because the allocos stall more if
				444	they're too close together. */
				445	eaddr1 = eaddr0 + cpu_data->dcache.way_ofs * cpu_data->dcache.ways;
				446	for (eaddr=eaddr0; eaddr<eaddr1; eaddr+=cpu_data->dcache.way_ofs) {
				447	asm __volatile__ ("alloco %0, 0" : : "r" (eaddr));
				448	asm __volatile__ ("synco"); /* TAKum03020 */
				449	}
				450
				451	eaddr1 = eaddr0 + cpu_data->dcache.way_ofs * cpu_data->dcache.ways;
				452	for (eaddr=eaddr0; eaddr<eaddr1; eaddr+=cpu_data->dcache.way_ofs) {
				453	/* Load from each address. Required because alloco is a NOP if
				454	the cache is write-through. Write-through is a config option. */
				455	if (test_bit(SH_CACHE_MODE_WT, &(cpu_data->dcache.flags)))
				456	(volatile unsigned char )(int)eaddr;
				457	}
				458	}
				459
				460	/* Don't use OCBI to invalidate the lines. That costs cycles directly.
				461	If the dummy block is just left resident, it will naturally get
				462	evicted as required. */
				463
				464	return;
				465	}
				466
				467	/****************************************************************************/
				468
				469	static void sh64_dcache_purge_all(void)
				470	{
				471	/* Purge the entire contents of the dcache. The most efficient way to
				472	achieve this is to use alloco instructions on a region of unused
				473	memory equal in size to the cache, thereby causing the current
				474	contents to be discarded by natural eviction. The alternative,
				475	namely reading every tag, setting up a mapping for the corresponding
				476	page and doing an OCBP for the line, would be much more expensive.
				477	*/
				478
				479	sh64_dcache_purge_sets(0, cpu_data->dcache.sets);
				480
				481	return;
				482
				483	}
				484
				485	/****************************************************************************/
				486
				487	static void sh64_dcache_purge_kernel_range(unsigned long start, unsigned long end)
				488	{
				489	/* Purge the range of addresses [start,end] from the D-cache. The
				490	addresses lie in the superpage mapping. There's no harm if we
				491	overpurge at either end - just a small performance loss. */
				492	unsigned long long ullend, addr, aligned_start;
				493	#if (NEFF == 32)
				494	aligned_start = (unsigned long long)(signed long long)(signed long) start;
				495	#else
				496	#error "NEFF != 32"
				497	#endif
				498	aligned_start &= L1_CACHE_ALIGN_MASK;
				499	addr = aligned_start;
				500	#if (NEFF == 32)
				501	ullend = (unsigned long long) (signed long long) (signed long) end;
				502	#else
				503	#error "NEFF != 32"
				504	#endif
				505	while (addr <= ullend) {
				506	asm __volatile__ ("ocbp %0, 0" : : "r" (addr));
				507	addr += L1_CACHE_BYTES;
				508	}
				509	return;
				510	}
				511
				512	/* Assumes this address (+ (2**n_synbits) pages up from it) aren't used for
				513	anything else in the kernel */
				514	#define MAGIC_PAGE0_START 0xffffffffec000000ULL
				515
				516	static void sh64_dcache_purge_coloured_phy_page(unsigned long paddr, unsigned long eaddr)
				517	{
				518	/* Purge the physical page 'paddr' from the cache. It's known that any
				519	cache lines requiring attention have the same page colour as the the
				520	address 'eaddr'.
				521
				522	This relies on the fact that the D-cache matches on physical tags
				523	when no virtual tag matches. So we create an alias for the original
				524	page and purge through that. (Alternatively, we could have done
				525	this by switching ASID to match the original mapping and purged
				526	through that, but that involves ASID switching cost + probably a
				527	TLBMISS + refill anyway.)
				528	*/
				529
				530	unsigned long long magic_page_start;
				531	unsigned long long magic_eaddr, magic_eaddr_end;
				532
				533	magic_page_start = MAGIC_PAGE0_START + (eaddr & CACHE_OC_SYN_MASK);
				534
				535	/* As long as the kernel is not pre-emptible, this doesn't need to be
				536	under cli/sti. */
				537
				538	sh64_setup_dtlb_cache_slot(magic_page_start, get_asid(), paddr);
				539
				540	magic_eaddr = magic_page_start;
				541	magic_eaddr_end = magic_eaddr + PAGE_SIZE;
				542	while (magic_eaddr < magic_eaddr_end) {
				543	/* Little point in unrolling this loop - the OCBPs are blocking
				544	and won't go any quicker (i.e. the loop overhead is parallel
				545	to part of the OCBP execution.) */
				546	asm __volatile__ ("ocbp %0, 0" : : "r" (magic_eaddr));
				547	magic_eaddr += L1_CACHE_BYTES;
				548	}
				549
				550	sh64_teardown_dtlb_cache_slot();
				551	}
				552
				553	/****************************************************************************/
				554
				555	static void sh64_dcache_purge_phy_page(unsigned long paddr)
				556	{
				557	/* Pure a page given its physical start address, by creating a
				558	temporary 1 page mapping and purging across that. Even if we know
				559	the virtual address (& vma or mm) of the page, the method here is
				560	more elegant because it avoids issues of coping with page faults on
				561	the purge instructions (i.e. no special-case code required in the
				562	critical path in the TLB miss handling). */
				563
				564	unsigned long long eaddr_start, eaddr, eaddr_end;
				565	int i;
				566
				567	/* As long as the kernel is not pre-emptible, this doesn't need to be
				568	under cli/sti. */
				569
				570	eaddr_start = MAGIC_PAGE0_START;
				571	for (i=0; i < (1 << CACHE_OC_N_SYNBITS); i++) {
				572	sh64_setup_dtlb_cache_slot(eaddr_start, get_asid(), paddr);
				573
				574	eaddr = eaddr_start;
				575	eaddr_end = eaddr + PAGE_SIZE;
				576	while (eaddr < eaddr_end) {
				577	asm __volatile__ ("ocbp %0, 0" : : "r" (eaddr));
				578	eaddr += L1_CACHE_BYTES;
				579	}
				580
				581	sh64_teardown_dtlb_cache_slot();
				582	eaddr_start += PAGE_SIZE;
				583	}
				584	}
				585
Hugh Dickins	60ec558	2005-10-29 18:16:34 -0700	[diff] [blame]	586	static void sh64_dcache_purge_user_pages(struct mm_struct *mm,
				587	unsigned long addr, unsigned long end)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	588	{
				589	pgd_t *pgd;
				590	pmd_t *pmd;
				591	pte_t *pte;
				592	pte_t entry;
Hugh Dickins	60ec558	2005-10-29 18:16:34 -0700	[diff] [blame]	593	spinlock_t *ptl;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	594	unsigned long paddr;
				595
Hugh Dickins	60ec558	2005-10-29 18:16:34 -0700	[diff] [blame]	596	if (!mm)
				597	return; /* No way to find physical address of page */
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	598
Hugh Dickins	60ec558	2005-10-29 18:16:34 -0700	[diff] [blame]	599	pgd = pgd_offset(mm, addr);
				600	if (pgd_bad(*pgd))
				601	return;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	602
Hugh Dickins	60ec558	2005-10-29 18:16:34 -0700	[diff] [blame]	603	pmd = pmd_offset(pgd, addr);
				604	if (pmd_none(pmd) \|\| pmd_bad(pmd))
				605	return;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	606
Hugh Dickins	60ec558	2005-10-29 18:16:34 -0700	[diff] [blame]	607	pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
				608	do {
				609	entry = *pte;
				610	if (pte_none(entry) \|\| !pte_present(entry))
				611	continue;
				612	paddr = pte_val(entry) & PAGE_MASK;
				613	sh64_dcache_purge_coloured_phy_page(paddr, addr);
				614	} while (pte++, addr += PAGE_SIZE, addr != end);
				615	pte_unmap_unlock(pte - 1, ptl);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	616	}
				617	/****************************************************************************/
				618
				619	static void sh64_dcache_purge_user_range(struct mm_struct *mm,
				620	unsigned long start, unsigned long end)
				621	{
				622	/* There are at least 5 choices for the implementation of this, with
				623	pros (+), cons(-), comments(*):
				624
				625	1. ocbp each line in the range through the original user's ASID
				626	+ no lines spuriously evicted
				627	- tlbmiss handling (must either handle faults on demand => extra
				628	special-case code in tlbmiss critical path), or map the page in
				629	advance (=> flush_tlb_range in advance to avoid multiple hits)
				630	- ASID switching
				631	- expensive for large ranges
				632
				633	2. temporarily map each page in the range to a special effective
				634	address and ocbp through the temporary mapping; relies on the
				635	fact that SH-5 OCB* always do TLB lookup and match on ptags (they
				636	never look at the etags)
				637	+ no spurious evictions
				638	- expensive for large ranges
				639	* surely cheaper than (1)
				640
				641	3. walk all the lines in the cache, check the tags, if a match
				642	occurs create a page mapping to ocbp the line through
				643	+ no spurious evictions
				644	- tag inspection overhead
				645	- (especially for small ranges)
				646	- potential cost of setting up/tearing down page mapping for
				647	every line that matches the range
				648	* cost partly independent of range size
				649
				650	4. walk all the lines in the cache, check the tags, if a match
				651	occurs use 4 * alloco to purge the line (+3 other probably
				652	innocent victims) by natural eviction
				653	+ no tlb mapping overheads
				654	- spurious evictions
				655	- tag inspection overhead
				656
				657	5. implement like flush_cache_all
				658	+ no tag inspection overhead
				659	- spurious evictions
				660	- bad for small ranges
				661
				662	(1) can be ruled out as more expensive than (2). (2) appears best
				663	for small ranges. The choice between (3), (4) and (5) for large
				664	ranges and the range size for the large/small boundary need
				665	benchmarking to determine.
				666
				667	For now use approach (2) for small ranges and (5) for large ones.
				668
				669	*/
				670
				671	int n_pages;
				672
				673	n_pages = ((end - start) >> PAGE_SHIFT);
Hugh Dickins	60ec558	2005-10-29 18:16:34 -0700	[diff] [blame]	674	if (n_pages >= 64 \|\| ((start ^ (end - 1)) & PMD_MASK)) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	675	#if 1
				676	sh64_dcache_purge_all();
				677	#else
				678	unsigned long long set, way;
				679	unsigned long mm_asid = mm->context & MMU_CONTEXT_ASID_MASK;
				680	for (set = 0; set < cpu_data->dcache.sets; set++) {
				681	unsigned long long set_base_config_addr = CACHE_OC_ADDRESS_ARRAY + (set << cpu_data->dcache.set_shift);
				682	for (way = 0; way < cpu_data->dcache.ways; way++) {
				683	unsigned long long config_addr = set_base_config_addr + (way << cpu_data->dcache.way_step_shift);
				684	unsigned long long tag0;
				685	unsigned long line_valid;
				686
				687	asm __volatile__("getcfg %1, 0, %0" : "=r" (tag0) : "r" (config_addr));
				688	line_valid = tag0 & SH_CACHE_VALID;
				689	if (line_valid) {
				690	unsigned long cache_asid;
				691	unsigned long epn;
				692
				693	cache_asid = (tag0 & cpu_data->dcache.asid_mask) >> cpu_data->dcache.asid_shift;
				694	/* The next line needs some
				695	explanation. The virtual tags
				696	encode bits [31:13] of the virtual
				697	address, bit [12] of the 'tag' being
				698	implied by the cache set index. */
				699	epn = (tag0 & cpu_data->dcache.epn_mask) \| ((set & 0x80) << cpu_data->dcache.entry_shift);
				700
				701	if ((cache_asid == mm_asid) && (start <= epn) && (epn < end)) {
				702	/* TODO : could optimise this
				703	call by batching multiple
				704	adjacent sets together. */
				705	sh64_dcache_purge_sets(set, 1);
				706	break; /* Don't waste time inspecting other ways for this set */
				707	}
				708	}
				709	}
				710	}
				711	#endif
				712	} else {
Hugh Dickins	60ec558	2005-10-29 18:16:34 -0700	[diff] [blame]	713	/* Small range, covered by a single page table page */
				714	start &= PAGE_MASK; /* should already be so */
				715	end = PAGE_ALIGN(end); /* should already be so */
				716	sh64_dcache_purge_user_pages(mm, start, end);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	717	}
				718	return;
				719	}
				720
				721	static void sh64_dcache_wback_current_user_range(unsigned long start, unsigned long end)
				722	{
				723	unsigned long long aligned_start;
				724	unsigned long long ull_end;
				725	unsigned long long addr;
				726
				727	ull_end = end;
				728
				729	/* Just wback over the range using the natural addresses. TLB miss
				730	handling will be OK (TBC) : the range has just been written to by
				731	the signal frame setup code, so the PTEs must exist.
				732
				733	Note, if we have CONFIG_PREEMPT and get preempted inside this loop,
				734	it doesn't matter, even if the pid->ASID mapping changes whilst
				735	we're away. In that case the cache will have been flushed when the
				736	mapping was renewed. So the writebacks below will be nugatory (and
				737	we'll doubtless have to fault the TLB entry/ies in again with the
				738	new ASID), but it's a rare case.
				739	*/
				740	aligned_start = start & L1_CACHE_ALIGN_MASK;
				741	addr = aligned_start;
				742	while (addr < ull_end) {
				743	asm __volatile__ ("ocbwb %0, 0" : : "r" (addr));
				744	addr += L1_CACHE_BYTES;
				745	}
				746	}
				747
				748	/****************************************************************************/
				749
				750	/* These MUST lie in an area of virtual address space that's otherwise unused. */
				751	#define UNIQUE_EADDR_START 0xe0000000UL
				752	#define UNIQUE_EADDR_END 0xe8000000UL
				753
				754	static unsigned long sh64_make_unique_eaddr(unsigned long user_eaddr, unsigned long paddr)
				755	{
				756	/* Given a physical address paddr, and a user virtual address
				757	user_eaddr which will eventually be mapped to it, create a one-off
				758	kernel-private eaddr mapped to the same paddr. This is used for
				759	creating special destination pages for copy_user_page and
				760	clear_user_page */
				761
				762	static unsigned long current_pointer = UNIQUE_EADDR_START;
				763	unsigned long coloured_pointer;
				764
				765	if (current_pointer == UNIQUE_EADDR_END) {
				766	sh64_dcache_purge_all();
				767	current_pointer = UNIQUE_EADDR_START;
				768	}
				769
				770	coloured_pointer = (current_pointer & ~CACHE_OC_SYN_MASK) \| (user_eaddr & CACHE_OC_SYN_MASK);
				771	sh64_setup_dtlb_cache_slot(coloured_pointer, get_asid(), paddr);
				772
				773	current_pointer += (PAGE_SIZE << CACHE_OC_N_SYNBITS);
				774
				775	return coloured_pointer;
				776	}
				777
				778	/****************************************************************************/
				779
				780	static void sh64_copy_user_page_coloured(void to, void from, unsigned long address)
				781	{
				782	void *coloured_to;
				783
				784	/* Discard any existing cache entries of the wrong colour. These are
				785	present quite often, if the kernel has recently used the page
				786	internally, then given it up, then it's been allocated to the user.
				787	*/
				788	sh64_dcache_purge_coloured_phy_page(__pa(to), (unsigned long) to);
				789
				790	coloured_to = (void *) sh64_make_unique_eaddr(address, __pa(to));
				791	sh64_page_copy(from, coloured_to);
				792
				793	sh64_teardown_dtlb_cache_slot();
				794	}
				795
				796	static void sh64_clear_user_page_coloured(void *to, unsigned long address)
				797	{
				798	void *coloured_to;
				799
				800	/* Discard any existing kernel-originated lines of the wrong colour (as
				801	above) */
				802	sh64_dcache_purge_coloured_phy_page(__pa(to), (unsigned long) to);
				803
				804	coloured_to = (void *) sh64_make_unique_eaddr(address, __pa(to));
				805	sh64_page_clear(coloured_to);
				806
				807	sh64_teardown_dtlb_cache_slot();
				808	}
				809
				810	#endif /* !CONFIG_DCACHE_DISABLED */
				811
				812	/****************************************************************************/
				813
				814	/*##########################################################################
				815	EXTERNALLY CALLABLE API.
				816	##########################################################################*/
				817
				818	/* These functions are described in Documentation/cachetlb.txt.
				819	Each one of these functions varies in behaviour depending on whether the
				820	I-cache and/or D-cache are configured out.
				821
				822	Note that the Linux term 'flush' corresponds to what is termed 'purge' in
				823	the sh/sh64 jargon for the D-cache, i.e. write back dirty data then
				824	invalidate the cache lines, and 'invalidate' for the I-cache.
				825	*/
				826
				827	#undef FLUSH_TRACE
				828
				829	void flush_cache_all(void)
				830	{
				831	/* Invalidate the entire contents of both caches, after writing back to
				832	memory any dirty data from the D-cache. */
				833	sh64_dcache_purge_all();
				834	sh64_icache_inv_all();
				835	}
				836
				837	/****************************************************************************/
				838
				839	void flush_cache_mm(struct mm_struct *mm)
				840	{
				841	/* Invalidate an entire user-address space from both caches, after
				842	writing back dirty data (e.g. for shared mmap etc). */
				843
				844	/* This could be coded selectively by inspecting all the tags then
				845	doing 4*alloco on any set containing a match (as for
				846	flush_cache_range), but fork/exit/execve (where this is called from)
				847	are expensive anyway. */
				848
				849	/* Have to do a purge here, despite the comments re I-cache below.
				850	There could be odd-coloured dirty data associated with the mm still
				851	in the cache - if this gets written out through natural eviction
				852	after the kernel has reused the page there will be chaos.
				853	*/
				854
				855	sh64_dcache_purge_all();
				856
				857	/* The mm being torn down won't ever be active again, so any Icache
				858	lines tagged with its ASID won't be visible for the rest of the
				859	lifetime of this ASID cycle. Before the ASID gets reused, there
				860	will be a flush_cache_all. Hence we don't need to touch the
				861	I-cache. This is similar to the lack of action needed in
				862	flush_tlb_mm - see fault.c. */
				863	}
				864
				865	/****************************************************************************/
				866
				867	void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
				868	unsigned long end)
				869	{
				870	struct mm_struct *mm = vma->vm_mm;
				871
				872	/* Invalidate (from both caches) the range [start,end) of virtual
				873	addresses from the user address space specified by mm, after writing
				874	back any dirty data.
				875
Hugh Dickins	60ec558	2005-10-29 18:16:34 -0700	[diff] [blame]	876	Note, 'end' is 1 byte beyond the end of the range to flush. */
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	877
				878	sh64_dcache_purge_user_range(mm, start, end);
				879	sh64_icache_inv_user_page_range(mm, start, end);
				880	}
				881
				882	/****************************************************************************/
				883
				884	void flush_cache_page(struct vm_area_struct *vma, unsigned long eaddr, unsigned long pfn)
				885	{
				886	/* Invalidate any entries in either cache for the vma within the user
				887	address space vma->vm_mm for the page starting at virtual address
				888	'eaddr'. This seems to be used primarily in breaking COW. Note,
				889	the I-cache must be searched too in case the page in question is
				890	both writable and being executed from (e.g. stack trampolines.)
				891
Hugh Dickins	60ec558	2005-10-29 18:16:34 -0700	[diff] [blame]	892	Note, this is called with pte lock held.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	893	*/
				894
				895	sh64_dcache_purge_phy_page(pfn << PAGE_SHIFT);
				896
				897	if (vma->vm_flags & VM_EXEC) {
				898	sh64_icache_inv_user_page(vma, eaddr);
				899	}
				900	}
				901
				902	/****************************************************************************/
				903
				904	#ifndef CONFIG_DCACHE_DISABLED
				905
				906	void copy_user_page(void to, void from, unsigned long address, struct page *page)
				907	{
				908	/* 'from' and 'to' are kernel virtual addresses (within the superpage
				909	mapping of the physical RAM). 'address' is the user virtual address
				910	where the copy 'to' will be mapped after. This allows a custom
				911	mapping to be used to ensure that the new copy is placed in the
				912	right cache sets for the user to see it without having to bounce it
				913	out via memory. Note however : the call to flush_page_to_ram in
				914	(generic)/mm/memory.c:(break_cow) undoes all this good work in that one
				915	very important case!
				916
				917	TBD : can we guarantee that on every call, any cache entries for
				918	'from' are in the same colour sets as 'address' also? i.e. is this
				919	always used just to deal with COW? (I suspect not). */
				920
				921	/* There are two possibilities here for when the page 'from' was last accessed:
				922	* by the kernel : this is OK, no purge required.
				923	* by the/a user (e.g. for break_COW) : need to purge.
				924
				925	If the potential user mapping at 'address' is the same colour as
				926	'from' there is no need to purge any cache lines from the 'from'
				927	page mapped into cache sets of colour 'address'. (The copy will be
				928	accessing the page through 'from').
				929	*/
				930
				931	if (((address ^ (unsigned long) from) & CACHE_OC_SYN_MASK) != 0) {
				932	sh64_dcache_purge_coloured_phy_page(__pa(from), address);
				933	}
				934
				935	if (((address ^ (unsigned long) to) & CACHE_OC_SYN_MASK) == 0) {
				936	/* No synonym problem on destination */
				937	sh64_page_copy(from, to);
				938	} else {
				939	sh64_copy_user_page_coloured(to, from, address);
				940	}
				941
				942	/* Note, don't need to flush 'from' page from the cache again - it's
				943	done anyway by the generic code */
				944	}
				945
				946	void clear_user_page(void to, unsigned long address, struct page page)
				947	{
				948	/* 'to' is a kernel virtual address (within the superpage
				949	mapping of the physical RAM). 'address' is the user virtual address
				950	where the 'to' page will be mapped after. This allows a custom
				951	mapping to be used to ensure that the new copy is placed in the
				952	right cache sets for the user to see it without having to bounce it
				953	out via memory.
				954	*/
				955
				956	if (((address ^ (unsigned long) to) & CACHE_OC_SYN_MASK) == 0) {
				957	/* No synonym problem on destination */
				958	sh64_page_clear(to);
				959	} else {
				960	sh64_clear_user_page_coloured(to, address);
				961	}
				962	}
				963
				964	#endif /* !CONFIG_DCACHE_DISABLED */
				965
				966	/****************************************************************************/
				967
				968	void flush_dcache_page(struct page *page)
				969	{
				970	sh64_dcache_purge_phy_page(page_to_phys(page));
				971	wmb();
				972	}
				973
				974	/****************************************************************************/
				975
				976	void flush_icache_range(unsigned long start, unsigned long end)
				977	{
				978	/* Flush the range [start,end] of kernel virtual adddress space from
				979	the I-cache. The corresponding range must be purged from the
				980	D-cache also because the SH-5 doesn't have cache snooping between
				981	the caches. The addresses will be visible through the superpage
				982	mapping, therefore it's guaranteed that there no cache entries for
				983	the range in cache sets of the wrong colour.
				984
				985	Primarily used for cohering the I-cache after a module has
				986	been loaded. */
				987
				988	/* We also make sure to purge the same range from the D-cache since
				989	flush_page_to_ram() won't be doing this for us! */
				990
				991	sh64_dcache_purge_kernel_range(start, end);
				992	wmb();
				993	sh64_icache_inv_kernel_range(start, end);
				994	}
				995
				996	/****************************************************************************/
				997
				998	void flush_icache_user_range(struct vm_area_struct *vma,
				999	struct page *page, unsigned long addr, int len)
				1000	{
				1001	/* Flush the range of user (defined by vma->vm_mm) address space
				1002	starting at 'addr' for 'len' bytes from the cache. The range does
				1003	not straddle a page boundary, the unique physical page containing
				1004	the range is 'page'. This seems to be used mainly for invalidating
				1005	an address range following a poke into the program text through the
				1006	ptrace() call from another process (e.g. for BRK instruction
				1007	insertion). */
				1008
				1009	sh64_dcache_purge_coloured_phy_page(page_to_phys(page), addr);
				1010	mb();
				1011
				1012	if (vma->vm_flags & VM_EXEC) {
				1013	sh64_icache_inv_user_small_range(vma->vm_mm, addr, len);
				1014	}
				1015	}
				1016
				1017	/*##########################################################################
				1018	ARCH/SH64 PRIVATE CALLABLE API.
				1019	##########################################################################*/
				1020
				1021	void flush_cache_sigtramp(unsigned long start, unsigned long end)
				1022	{
				1023	/* For the address range [start,end), write back the data from the
				1024	D-cache and invalidate the corresponding region of the I-cache for
				1025	the current process. Used to flush signal trampolines on the stack
				1026	to make them executable. */
				1027
				1028	sh64_dcache_wback_current_user_range(start, end);
				1029	wmb();
				1030	sh64_icache_inv_current_user_range(start, end);
				1031	}
				1032