PageRenderTime 78ms CodeModel.GetById 7ms app.highlight 63ms RepoModel.GetById 1ms app.codeStats 0ms

/arch/sh64/mm/cache.c

https://bitbucket.org/evzijst/gittest
C | 1041 lines | 473 code | 171 blank | 397 comment | 68 complexity | 7539b69553018f3dd68b6eec59f2bfc7 MD5 | raw file
   1/*
   2 * This file is subject to the terms and conditions of the GNU General Public
   3 * License.  See the file "COPYING" in the main directory of this archive
   4 * for more details.
   5 *
   6 * arch/sh64/mm/cache.c
   7 *
   8 * Original version Copyright (C) 2000, 2001  Paolo Alberelli
   9 * Second version Copyright (C) benedict.gaster@superh.com 2002
  10 * Third version Copyright Richard.Curnow@superh.com 2003
  11 * Hacks to third version Copyright (C) 2003 Paul Mundt
  12 */
  13
  14/****************************************************************************/
  15
  16#include <linux/config.h>
  17#include <linux/init.h>
  18#include <linux/mman.h>
  19#include <linux/mm.h>
  20#include <linux/threads.h>
  21#include <asm/page.h>
  22#include <asm/pgtable.h>
  23#include <asm/processor.h>
  24#include <asm/cache.h>
  25#include <asm/tlb.h>
  26#include <asm/io.h>
  27#include <asm/uaccess.h>
  28#include <asm/mmu_context.h>
  29#include <asm/pgalloc.h> /* for flush_itlb_range */
  30
  31#include <linux/proc_fs.h>
  32
  33/* This function is in entry.S */
  34extern unsigned long switch_and_save_asid(unsigned long new_asid);
  35
  36/* Wired TLB entry for the D-cache */
  37static unsigned long long dtlb_cache_slot;
  38
  39/**
  40 * sh64_cache_init()
  41 *
  42 * This is pretty much just a straightforward clone of the SH
  43 * detect_cpu_and_cache_system().
  44 *
  45 * This function is responsible for setting up all of the cache
  46 * info dynamically as well as taking care of CPU probing and
  47 * setting up the relevant subtype data.
  48 *
  49 * FIXME: For the time being, we only really support the SH5-101
  50 * out of the box, and don't support dynamic probing for things
  51 * like the SH5-103 or even cut2 of the SH5-101. Implement this
  52 * later!
  53 */
  54int __init sh64_cache_init(void)
  55{
  56	/*
  57	 * First, setup some sane values for the I-cache.
  58	 */
  59	cpu_data->icache.ways		= 4;
  60	cpu_data->icache.sets		= 256;
  61	cpu_data->icache.linesz		= L1_CACHE_BYTES;
  62
  63	/*
  64	 * FIXME: This can probably be cleaned up a bit as well.. for example,
  65	 * do we really need the way shift _and_ the way_step_shift ?? Judging
  66	 * by the existing code, I would guess no.. is there any valid reason
  67	 * why we need to be tracking this around?
  68	 */
  69	cpu_data->icache.way_shift	= 13;
  70	cpu_data->icache.entry_shift	= 5;
  71	cpu_data->icache.set_shift	= 4;
  72	cpu_data->icache.way_step_shift	= 16;
  73	cpu_data->icache.asid_shift	= 2;
  74
  75	/*
  76	 * way offset = cache size / associativity, so just don't factor in
  77	 * associativity in the first place..
  78	 */
  79	cpu_data->icache.way_ofs	= cpu_data->icache.sets *
  80					  cpu_data->icache.linesz;
  81
  82	cpu_data->icache.asid_mask	= 0x3fc;
  83	cpu_data->icache.idx_mask	= 0x1fe0;
  84	cpu_data->icache.epn_mask	= 0xffffe000;
  85	cpu_data->icache.flags		= 0;
  86
  87	/*
  88	 * Next, setup some sane values for the D-cache.
  89	 *
  90	 * On the SH5, these are pretty consistent with the I-cache settings,
  91	 * so we just copy over the existing definitions.. these can be fixed
  92	 * up later, especially if we add runtime CPU probing.
  93	 *
  94	 * Though in the meantime it saves us from having to duplicate all of
  95	 * the above definitions..
  96	 */
  97	cpu_data->dcache		= cpu_data->icache;
  98
  99	/*
 100	 * Setup any cache-related flags here
 101	 */
 102#if defined(CONFIG_DCACHE_WRITE_THROUGH)
 103	set_bit(SH_CACHE_MODE_WT, &(cpu_data->dcache.flags));
 104#elif defined(CONFIG_DCACHE_WRITE_BACK)
 105	set_bit(SH_CACHE_MODE_WB, &(cpu_data->dcache.flags));
 106#endif
 107
 108	/*
 109	 * We also need to reserve a slot for the D-cache in the DTLB, so we
 110	 * do this now ..
 111	 */
 112	dtlb_cache_slot			= sh64_get_wired_dtlb_entry();
 113
 114	return 0;
 115}
 116
 117#ifdef CONFIG_DCACHE_DISABLED
 118#define sh64_dcache_purge_all()					do { } while (0)
 119#define sh64_dcache_purge_coloured_phy_page(paddr, eaddr)	do { } while (0)
 120#define sh64_dcache_purge_user_range(mm, start, end)		do { } while (0)
 121#define sh64_dcache_purge_phy_page(paddr)			do { } while (0)
 122#define sh64_dcache_purge_virt_page(mm, eaddr)			do { } while (0)
 123#define sh64_dcache_purge_kernel_range(start, end)		do { } while (0)
 124#define sh64_dcache_wback_current_user_range(start, end)	do { } while (0)
 125#endif
 126
 127/*##########################################################################*/
 128
 129/* From here onwards, a rewrite of the implementation,
 130   by Richard.Curnow@superh.com.
 131
 132   The major changes in this compared to the old version are;
 133   1. use more selective purging through OCBP instead of using ALLOCO to purge
 134      by natural replacement.  This avoids purging out unrelated cache lines
 135      that happen to be in the same set.
 136   2. exploit the APIs copy_user_page and clear_user_page better
 137   3. be more selective about I-cache purging, in particular use invalidate_all
 138      more sparingly.
 139
 140   */
 141
 142/*##########################################################################
 143			       SUPPORT FUNCTIONS
 144  ##########################################################################*/
 145
 146/****************************************************************************/
 147/* The following group of functions deal with mapping and unmapping a temporary
 148   page into the DTLB slot that have been set aside for our exclusive use. */
 149/* In order to accomplish this, we use the generic interface for adding and
 150   removing a wired slot entry as defined in arch/sh64/mm/tlb.c */
 151/****************************************************************************/
 152
 153static unsigned long slot_own_flags;
 154
 155static inline void sh64_setup_dtlb_cache_slot(unsigned long eaddr, unsigned long asid, unsigned long paddr)
 156{
 157	local_irq_save(slot_own_flags);
 158	sh64_setup_tlb_slot(dtlb_cache_slot, eaddr, asid, paddr);
 159}
 160
 161static inline void sh64_teardown_dtlb_cache_slot(void)
 162{
 163	sh64_teardown_tlb_slot(dtlb_cache_slot);
 164	local_irq_restore(slot_own_flags);
 165}
 166
 167/****************************************************************************/
 168
 169#ifndef CONFIG_ICACHE_DISABLED
 170
 171static void __inline__ sh64_icache_inv_all(void)
 172{
 173	unsigned long long addr, flag, data;
 174	unsigned int flags;
 175
 176	addr=ICCR0;
 177	flag=ICCR0_ICI;
 178	data=0;
 179
 180	/* Make this a critical section for safety (probably not strictly necessary.) */
 181	local_irq_save(flags);
 182
 183	/* Without %1 it gets unexplicably wrong */
 184	asm volatile("getcfg	%3, 0, %0\n\t"
 185			"or	%0, %2, %0\n\t"
 186			"putcfg	%3, 0, %0\n\t"
 187			"synci"
 188			: "=&r" (data)
 189			: "0" (data), "r" (flag), "r" (addr));
 190
 191	local_irq_restore(flags);
 192}
 193
 194static void sh64_icache_inv_kernel_range(unsigned long start, unsigned long end)
 195{
 196	/* Invalidate range of addresses [start,end] from the I-cache, where
 197	 * the addresses lie in the kernel superpage. */
 198
 199	unsigned long long ullend, addr, aligned_start;
 200#if (NEFF == 32)
 201	aligned_start = (unsigned long long)(signed long long)(signed long) start;
 202#else
 203#error "NEFF != 32"
 204#endif
 205	aligned_start &= L1_CACHE_ALIGN_MASK;
 206	addr = aligned_start;
 207#if (NEFF == 32)
 208	ullend = (unsigned long long) (signed long long) (signed long) end;
 209#else
 210#error "NEFF != 32"
 211#endif
 212	while (addr <= ullend) {
 213		asm __volatile__ ("icbi %0, 0" : : "r" (addr));
 214		addr += L1_CACHE_BYTES;
 215	}
 216}
 217
 218static void sh64_icache_inv_user_page(struct vm_area_struct *vma, unsigned long eaddr)
 219{
 220	/* If we get called, we know that vma->vm_flags contains VM_EXEC.
 221	   Also, eaddr is page-aligned. */
 222
 223	unsigned long long addr, end_addr;
 224	unsigned long flags = 0;
 225	unsigned long running_asid, vma_asid;
 226	addr = eaddr;
 227	end_addr = addr + PAGE_SIZE;
 228
 229	/* Check whether we can use the current ASID for the I-cache
 230	   invalidation.  For example, if we're called via
 231	   access_process_vm->flush_cache_page->here, (e.g. when reading from
 232	   /proc), 'running_asid' will be that of the reader, not of the
 233	   victim.
 234
 235	   Also, note the risk that we might get pre-empted between the ASID
 236	   compare and blocking IRQs, and before we regain control, the
 237	   pid->ASID mapping changes.  However, the whole cache will get
 238	   invalidated when the mapping is renewed, so the worst that can
 239	   happen is that the loop below ends up invalidating somebody else's
 240	   cache entries.
 241	*/
 242
 243	running_asid = get_asid();
 244	vma_asid = (vma->vm_mm->context & MMU_CONTEXT_ASID_MASK);
 245	if (running_asid != vma_asid) {
 246		local_irq_save(flags);
 247		switch_and_save_asid(vma_asid);
 248	}
 249	while (addr < end_addr) {
 250		/* Worth unrolling a little */
 251		asm __volatile__("icbi %0,  0" : : "r" (addr));
 252		asm __volatile__("icbi %0, 32" : : "r" (addr));
 253		asm __volatile__("icbi %0, 64" : : "r" (addr));
 254		asm __volatile__("icbi %0, 96" : : "r" (addr));
 255		addr += 128;
 256	}
 257	if (running_asid != vma_asid) {
 258		switch_and_save_asid(running_asid);
 259		local_irq_restore(flags);
 260	}
 261}
 262
 263/****************************************************************************/
 264
 265static void sh64_icache_inv_user_page_range(struct mm_struct *mm,
 266			  unsigned long start, unsigned long end)
 267{
 268	/* Used for invalidating big chunks of I-cache, i.e. assume the range
 269	   is whole pages.  If 'start' or 'end' is not page aligned, the code
 270	   is conservative and invalidates to the ends of the enclosing pages.
 271	   This is functionally OK, just a performance loss. */
 272
 273	/* See the comments below in sh64_dcache_purge_user_range() regarding
 274	   the choice of algorithm.  However, for the I-cache option (2) isn't
 275	   available because there are no physical tags so aliases can't be
 276	   resolved.  The icbi instruction has to be used through the user
 277	   mapping.   Because icbi is cheaper than ocbp on a cache hit, it
 278	   would be cheaper to use the selective code for a large range than is
 279	   possible with the D-cache.  Just assume 64 for now as a working
 280	   figure.
 281	   */
 282
 283	int n_pages;
 284
 285	if (!mm) return;
 286
 287	n_pages = ((end - start) >> PAGE_SHIFT);
 288	if (n_pages >= 64) {
 289		sh64_icache_inv_all();
 290	} else {
 291		unsigned long aligned_start;
 292		unsigned long eaddr;
 293		unsigned long after_last_page_start;
 294		unsigned long mm_asid, current_asid;
 295		unsigned long long flags = 0ULL;
 296
 297		mm_asid = mm->context & MMU_CONTEXT_ASID_MASK;
 298		current_asid = get_asid();
 299
 300		if (mm_asid != current_asid) {
 301			/* Switch ASID and run the invalidate loop under cli */
 302			local_irq_save(flags);
 303			switch_and_save_asid(mm_asid);
 304		}
 305
 306		aligned_start = start & PAGE_MASK;
 307		after_last_page_start = PAGE_SIZE + ((end - 1) & PAGE_MASK);
 308
 309		while (aligned_start < after_last_page_start) {
 310			struct vm_area_struct *vma;
 311			unsigned long vma_end;
 312			vma = find_vma(mm, aligned_start);
 313			if (!vma || (aligned_start <= vma->vm_end)) {
 314				/* Avoid getting stuck in an error condition */
 315				aligned_start += PAGE_SIZE;
 316				continue;
 317			}
 318			vma_end = vma->vm_end;
 319			if (vma->vm_flags & VM_EXEC) {
 320				/* Executable */
 321				eaddr = aligned_start;
 322				while (eaddr < vma_end) {
 323					sh64_icache_inv_user_page(vma, eaddr);
 324					eaddr += PAGE_SIZE;
 325				}
 326			}
 327			aligned_start = vma->vm_end; /* Skip to start of next region */
 328		}
 329		if (mm_asid != current_asid) {
 330			switch_and_save_asid(current_asid);
 331			local_irq_restore(flags);
 332		}
 333	}
 334}
 335
 336static void sh64_icache_inv_user_small_range(struct mm_struct *mm,
 337						unsigned long start, int len)
 338{
 339
 340	/* Invalidate a small range of user context I-cache, not necessarily
 341	   page (or even cache-line) aligned. */
 342
 343	unsigned long long eaddr = start;
 344	unsigned long long eaddr_end = start + len;
 345	unsigned long current_asid, mm_asid;
 346	unsigned long long flags;
 347	unsigned long long epage_start;
 348
 349	/* Since this is used inside ptrace, the ASID in the mm context
 350	   typically won't match current_asid.  We'll have to switch ASID to do
 351	   this.  For safety, and given that the range will be small, do all
 352	   this under cli.
 353
 354	   Note, there is a hazard that the ASID in mm->context is no longer
 355	   actually associated with mm, i.e. if the mm->context has started a
 356	   new cycle since mm was last active.  However, this is just a
 357	   performance issue: all that happens is that we invalidate lines
 358	   belonging to another mm, so the owning process has to refill them
 359	   when that mm goes live again.  mm itself can't have any cache
 360	   entries because there will have been a flush_cache_all when the new
 361	   mm->context cycle started. */
 362
 363	/* Align to start of cache line.  Otherwise, suppose len==8 and start
 364	   was at 32N+28 : the last 4 bytes wouldn't get invalidated. */
 365	eaddr = start & L1_CACHE_ALIGN_MASK;
 366	eaddr_end = start + len;
 367
 368	local_irq_save(flags);
 369	mm_asid = mm->context & MMU_CONTEXT_ASID_MASK;
 370	current_asid = switch_and_save_asid(mm_asid);
 371
 372	epage_start = eaddr & PAGE_MASK;
 373
 374	while (eaddr < eaddr_end)
 375	{
 376		asm __volatile__("icbi %0, 0" : : "r" (eaddr));
 377		eaddr += L1_CACHE_BYTES;
 378	}
 379	switch_and_save_asid(current_asid);
 380	local_irq_restore(flags);
 381}
 382
 383static void sh64_icache_inv_current_user_range(unsigned long start, unsigned long end)
 384{
 385	/* The icbi instruction never raises ITLBMISS.  i.e. if there's not a
 386	   cache hit on the virtual tag the instruction ends there, without a
 387	   TLB lookup. */
 388
 389	unsigned long long aligned_start;
 390	unsigned long long ull_end;
 391	unsigned long long addr;
 392
 393	ull_end = end;
 394
 395	/* Just invalidate over the range using the natural addresses.  TLB
 396	   miss handling will be OK (TBC).  Since it's for the current process,
 397	   either we're already in the right ASID context, or the ASIDs have
 398	   been recycled since we were last active in which case we might just
 399	   invalidate another processes I-cache entries : no worries, just a
 400	   performance drop for him. */
 401	aligned_start = start & L1_CACHE_ALIGN_MASK;
 402	addr = aligned_start;
 403	while (addr < ull_end) {
 404		asm __volatile__ ("icbi %0, 0" : : "r" (addr));
 405		asm __volatile__ ("nop");
 406		asm __volatile__ ("nop");
 407		addr += L1_CACHE_BYTES;
 408	}
 409}
 410
 411#endif /* !CONFIG_ICACHE_DISABLED */
 412
 413/****************************************************************************/
 414
 415#ifndef CONFIG_DCACHE_DISABLED
 416
 417/* Buffer used as the target of alloco instructions to purge data from cache
 418   sets by natural eviction. -- RPC */
 419#define DUMMY_ALLOCO_AREA_SIZE L1_CACHE_SIZE_BYTES + (1024 * 4)
 420static unsigned char dummy_alloco_area[DUMMY_ALLOCO_AREA_SIZE] __cacheline_aligned = { 0, };
 421
 422/****************************************************************************/
 423
 424static void __inline__ sh64_dcache_purge_sets(int sets_to_purge_base, int n_sets)
 425{
 426	/* Purge all ways in a particular block of sets, specified by the base
 427	   set number and number of sets.  Can handle wrap-around, if that's
 428	   needed.  */
 429
 430	int dummy_buffer_base_set;
 431	unsigned long long eaddr, eaddr0, eaddr1;
 432	int j;
 433	int set_offset;
 434
 435	dummy_buffer_base_set = ((int)&dummy_alloco_area & cpu_data->dcache.idx_mask) >> cpu_data->dcache.entry_shift;
 436	set_offset = sets_to_purge_base - dummy_buffer_base_set;
 437
 438	for (j=0; j<n_sets; j++, set_offset++) {
 439		set_offset &= (cpu_data->dcache.sets - 1);
 440		eaddr0 = (unsigned long long)dummy_alloco_area + (set_offset << cpu_data->dcache.entry_shift);
 441
 442		/* Do one alloco which hits the required set per cache way.  For
 443		   write-back mode, this will purge the #ways resident lines.   There's
 444		   little point unrolling this loop because the allocos stall more if
 445		   they're too close together. */
 446		eaddr1 = eaddr0 + cpu_data->dcache.way_ofs * cpu_data->dcache.ways;
 447		for (eaddr=eaddr0; eaddr<eaddr1; eaddr+=cpu_data->dcache.way_ofs) {
 448			asm __volatile__ ("alloco %0, 0" : : "r" (eaddr));
 449			asm __volatile__ ("synco"); /* TAKum03020 */
 450		}
 451
 452		eaddr1 = eaddr0 + cpu_data->dcache.way_ofs * cpu_data->dcache.ways;
 453		for (eaddr=eaddr0; eaddr<eaddr1; eaddr+=cpu_data->dcache.way_ofs) {
 454			/* Load from each address.  Required because alloco is a NOP if
 455			   the cache is write-through.  Write-through is a config option. */
 456			if (test_bit(SH_CACHE_MODE_WT, &(cpu_data->dcache.flags)))
 457				*(volatile unsigned char *)(int)eaddr;
 458		}
 459	}
 460
 461	/* Don't use OCBI to invalidate the lines.  That costs cycles directly.
 462	   If the dummy block is just left resident, it will naturally get
 463	   evicted as required.  */
 464
 465	return;
 466}
 467
 468/****************************************************************************/
 469
 470static void sh64_dcache_purge_all(void)
 471{
 472	/* Purge the entire contents of the dcache.  The most efficient way to
 473	   achieve this is to use alloco instructions on a region of unused
 474	   memory equal in size to the cache, thereby causing the current
 475	   contents to be discarded by natural eviction.  The alternative,
 476	   namely reading every tag, setting up a mapping for the corresponding
 477	   page and doing an OCBP for the line, would be much more expensive.
 478	   */
 479
 480	sh64_dcache_purge_sets(0, cpu_data->dcache.sets);
 481
 482	return;
 483
 484}
 485
 486/****************************************************************************/
 487
 488static void sh64_dcache_purge_kernel_range(unsigned long start, unsigned long end)
 489{
 490	/* Purge the range of addresses [start,end] from the D-cache.  The
 491	   addresses lie in the superpage mapping.  There's no harm if we
 492	   overpurge at either end - just a small performance loss. */
 493	unsigned long long ullend, addr, aligned_start;
 494#if (NEFF == 32)
 495	aligned_start = (unsigned long long)(signed long long)(signed long) start;
 496#else
 497#error "NEFF != 32"
 498#endif
 499	aligned_start &= L1_CACHE_ALIGN_MASK;
 500	addr = aligned_start;
 501#if (NEFF == 32)
 502	ullend = (unsigned long long) (signed long long) (signed long) end;
 503#else
 504#error "NEFF != 32"
 505#endif
 506	while (addr <= ullend) {
 507		asm __volatile__ ("ocbp %0, 0" : : "r" (addr));
 508		addr += L1_CACHE_BYTES;
 509	}
 510	return;
 511}
 512
 513/* Assumes this address (+ (2**n_synbits) pages up from it) aren't used for
 514   anything else in the kernel */
 515#define MAGIC_PAGE0_START 0xffffffffec000000ULL
 516
 517static void sh64_dcache_purge_coloured_phy_page(unsigned long paddr, unsigned long eaddr)
 518{
 519	/* Purge the physical page 'paddr' from the cache.  It's known that any
 520	   cache lines requiring attention have the same page colour as the the
 521	   address 'eaddr'.
 522
 523	   This relies on the fact that the D-cache matches on physical tags
 524	   when no virtual tag matches.  So we create an alias for the original
 525	   page and purge through that.  (Alternatively, we could have done
 526	   this by switching ASID to match the original mapping and purged
 527	   through that, but that involves ASID switching cost + probably a
 528	   TLBMISS + refill anyway.)
 529	   */
 530
 531	unsigned long long magic_page_start;
 532	unsigned long long magic_eaddr, magic_eaddr_end;
 533
 534	magic_page_start = MAGIC_PAGE0_START + (eaddr & CACHE_OC_SYN_MASK);
 535
 536	/* As long as the kernel is not pre-emptible, this doesn't need to be
 537	   under cli/sti. */
 538
 539	sh64_setup_dtlb_cache_slot(magic_page_start, get_asid(), paddr);
 540
 541	magic_eaddr = magic_page_start;
 542	magic_eaddr_end = magic_eaddr + PAGE_SIZE;
 543	while (magic_eaddr < magic_eaddr_end) {
 544		/* Little point in unrolling this loop - the OCBPs are blocking
 545		   and won't go any quicker (i.e. the loop overhead is parallel
 546		   to part of the OCBP execution.) */
 547		asm __volatile__ ("ocbp %0, 0" : : "r" (magic_eaddr));
 548		magic_eaddr += L1_CACHE_BYTES;
 549	}
 550
 551	sh64_teardown_dtlb_cache_slot();
 552}
 553
 554/****************************************************************************/
 555
 556static void sh64_dcache_purge_phy_page(unsigned long paddr)
 557{
 558	/* Pure a page given its physical start address, by creating a
 559	   temporary 1 page mapping and purging across that.  Even if we know
 560	   the virtual address (& vma or mm) of the page, the method here is
 561	   more elegant because it avoids issues of coping with page faults on
 562	   the purge instructions (i.e. no special-case code required in the
 563	   critical path in the TLB miss handling). */
 564
 565	unsigned long long eaddr_start, eaddr, eaddr_end;
 566	int i;
 567
 568	/* As long as the kernel is not pre-emptible, this doesn't need to be
 569	   under cli/sti. */
 570
 571	eaddr_start = MAGIC_PAGE0_START;
 572	for (i=0; i < (1 << CACHE_OC_N_SYNBITS); i++) {
 573		sh64_setup_dtlb_cache_slot(eaddr_start, get_asid(), paddr);
 574
 575		eaddr = eaddr_start;
 576		eaddr_end = eaddr + PAGE_SIZE;
 577		while (eaddr < eaddr_end) {
 578			asm __volatile__ ("ocbp %0, 0" : : "r" (eaddr));
 579			eaddr += L1_CACHE_BYTES;
 580		}
 581
 582		sh64_teardown_dtlb_cache_slot();
 583		eaddr_start += PAGE_SIZE;
 584	}
 585}
 586
 587static void sh64_dcache_purge_user_page(struct mm_struct *mm, unsigned long eaddr)
 588{
 589	pgd_t *pgd;
 590	pmd_t *pmd;
 591	pte_t *pte;
 592	pte_t entry;
 593	unsigned long paddr;
 594
 595	/* NOTE : all the callers of this have mm->page_table_lock held, so the
 596	   following page table traversal is safe even on SMP/pre-emptible. */
 597
 598	if (!mm) return; /* No way to find physical address of page */
 599	pgd = pgd_offset(mm, eaddr);
 600	if (pgd_bad(*pgd)) return;
 601
 602	pmd = pmd_offset(pgd, eaddr);
 603	if (pmd_none(*pmd) || pmd_bad(*pmd)) return;
 604
 605	pte = pte_offset_kernel(pmd, eaddr);
 606	entry = *pte;
 607	if (pte_none(entry) || !pte_present(entry)) return;
 608
 609	paddr = pte_val(entry) & PAGE_MASK;
 610
 611	sh64_dcache_purge_coloured_phy_page(paddr, eaddr);
 612
 613}
 614/****************************************************************************/
 615
 616static void sh64_dcache_purge_user_range(struct mm_struct *mm,
 617			  unsigned long start, unsigned long end)
 618{
 619	/* There are at least 5 choices for the implementation of this, with
 620	   pros (+), cons(-), comments(*):
 621
 622	   1. ocbp each line in the range through the original user's ASID
 623	      + no lines spuriously evicted
 624	      - tlbmiss handling (must either handle faults on demand => extra
 625		special-case code in tlbmiss critical path), or map the page in
 626		advance (=> flush_tlb_range in advance to avoid multiple hits)
 627	      - ASID switching
 628	      - expensive for large ranges
 629
 630	   2. temporarily map each page in the range to a special effective
 631	      address and ocbp through the temporary mapping; relies on the
 632	      fact that SH-5 OCB* always do TLB lookup and match on ptags (they
 633	      never look at the etags)
 634	      + no spurious evictions
 635	      - expensive for large ranges
 636	      * surely cheaper than (1)
 637
 638	   3. walk all the lines in the cache, check the tags, if a match
 639	      occurs create a page mapping to ocbp the line through
 640	      + no spurious evictions
 641	      - tag inspection overhead
 642	      - (especially for small ranges)
 643	      - potential cost of setting up/tearing down page mapping for
 644		every line that matches the range
 645	      * cost partly independent of range size
 646
 647	   4. walk all the lines in the cache, check the tags, if a match
 648	      occurs use 4 * alloco to purge the line (+3 other probably
 649	      innocent victims) by natural eviction
 650	      + no tlb mapping overheads
 651	      - spurious evictions
 652	      - tag inspection overhead
 653
 654	   5. implement like flush_cache_all
 655	      + no tag inspection overhead
 656	      - spurious evictions
 657	      - bad for small ranges
 658
 659	   (1) can be ruled out as more expensive than (2).  (2) appears best
 660	   for small ranges.  The choice between (3), (4) and (5) for large
 661	   ranges and the range size for the large/small boundary need
 662	   benchmarking to determine.
 663
 664	   For now use approach (2) for small ranges and (5) for large ones.
 665
 666	   */
 667
 668	int n_pages;
 669
 670	n_pages = ((end - start) >> PAGE_SHIFT);
 671	if (n_pages >= 64) {
 672#if 1
 673		sh64_dcache_purge_all();
 674#else
 675		unsigned long long set, way;
 676		unsigned long mm_asid = mm->context & MMU_CONTEXT_ASID_MASK;
 677		for (set = 0; set < cpu_data->dcache.sets; set++) {
 678			unsigned long long set_base_config_addr = CACHE_OC_ADDRESS_ARRAY + (set << cpu_data->dcache.set_shift);
 679			for (way = 0; way < cpu_data->dcache.ways; way++) {
 680				unsigned long long config_addr = set_base_config_addr + (way << cpu_data->dcache.way_step_shift);
 681				unsigned long long tag0;
 682				unsigned long line_valid;
 683
 684				asm __volatile__("getcfg %1, 0, %0" : "=r" (tag0) : "r" (config_addr));
 685				line_valid = tag0 & SH_CACHE_VALID;
 686				if (line_valid) {
 687					unsigned long cache_asid;
 688					unsigned long epn;
 689
 690					cache_asid = (tag0 & cpu_data->dcache.asid_mask) >> cpu_data->dcache.asid_shift;
 691					/* The next line needs some
 692					   explanation.  The virtual tags
 693					   encode bits [31:13] of the virtual
 694					   address, bit [12] of the 'tag' being
 695					   implied by the cache set index. */
 696					epn = (tag0 & cpu_data->dcache.epn_mask) | ((set & 0x80) << cpu_data->dcache.entry_shift);
 697
 698					if ((cache_asid == mm_asid) && (start <= epn) && (epn < end)) {
 699						/* TODO : could optimise this
 700						   call by batching multiple
 701						   adjacent sets together. */
 702						sh64_dcache_purge_sets(set, 1);
 703						break; /* Don't waste time inspecting other ways for this set */
 704					}
 705				}
 706			}
 707		}
 708#endif
 709	} else {
 710		/* 'Small' range */
 711		unsigned long aligned_start;
 712		unsigned long eaddr;
 713		unsigned long last_page_start;
 714
 715		aligned_start = start & PAGE_MASK;
 716		/* 'end' is 1 byte beyond the end of the range */
 717		last_page_start = (end - 1) & PAGE_MASK;
 718
 719		eaddr = aligned_start;
 720		while (eaddr <= last_page_start) {
 721			sh64_dcache_purge_user_page(mm, eaddr);
 722			eaddr += PAGE_SIZE;
 723		}
 724	}
 725	return;
 726}
 727
 728static void sh64_dcache_wback_current_user_range(unsigned long start, unsigned long end)
 729{
 730	unsigned long long aligned_start;
 731	unsigned long long ull_end;
 732	unsigned long long addr;
 733
 734	ull_end = end;
 735
 736	/* Just wback over the range using the natural addresses.  TLB miss
 737	   handling will be OK (TBC) : the range has just been written to by
 738	   the signal frame setup code, so the PTEs must exist.
 739
 740	   Note, if we have CONFIG_PREEMPT and get preempted inside this loop,
 741	   it doesn't matter, even if the pid->ASID mapping changes whilst
 742	   we're away.  In that case the cache will have been flushed when the
 743	   mapping was renewed.  So the writebacks below will be nugatory (and
 744	   we'll doubtless have to fault the TLB entry/ies in again with the
 745	   new ASID), but it's a rare case.
 746	   */
 747	aligned_start = start & L1_CACHE_ALIGN_MASK;
 748	addr = aligned_start;
 749	while (addr < ull_end) {
 750		asm __volatile__ ("ocbwb %0, 0" : : "r" (addr));
 751		addr += L1_CACHE_BYTES;
 752	}
 753}
 754
 755/****************************************************************************/
 756
 757/* These *MUST* lie in an area of virtual address space that's otherwise unused. */
 758#define UNIQUE_EADDR_START 0xe0000000UL
 759#define UNIQUE_EADDR_END   0xe8000000UL
 760
 761static unsigned long sh64_make_unique_eaddr(unsigned long user_eaddr, unsigned long paddr)
 762{
 763	/* Given a physical address paddr, and a user virtual address
 764	   user_eaddr which will eventually be mapped to it, create a one-off
 765	   kernel-private eaddr mapped to the same paddr.  This is used for
 766	   creating special destination pages for copy_user_page and
 767	   clear_user_page */
 768
 769	static unsigned long current_pointer = UNIQUE_EADDR_START;
 770	unsigned long coloured_pointer;
 771
 772	if (current_pointer == UNIQUE_EADDR_END) {
 773		sh64_dcache_purge_all();
 774		current_pointer = UNIQUE_EADDR_START;
 775	}
 776
 777	coloured_pointer = (current_pointer & ~CACHE_OC_SYN_MASK) | (user_eaddr & CACHE_OC_SYN_MASK);
 778	sh64_setup_dtlb_cache_slot(coloured_pointer, get_asid(), paddr);
 779
 780	current_pointer += (PAGE_SIZE << CACHE_OC_N_SYNBITS);
 781
 782	return coloured_pointer;
 783}
 784
 785/****************************************************************************/
 786
 787static void sh64_copy_user_page_coloured(void *to, void *from, unsigned long address)
 788{
 789	void *coloured_to;
 790
 791	/* Discard any existing cache entries of the wrong colour.  These are
 792	   present quite often, if the kernel has recently used the page
 793	   internally, then given it up, then it's been allocated to the user.
 794	   */
 795	sh64_dcache_purge_coloured_phy_page(__pa(to), (unsigned long) to);
 796
 797	coloured_to = (void *) sh64_make_unique_eaddr(address, __pa(to));
 798	sh64_page_copy(from, coloured_to);
 799
 800	sh64_teardown_dtlb_cache_slot();
 801}
 802
 803static void sh64_clear_user_page_coloured(void *to, unsigned long address)
 804{
 805	void *coloured_to;
 806
 807	/* Discard any existing kernel-originated lines of the wrong colour (as
 808	   above) */
 809	sh64_dcache_purge_coloured_phy_page(__pa(to), (unsigned long) to);
 810
 811	coloured_to = (void *) sh64_make_unique_eaddr(address, __pa(to));
 812	sh64_page_clear(coloured_to);
 813
 814	sh64_teardown_dtlb_cache_slot();
 815}
 816
 817#endif /* !CONFIG_DCACHE_DISABLED */
 818
 819/****************************************************************************/
 820
 821/*##########################################################################
 822			    EXTERNALLY CALLABLE API.
 823  ##########################################################################*/
 824
 825/* These functions are described in Documentation/cachetlb.txt.
 826   Each one of these functions varies in behaviour depending on whether the
 827   I-cache and/or D-cache are configured out.
 828
 829   Note that the Linux term 'flush' corresponds to what is termed 'purge' in
 830   the sh/sh64 jargon for the D-cache, i.e. write back dirty data then
 831   invalidate the cache lines, and 'invalidate' for the I-cache.
 832   */
 833
 834#undef FLUSH_TRACE
 835
 836void flush_cache_all(void)
 837{
 838	/* Invalidate the entire contents of both caches, after writing back to
 839	   memory any dirty data from the D-cache. */
 840	sh64_dcache_purge_all();
 841	sh64_icache_inv_all();
 842}
 843
 844/****************************************************************************/
 845
 846void flush_cache_mm(struct mm_struct *mm)
 847{
 848	/* Invalidate an entire user-address space from both caches, after
 849	   writing back dirty data (e.g. for shared mmap etc). */
 850
 851	/* This could be coded selectively by inspecting all the tags then
 852	   doing 4*alloco on any set containing a match (as for
 853	   flush_cache_range), but fork/exit/execve (where this is called from)
 854	   are expensive anyway. */
 855
 856	/* Have to do a purge here, despite the comments re I-cache below.
 857	   There could be odd-coloured dirty data associated with the mm still
 858	   in the cache - if this gets written out through natural eviction
 859	   after the kernel has reused the page there will be chaos.
 860	   */
 861
 862	sh64_dcache_purge_all();
 863
 864	/* The mm being torn down won't ever be active again, so any Icache
 865	   lines tagged with its ASID won't be visible for the rest of the
 866	   lifetime of this ASID cycle.  Before the ASID gets reused, there
 867	   will be a flush_cache_all.  Hence we don't need to touch the
 868	   I-cache.  This is similar to the lack of action needed in
 869	   flush_tlb_mm - see fault.c. */
 870}
 871
 872/****************************************************************************/
 873
 874void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
 875		       unsigned long end)
 876{
 877	struct mm_struct *mm = vma->vm_mm;
 878
 879	/* Invalidate (from both caches) the range [start,end) of virtual
 880	   addresses from the user address space specified by mm, after writing
 881	   back any dirty data.
 882
 883	   Note(1), 'end' is 1 byte beyond the end of the range to flush.
 884
 885	   Note(2), this is called with mm->page_table_lock held.*/
 886
 887	sh64_dcache_purge_user_range(mm, start, end);
 888	sh64_icache_inv_user_page_range(mm, start, end);
 889}
 890
 891/****************************************************************************/
 892
 893void flush_cache_page(struct vm_area_struct *vma, unsigned long eaddr, unsigned long pfn)
 894{
 895	/* Invalidate any entries in either cache for the vma within the user
 896	   address space vma->vm_mm for the page starting at virtual address
 897	   'eaddr'.   This seems to be used primarily in breaking COW.  Note,
 898	   the I-cache must be searched too in case the page in question is
 899	   both writable and being executed from (e.g. stack trampolines.)
 900
 901	   Note(1), this is called with mm->page_table_lock held.
 902	   */
 903
 904	sh64_dcache_purge_phy_page(pfn << PAGE_SHIFT);
 905
 906	if (vma->vm_flags & VM_EXEC) {
 907		sh64_icache_inv_user_page(vma, eaddr);
 908	}
 909}
 910
 911/****************************************************************************/
 912
 913#ifndef CONFIG_DCACHE_DISABLED
 914
 915void copy_user_page(void *to, void *from, unsigned long address, struct page *page)
 916{
 917	/* 'from' and 'to' are kernel virtual addresses (within the superpage
 918	   mapping of the physical RAM).  'address' is the user virtual address
 919	   where the copy 'to' will be mapped after.  This allows a custom
 920	   mapping to be used to ensure that the new copy is placed in the
 921	   right cache sets for the user to see it without having to bounce it
 922	   out via memory.  Note however : the call to flush_page_to_ram in
 923	   (generic)/mm/memory.c:(break_cow) undoes all this good work in that one
 924	   very important case!
 925
 926	   TBD : can we guarantee that on every call, any cache entries for
 927	   'from' are in the same colour sets as 'address' also?  i.e. is this
 928	   always used just to deal with COW?  (I suspect not). */
 929
 930	/* There are two possibilities here for when the page 'from' was last accessed:
 931	   * by the kernel : this is OK, no purge required.
 932	   * by the/a user (e.g. for break_COW) : need to purge.
 933
 934	   If the potential user mapping at 'address' is the same colour as
 935	   'from' there is no need to purge any cache lines from the 'from'
 936	   page mapped into cache sets of colour 'address'.  (The copy will be
 937	   accessing the page through 'from').
 938	   */
 939
 940	if (((address ^ (unsigned long) from) & CACHE_OC_SYN_MASK) != 0) {
 941		sh64_dcache_purge_coloured_phy_page(__pa(from), address);
 942	}
 943
 944	if (((address ^ (unsigned long) to) & CACHE_OC_SYN_MASK) == 0) {
 945		/* No synonym problem on destination */
 946		sh64_page_copy(from, to);
 947	} else {
 948		sh64_copy_user_page_coloured(to, from, address);
 949	}
 950
 951	/* Note, don't need to flush 'from' page from the cache again - it's
 952	   done anyway by the generic code */
 953}
 954
 955void clear_user_page(void *to, unsigned long address, struct page *page)
 956{
 957	/* 'to' is a kernel virtual address (within the superpage
 958	   mapping of the physical RAM).  'address' is the user virtual address
 959	   where the 'to' page will be mapped after.  This allows a custom
 960	   mapping to be used to ensure that the new copy is placed in the
 961	   right cache sets for the user to see it without having to bounce it
 962	   out via memory.
 963	*/
 964
 965	if (((address ^ (unsigned long) to) & CACHE_OC_SYN_MASK) == 0) {
 966		/* No synonym problem on destination */
 967		sh64_page_clear(to);
 968	} else {
 969		sh64_clear_user_page_coloured(to, address);
 970	}
 971}
 972
 973#endif /* !CONFIG_DCACHE_DISABLED */
 974
 975/****************************************************************************/
 976
 977void flush_dcache_page(struct page *page)
 978{
 979	sh64_dcache_purge_phy_page(page_to_phys(page));
 980	wmb();
 981}
 982
 983/****************************************************************************/
 984
 985void flush_icache_range(unsigned long start, unsigned long end)
 986{
 987	/* Flush the range [start,end] of kernel virtual adddress space from
 988	   the I-cache.  The corresponding range must be purged from the
 989	   D-cache also because the SH-5 doesn't have cache snooping between
 990	   the caches.  The addresses will be visible through the superpage
 991	   mapping, therefore it's guaranteed that there no cache entries for
 992	   the range in cache sets of the wrong colour.
 993
 994	   Primarily used for cohering the I-cache after a module has
 995	   been loaded.  */
 996
 997	/* We also make sure to purge the same range from the D-cache since
 998	   flush_page_to_ram() won't be doing this for us! */
 999
1000	sh64_dcache_purge_kernel_range(start, end);
1001	wmb();
1002	sh64_icache_inv_kernel_range(start, end);
1003}
1004
1005/****************************************************************************/
1006
1007void flush_icache_user_range(struct vm_area_struct *vma,
1008			struct page *page, unsigned long addr, int len)
1009{
1010	/* Flush the range of user (defined by vma->vm_mm) address space
1011	   starting at 'addr' for 'len' bytes from the cache.  The range does
1012	   not straddle a page boundary, the unique physical page containing
1013	   the range is 'page'.  This seems to be used mainly for invalidating
1014	   an address range following a poke into the program text through the
1015	   ptrace() call from another process (e.g. for BRK instruction
1016	   insertion). */
1017
1018	sh64_dcache_purge_coloured_phy_page(page_to_phys(page), addr);
1019	mb();
1020
1021	if (vma->vm_flags & VM_EXEC) {
1022		sh64_icache_inv_user_small_range(vma->vm_mm, addr, len);
1023	}
1024}
1025
1026/*##########################################################################
1027			ARCH/SH64 PRIVATE CALLABLE API.
1028  ##########################################################################*/
1029
1030void flush_cache_sigtramp(unsigned long start, unsigned long end)
1031{
1032	/* For the address range [start,end), write back the data from the
1033	   D-cache and invalidate the corresponding region of the I-cache for
1034	   the current process.  Used to flush signal trampolines on the stack
1035	   to make them executable. */
1036
1037	sh64_dcache_wback_current_user_range(start, end);
1038	wmb();
1039	sh64_icache_inv_current_user_range(start, end);
1040}
1041