summaryrefslogtreecommitdiffstats
path: root/arch/sh/kernel
diff options
context:
space:
mode:
authorStuart Menefy <stuart.menefy@st.com>2006-11-24 11:42:24 +0900
committerPaul Mundt <lethal@linux-sh.org>2006-12-06 10:45:38 +0900
commit9b3a53ab76771e3669e50086c131e1574fe25847 (patch)
tree07dab1cd3972c7b82ddd5b7ad1e28628d7756dbb /arch/sh/kernel
parent9daa0c257d6c200b58092e0bfc32b77c4618a8af (diff)
downloadkernel-crypto-9b3a53ab76771e3669e50086c131e1574fe25847.tar.gz
kernel-crypto-9b3a53ab76771e3669e50086c131e1574fe25847.tar.xz
kernel-crypto-9b3a53ab76771e3669e50086c131e1574fe25847.zip
sh: TLB miss fast-path optimizations.
Handle simple TLB miss faults which can be resolved completely from the page table in assembler. Signed-off-by: Stuart Menefy <stuart.menefy@st.com> Signed-off-by: Paul Mundt <lethal@linux-sh.org>
Diffstat (limited to 'arch/sh/kernel')
-rw-r--r--arch/sh/kernel/cpu/sh3/entry.S206
-rw-r--r--arch/sh/kernel/cpu/sh4/probe.c19
2 files changed, 197 insertions, 28 deletions
diff --git a/arch/sh/kernel/cpu/sh3/entry.S b/arch/sh/kernel/cpu/sh3/entry.S
index 869d56fb7d6..5de99b49873 100644
--- a/arch/sh/kernel/cpu/sh3/entry.S
+++ b/arch/sh/kernel/cpu/sh3/entry.S
@@ -13,8 +13,10 @@
#include <linux/linkage.h>
#include <asm/asm-offsets.h>
#include <asm/thread_info.h>
-#include <asm/cpu/mmu_context.h>
#include <asm/unistd.h>
+#include <asm/cpu/mmu_context.h>
+#include <asm/pgtable.h>
+#include <asm/page.h>
! NOTE:
! GNU as (as of 2.9.1) changes bf/s into bt/s and bra, when the address
@@ -136,29 +138,14 @@ ENTRY(tlb_protection_violation_store)
call_dpf:
mov.l 1f, r0
- mov r5, r8
- mov.l @r0, r6
- mov r6, r9
- mov.l 2f, r0
- sts pr, r10
- jsr @r0
- mov r15, r4
- !
- tst r0, r0
- bf/s 0f
- lds r10, pr
- rts
- nop
-0: sti
+ mov.l @r0, r6 ! address
mov.l 3f, r0
- mov r9, r6
- mov r8, r5
+ sti
jmp @r0
- mov r15, r4
+ mov r15, r4 ! regs
.align 2
1: .long MMU_TEA
-2: .long __do_page_fault
3: .long do_page_fault
.align 2
@@ -344,9 +331,176 @@ general_exception:
2: .long ret_from_exception
!
!
+
+/* This code makes some assumptions to improve performance.
+ * Make sure they are stil true. */
+#if PTRS_PER_PGD != PTRS_PER_PTE
+#error PDG and PTE sizes don't match
+#endif
+
+/* gas doesn't flag impossible values for mov #immediate as an error */
+#if (_PAGE_PRESENT >> 2) > 0x7f
+#error cannot load PAGE_PRESENT as an immediate
+#endif
+#if _PAGE_DIRTY > 0x7f
+#error cannot load PAGE_DIRTY as an immediate
+#endif
+#if (_PAGE_PRESENT << 2) != _PAGE_ACCESSED
+#error cannot derive PAGE_ACCESSED from PAGE_PRESENT
+#endif
+
+#if defined(CONFIG_CPU_SH4)
+#define ldmmupteh(r) mov.l 8f, r
+#else
+#define ldmmupteh(r) mov #MMU_PTEH, r
+#endif
+
.balign 1024,0,1024
tlb_miss:
- mov.l 1f, k2
+#ifdef COUNT_EXCEPTIONS
+ ! Increment the counts
+ mov.l 9f, k1
+ mov.l @k1, k2
+ add #1, k2
+ mov.l k2, @k1
+#endif
+
+ ! k0 scratch
+ ! k1 pgd and pte pointers
+ ! k2 faulting address
+ ! k3 pgd and pte index masks
+ ! k4 shift
+
+ ! Load up the pgd entry (k1)
+
+ ldmmupteh(k0) ! 9 LS (latency=2) MMU_PTEH
+
+ mov.w 4f, k3 ! 8 LS (latency=2) (PTRS_PER_PGD-1) << 2
+ mov #-(PGDIR_SHIFT-2), k4 ! 6 EX
+
+ mov.l @(MMU_TEA-MMU_PTEH,k0), k2 ! 18 LS (latency=2)
+
+ mov.l @(MMU_TTB-MMU_PTEH,k0), k1 ! 18 LS (latency=2)
+
+ mov k2, k0 ! 5 MT (latency=0)
+ shld k4, k0 ! 99 EX
+
+ and k3, k0 ! 78 EX
+
+ mov.l @(k0, k1), k1 ! 21 LS (latency=2)
+ mov #-(PAGE_SHIFT-2), k4 ! 6 EX
+
+ ! Load up the pte entry (k2)
+
+ mov k2, k0 ! 5 MT (latency=0)
+ shld k4, k0 ! 99 EX
+
+ tst k1, k1 ! 86 MT
+
+ bt 20f ! 110 BR
+
+ and k3, k0 ! 78 EX
+ mov.w 5f, k4 ! 8 LS (latency=2) _PAGE_PRESENT
+
+ mov.l @(k0, k1), k2 ! 21 LS (latency=2)
+ add k0, k1 ! 49 EX
+
+#ifdef CONFIG_CPU_HAS_PTEA
+ ! Test the entry for present and _PAGE_ACCESSED
+
+ mov #-28, k3 ! 6 EX
+ mov k2, k0 ! 5 MT (latency=0)
+
+ tst k4, k2 ! 68 MT
+ shld k3, k0 ! 99 EX
+
+ bt 20f ! 110 BR
+
+ ! Set PTEA register
+ ! MMU_PTEA = ((pteval >> 28) & 0xe) | (pteval & 0x1)
+ !
+ ! k0=pte>>28, k1=pte*, k2=pte, k3=<unused>, k4=_PAGE_PRESENT
+
+ and #0xe, k0 ! 79 EX
+
+ mov k0, k3 ! 5 MT (latency=0)
+ mov k2, k0 ! 5 MT (latency=0)
+
+ and #1, k0 ! 79 EX
+
+ or k0, k3 ! 82 EX
+
+ ldmmupteh(k0) ! 9 LS (latency=2)
+ shll2 k4 ! 101 EX _PAGE_ACCESSED
+
+ tst k4, k2 ! 68 MT
+
+ mov.l k3, @(MMU_PTEA-MMU_PTEH,k0) ! 27 LS
+
+ mov.l 7f, k3 ! 9 LS (latency=2) _PAGE_FLAGS_HARDWARE_MASK
+
+ ! k0=MMU_PTEH, k1=pte*, k2=pte, k3=_PAGE_FLAGS_HARDWARE, k4=_PAGE_ACCESSED
+#else
+
+ ! Test the entry for present and _PAGE_ACCESSED
+
+ mov.l 7f, k3 ! 9 LS (latency=2) _PAGE_FLAGS_HARDWARE_MASK
+ tst k4, k2 ! 68 MT
+
+ shll2 k4 ! 101 EX _PAGE_ACCESSED
+ ldmmupteh(k0) ! 9 LS (latency=2)
+
+ bt 20f ! 110 BR
+ tst k4, k2 ! 68 MT
+
+ ! k0=MMU_PTEH, k1=pte*, k2=pte, k3=_PAGE_FLAGS_HARDWARE, k4=_PAGE_ACCESSED
+
+#endif
+
+ ! Set up the entry
+
+ and k2, k3 ! 78 EX
+ bt/s 10f ! 108 BR
+
+ mov.l k3, @(MMU_PTEL-MMU_PTEH,k0) ! 27 LS
+
+ ldtlb ! 128 CO
+
+ ! At least one instruction between ldtlb and rte
+ nop ! 119 NOP
+
+ rte ! 126 CO
+
+ nop ! 119 NOP
+
+
+10: or k4, k2 ! 82 EX
+
+ ldtlb ! 128 CO
+
+ ! At least one instruction between ldtlb and rte
+ mov.l k2, @k1 ! 27 LS
+
+ rte ! 126 CO
+
+ ! Note we cannot execute mov here, because it is executed after
+ ! restoring SSR, so would be executed in user space.
+ nop ! 119 NOP
+
+
+ .align 5
+ ! Once cache line if possible...
+1: .long swapper_pg_dir
+4: .short (PTRS_PER_PGD-1) << 2
+5: .short _PAGE_PRESENT
+7: .long _PAGE_FLAGS_HARDWARE_MASK
+8: .long MMU_PTEH
+#ifdef COUNT_EXCEPTIONS
+9: .long exception_count_miss
+#endif
+
+ ! Either pgd or pte not present
+20: mov.l 1f, k2
mov.l 4f, k3
bra handle_exception
mov.l @k2, k2
@@ -496,6 +650,15 @@ skip_save:
bf interrupt_exception
shlr2 r8
shlr r8
+
+#ifdef COUNT_EXCEPTIONS
+ mov.l 5f, r9
+ add r8, r9
+ mov.l @r9, r10
+ add #1, r10
+ mov.l r10, @r9
+#endif
+
mov.l 4f, r9
add r8, r9
mov.l @r9, r9
@@ -509,6 +672,9 @@ skip_save:
2: .long 0x000080f0 ! FD=1, IMASK=15
3: .long 0xcfffffff ! RB=0, BL=0
4: .long exception_handling_table
+#ifdef COUNT_EXCEPTIONS
+5: .long exception_count_table
+#endif
interrupt_exception:
mov.l 1f, r9
diff --git a/arch/sh/kernel/cpu/sh4/probe.c b/arch/sh/kernel/cpu/sh4/probe.c
index c294de1e14a..afe0f1b1c03 100644
--- a/arch/sh/kernel/cpu/sh4/probe.c
+++ b/arch/sh/kernel/cpu/sh4/probe.c
@@ -79,16 +79,16 @@ int __init detect_cpu_and_cache_system(void)
case 0x205:
cpu_data->type = CPU_SH7750;
cpu_data->flags |= CPU_HAS_P2_FLUSH_BUG | CPU_HAS_FPU |
- CPU_HAS_PERF_COUNTER | CPU_HAS_PTEA;
+ CPU_HAS_PERF_COUNTER;
break;
case 0x206:
cpu_data->type = CPU_SH7750S;
cpu_data->flags |= CPU_HAS_P2_FLUSH_BUG | CPU_HAS_FPU |
- CPU_HAS_PERF_COUNTER | CPU_HAS_PTEA;
+ CPU_HAS_PERF_COUNTER;
break;
case 0x1100:
cpu_data->type = CPU_SH7751;
- cpu_data->flags |= CPU_HAS_FPU | CPU_HAS_PTEA;
+ cpu_data->flags |= CPU_HAS_FPU;
break;
case 0x2000:
cpu_data->type = CPU_SH73180;
@@ -126,23 +126,22 @@ int __init detect_cpu_and_cache_system(void)
break;
case 0x8000:
cpu_data->type = CPU_ST40RA;
- cpu_data->flags |= CPU_HAS_FPU | CPU_HAS_PTEA;
+ cpu_data->flags |= CPU_HAS_FPU;
break;
case 0x8100:
cpu_data->type = CPU_ST40GX1;
- cpu_data->flags |= CPU_HAS_FPU | CPU_HAS_PTEA;
+ cpu_data->flags |= CPU_HAS_FPU;
break;
case 0x700:
cpu_data->type = CPU_SH4_501;
cpu_data->icache.ways = 2;
cpu_data->dcache.ways = 2;
- cpu_data->flags |= CPU_HAS_PTEA;
break;
case 0x600:
cpu_data->type = CPU_SH4_202;
cpu_data->icache.ways = 2;
cpu_data->dcache.ways = 2;
- cpu_data->flags |= CPU_HAS_FPU | CPU_HAS_PTEA;
+ cpu_data->flags |= CPU_HAS_FPU;
break;
case 0x500 ... 0x501:
switch (prr) {
@@ -160,7 +159,7 @@ int __init detect_cpu_and_cache_system(void)
cpu_data->icache.ways = 2;
cpu_data->dcache.ways = 2;
- cpu_data->flags |= CPU_HAS_FPU | CPU_HAS_PTEA;
+ cpu_data->flags |= CPU_HAS_FPU;
break;
default:
@@ -173,6 +172,10 @@ int __init detect_cpu_and_cache_system(void)
cpu_data->dcache.ways = 1;
#endif
+#ifdef CONFIG_CPU_HAS_PTEA
+ cpu_data->flags |= CPU_HAS_PTEA;
+#endif
+
/*
* On anything that's not a direct-mapped cache, look to the CVR
* for I/D-cache specifics.