From 06ef66081620e94fe35a518f98624b83a140096e Mon Sep 17 00:00:00 2001 From: David Mosberger-Tang Date: Wed, 27 Apr 2005 21:10:45 -0700 Subject: [IA64] __ia64_syscall() is no longer used anywhere in the kernel. Remove it. Signed-off-by: David Mosberger-Tang Signed-off-by: Tony Luck --- arch/ia64/kernel/entry.S | 12 ------------ arch/ia64/kernel/ia64_ksyms.c | 3 --- 2 files changed, 15 deletions(-) diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index bd86fea49a0..5b469db2326 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -470,18 +470,6 @@ ENTRY(load_switch_stack) br.cond.sptk.many b7 END(load_switch_stack) -GLOBAL_ENTRY(__ia64_syscall) - .regstk 6,0,0,0 - mov r15=in5 // put syscall number in place - break __BREAK_SYSCALL - movl r2=errno - cmp.eq p6,p7=-1,r10 - ;; -(p6) st4 [r2]=r8 -(p6) mov r8=-1 - br.ret.sptk.many rp -END(__ia64_syscall) - GLOBAL_ENTRY(execve) mov r15=__NR_execve // put syscall number in place break __BREAK_SYSCALL diff --git a/arch/ia64/kernel/ia64_ksyms.c b/arch/ia64/kernel/ia64_ksyms.c index 7bbf019c986..01572814abe 100644 --- a/arch/ia64/kernel/ia64_ksyms.c +++ b/arch/ia64/kernel/ia64_ksyms.c @@ -58,9 +58,6 @@ EXPORT_SYMBOL(__strlen_user); EXPORT_SYMBOL(__strncpy_from_user); EXPORT_SYMBOL(__strnlen_user); -#include -EXPORT_SYMBOL(__ia64_syscall); - /* from arch/ia64/lib */ extern void __divsi3(void); extern void __udivsi3(void); -- cgit From 9ec1a7ad434fa951ff845dbee3397cf6ad4f76df Mon Sep 17 00:00:00 2001 From: David Mosberger-Tang Date: Wed, 27 Apr 2005 21:13:33 -0700 Subject: [IA64] Use dynamic prediction for RSE-clearing branches. This by itself is good for a 1-2 cycle speed up. Effect is bigger when combined with the later patches. Signed-off-by: David Mosberger-Tang Signed-off-by: Tony Luck --- arch/ia64/kernel/entry.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index 5b469db2326..c0f28339d58 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -1012,14 +1012,14 @@ rse_clear_invalid: mov loc5=0 mov loc6=0 mov loc7=0 -(pRecurse) br.call.sptk.few b0=rse_clear_invalid +(pRecurse) br.call.dptk.few b0=rse_clear_invalid ;; mov loc8=0 mov loc9=0 cmp.ne pReturn,p0=r0,in1 // if recursion count != 0, we need to do a br.ret mov loc10=0 mov loc11=0 -(pReturn) br.ret.sptk.many b0 +(pReturn) br.ret.dptk.many b0 #endif /* !CONFIG_ITANIUM */ # undef pRecurse # undef pReturn -- cgit From 3c79c8b1d92a9ae3edf3cbcd2c43c553ee0f1d83 Mon Sep 17 00:00:00 2001 From: David Mosberger-Tang Date: Wed, 27 Apr 2005 21:15:13 -0700 Subject: [IA64] Schedule fp-clearing insns at least 6 cycles after reading ar.bsp. Decreases syscall overhead by approximately 6 cycles. Signed-off-by: David Mosberger-Tang Signed-off-by: Tony Luck --- arch/ia64/kernel/entry.S | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index c0f28339d58..0c84bed1bda 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -705,15 +705,15 @@ ENTRY(ia64_leave_syscall) // start restoring the state saved on the kernel stack (struct pt_regs): ld8 r9=[r2],PT(CR_IPSR)-PT(R9) ld8 r11=[r3],PT(CR_IIP)-PT(R11) - mov f6=f0 // clear f6 + nop.i 0 ;; invala // M0|1 invalidate ALAT rsm psr.i | psr.ic // M2 initiate turning off of interrupt and interruption collection - mov f9=f0 // clear f9 + nop.i 0 ld8 r29=[r2],16 // load cr.ipsr ld8 r28=[r3],16 // load cr.iip - mov f8=f0 // clear f8 + mov r22=r0 // clear r22 ;; ld8 r30=[r2],16 // M0|1 load cr.ifs ld8 r25=[r3],16 // M0|1 load ar.unat @@ -721,15 +721,15 @@ ENTRY(ia64_leave_syscall) ;; ld8 r26=[r2],PT(B0)-PT(AR_PFS) // M0|1 load ar.pfs (pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled - mov f10=f0 // clear f10 +(pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13 ;; ld8 r21=[r2],PT(AR_RNAT)-PT(B0) // load b0 ld8 r27=[r3],PT(PR)-PT(AR_RSC) // load ar.rsc - mov f11=f0 // clear f11 + mov f6=f0 // clear f6 ;; ld8 r24=[r2],PT(AR_FPSR)-PT(AR_RNAT) // load ar.rnat (may be garbage) ld8 r31=[r3],PT(R1)-PT(PR) // load predicates -(pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13 + mov f7=f0 // clear f7 ;; ld8 r20=[r2],PT(R12)-PT(AR_FPSR) // load ar.fpsr ld8.fill r1=[r3],16 // load r1 @@ -737,24 +737,29 @@ ENTRY(ia64_leave_syscall) ;; srlz.d // M0 ensure interruption collection is off ld8.fill r13=[r3],16 - mov f7=f0 // clear f7 + mov f8=f0 // clear f8 ;; ld8.fill r12=[r2] // restore r12 (sp) mov.m ar.ssd=r0 // M2 clear ar.ssd - mov r22=r0 // clear r22 + mov b6=r18 // I0 restore b6 + nop.m 0 + mov f9=f0 // clear f9 + shr.u r18=r19,16 // I0|1 get byte size of existing "dirty" partition + ;; ld8.fill r15=[r3] // restore r15 (pUStk) st1 [r14]=r17 addl r3=THIS_CPU(ia64_phys_stacked_size_p8),r0 ;; (pUStk) ld4 r17=[r3] // r17 = cpu_data->phys_stacked_size_p8 mov.m ar.csd=r0 // M2 clear ar.csd - mov b6=r18 // I0 restore b6 + mov f10=f0 // clear f10 ;; mov r14=r0 // clear r14 - shr.u r18=r19,16 // I0|1 get byte size of existing "dirty" partition + mov f11=f0 // clear f11 (pKStk) br.cond.dpnt.many skip_rbs_switch + mov.m ar.ccv=r0 // clear ar.ccv (pNonSys) br.cond.dpnt.many dont_preserve_current_frame br.cond.sptk.many rbs_switch -- cgit From 96e017495e6833adcbad84783e3c5eb685836bbf Mon Sep 17 00:00:00 2001 From: David Mosberger-Tang Date: Wed, 27 Apr 2005 21:16:07 -0700 Subject: [IA64] On return from syscall, hint b7 with __kernel_syscall_via_epc(). Why is this a good idea? Clearing b7 to 0 is guaranteed to do us no good and writing it with __kernel_syscall_via_epc() yields a 6 cycle improvement _if_ the application performs another EPC-based system- call without overwriting b7, which is not all that uncommon. Well worth the minimal cost of 1 bundle of code. Signed-off-by: David Mosberger-Tang Signed-off-by: Tony Luck --- arch/ia64/kernel/entry.S | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index 0c84bed1bda..6359d7ffbb7 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -692,7 +692,7 @@ ENTRY(ia64_leave_syscall) ;; (p6) ld4 r31=[r18] // load current_thread_info()->flags ld8 r19=[r2],PT(B6)-PT(LOADRS) // load ar.rsc value for "loadrs" - mov b7=r0 // clear b7 + nop.i 0 ;; ld8 r23=[r3],PT(R11)-PT(AR_BSPSTORE) // load ar.bspstore (may be garbage) ld8 r18=[r2],PT(R9)-PT(B6) // load b6 @@ -754,7 +754,14 @@ ENTRY(ia64_leave_syscall) (pUStk) ld4 r17=[r3] // r17 = cpu_data->phys_stacked_size_p8 mov.m ar.csd=r0 // M2 clear ar.csd mov f10=f0 // clear f10 + + nop.m 0 + movl r14=__kernel_syscall_via_epc // X ;; + nop.m 0 + nop.m 0 + mov b7=r14 // I0 clear b7 (hint with __kernel_syscall_via_epc) + mov r14=r0 // clear r14 mov f11=f0 // clear f11 (pKStk) br.cond.dpnt.many skip_rbs_switch -- cgit From 060561ff79b01eea58e6d72abfb8e7580ff21f2a Mon Sep 17 00:00:00 2001 From: David Mosberger-Tang Date: Wed, 27 Apr 2005 21:17:03 -0700 Subject: [IA64] In syscall-entry, use st8 instead of stf8 to clear pt_regs.r8 Using stf8 seemed like a clever idea at the time, but stf8 forces the cache-line to be invalidated in the L1D (if it happens to be there already). This patch eliminates a guaranteed L1D cache-miss and, by itself, is good for a 1-2 cycle improvement for heavy-weight syscalls. Signed-off-by: David Mosberger-Tang Signed-off-by: Tony Luck --- arch/ia64/kernel/ivt.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S index d9c05d53435..dc78c356ddd 100644 --- a/arch/ia64/kernel/ivt.S +++ b/arch/ia64/kernel/ivt.S @@ -1,7 +1,7 @@ /* * arch/ia64/kernel/ivt.S * - * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co + * Copyright (C) 1998-2001, 2003, 2005 Hewlett-Packard Co * Stephane Eranian * David Mosberger * Copyright (C) 2000, 2002-2003 Intel Co @@ -918,7 +918,7 @@ GLOBAL_ENTRY(ia64_syscall_setup) tnat.nz p14,p0=in6 cmp.lt p10,p9=r11,r8 // frame size can't be more than local+8 ;; - stf8 [r16]=f1 // ensure pt_regs.r8 != 0 (see handle_syscall_error) + mov r8=1 (p9) tnat.nz p10,p0=r15 adds r12=-16,r1 // switch to kernel memory stack (with 16 bytes of scratch) @@ -929,9 +929,9 @@ GLOBAL_ENTRY(ia64_syscall_setup) mov r13=r2 // establish `current' movl r1=__gp // establish kernel global pointer ;; + st8 [r16]=r8 // ensure pt_regs.r8 != 0 (see handle_syscall_error) (p14) mov in6=-1 (p8) mov in7=-1 - nop.i 0 cmp.eq pSys,pNonSys=r0,r0 // set pSys=1, pNonSys=0 movl r17=FPSR_DEFAULT -- cgit From 87e522a0f7f8a7a5a1a880517989835c4f09c63e Mon Sep 17 00:00:00 2001 From: David Mosberger-Tang Date: Wed, 27 Apr 2005 21:17:44 -0700 Subject: [IA64] Schedule ia64_leave_syscall() to read ar.bsp earlier Reschedule code to read ar.bsp as early as possible. To enable this, don't bother clearing some of the registers when we're returning to kernel stacks. Also, instead of trying to support the pNonSys case (which makes no sense), do a bugcheck instead (with break 0). Finally, remove a clear of r14 which is a left-over from the previous patch. Signed-off-by: David Mosberger-Tang Signed-off-by: Tony Luck --- arch/ia64/kernel/entry.S | 43 +++++++++++++++++++------------------------ 1 file changed, 19 insertions(+), 24 deletions(-) diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index 6359d7ffbb7..d97a07c77a4 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -694,22 +694,22 @@ ENTRY(ia64_leave_syscall) ld8 r19=[r2],PT(B6)-PT(LOADRS) // load ar.rsc value for "loadrs" nop.i 0 ;; - ld8 r23=[r3],PT(R11)-PT(AR_BSPSTORE) // load ar.bspstore (may be garbage) + mov r16=ar.bsp // M2 get existing backing store pointer ld8 r18=[r2],PT(R9)-PT(B6) // load b6 (p6) and r15=TIF_WORK_MASK,r31 // any work other than TIF_SYSCALL_TRACE? ;; - mov r16=ar.bsp // M2 get existing backing store pointer + ld8 r23=[r3],PT(R11)-PT(AR_BSPSTORE) // load ar.bspstore (may be garbage) (p6) cmp4.ne.unc p6,p0=r15, r0 // any special work pending? (p6) br.cond.spnt .work_pending_syscall ;; // start restoring the state saved on the kernel stack (struct pt_regs): ld8 r9=[r2],PT(CR_IPSR)-PT(R9) ld8 r11=[r3],PT(CR_IIP)-PT(R11) - nop.i 0 +(pNonSys) break 0 // bug check: we shouldn't be here if pNonSys is TRUE! ;; invala // M0|1 invalidate ALAT rsm psr.i | psr.ic // M2 initiate turning off of interrupt and interruption collection - nop.i 0 + cmp.eq p9,p0=r0,r0 // set p9 to indicate that we should restore cr.ifs ld8 r29=[r2],16 // load cr.ipsr ld8 r28=[r3],16 // load cr.iip @@ -717,11 +717,11 @@ ENTRY(ia64_leave_syscall) ;; ld8 r30=[r2],16 // M0|1 load cr.ifs ld8 r25=[r3],16 // M0|1 load ar.unat - cmp.eq p9,p0=r0,r0 // set p9 to indicate that we should restore cr.ifs +(pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13 ;; ld8 r26=[r2],PT(B0)-PT(AR_PFS) // M0|1 load ar.pfs (pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled -(pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13 + nop 0 ;; ld8 r21=[r2],PT(AR_RNAT)-PT(B0) // load b0 ld8 r27=[r3],PT(PR)-PT(AR_RSC) // load ar.rsc @@ -735,40 +735,35 @@ ENTRY(ia64_leave_syscall) ld8.fill r1=[r3],16 // load r1 (pUStk) mov r17=1 ;; - srlz.d // M0 ensure interruption collection is off +(pUStk) st1 [r14]=r17 ld8.fill r13=[r3],16 mov f8=f0 // clear f8 ;; ld8.fill r12=[r2] // restore r12 (sp) - mov.m ar.ssd=r0 // M2 clear ar.ssd + ld8.fill r15=[r3] // restore r15 mov b6=r18 // I0 restore b6 - nop.m 0 + addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0 mov f9=f0 // clear f9 +(pKStk) br.cond.dpnt.many skip_rbs_switch + + srlz.d // M0 ensure interruption collection is off shr.u r18=r19,16 // I0|1 get byte size of existing "dirty" partition + cover // B add current frame into dirty partition and set cr.ifs ;; - ld8.fill r15=[r3] // restore r15 -(pUStk) st1 [r14]=r17 - addl r3=THIS_CPU(ia64_phys_stacked_size_p8),r0 - ;; -(pUStk) ld4 r17=[r3] // r17 = cpu_data->phys_stacked_size_p8 - mov.m ar.csd=r0 // M2 clear ar.csd +(pUStk) ld4 r17=[r17] // r17 = cpu_data->phys_stacked_size_p8 + mov r19=ar.bsp // M2 get new backing store pointer mov f10=f0 // clear f10 nop.m 0 movl r14=__kernel_syscall_via_epc // X ;; - nop.m 0 - nop.m 0 + mov.m ar.csd=r0 // M2 clear ar.csd + mov.m ar.ccv=r0 // clear ar.ccv mov b7=r14 // I0 clear b7 (hint with __kernel_syscall_via_epc) - mov r14=r0 // clear r14 + mov.m ar.ssd=r0 // M2 clear ar.ssd mov f11=f0 // clear f11 -(pKStk) br.cond.dpnt.many skip_rbs_switch - - - mov.m ar.ccv=r0 // clear ar.ccv -(pNonSys) br.cond.dpnt.many dont_preserve_current_frame br.cond.sptk.many rbs_switch END(ia64_leave_syscall) @@ -946,10 +941,10 @@ GLOBAL_ENTRY(ia64_leave_kernel) */ (pNonSys) br.cond.dpnt dont_preserve_current_frame -rbs_switch: cover // add current frame into dirty partition and set cr.ifs ;; mov r19=ar.bsp // get new backing store pointer +rbs_switch: sub r16=r16,r18 // krbs = old bsp - size of dirty partition cmp.ne p9,p0=r0,r0 // clear p9 to skip restore of cr.ifs ;; -- cgit From c03f058fbf685f2ff630095d2c1e98d331b81e82 Mon Sep 17 00:00:00 2001 From: David Mosberger-Tang Date: Wed, 27 Apr 2005 21:18:22 -0700 Subject: [IA64] In ia64_leave_syscall(), fix comments and whitespace only. Signed-off-by: David Mosberger-Tang Signed-off-by: Tony Luck --- arch/ia64/kernel/entry.S | 79 ++++++++++++++++++++++++------------------------ 1 file changed, 39 insertions(+), 40 deletions(-) diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index d97a07c77a4..a7542c3d3b3 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -625,7 +625,7 @@ END(ia64_ret_from_syscall) * r8-r11: restored (syscall return value(s)) * r12: restored (user-level stack pointer) * r13: restored (user-level thread pointer) - * r14: cleared + * r14: set to __kernel_syscall_via_epc * r15: restored (syscall #) * r16-r17: cleared * r18: user-level b6 @@ -646,7 +646,7 @@ END(ia64_ret_from_syscall) * pr: restored (user-level pr) * b0: restored (user-level rp) * b6: restored - * b7: cleared + * b7: set to __kernel_syscall_via_epc * ar.unat: restored (user-level ar.unat) * ar.pfs: restored (user-level ar.pfs) * ar.rsc: restored (user-level ar.rsc) @@ -708,63 +708,63 @@ ENTRY(ia64_leave_syscall) (pNonSys) break 0 // bug check: we shouldn't be here if pNonSys is TRUE! ;; invala // M0|1 invalidate ALAT - rsm psr.i | psr.ic // M2 initiate turning off of interrupt and interruption collection - cmp.eq p9,p0=r0,r0 // set p9 to indicate that we should restore cr.ifs + rsm psr.i | psr.ic // M2 turn off interrupts and interruption collection + cmp.eq p9,p0=r0,r0 // A set p9 to indicate that we should restore cr.ifs - ld8 r29=[r2],16 // load cr.ipsr - ld8 r28=[r3],16 // load cr.iip - mov r22=r0 // clear r22 + ld8 r29=[r2],16 // M0|1 load cr.ipsr + ld8 r28=[r3],16 // M0|1 load cr.iip + mov r22=r0 // A clear r22 ;; ld8 r30=[r2],16 // M0|1 load cr.ifs ld8 r25=[r3],16 // M0|1 load ar.unat (pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13 ;; ld8 r26=[r2],PT(B0)-PT(AR_PFS) // M0|1 load ar.pfs -(pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled +(pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled nop 0 ;; - ld8 r21=[r2],PT(AR_RNAT)-PT(B0) // load b0 - ld8 r27=[r3],PT(PR)-PT(AR_RSC) // load ar.rsc - mov f6=f0 // clear f6 + ld8 r21=[r2],PT(AR_RNAT)-PT(B0) // M0|1 load b0 + ld8 r27=[r3],PT(PR)-PT(AR_RSC) // M0|1 load ar.rsc + mov f6=f0 // F clear f6 ;; - ld8 r24=[r2],PT(AR_FPSR)-PT(AR_RNAT) // load ar.rnat (may be garbage) - ld8 r31=[r3],PT(R1)-PT(PR) // load predicates - mov f7=f0 // clear f7 + ld8 r24=[r2],PT(AR_FPSR)-PT(AR_RNAT) // M0|1 load ar.rnat (may be garbage) + ld8 r31=[r3],PT(R1)-PT(PR) // M0|1 load predicates + mov f7=f0 // F clear f7 ;; - ld8 r20=[r2],PT(R12)-PT(AR_FPSR) // load ar.fpsr - ld8.fill r1=[r3],16 // load r1 -(pUStk) mov r17=1 + ld8 r20=[r2],PT(R12)-PT(AR_FPSR) // M0|1 load ar.fpsr + ld8.fill r1=[r3],16 // M0|1 load r1 +(pUStk) mov r17=1 // A ;; -(pUStk) st1 [r14]=r17 - ld8.fill r13=[r3],16 - mov f8=f0 // clear f8 +(pUStk) st1 [r14]=r17 // M2|3 + ld8.fill r13=[r3],16 // M0|1 + mov f8=f0 // F clear f8 ;; - ld8.fill r12=[r2] // restore r12 (sp) - ld8.fill r15=[r3] // restore r15 - mov b6=r18 // I0 restore b6 + ld8.fill r12=[r2] // M0|1 restore r12 (sp) + ld8.fill r15=[r3] // M0|1 restore r15 + mov b6=r18 // I0 restore b6 - addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0 - mov f9=f0 // clear f9 -(pKStk) br.cond.dpnt.many skip_rbs_switch + addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0 // A + mov f9=f0 // F clear f9 +(pKStk) br.cond.dpnt.many skip_rbs_switch // B - srlz.d // M0 ensure interruption collection is off - shr.u r18=r19,16 // I0|1 get byte size of existing "dirty" partition - cover // B add current frame into dirty partition and set cr.ifs + srlz.d // M0 ensure interruption collection is off (for cover) + shr.u r18=r19,16 // I0|1 get byte size of existing "dirty" partition + cover // B add current frame into dirty partition & set cr.ifs ;; -(pUStk) ld4 r17=[r17] // r17 = cpu_data->phys_stacked_size_p8 - mov r19=ar.bsp // M2 get new backing store pointer - mov f10=f0 // clear f10 +(pUStk) ld4 r17=[r17] // M0|1 r17 = cpu_data->phys_stacked_size_p8 + mov r19=ar.bsp // M2 get new backing store pointer + mov f10=f0 // F clear f10 nop.m 0 - movl r14=__kernel_syscall_via_epc // X + movl r14=__kernel_syscall_via_epc // X ;; - mov.m ar.csd=r0 // M2 clear ar.csd - mov.m ar.ccv=r0 // clear ar.ccv - mov b7=r14 // I0 clear b7 (hint with __kernel_syscall_via_epc) + mov.m ar.csd=r0 // M2 clear ar.csd + mov.m ar.ccv=r0 // M2 clear ar.ccv + mov b7=r14 // I0 clear b7 (hint with __kernel_syscall_via_epc) - mov.m ar.ssd=r0 // M2 clear ar.ssd - mov f11=f0 // clear f11 - br.cond.sptk.many rbs_switch + mov.m ar.ssd=r0 // M2 clear ar.ssd + mov f11=f0 // F clear f11 + br.cond.sptk.many rbs_switch // B END(ia64_leave_syscall) #ifdef CONFIG_IA32_SUPPORT @@ -940,7 +940,6 @@ GLOBAL_ENTRY(ia64_leave_kernel) * NOTE: alloc, loadrs, and cover can't be predicated. */ (pNonSys) br.cond.dpnt dont_preserve_current_frame - cover // add current frame into dirty partition and set cr.ifs ;; mov r19=ar.bsp // get new backing store pointer -- cgit From f8fa5448fc9b4a7806b1297a0b57808f12fe4d43 Mon Sep 17 00:00:00 2001 From: David Mosberger-Tang Date: Wed, 27 Apr 2005 21:19:04 -0700 Subject: [IA64] Reschedule break_fault() for better performance. This patch reorganizes break_fault() to optimistically assume that a system-call is being performed from user-space (which is almost always the case). If it turns out that (a) we're not being called due to a system call or (b) we're being called from within the kernel, we fixup the no-longer-valid assumptions in non_syscall() and .break_fixup(), respectively. With this approach, there are 3 major phases: - Phase 1: Read various control & application registers, in particular the current task pointer from AR.K6. - Phase 2: Do all memory loads (load system-call entry, load current_thread_info()->flags, prefetch kernel register-backing store) and switch to kernel register-stack. - Phase 3: Call ia64_syscall_setup() and invoke syscall-handler. Good for 26-30 cycles of improvement on break-based syscall-path. Signed-off-by: David Mosberger-Tang Signed-off-by: Tony Luck --- arch/ia64/kernel/ivt.S | 173 ++++++++++++++++++++++++++++++------------------- 1 file changed, 106 insertions(+), 67 deletions(-) diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S index dc78c356ddd..386087edabf 100644 --- a/arch/ia64/kernel/ivt.S +++ b/arch/ia64/kernel/ivt.S @@ -687,82 +687,118 @@ ENTRY(break_fault) * to prevent leaking bits from kernel to user level. */ DBG_FAULT(11) - mov r16=IA64_KR(CURRENT) // r16 = current task; 12 cycle read lat. - mov r17=cr.iim - mov r18=__IA64_BREAK_SYSCALL - mov r21=ar.fpsr - mov r29=cr.ipsr - mov r19=b6 - mov r25=ar.unat - mov r27=ar.rsc - mov r26=ar.pfs - mov r28=cr.iip - mov r31=pr // prepare to save predicates - mov r20=r1 - ;; + mov.m r16=IA64_KR(CURRENT) // M2 r16 <- current task (12 cyc) + mov r29=cr.ipsr // M2 (12 cyc) + mov r31=pr // I0 (2 cyc) + + mov r17=cr.iim // M2 (2 cyc) + mov.m r27=ar.rsc // M2 (12 cyc) + mov r18=__IA64_BREAK_SYSCALL // A + + mov.m ar.rsc=0 // M2 + mov.m r21=ar.fpsr // M2 (12 cyc) + mov r19=b6 // I0 (2 cyc) + ;; + mov.m r23=ar.bspstore // M2 (12 cyc) + mov.m r24=ar.rnat // M2 (5 cyc) + mov.i r26=ar.pfs // I0 (2 cyc) + + invala // M0|1 + nop.m 0 // M + mov r20=r1 // A save r1 + + nop.m 0 + movl r30=sys_call_table // X + + mov r28=cr.iip // M2 (2 cyc) + cmp.eq p0,p7=r18,r17 // I0 is this a system call? +(p7) br.cond.spnt non_syscall // B no -> + // + // From this point on, we are definitely on the syscall-path + // and we can use (non-banked) scratch registers. + // +/////////////////////////////////////////////////////////////////////// + mov r1=r16 // A move task-pointer to "addl"-addressable reg + mov r2=r16 // A setup r2 for ia64_syscall_setup + add r9=TI_FLAGS+IA64_TASK_SIZE,r16 // A r9 = ¤t_thread_info()->flags + adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 - cmp.eq p0,p7=r18,r17 // is this a system call? (p7 <- false, if so) -(p7) br.cond.spnt non_syscall + adds r15=-1024,r15 // A subtract 1024 from syscall number + mov r3=NR_syscalls - 1 ;; - ld1 r17=[r16] // load current->thread.on_ustack flag - st1 [r16]=r0 // clear current->thread.on_ustack flag - add r1=-IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 // set r1 for MINSTATE_START_SAVE_MIN_VIRT + ld1.bias r17=[r16] // M0|1 r17 = current->thread.on_ustack flag + ld4 r9=[r9] // M0|1 r9 = current_thread_info()->flags + extr.u r8=r29,41,2 // I0 extract ei field from cr.ipsr + + shladd r30=r15,3,r30 // A r30 = sys_call_table + 8*(syscall-1024) + addl r22=IA64_RBS_OFFSET,r1 // A compute base of RBS + cmp.leu p6,p7=r15,r3 // A syscall number in range? ;; - invala - /* adjust return address so we skip over the break instruction: */ + lfetch.fault.excl.nt1 [r22] // M0|1 prefetch RBS +(p6) ld8 r30=[r30] // M0|1 load address of syscall entry point + tnat.nz.or p7,p0=r15 // I0 is syscall nr a NaT? - extr.u r8=r29,41,2 // extract ei field from cr.ipsr - ;; - cmp.eq p6,p7=2,r8 // isr.ei==2? - mov r2=r1 // setup r2 for ia64_syscall_setup - ;; -(p6) mov r8=0 // clear ei to 0 -(p6) adds r28=16,r28 // switch cr.iip to next bundle cr.ipsr.ei wrapped -(p7) adds r8=1,r8 // increment ei to next slot - ;; - cmp.eq pKStk,pUStk=r0,r17 // are we in kernel mode already? - dep r29=r8,r29,41,2 // insert new ei into cr.ipsr + mov.m ar.bspstore=r22 // M2 switch to kernel RBS + cmp.eq p8,p9=2,r8 // A isr.ei==2? ;; - // switch from user to kernel RBS: - MINSTATE_START_SAVE_MIN_VIRT - br.call.sptk.many b7=ia64_syscall_setup - ;; - MINSTATE_END_SAVE_MIN_VIRT // switch to bank 1 - ssm psr.ic | PSR_DEFAULT_BITS - ;; - srlz.i // guarantee that interruption collection is on - mov r3=NR_syscalls - 1 - ;; -(p15) ssm psr.i // restore psr.i - // p10==true means out registers are more than 8 or r15's Nat is true -(p10) br.cond.spnt.many ia64_ret_from_syscall - ;; - movl r16=sys_call_table +(p8) mov r8=0 // A clear ei to 0 +(p7) movl r30=sys_ni_syscall // X - adds r15=-1024,r15 // r15 contains the syscall number---subtract 1024 - movl r2=ia64_ret_from_syscall - ;; - shladd r20=r15,3,r16 // r20 = sys_call_table + 8*(syscall-1024) - cmp.leu p6,p7=r15,r3 // (syscall > 0 && syscall < 1024 + NR_syscalls) ? - mov rp=r2 // set the real return addr +(p8) adds r28=16,r28 // A switch cr.iip to next bundle +(p9) adds r8=1,r8 // A increment ei to next slot + nop.i 0 ;; -(p6) ld8 r20=[r20] // load address of syscall entry point -(p7) movl r20=sys_ni_syscall - add r2=TI_FLAGS+IA64_TASK_SIZE,r13 - ;; - ld4 r2=[r2] // r2 = current_thread_info()->flags - ;; - and r2=_TIF_SYSCALL_TRACEAUDIT,r2 // mask trace or audit + mov.m r25=ar.unat // M2 (5 cyc) + dep r29=r8,r29,41,2 // I0 insert new ei into cr.ipsr + adds r15=1024,r15 // A restore original syscall number + // + // If any of the above loads miss in L1D, we'll stall here until + // the data arrives. + // +/////////////////////////////////////////////////////////////////////// + st1 [r16]=r0 // M2|3 clear current->thread.on_ustack flag + mov b6=r30 // I0 setup syscall handler branch reg early + cmp.eq pKStk,pUStk=r0,r17 // A were we on kernel stacks already? + + and r9=_TIF_SYSCALL_TRACEAUDIT,r9 // A mask trace or audit + mov r18=ar.bsp // M2 (12 cyc) +(pKStk) br.cond.spnt .break_fixup // B we're already in kernel-mode -- fix up RBS + ;; +.back_from_break_fixup: +(pUStk) addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1 // A compute base of memory stack + cmp.eq p14,p0=r9,r0 // A are syscalls being traced/audited? + br.call.sptk.many b7=ia64_syscall_setup // B +1: + mov ar.rsc=0x3 // M2 set eager mode, pl 0, LE, loadrs=0 + nop 0 + bsw.1 // B (6 cyc) regs are saved, switch to bank 1 ;; - cmp.eq p8,p0=r2,r0 - mov b6=r20 + + ssm psr.ic | PSR_DEFAULT_BITS // M2 now it's safe to re-enable intr.-collection + movl r3=ia64_ret_from_syscall // X ;; -(p8) br.call.sptk.many b6=b6 // ignore this return addr - br.cond.sptk ia64_trace_syscall + + srlz.i // M0 ensure interruption collection is on + mov rp=r3 // I0 set the real return addr +(p10) br.cond.spnt.many ia64_ret_from_syscall // B return if bad call-frame or r15 is a NaT + +(p15) ssm psr.i // M2 restore psr.i +(p14) br.call.sptk.many b6=b6 // B invoke syscall-handker (ignore return addr) + br.cond.spnt.many ia64_trace_syscall // B do syscall-tracing thingamagic // NOT REACHED +/////////////////////////////////////////////////////////////////////// + // On entry, we optimistically assumed that we're coming from user-space. + // For the rare cases where a system-call is done from within the kernel, + // we fix things up at this point: +.break_fixup: + add r1=-IA64_PT_REGS_SIZE,sp // A allocate space for pt_regs structure + mov ar.rnat=r24 // M2 restore kernel's AR.RNAT + ;; + mov ar.bspstore=r23 // M2 restore kernel's AR.BSPSTORE + br.cond.sptk .back_from_break_fixup END(break_fault) .org ia64_ivt+0x3000 @@ -837,8 +873,6 @@ END(interrupt) * - r31: saved pr * - b0: original contents (to be saved) * On exit: - * - executing on bank 1 registers - * - psr.ic enabled, interrupts restored * - p10: TRUE if syscall is invoked with more than 8 out * registers or r15's Nat is true * - r1: kernel's gp @@ -846,8 +880,11 @@ END(interrupt) * - r8: -EINVAL if p10 is true * - r12: points to kernel stack * - r13: points to current task + * - r14: preserved (same as on entry) + * - p13: preserved * - p15: TRUE if interrupts need to be re-enabled * - ar.fpsr: set to kernel settings + * - b6: preserved (same as on entry) */ GLOBAL_ENTRY(ia64_syscall_setup) #if PT(B6) != 0 @@ -915,7 +952,7 @@ GLOBAL_ENTRY(ia64_syscall_setup) (p13) mov in5=-1 ;; st8 [r16]=r21,PT(R8)-PT(AR_FPSR) // save ar.fpsr - tnat.nz p14,p0=in6 + tnat.nz p13,p0=in6 cmp.lt p10,p9=r11,r8 // frame size can't be more than local+8 ;; mov r8=1 @@ -930,7 +967,7 @@ GLOBAL_ENTRY(ia64_syscall_setup) movl r1=__gp // establish kernel global pointer ;; st8 [r16]=r8 // ensure pt_regs.r8 != 0 (see handle_syscall_error) -(p14) mov in6=-1 +(p13) mov in6=-1 (p8) mov in7=-1 cmp.eq pSys,pNonSys=r0,r0 // set pSys=1, pNonSys=0 @@ -1002,6 +1039,8 @@ END(dispatch_illegal_op_fault) FAULT(17) ENTRY(non_syscall) + mov ar.rsc=r27 // restore ar.rsc before SAVE_MIN_WITH_COVER + ;; SAVE_MIN_WITH_COVER // There is no particular reason for this code to be here, other than that -- cgit From 70929a57cfea8c18de13fcea9ae6771018a98949 Mon Sep 17 00:00:00 2001 From: David Mosberger-Tang Date: Wed, 27 Apr 2005 21:19:37 -0700 Subject: [IA64] Reschedule __kernel_syscall_via_epc(). Avoid some stalls, which is good for about 2 cycles when invoking a light-weight handler. When invoking a heavy-weight handler, this helps by about 7 cycles, with most of the improvement coming from the improved branch-prediction achieved by splitting the BBB bundle into two MIB bundles. Signed-off-by: David Mosberger-Tang Signed-off-by: Tony Luck --- arch/ia64/kernel/gate.S | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/arch/ia64/kernel/gate.S b/arch/ia64/kernel/gate.S index facf75acdc8..3cd3f2e971f 100644 --- a/arch/ia64/kernel/gate.S +++ b/arch/ia64/kernel/gate.S @@ -79,31 +79,34 @@ GLOBAL_ENTRY(__kernel_syscall_via_epc) ;; rsm psr.be // note: on McKinley "rsm psr.be/srlz.d" is slightly faster than "rum psr.be" LOAD_FSYSCALL_TABLE(r14) - + ;; mov r16=IA64_KR(CURRENT) // 12 cycle read latency - tnat.nz p10,p9=r15 + shladd r18=r17,3,r14 mov r19=NR_syscalls-1 ;; - shladd r18=r17,3,r14 - - srlz.d - cmp.ne p8,p0=r0,r0 // p8 <- FALSE + lfetch [r18] // M0|1 + mov r29=psr // read psr (12 cyc load latency) /* Note: if r17 is a NaT, p6 will be set to zero. */ cmp.geu p6,p7=r19,r17 // (syscall > 0 && syscall < 1024+NR_syscalls)? ;; -(p6) ld8 r18=[r18] mov r21=ar.fpsr - add r14=-8,r14 // r14 <- addr of fsys_bubble_down entry + tnat.nz p10,p9=r15 + mov r26=ar.pfs ;; + srlz.d +(p6) ld8 r18=[r18] + nop.i 0 + ;; + nop.m 0 (p6) mov b7=r18 -(p6) tbit.z p8,p0=r18,0 +(p6) tbit.z.unc p8,p0=r18,0 + + nop.m 0 + nop.i 0 (p8) br.dptk.many b7 -(p6) rsm psr.i mov r27=ar.rsc - mov r26=ar.pfs - ;; - mov r29=psr // read psr (12 cyc load latency) +(p6) rsm psr.i /* * brl.cond doesn't work as intended because the linker would convert this branch * into a branch to a PLT. Perhaps there will be a way to avoid this with some @@ -111,6 +114,8 @@ GLOBAL_ENTRY(__kernel_syscall_via_epc) * instead. */ #ifdef CONFIG_ITANIUM + add r14=-8,r14 // r14 <- addr of fsys_bubble_down entry + ;; (p6) ld8 r14=[r14] // r14 <- fsys_bubble_down ;; (p6) mov b7=r14 -- cgit From 21bc4f9b34cc1eab3610955207f72c52495ae8ed Mon Sep 17 00:00:00 2001 From: David Mosberger-Tang Date: Wed, 27 Apr 2005 21:20:11 -0700 Subject: [IA64] Annotate __kernel_syscall_via_epc() with McKinley dispatch info. Two other very minor changes: use "mov.i" instead of "mov" for reading ar.pfs (for clarity; doesn't affect the code at all). Also, predicate the load of r14 for consistency. Signed-off-by: David Mosberger-Tang Signed-off-by: Tony Luck --- arch/ia64/kernel/gate.S | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/arch/ia64/kernel/gate.S b/arch/ia64/kernel/gate.S index 3cd3f2e971f..272e64c0e21 100644 --- a/arch/ia64/kernel/gate.S +++ b/arch/ia64/kernel/gate.S @@ -72,41 +72,41 @@ GLOBAL_ENTRY(__kernel_syscall_via_epc) * bundle get executed. The remaining code must be safe even if * they do not get executed. */ - adds r17=-1024,r15 - mov r10=0 // default to successful syscall execution - epc + adds r17=-1024,r15 // A + mov r10=0 // A default to successful syscall execution + epc // B causes split-issue } ;; - rsm psr.be // note: on McKinley "rsm psr.be/srlz.d" is slightly faster than "rum psr.be" - LOAD_FSYSCALL_TABLE(r14) + rsm psr.be // M2 (5 cyc to srlz.d) + LOAD_FSYSCALL_TABLE(r14) // X ;; - mov r16=IA64_KR(CURRENT) // 12 cycle read latency - shladd r18=r17,3,r14 - mov r19=NR_syscalls-1 + mov r16=IA64_KR(CURRENT) // M2 (12 cyc) + shladd r18=r17,3,r14 // A + mov r19=NR_syscalls-1 // A ;; lfetch [r18] // M0|1 - mov r29=psr // read psr (12 cyc load latency) - /* Note: if r17 is a NaT, p6 will be set to zero. */ - cmp.geu p6,p7=r19,r17 // (syscall > 0 && syscall < 1024+NR_syscalls)? + mov r29=psr // M2 (12 cyc) + // If r17 is a NaT, p6 will be zero + cmp.geu p6,p7=r19,r17 // A (sysnr > 0 && sysnr < 1024+NR_syscalls)? ;; - mov r21=ar.fpsr - tnat.nz p10,p9=r15 - mov r26=ar.pfs + mov r21=ar.fpsr // M2 (12 cyc) + tnat.nz p10,p9=r15 // I0 + mov.i r26=ar.pfs // I0 (would stall anyhow due to srlz.d...) ;; - srlz.d -(p6) ld8 r18=[r18] + srlz.d // M0 (forces split-issue) ensure PSR.BE==0 +(p6) ld8 r18=[r18] // M0|1 nop.i 0 ;; nop.m 0 -(p6) mov b7=r18 -(p6) tbit.z.unc p8,p0=r18,0 +(p6) mov b7=r18 // I0 +(p6) tbit.z.unc p8,p0=r18,0 // I0 (dual-issues with "mov b7=r18"!) nop.m 0 nop.i 0 -(p8) br.dptk.many b7 +(p8) br.dptk.many b7 // B - mov r27=ar.rsc -(p6) rsm psr.i + mov r27=ar.rsc // M2 (12 cyc) +(p6) rsm psr.i // M2 /* * brl.cond doesn't work as intended because the linker would convert this branch * into a branch to a PLT. Perhaps there will be a way to avoid this with some @@ -114,7 +114,7 @@ GLOBAL_ENTRY(__kernel_syscall_via_epc) * instead. */ #ifdef CONFIG_ITANIUM - add r14=-8,r14 // r14 <- addr of fsys_bubble_down entry +(p6) add r14=-8,r14 // r14 <- addr of fsys_bubble_down entry ;; (p6) ld8 r14=[r14] // r14 <- fsys_bubble_down ;; -- cgit From 1ba7be7d691f6df2557d39c5b1a2e14c32e5dd20 Mon Sep 17 00:00:00 2001 From: David Mosberger-Tang Date: Wed, 27 Apr 2005 21:20:51 -0700 Subject: [IA64] Reschedule fsys_bubble_down(). Improvements come from eliminating srlz.i, not scheduling AR/CR-reads too early (while there are others still pending), scheduling the backing-store switch as well as possible, splitting the BBB bundle into a MIB/MBB pair. Why is it safe to eliminate the srlz.i? Observe that we used to clear bits ~PSR_PRESERVED_BITS in PSR.L. Since PSR_PRESERVED_BITS==PSR.{UP,MFL,MFH,PK,DT,PP,SP,RT,IC}, we ended up clearing PSR.{BE,AC,I,DFL,DFH,DI,DB,SI,TB}. However, PSR.BE : already is turned off in __kernel_syscall_via_epc() PSR.AC : don't care (kernel normally turns PSR.AC on) PSR.I : already turned off by the time fsys_bubble_down gets invoked PSR.DFL: always 0 (kernel never turns it on) PSR.DFH: don't care --- kernel never touches f32-f127 on its own initiative PSR.DI : always 0 (kernel never turns it on) PSR.SI : always 0 (kernel never turns it on) PSR.DB : don't care --- kernel never enables kernel-level breakpoints PSR.TB : must be 0 already; if it wasn't zero on entry to __kernel_syscall_via_epc, the branch to fsys_bubble_down will trigger a taken branch; the taken-trap-handler then converts the syscall into a break-based system-call. In other words: all the bits we're clearying are either 0 already or are don't cares! Thus, we don't have to write PSR.L at all and we don't have to do a srlz.i either. Good for another ~20 cycle improvement for EPC-based heavy-weight syscalls. Signed-off-by: David Mosberger-Tang Signed-off-by: Tony Luck --- arch/ia64/kernel/fsys.S | 73 ++++++++++++++++++++++--------------------------- 1 file changed, 33 insertions(+), 40 deletions(-) diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S index 0d8650f7fce..57c6556b1e0 100644 --- a/arch/ia64/kernel/fsys.S +++ b/arch/ia64/kernel/fsys.S @@ -549,9 +549,6 @@ GLOBAL_ENTRY(fsys_bubble_down) * - r27: ar.rsc * - r29: psr */ -# define PSR_PRESERVED_BITS (IA64_PSR_UP | IA64_PSR_MFL | IA64_PSR_MFH | IA64_PSR_PK \ - | IA64_PSR_DT | IA64_PSR_PP | IA64_PSR_SP | IA64_PSR_RT \ - | IA64_PSR_IC) /* * Reading psr.l gives us only bits 0-31, psr.it, and psr.mc. The rest we have * to synthesize. @@ -560,62 +557,58 @@ GLOBAL_ENTRY(fsys_bubble_down) | IA64_PSR_BN | IA64_PSR_I) invala - movl r8=PSR_ONE_BITS + movl r14=ia64_ret_from_syscall - mov r25=ar.unat // save ar.unat (5 cyc) - movl r9=PSR_PRESERVED_BITS - - mov ar.rsc=0 // set enforced lazy mode, pl 0, little-endian, loadrs=0 + nop.m 0 movl r28=__kernel_syscall_via_break ;; - mov r23=ar.bspstore // save ar.bspstore (12 cyc) - mov r31=pr // save pr (2 cyc) - mov r20=r1 // save caller's gp in r20 - ;; + mov r2=r16 // copy current task addr to addl-addressable register - and r9=r9,r29 - mov r19=b6 // save b6 (2 cyc) + adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 + mov r31=pr // save pr (2 cyc) ;; - mov psr.l=r9 // slam the door (17 cyc to srlz.i) - or r29=r8,r29 // construct cr.ipsr value to save + st1 [r16]=r0 // clear current->thread.on_ustack flag addl r22=IA64_RBS_OFFSET,r2 // compute base of RBS + add r3=TI_FLAGS+IA64_TASK_SIZE,r2 ;; - // GAS reports a spurious RAW hazard on the read of ar.rnat because it thinks - // we may be reading ar.itc after writing to psr.l. Avoid that message with - // this directive: - dv_serialize_data - mov.m r24=ar.rnat // read ar.rnat (5 cyc lat) + ld4 r3=[r3] // r2 = current_thread_info()->flags lfetch.fault.excl.nt1 [r22] - adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r2 - - // ensure previous insn group is issued before we stall for srlz.i: + nop.i 0 ;; - srlz.i // ensure new psr.l has been established - ///////////////////////////////////////////////////////////////////////////// - ////////// from this point on, execution is not interruptible anymore - ///////////////////////////////////////////////////////////////////////////// - addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2 // compute base of memory stack - cmp.ne pKStk,pUStk=r0,r0 // set pKStk <- 0, pUStk <- 1 + mov ar.rsc=0 // set enforced lazy mode, pl 0, little-endian, loadrs=0 + nop.m 0 + nop.i 0 + ;; + mov r23=ar.bspstore // save ar.bspstore (12 cyc) + mov.m r24=ar.rnat // read ar.rnat (5 cyc lat) + nop.i 0 ;; - st1 [r16]=r0 // clear current->thread.on_ustack flag mov ar.bspstore=r22 // switch to kernel RBS - mov b6=r18 // copy syscall entry-point to b6 (7 cyc) - add r3=TI_FLAGS+IA64_TASK_SIZE,r2 + movl r8=PSR_ONE_BITS // X ;; - ld4 r3=[r3] // r2 = current_thread_info()->flags + mov r25=ar.unat // save ar.unat (5 cyc) + mov r19=b6 // save b6 (2 cyc) + mov r20=r1 // save caller's gp in r20 + ;; + or r29=r8,r29 // construct cr.ipsr value to save + mov b6=r18 // copy syscall entry-point to b6 (7 cyc) + addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2 // compute base of memory stack + mov r18=ar.bsp // save (kernel) ar.bsp (12 cyc) - mov ar.rsc=0x3 // set eager mode, pl 0, little-endian, loadrs=0 + cmp.ne pKStk,pUStk=r0,r0 // set pKStk <- 0, pUStk <- 1 br.call.sptk.many b7=ia64_syscall_setup ;; - ssm psr.i - movl r2=ia64_ret_from_syscall + mov ar.rsc=0x3 // set eager mode, pl 0, little-endian, loadrs=0 + mov rp=r14 // set the real return addr + nop.i 0 ;; - mov rp=r2 // set the real return addr + ssm psr.i tbit.z p8,p0=r3,TIF_SYSCALL_TRACE - ;; (p10) br.cond.spnt.many ia64_ret_from_syscall // p10==true means out registers are more than 8 + + nop.m 0 (p8) br.call.sptk.many b6=b6 // ignore this return addr - br.cond.sptk ia64_trace_syscall + br.cond.spnt ia64_trace_syscall END(fsys_bubble_down) .rodata -- cgit From fbf7192ba06e4b55022231e5fdb9db8ce3ad6307 Mon Sep 17 00:00:00 2001 From: David Mosberger-Tang Date: Wed, 27 Apr 2005 21:21:26 -0700 Subject: [IA64] Annotate fsys_bubble_down() with McKinley dispatch info. This patch changes comments & formatting only. There is no code change. Signed-off-by: David Mosberger-Tang Signed-off-by: Tony Luck --- arch/ia64/kernel/fsys.S | 118 ++++++++++++++++++++++++++++++------------------ 1 file changed, 74 insertions(+), 44 deletions(-) diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S index 57c6556b1e0..d09a5b8a098 100644 --- a/arch/ia64/kernel/fsys.S +++ b/arch/ia64/kernel/fsys.S @@ -531,84 +531,114 @@ GLOBAL_ENTRY(fsys_bubble_down) .altrp b6 .body /* - * We get here for syscalls that don't have a lightweight handler. For those, we - * need to bubble down into the kernel and that requires setting up a minimal - * pt_regs structure, and initializing the CPU state more or less as if an - * interruption had occurred. To make syscall-restarts work, we setup pt_regs - * such that cr_iip points to the second instruction in syscall_via_break. - * Decrementing the IP hence will restart the syscall via break and not - * decrementing IP will return us to the caller, as usual. Note that we preserve - * the value of psr.pp rather than initializing it from dcr.pp. This makes it - * possible to distinguish fsyscall execution from other privileged execution. + * We get here for syscalls that don't have a lightweight + * handler. For those, we need to bubble down into the kernel + * and that requires setting up a minimal pt_regs structure, + * and initializing the CPU state more or less as if an + * interruption had occurred. To make syscall-restarts work, + * we setup pt_regs such that cr_iip points to the second + * instruction in syscall_via_break. Decrementing the IP + * hence will restart the syscall via break and not + * decrementing IP will return us to the caller, as usual. + * Note that we preserve the value of psr.pp rather than + * initializing it from dcr.pp. This makes it possible to + * distinguish fsyscall execution from other privileged + * execution. * * On entry: - * - normal fsyscall handler register usage, except that we also have: + * - normal fsyscall handler register usage, except + * that we also have: * - r18: address of syscall entry point * - r21: ar.fpsr * - r26: ar.pfs * - r27: ar.rsc * - r29: psr + * + * We used to clear some PSR bits here but that requires slow + * serialization. Fortuntely, that isn't really necessary. + * The rationale is as follows: we used to clear bits + * ~PSR_PRESERVED_BITS in PSR.L. Since + * PSR_PRESERVED_BITS==PSR.{UP,MFL,MFH,PK,DT,PP,SP,RT,IC}, we + * ended up clearing PSR.{BE,AC,I,DFL,DFH,DI,DB,SI,TB}. + * However, + * + * PSR.BE : already is turned off in __kernel_syscall_via_epc() + * PSR.AC : don't care (kernel normally turns PSR.AC on) + * PSR.I : already turned off by the time fsys_bubble_down gets + * invoked + * PSR.DFL: always 0 (kernel never turns it on) + * PSR.DFH: don't care --- kernel never touches f32-f127 on its own + * initiative + * PSR.DI : always 0 (kernel never turns it on) + * PSR.SI : always 0 (kernel never turns it on) + * PSR.DB : don't care --- kernel never enables kernel-level + * breakpoints + * PSR.TB : must be 0 already; if it wasn't zero on entry to + * __kernel_syscall_via_epc, the branch to fsys_bubble_down + * will trigger a taken branch; the taken-trap-handler then + * converts the syscall into a break-based system-call. */ /* - * Reading psr.l gives us only bits 0-31, psr.it, and psr.mc. The rest we have - * to synthesize. + * Reading psr.l gives us only bits 0-31, psr.it, and psr.mc. + * The rest we have to synthesize. */ -# define PSR_ONE_BITS ((3 << IA64_PSR_CPL0_BIT) | (0x1 << IA64_PSR_RI_BIT) \ +# define PSR_ONE_BITS ((3 << IA64_PSR_CPL0_BIT) \ + | (0x1 << IA64_PSR_RI_BIT) \ | IA64_PSR_BN | IA64_PSR_I) - invala - movl r14=ia64_ret_from_syscall + invala // M0|1 + movl r14=ia64_ret_from_syscall // X nop.m 0 - movl r28=__kernel_syscall_via_break + movl r28=__kernel_syscall_via_break // X create cr.iip ;; - mov r2=r16 // copy current task addr to addl-addressable register - adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 - mov r31=pr // save pr (2 cyc) + mov r2=r16 // A get task addr to addl-addressable register + adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 // A + mov r31=pr // I0 save pr (2 cyc) ;; - st1 [r16]=r0 // clear current->thread.on_ustack flag - addl r22=IA64_RBS_OFFSET,r2 // compute base of RBS - add r3=TI_FLAGS+IA64_TASK_SIZE,r2 + st1 [r16]=r0 // M2|3 clear current->thread.on_ustack flag + addl r22=IA64_RBS_OFFSET,r2 // A compute base of RBS + add r3=TI_FLAGS+IA64_TASK_SIZE,r2 // A ;; - ld4 r3=[r3] // r2 = current_thread_info()->flags - lfetch.fault.excl.nt1 [r22] + ld4 r3=[r3] // M0|1 r3 = current_thread_info()->flags + lfetch.fault.excl.nt1 [r22] // M0|1 prefetch register backing-store nop.i 0 ;; - mov ar.rsc=0 // set enforced lazy mode, pl 0, little-endian, loadrs=0 + mov ar.rsc=0 // M2 set enforced lazy mode, pl 0, LE, loadrs=0 nop.m 0 nop.i 0 ;; - mov r23=ar.bspstore // save ar.bspstore (12 cyc) - mov.m r24=ar.rnat // read ar.rnat (5 cyc lat) + mov r23=ar.bspstore // M2 (12 cyc) save ar.bspstore + mov.m r24=ar.rnat // M2 (5 cyc) read ar.rnat (dual-issues!) nop.i 0 ;; - mov ar.bspstore=r22 // switch to kernel RBS + mov ar.bspstore=r22 // M2 (6 cyc) switch to kernel RBS movl r8=PSR_ONE_BITS // X ;; - mov r25=ar.unat // save ar.unat (5 cyc) - mov r19=b6 // save b6 (2 cyc) - mov r20=r1 // save caller's gp in r20 + mov r25=ar.unat // M2 (5 cyc) save ar.unat + mov r19=b6 // I0 save b6 (2 cyc) + mov r20=r1 // A save caller's gp in r20 ;; - or r29=r8,r29 // construct cr.ipsr value to save - mov b6=r18 // copy syscall entry-point to b6 (7 cyc) - addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2 // compute base of memory stack + or r29=r8,r29 // A construct cr.ipsr value to save + mov b6=r18 // I0 copy syscall entry-point to b6 (7 cyc) + addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2 // A compute base of memory stack - mov r18=ar.bsp // save (kernel) ar.bsp (12 cyc) - cmp.ne pKStk,pUStk=r0,r0 // set pKStk <- 0, pUStk <- 1 - br.call.sptk.many b7=ia64_syscall_setup + mov r18=ar.bsp // M2 save (kernel) ar.bsp (12 cyc) + cmp.ne pKStk,pUStk=r0,r0 // A set pKStk <- 0, pUStk <- 1 + br.call.sptk.many b7=ia64_syscall_setup // B ;; - mov ar.rsc=0x3 // set eager mode, pl 0, little-endian, loadrs=0 - mov rp=r14 // set the real return addr + mov ar.rsc=0x3 // M2 set eager mode, pl 0, LE, loadrs=0 + mov rp=r14 // I0 set the real return addr nop.i 0 ;; - ssm psr.i - tbit.z p8,p0=r3,TIF_SYSCALL_TRACE -(p10) br.cond.spnt.many ia64_ret_from_syscall // p10==true means out registers are more than 8 + ssm psr.i // M2 we're on kernel stacks now, reenable irqs + tbit.z p8,p0=r3,TIF_SYSCALL_TRACE // I0 +(p10) br.cond.spnt.many ia64_ret_from_syscall // B return if bad call-frame or r15 is a NaT nop.m 0 -(p8) br.call.sptk.many b6=b6 // ignore this return addr - br.cond.spnt ia64_trace_syscall +(p8) br.call.sptk.many b6=b6 // B (ignore return address) + br.cond.spnt ia64_trace_syscall // B END(fsys_bubble_down) .rodata -- cgit From e7e965fa1961a8ce32cbbb1bd436c655ad03973e Mon Sep 17 00:00:00 2001 From: David Mosberger-Tang Date: Wed, 27 Apr 2005 21:22:08 -0700 Subject: [IA64] use srlz.d instead of srlz.i in ia64_leave_kernel() This patch switches the srlz.i in ia64_leave_kernel() to srlz.d. As per architecture manual, the former is needed only to ensure that the clearing of PSR.IC is seen by the VHPT for subsequent instruction fetches. However, since the remainder of the code (up to and including the RFI instruction) is mapped by a pinned TLB entry, there is no chance of an iTLB miss and we don't care whether or not the VHPT sees PSR.IC cleared. Since srlz.d is substantially cheaper than srlz.i, this should shave off a few cycles off the interrupt path (unverified though; I'm not setup to measure this at the moment). Signed-off-by: David Mosberger-Tang Signed-off-by: Tony Luck --- arch/ia64/kernel/entry.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index a7542c3d3b3..4517d4ab5ef 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -880,7 +880,7 @@ GLOBAL_ENTRY(ia64_leave_kernel) ldf.fill f7=[r2],PT(F11)-PT(F7) ldf.fill f8=[r3],32 ;; - srlz.i // ensure interruption collection is off + srlz.d // ensure that inter. collection is off (VHPT is don't care, since text is pinned) mov ar.ccv=r15 ;; ldf.fill f11=[r2] -- cgit From 8e3e50168c8537807b7a6f78588cd72e21363262 Mon Sep 17 00:00:00 2001 From: David Mosberger-Tang Date: Wed, 27 Apr 2005 21:22:40 -0700 Subject: [IA64] need r29=psr *after* rsm psr.i Yanmin Zhang pointed out a sequence problem when saving the psr. David Mosberger provided this patch (which gave up a cycle). Signed-off-by: Tony Luck --- arch/ia64/kernel/gate.S | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/arch/ia64/kernel/gate.S b/arch/ia64/kernel/gate.S index 272e64c0e21..86948ce63e4 100644 --- a/arch/ia64/kernel/gate.S +++ b/arch/ia64/kernel/gate.S @@ -77,7 +77,7 @@ GLOBAL_ENTRY(__kernel_syscall_via_epc) epc // B causes split-issue } ;; - rsm psr.be // M2 (5 cyc to srlz.d) + rsm psr.be | psr.i // M2 (5 cyc to srlz.d) LOAD_FSYSCALL_TABLE(r14) // X ;; mov r16=IA64_KR(CURRENT) // M2 (12 cyc) @@ -98,15 +98,14 @@ GLOBAL_ENTRY(__kernel_syscall_via_epc) nop.i 0 ;; nop.m 0 -(p6) mov b7=r18 // I0 (p6) tbit.z.unc p8,p0=r18,0 // I0 (dual-issues with "mov b7=r18"!) - - nop.m 0 nop.i 0 + ;; +(p8) ssm psr.i +(p6) mov b7=r18 // I0 (p8) br.dptk.many b7 // B mov r27=ar.rsc // M2 (12 cyc) -(p6) rsm psr.i // M2 /* * brl.cond doesn't work as intended because the linker would convert this branch * into a branch to a PLT. Perhaps there will be a way to avoid this with some @@ -123,7 +122,7 @@ GLOBAL_ENTRY(__kernel_syscall_via_epc) #else BRL_COND_FSYS_BUBBLE_DOWN(p6) #endif - + ssm psr.i mov r10=-1 (p10) mov r8=EINVAL (p9) mov r8=ENOSYS -- cgit From ebcc80c1b6629a445f7471cc1ddb48faf8a84e70 Mon Sep 17 00:00:00 2001 From: David Mosberger-Tang Date: Thu, 5 May 2005 06:40:00 -0700 Subject: [IA64] Merge audit fix for fsyscalls with syscall-optimizations Signed-off-by: David Mosberger-Tang Signed-off-by: Tony Luck --- arch/ia64/kernel/fsys.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S index d09a5b8a098..f566ff43a38 100644 --- a/arch/ia64/kernel/fsys.S +++ b/arch/ia64/kernel/fsys.S @@ -630,10 +630,10 @@ GLOBAL_ENTRY(fsys_bubble_down) ;; mov ar.rsc=0x3 // M2 set eager mode, pl 0, LE, loadrs=0 mov rp=r14 // I0 set the real return addr - nop.i 0 + and r3=_TIF_SYSCALL_TRACEAUDIT,r3 // A ;; ssm psr.i // M2 we're on kernel stacks now, reenable irqs - tbit.z p8,p0=r3,TIF_SYSCALL_TRACE // I0 + cmp.eq p8,p0=r3,r0 // A (p10) br.cond.spnt.many ia64_ret_from_syscall // B return if bad call-frame or r15 is a NaT nop.m 0 -- cgit From 7f9eaedf894dbaa08c157832e9a6c9c03ffed1ed Mon Sep 17 00:00:00 2001 From: David Mosberger-Tang Date: Tue, 10 May 2005 12:49:00 -0700 Subject: [IA64] Fix convert_to_non_syscall() so gdb inferior calls work again Fix convert_to_non_syscall() so it arranges for the kernel to be left via ia64_leave_kernel() rather than ia64_leave_syscall(). The latter no longer tolerates being called with pSys=0 and pNonSys=1. Signed-off-by: David Mosberger-Tang Signed-off-by: Tony Luck --- arch/ia64/kernel/ptrace.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c index c253fd5914f..9e730c7bf0c 100644 --- a/arch/ia64/kernel/ptrace.c +++ b/arch/ia64/kernel/ptrace.c @@ -705,12 +705,32 @@ convert_to_non_syscall (struct task_struct *child, struct pt_regs *pt, break; } + /* + * Note: at the time of this call, the target task is blocked + * in notify_resume_user() and by clearling PRED_LEAVE_SYSCALL + * (aka, "pLvSys") we redirect execution from + * .work_pending_syscall_end to .work_processed_kernel. + */ unw_get_pr(&prev_info, &pr); - pr &= ~(1UL << PRED_SYSCALL); + pr &= ~((1UL << PRED_SYSCALL) | (1UL << PRED_LEAVE_SYSCALL)); pr |= (1UL << PRED_NON_SYSCALL); unw_set_pr(&prev_info, pr); pt->cr_ifs = (1UL << 63) | cfm; + /* + * Clear the memory that is NOT written on syscall-entry to + * ensure we do not leak kernel-state to user when execution + * resumes. + */ + pt->r2 = 0; + pt->r3 = 0; + pt->r14 = 0; + memset(&pt->r16, 0, 16*8); /* clear r16-r31 */ + memset(&pt->f6, 0, 6*16); /* clear f6-f11 */ + pt->b7 = 0; + pt->ar_ccv = 0; + pt->ar_csd = 0; + pt->ar_ssd = 0; } static int -- cgit