diff options
author | Chris Zankel <chris@zankel.net> | 2016-08-10 18:36:44 +0300 |
---|---|---|
committer | Tom Rini <trini@konsulko.com> | 2016-08-15 18:46:38 -0400 |
commit | c978b52410016b0ab5a213f235596340af8d45f7 (patch) | |
tree | b01e9a8ea9a92fe962a545974339677b87dcc1ba /arch/xtensa/cpu/start.S | |
parent | de5e5cea022ab44006ff1edf45a39f0943fb9dff (diff) | |
download | u-boot-c978b52410016b0ab5a213f235596340af8d45f7.tar.gz u-boot-c978b52410016b0ab5a213f235596340af8d45f7.tar.xz u-boot-c978b52410016b0ab5a213f235596340af8d45f7.zip |
xtensa: add support for the xtensa processor architecture [2/2]
The Xtensa processor architecture is a configurable, extensible,
and synthesizable 32-bit RISC processor core provided by Tensilica, inc.
This is the second part of the basic architecture port, adding the
'arch/xtensa' directory and a readme file.
Signed-off-by: Chris Zankel <chris@zankel.net>
Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
Reviewed-by: Simon Glass <sjg@chromium.org>
Reviewed-by: Tom Rini <trini@konsulko.com>
Diffstat (limited to 'arch/xtensa/cpu/start.S')
-rw-r--r-- | arch/xtensa/cpu/start.S | 677 |
1 files changed, 677 insertions, 0 deletions
diff --git a/arch/xtensa/cpu/start.S b/arch/xtensa/cpu/start.S new file mode 100644 index 0000000000..8e4bc99e42 --- /dev/null +++ b/arch/xtensa/cpu/start.S @@ -0,0 +1,677 @@ +/* + * (C) Copyright 2008 - 2013 Tensilica Inc. + * (C) Copyright 2014 - 2016 Cadence Design Systems Inc. + * + * SPDX-License-Identifier: GPL-2.0+ + */ + +#include <config.h> +#include <asm/asmmacro.h> +#include <asm/cacheasm.h> +#include <asm/regs.h> +#include <asm/arch/tie.h> +#include <asm-offsets.h> + +/* + * Offsets into the the pt_regs struture. + * Make sure these always match with the structure defined in ptrace.h! + */ + +#define PT_PC 0 +#define PT_PS 4 +#define PT_DEPC 8 +#define PT_EXCCAUSE 12 +#define PT_EXCVADDR 16 +#define PT_DEBUGCAUSE 20 +#define PT_WMASK 24 +#define PT_LBEG 28 +#define PT_LEND 32 +#define PT_LCOUNT 36 +#define PT_SAR 40 +#define PT_WINDOWBASE 44 +#define PT_WINDOWSTART 48 +#define PT_SYSCALL 52 +#define PT_ICOUNTLEVEL 56 +#define PT_RESERVED 60 +#define PT_AREG 64 +#define PT_SIZE (64 + 64) + +/* + * Cache attributes are different for full MMU and region protection. + */ + +#if XCHAL_HAVE_PTP_MMU +#define CA_WRITEBACK (0x7) +#else +#define CA_WRITEBACK (0x4) +#endif + +/* + * Reset vector. + * Only a trampoline to jump to _start + * (Note that we have to mark the section writable as the section contains + * a relocatable literal) + */ + + .section .ResetVector.text, "awx" + .global _ResetVector +_ResetVector: + + j 1f + .align 4 +2: .long _start +1: l32r a2, 2b + jx a2 + + +/* + * Processor initialization. We still run in rom space. + * + * NOTE: Running in ROM + * For Xtensa, we currently don't allow to run some code from ROM but + * unpack the data immediately to memory. This requires, for example, + * that DDR has been set up before running U-Boot. (See also comments + * inline for ways to change it) + */ + + .section .reset.text, "ax" + .global _start + .align 4 +_start: + /* Keep a0 = 0 for various initializations */ + + movi a0, 0 + + /* + * For full MMU cores, put page table at unmapped virtual address. + * This ensures that accesses outside the static maps result + * in miss exceptions rather than random behaviour. + */ + +#if XCHAL_HAVE_PTP_MMU + wsr a0, PTEVADDR +#endif + + /* Disable dbreak debug exceptions */ + +#if XCHAL_HAVE_DEBUG && XCHAL_NUM_DBREAK > 0 + .set _index, 0 + .rept XCHAL_NUM_DBREAK + wsr a0, DBREAKC + _index + .set _index, _index + 1 + .endr +#endif + + /* Reset windowbase and windowstart */ + +#if XCHAL_HAVE_WINDOWED + movi a3, 1 + wsr a3, windowstart + wsr a0, windowbase + rsync + movi a0, 0 /* windowbase might have changed */ +#endif + + /* + * Vecbase in bitstream may differ from header files + * set or check it. + */ + +#if XCHAL_HAVE_VECBASE + movi a3, XCHAL_VECBASE_RESET_VADDR /* VECBASE reset value */ + wsr a3, VECBASE +#endif + +#if XCHAL_HAVE_LOOPS + /* Disable loops */ + + wsr a0, LCOUNT +#endif + + /* Set PS.WOE = 0, PS.EXCM = 0 (for loop), PS.INTLEVEL = EXCM level */ + +#if XCHAL_HAVE_XEA1 + movi a2, 1 +#else + movi a2, XCHAL_EXCM_LEVEL +#endif + wsr a2, PS + rsync + + /* Unlock and invalidate caches */ + + ___unlock_dcache_all a2, a3 + ___invalidate_dcache_all a2, a3 + ___unlock_icache_all a2, a3 + ___invalidate_icache_all a2, a3 + + isync + + /* Unpack data sections */ + + movi a2, __reloc_table_start + movi a3, __reloc_table_end + +1: beq a2, a3, 3f # no more entries? + l32i a4, a2, 0 # start destination (in RAM) + l32i a5, a2, 4 # end destination (in RAM) + l32i a6, a2, 8 # start source (in ROM) + addi a2, a2, 12 # next entry + beq a4, a5, 1b # skip, empty entry + beq a4, a6, 1b # skip, source and destination are the same + + /* If there's memory protection option with 512MB TLB regions and + * cache attributes in TLB entries and caching is not inhibited, + * enable data/instruction cache for relocated image. + */ +#if XCHAL_HAVE_SPANNING_WAY && \ + (!defined(CONFIG_SYS_DCACHE_OFF) || \ + !defined(CONFIG_SYS_ICACHE_OFF)) + srli a7, a4, 29 + slli a7, a7, 29 + addi a7, a7, XCHAL_SPANNING_WAY +#ifndef CONFIG_SYS_DCACHE_OFF + rdtlb1 a8, a7 + srli a8, a8, 4 + slli a8, a8, 4 + addi a8, a8, CA_WRITEBACK + wdtlb a8, a7 +#endif +#ifndef CONFIG_SYS_ICACHE_OFF + ritlb1 a8, a7 + srli a8, a8, 4 + slli a8, a8, 4 + addi a8, a8, CA_WRITEBACK + witlb a8, a7 +#endif + isync +#endif + +2: l32i a7, a6, 0 + addi a6, a6, 4 + s32i a7, a4, 0 + addi a4, a4, 4 + bltu a4, a5, 2b + j 1b + +3: /* All code and initalized data segments have been copied */ + + /* Setup PS, PS.WOE = 1, PS.EXCM = 0, PS.INTLEVEL = EXCM level. */ + +#if __XTENSA_CALL0_ABI__ + movi a2, XCHAL_EXCM_LEVEL +#else + movi a2, (1<<PS_WOE_BIT) | XCHAL_EXCM_LEVEL +#endif + wsr a2, PS + rsync + + /* Writeback */ + + ___flush_dcache_all a2, a3 + +#ifdef __XTENSA_WINDOWED_ABI__ + /* + * In windowed ABI caller and call target need to be within the same + * gigabyte. Put the rest of the code into the text segment and jump + * there. + */ + + movi a4, .Lboard_init_code + jx a4 + + .text + .align 4 +.Lboard_init_code: +#endif + + movi a0, 0 + movi sp, (CONFIG_SYS_TEXT_ADDR - 16) & 0xfffffff0 + +#ifdef CONFIG_DEBUG_UART + movi a4, debug_uart_init +#ifdef __XTENSA_CALL0_ABI__ + callx0 a4 +#else + callx4 a4 +#endif +#endif + + movi a4, board_init_f_alloc_reserve + +#ifdef __XTENSA_CALL0_ABI__ + mov a2, sp + callx0 a4 + mov sp, a2 +#else + mov a6, sp + callx4 a4 + movsp sp, a6 +#endif + + movi a4, board_init_f_init_reserve + +#ifdef __XTENSA_CALL0_ABI__ + callx0 a4 +#else + callx4 a4 +#endif + + /* + * Call board initialization routine (never returns). + */ + + movi a4, board_init_f + +#ifdef __XTENSA_CALL0_ABI__ + movi a2, 0 + callx0 a4 +#else + movi a6, 0 + callx4 a4 +#endif + /* Never Returns */ + ill + +/* + * void relocate_code (addr_sp, gd, addr_moni) + * + * This "function" does not return, instead it continues in RAM + * after relocating the monitor code. + * + * a2 = addr_sp + * a3 = gd + * a4 = destination address + */ + .text + .globl relocate_code + .align 4 +relocate_code: + abi_entry + +#ifdef __XTENSA_CALL0_ABI__ + mov a1, a2 + mov a2, a3 + mov a3, a4 + movi a0, board_init_r + callx0 a0 +#else + /* We can't movsp here, because the chain of stack frames may cross + * the now reserved memory. We need to toss all window frames except + * the current, create new pristine stack frame and start from scratch. + */ + rsr a0, windowbase + ssl a0 + movi a0, 1 + sll a0, a0 + wsr a0, windowstart + rsync + + movi a0, 0 + + /* Reserve 16-byte save area */ + addi sp, a2, -16 + mov a6, a3 + mov a7, a4 + movi a4, board_init_r + callx4 a4 +#endif + ill + +#if XCHAL_HAVE_EXCEPTIONS + +/* + * Exception vectors. + * + * Various notes: + * - We currently don't use the user exception vector (PS.UM is always 0), + * but do define such a vector, just in case. They both jump to the + * same exception handler, though. + * - We currently only save the bare minimum number of registers: + * a0...a15, sar, loop-registers, exception register (epc1, excvaddr, + * exccause, depc) + * - WINDOWSTART is only saved to identify if registers have been spilled + * to the wrong stack (exception stack) while executing the exception + * handler. + */ + + .section .KernelExceptionVector.text, "ax" + .global _KernelExceptionVector +_KernelExceptionVector: + + wsr a2, EXCSAVE1 + movi a2, ExceptionHandler + jx a2 + + .section .UserExceptionVector.text, "ax" + .global _UserExceptionVector +_UserExceptionVector: + + wsr a2, EXCSAVE1 + movi a2, ExceptionHandler + jx a2 + +#if !XCHAL_HAVE_XEA1 + .section .DoubleExceptionVector.text, "ax" + .global _DoubleExceptionVector +_DoubleExceptionVector: + +#ifdef __XTENSA_CALL0_ABI__ + wsr a0, EXCSAVE1 + movi a0, hang # report and ask user to reset board + callx0 a0 +#else + wsr a4, EXCSAVE1 + movi a4, hang # report and ask user to reset board + callx4 a4 +#endif +#endif + /* Does not return here */ + + + .text + .align 4 +ExceptionHandler: + + rsr a2, EXCCAUSE # find handler + +#if XCHAL_HAVE_WINDOWED + /* Special case for alloca handler */ + + bnei a2, 5, 1f # jump if not alloca exception + + addi a1, a1, -16 - 4 # create a small stack frame + s32i a3, a1, 0 # and save a3 (a2 still in excsave1) + movi a2, fast_alloca_exception + jx a2 # jump to fast_alloca_exception +#endif + /* All other exceptions go here: */ + + /* Create ptrace stack and save a0...a3 */ + +1: addi a2, a1, - PT_SIZE - 16 + s32i a0, a2, PT_AREG + 0 * 4 + s32i a1, a2, PT_AREG + 1 * 4 + s32i a3, a2, PT_AREG + 3 * 4 + rsr a3, EXCSAVE1 + s32i a3, a2, PT_AREG + 2 * 4 + mov a1, a2 + + /* Save remaining AR registers */ + + s32i a4, a1, PT_AREG + 4 * 4 + s32i a5, a1, PT_AREG + 5 * 4 + s32i a6, a1, PT_AREG + 6 * 4 + s32i a7, a1, PT_AREG + 7 * 4 + s32i a8, a1, PT_AREG + 8 * 4 + s32i a9, a1, PT_AREG + 9 * 4 + s32i a10, a1, PT_AREG + 10 * 4 + s32i a11, a1, PT_AREG + 11 * 4 + s32i a12, a1, PT_AREG + 12 * 4 + s32i a13, a1, PT_AREG + 13 * 4 + s32i a14, a1, PT_AREG + 14 * 4 + s32i a15, a1, PT_AREG + 15 * 4 + + /* Save SRs */ + +#if XCHAL_HAVE_WINDOWED + rsr a2, WINDOWSTART + s32i a2, a1, PT_WINDOWSTART +#endif + + rsr a2, SAR + rsr a3, EPC1 + rsr a4, EXCVADDR + s32i a2, a1, PT_SAR + s32i a3, a1, PT_PC + s32i a4, a1, PT_EXCVADDR + +#if XCHAL_HAVE_LOOPS + movi a2, 0 + rsr a3, LBEG + xsr a2, LCOUNT + s32i a3, a1, PT_LBEG + rsr a3, LEND + s32i a2, a1, PT_LCOUNT + s32i a3, a1, PT_LEND +#endif + + /* Set up C environment and call registered handler */ + /* Setup stack, PS.WOE = 1, PS.EXCM = 0, PS.INTLEVEL = EXCM level. */ + + rsr a2, EXCCAUSE +#if XCHAL_HAVE_XEA1 + movi a3, (1<<PS_WOE_BIT) | 1 +#elif __XTENSA_CALL0_ABI__ + movi a3, XCHAL_EXCM_LEVEL +#else + movi a3, (1<<PS_WOE_BIT) | XCHAL_EXCM_LEVEL +#endif + xsr a3, PS + rsync + s32i a2, a1, PT_EXCCAUSE + s32i a3, a1, PT_PS + + movi a0, exc_table + addx4 a0, a2, a0 + l32i a0, a0, 0 +#ifdef __XTENSA_CALL0_ABI__ + mov a2, a1 # Provide stack frame as only argument + callx0 a0 + l32i a3, a1, PT_PS +#else + mov a6, a1 # Provide stack frame as only argument + callx4 a0 +#endif + + /* Restore PS and go to exception mode (PS.EXCM=1) */ + + wsr a3, PS + + /* Restore SR registers */ + +#if XCHAL_HAVE_LOOPS + l32i a2, a1, PT_LBEG + l32i a3, a1, PT_LEND + l32i a4, a1, PT_LCOUNT + wsr a2, LBEG + wsr a3, LEND + wsr a4, LCOUNT +#endif + + l32i a2, a1, PT_SAR + l32i a3, a1, PT_PC + wsr a2, SAR + wsr a3, EPC1 + +#if XCHAL_HAVE_WINDOWED + /* Do we need to simulate a MOVSP? */ + + l32i a2, a1, PT_WINDOWSTART + addi a3, a2, -1 + and a2, a2, a3 + beqz a2, 1f # Skip if regs were spilled before exc. + + rsr a2, WINDOWSTART + addi a3, a2, -1 + and a2, a2, a3 + bnez a2, 1f # Skip if registers aren't spilled now + + addi a2, a1, -16 + l32i a4, a2, 0 + l32i a5, a2, 4 + s32i a4, a1, PT_SIZE + 0 + s32i a5, a1, PT_SIZE + 4 + l32i a4, a2, 8 + l32i a5, a2, 12 + s32i a4, a1, PT_SIZE + 8 + s32i a5, a1, PT_SIZE + 12 +#endif + + /* Restore address register */ + +1: l32i a15, a1, PT_AREG + 15 * 4 + l32i a14, a1, PT_AREG + 14 * 4 + l32i a13, a1, PT_AREG + 13 * 4 + l32i a12, a1, PT_AREG + 12 * 4 + l32i a11, a1, PT_AREG + 11 * 4 + l32i a10, a1, PT_AREG + 10 * 4 + l32i a9, a1, PT_AREG + 9 * 4 + l32i a8, a1, PT_AREG + 8 * 4 + l32i a7, a1, PT_AREG + 7 * 4 + l32i a6, a1, PT_AREG + 6 * 4 + l32i a5, a1, PT_AREG + 5 * 4 + l32i a4, a1, PT_AREG + 4 * 4 + l32i a3, a1, PT_AREG + 3 * 4 + l32i a2, a1, PT_AREG + 2 * 4 + l32i a0, a1, PT_AREG + 0 * 4 + + l32i a1, a1, PT_AREG + 1 * 4 # Remove ptrace stack frame + + rfe + +#endif /* XCHAL_HAVE_EXCEPTIONS */ + +#if XCHAL_HAVE_WINDOWED + +/* + * Window overflow and underflow handlers. + * The handlers must be 64 bytes apart, first starting with the underflow + * handlers underflow-4 to underflow-12, then the overflow handlers + * overflow-4 to overflow-12. + * + * Note: We rerun the underflow handlers if we hit an exception, so + * we try to access any page that would cause a page fault early. + */ + + .section .WindowVectors.text, "ax" + +/* 4-Register Window Overflow Vector (Handler) */ + + .align 64 +.global _WindowOverflow4 +_WindowOverflow4: + s32e a0, a5, -16 + s32e a1, a5, -12 + s32e a2, a5, -8 + s32e a3, a5, -4 + rfwo + + +/* 4-Register Window Underflow Vector (Handler) */ + + .align 64 +.global _WindowUnderflow4 +_WindowUnderflow4: + l32e a0, a5, -16 + l32e a1, a5, -12 + l32e a2, a5, -8 + l32e a3, a5, -4 + rfwu + +/* + * a0: a0 + * a1: new stack pointer = a1 - 16 - 4 + * a2: available, saved in excsave1 + * a3: available, saved on stack *a1 + */ + +/* 15*/ .byte 0xff + +fast_alloca_exception: /* must be at _WindowUnderflow4 + 16 */ + +/* 16*/ rsr a2, PS +/* 19*/ rsr a3, WINDOWBASE +/* 22*/ extui a2, a2, PS_OWB_SHIFT, PS_OWB_SHIFT +/* 25*/ xor a2, a2, a3 +/* 28*/ rsr a3, PS +/* 31*/ slli a2, a2, PS_OWB_SHIFT +/* 34*/ xor a2, a3, a2 +/* 37*/ wsr a2, PS + +/* 40*/ _l32i a3, a1, 0 +/* 43*/ addi a1, a1, 16 + 4 +/* 46*/ rsr a2, EXCSAVE1 + +/* 49*/ rotw -1 +/* 52*/ _bbci.l a4, 31, _WindowUnderflow4 /* 0x: call4 */ +/* 55*/ rotw -1 +/* 58*/ _bbci.l a8, 30, _WindowUnderflow8 /* 10: call8 */ +/* 61*/ _j __WindowUnderflow12 /* 11: call12 */ +/* 64*/ + +/* 8-Register Window Overflow Vector (Handler) */ + + .align 64 +.global _WindowOverflow8 +_WindowOverflow8: + s32e a0, a9, -16 + l32e a0, a1, -12 + s32e a2, a9, -8 + s32e a1, a9, -12 + s32e a3, a9, -4 + s32e a4, a0, -32 + s32e a5, a0, -28 + s32e a6, a0, -24 + s32e a7, a0, -20 + rfwo + +/* 8-Register Window Underflow Vector (Handler) */ + + .align 64 +.global _WindowUnderflow8 +_WindowUnderflow8: + l32e a1, a9, -12 + l32e a0, a9, -16 + l32e a7, a1, -12 + l32e a2, a9, -8 + l32e a4, a7, -32 + l32e a3, a9, -4 + l32e a5, a7, -28 + l32e a6, a7, -24 + l32e a7, a7, -20 + rfwu + +/* 12-Register Window Overflow Vector (Handler) */ + + .align 64 +.global _WindowOverflow12 +_WindowOverflow12: + s32e a0, a13, -16 + l32e a0, a1, -12 + s32e a1, a13, -12 + s32e a2, a13, -8 + s32e a3, a13, -4 + s32e a4, a0, -48 + s32e a5, a0, -44 + s32e a6, a0, -40 + s32e a7, a0, -36 + s32e a8, a0, -32 + s32e a9, a0, -28 + s32e a10, a0, -24 + s32e a11, a0, -20 + rfwo + +/* 12-Register Window Underflow Vector (Handler) */ + + .org _WindowOverflow12 + 64 - 3 +__WindowUnderflow12: + rotw -1 +.global _WindowUnderflow12 +_WindowUnderflow12: + l32e a1, a13, -12 + l32e a0, a13, -16 + l32e a11, a1, -12 + l32e a2, a13, -8 + l32e a4, a11, -48 + l32e a8, a11, -32 + l32e a3, a13, -4 + l32e a5, a11, -44 + l32e a6, a11, -40 + l32e a7, a11, -36 + l32e a9, a11, -28 + l32e a10, a11, -24 + l32e a11, a11, -20 + rfwu + +#endif /* XCHAL_HAVE_WINDOWED */ |