X-Git-Url: https://git.librecmc.org/?a=blobdiff_plain;f=arch%2Friscv%2Fcpu%2Fstart.S;h=ecf0482635b07fc410cac94dd4c0d4c375a93e61;hb=84dc9d26908798c7e9ee5469965c16653593fde5;hp=7cd7755190ce6a7569a900ff05d9d71c9802df36;hpb=14573fb78fef99266f7691ac9af2ddb388f9491f;p=oweals%2Fu-boot.git diff --git a/arch/riscv/cpu/start.S b/arch/riscv/cpu/start.S index 7cd7755190..ecf0482635 100644 --- a/arch/riscv/cpu/start.S +++ b/arch/riscv/cpu/start.S @@ -14,75 +14,208 @@ #include #include #include +#include #ifdef CONFIG_32BIT -#define LREG lw -#define SREG sw -#define REGBYTES 4 +#define LREG lw +#define SREG sw +#define REGBYTES 4 #define RELOC_TYPE R_RISCV_32 #define SYM_INDEX 0x8 #define SYM_SIZE 0x10 #else -#define LREG ld -#define SREG sd -#define REGBYTES 8 +#define LREG ld +#define SREG sd +#define REGBYTES 8 #define RELOC_TYPE R_RISCV_64 #define SYM_INDEX 0x20 #define SYM_SIZE 0x18 #endif -.section .text +.section .data +secondary_harts_relocation_error: + .ascii "Relocation of secondary harts has failed, error %d\n" + +.section .text .globl _start _start: - j handle_reset +#if CONFIG_IS_ENABLED(RISCV_MMODE) + csrr a0, CSR_MHARTID +#endif -nmi_vector: - j nmi_vector + /* save hart id and dtb pointer */ + mv tp, a0 + mv s1, a1 -trap_vector: - j trap_entry + la t0, trap_entry + csrw MODE_PREFIX(tvec), t0 -.global trap_entry -handle_reset: - li t0, CONFIG_SYS_SDRAM_BASE - SREG a2, 0(t0) - la t0, trap_entry - csrw mtvec, t0 - csrwi mstatus, 0 - csrwi mie, 0 + /* mask all interrupts */ + csrw MODE_PREFIX(ie), zero -/* - * Do CPU critical regs init only at reboot, - * not when booting from ram - */ -#ifdef CONFIG_INIT_CRITICAL - jal cpu_init_crit /* Do CPU critical regs init */ +#ifdef CONFIG_SMP + /* check if hart is within range */ + /* tp: hart id */ + li t0, CONFIG_NR_CPUS + bge tp, t0, hart_out_of_bounds_loop + + /* set xSIE bit to receive IPIs */ +#if CONFIG_IS_ENABLED(RISCV_MMODE) + li t0, MIE_MSIE +#else + li t0, SIE_SSIE +#endif + csrs MODE_PREFIX(ie), t0 #endif /* * Set stackpointer in internal/ex RAM to call board_init_f */ call_board_init_f: - li t0, -16 - li t1, CONFIG_SYS_INIT_SP_ADDR - and sp, t1, t0 /* force 16 byte alignment */ - -#ifdef CONFIG_DEBUG_UART - jal debug_uart_init + li t0, -16 +#if defined(CONFIG_SPL_BUILD) && defined(CONFIG_SPL_STACK) + li t1, CONFIG_SPL_STACK +#else + li t1, CONFIG_SYS_INIT_SP_ADDR #endif + and sp, t1, t0 /* force 16 byte alignment */ call_board_init_f_0: mv a0, sp jal board_init_f_alloc_reserve + + /* + * Set global data pointer here for all harts, uninitialized at this + * point. + */ + mv gp, a0 + + /* setup stack */ +#ifdef CONFIG_SMP + /* tp: hart id */ + slli t0, tp, CONFIG_STACK_SIZE_SHIFT + sub sp, a0, t0 +#else mv sp, a0 +#endif + +#ifndef CONFIG_XIP + /* + * Pick hart to initialize global data and run U-Boot. The other harts + * wait for initialization to complete. + */ + la t0, hart_lottery + li s2, 1 + amoswap.w s2, t1, 0(t0) + bnez s2, wait_for_gd_init +#else + bnez tp, secondary_hart_loop +#endif + +#ifdef CONFIG_OF_PRIOR_STAGE + la t0, prior_stage_fdt_address + SREG s1, 0(t0) +#endif + jal board_init_f_init_reserve - mv a0, zero /* a0 <-- boot_flags = 0 */ - la t5, board_init_f - jr t5 /* jump to board_init_f() */ + /* save the boot hart id to global_data */ + SREG tp, GD_BOOT_HART(gp) + +#ifndef CONFIG_XIP + la t0, available_harts_lock + fence rw, w + amoswap.w zero, zero, 0(t0) + +wait_for_gd_init: + la t0, available_harts_lock + li t1, 1 +1: amoswap.w t1, t1, 0(t0) + fence r, rw + bnez t1, 1b + + /* register available harts in the available_harts mask */ + li t1, 1 + sll t1, t1, tp + LREG t2, GD_AVAILABLE_HARTS(gp) + or t2, t2, t1 + SREG t2, GD_AVAILABLE_HARTS(gp) + + fence rw, w + amoswap.w zero, zero, 0(t0) + + /* + * Continue on hart lottery winner, others branch to + * secondary_hart_loop. + */ + bnez s2, secondary_hart_loop +#endif + + /* Enable cache */ + jal icache_enable + jal dcache_enable + +#ifdef CONFIG_DEBUG_UART + jal debug_uart_init +#endif + + mv a0, zero /* a0 <-- boot_flags = 0 */ + la t5, board_init_f + jalr t5 /* jump to board_init_f() */ + +#ifdef CONFIG_SPL_BUILD +spl_clear_bss: + la t0, __bss_start + la t1, __bss_end + beq t0, t1, spl_stack_gd_setup + +spl_clear_bss_loop: + SREG zero, 0(t0) + addi t0, t0, REGBYTES + blt t0, t1, spl_clear_bss_loop + +spl_stack_gd_setup: + jal spl_relocate_stack_gd + + /* skip setup if we did not relocate */ + beqz a0, spl_call_board_init_r + mv s0, a0 + + /* setup stack on main hart */ +#ifdef CONFIG_SMP + /* tp: hart id */ + slli t0, tp, CONFIG_STACK_SIZE_SHIFT + sub sp, s0, t0 +#else + mv sp, s0 +#endif + + /* set new stack and global data pointer on secondary harts */ +spl_secondary_hart_stack_gd_setup: + la a0, secondary_hart_relocate + mv a1, s0 + mv a2, s0 + mv a3, zero + jal smp_call_function + + /* hang if relocation of secondary harts has failed */ + beqz a0, 1f + mv a1, a0 + la a0, secondary_harts_relocation_error + jal printf + jal hang + + /* set new global data pointer on main hart */ +1: mv gp, s0 + +spl_call_board_init_r: + mv a0, zero + mv a1, zero + jal board_init_r +#endif /* - * void relocate_code (addr_sp, gd, addr_moni) + * void relocate_code(addr_sp, gd, addr_moni) * * This "function" does not return, instead it continues in RAM * after relocating the monitor code. @@ -90,203 +223,187 @@ call_board_init_f_0: */ .globl relocate_code relocate_code: - mv s2, a0 /* save addr_sp */ - mv s3, a1 /* save addr of gd */ - mv s4, a2 /* save addr of destination */ + mv s2, a0 /* save addr_sp */ + mv s3, a1 /* save addr of gd */ + mv s4, a2 /* save addr of destination */ /* *Set up the stack */ stack_setup: - mv sp, s2 - la t0, _start - sub t6, s4, t0 /* t6 <- relocation offset */ - beq t0, s4, clear_bss /* skip relocation */ +#ifdef CONFIG_SMP + /* tp: hart id */ + slli t0, tp, CONFIG_STACK_SIZE_SHIFT + sub sp, s2, t0 +#else + mv sp, s2 +#endif - mv t1, s4 /* t1 <- scratch for copy_loop */ - la t3, __bss_start - sub t3, t3, t0 /* t3 <- __bss_start_ofs */ - add t2, t0, t3 /* t2 <- source end address */ + la t0, _start + sub t6, s4, t0 /* t6 <- relocation offset */ + beq t0, s4, clear_bss /* skip relocation */ + + mv t1, s4 /* t1 <- scratch for copy_loop */ + la t3, __bss_start + sub t3, t3, t0 /* t3 <- __bss_start_ofs */ + add t2, t0, t3 /* t2 <- source end address */ copy_loop: - LREG t5, 0(t0) - addi t0, t0, REGBYTES - SREG t5, 0(t1) - addi t1, t1, REGBYTES - blt t0, t2, copy_loop + LREG t5, 0(t0) + addi t0, t0, REGBYTES + SREG t5, 0(t1) + addi t1, t1, REGBYTES + blt t0, t2, copy_loop /* * Update dynamic relocations after board_init_f */ fix_rela_dyn: - la t1, __rel_dyn_start - la t2, __rel_dyn_end - beq t1, t2, clear_bss - add t1, t1, t6 /* t1 <- rela_dyn_start in RAM */ - add t2, t2, t6 /* t2 <- rela_dyn_end in RAM */ + la t1, __rel_dyn_start + la t2, __rel_dyn_end + beq t1, t2, clear_bss + add t1, t1, t6 /* t1 <- rela_dyn_start in RAM */ + add t2, t2, t6 /* t2 <- rela_dyn_end in RAM */ /* * skip first reserved entry: address, type, addend */ - bne t1, t2, 7f + j 10f 6: - LREG t5, -(REGBYTES*2)(t1) /* t5 <-- relocation info:type */ - li t3, R_RISCV_RELATIVE /* reloc type R_RISCV_RELATIVE */ - bne t5, t3, 8f /* skip non-RISCV_RELOC entries */ - LREG t3, -(REGBYTES*3)(t1) - LREG t5, -(REGBYTES)(t1) /* t5 <-- addend */ - add t5, t5, t6 /* t5 <-- location to fix up in RAM */ - add t3, t3, t6 /* t3 <-- location to fix up in RAM */ - SREG t5, 0(t3) -7: - addi t1, t1, (REGBYTES*3) - ble t1, t2, 6b + LREG t5, -(REGBYTES*2)(t1) /* t5 <-- relocation info:type */ + li t3, R_RISCV_RELATIVE /* reloc type R_RISCV_RELATIVE */ + bne t5, t3, 8f /* skip non-RISCV_RELOC entries */ + LREG t3, -(REGBYTES*3)(t1) + LREG t5, -(REGBYTES)(t1) /* t5 <-- addend */ + add t5, t5, t6 /* t5 <-- location to fix up in RAM */ + add t3, t3, t6 /* t3 <-- location to fix up in RAM */ + SREG t5, 0(t3) + j 10f 8: - la t4, __dyn_sym_start - add t4, t4, t6 + la t4, __dyn_sym_start + add t4, t4, t6 9: - LREG t5, -(REGBYTES*2)(t1) /* t5 <-- relocation info:type */ - srli t0, t5, SYM_INDEX /* t0 <--- sym table index */ - andi t5, t5, 0xFF /* t5 <--- relocation type */ - li t3, RELOC_TYPE - bne t5, t3, 10f /* skip non-addned entries */ - - LREG t3, -(REGBYTES*3)(t1) - li t5, SYM_SIZE - mul t0, t0, t5 - add s1, t4, t0 - LREG t5, REGBYTES(s1) - add t5, t5, t6 /* t5 <-- location to fix up in RAM */ - add t3, t3, t6 /* t3 <-- location to fix up in RAM */ - SREG t5, 0(t3) + LREG t5, -(REGBYTES*2)(t1) /* t5 <-- relocation info:type */ + srli t0, t5, SYM_INDEX /* t0 <--- sym table index */ + andi t5, t5, 0xFF /* t5 <--- relocation type */ + li t3, RELOC_TYPE + bne t5, t3, 10f /* skip non-addned entries */ + + LREG t3, -(REGBYTES*3)(t1) + li t5, SYM_SIZE + mul t0, t0, t5 + add s5, t4, t0 + LREG t0, -(REGBYTES)(t1) /* t0 <-- addend */ + LREG t5, REGBYTES(s5) + add t5, t5, t0 + add t5, t5, t6 /* t5 <-- location to fix up in RAM */ + add t3, t3, t6 /* t3 <-- location to fix up in RAM */ + SREG t5, 0(t3) 10: - addi t1, t1, (REGBYTES*3) - ble t1, t2, 9b + addi t1, t1, (REGBYTES*3) + ble t1, t2, 6b /* * trap update */ - la t0, trap_entry - add t0, t0, t6 - csrw mtvec, t0 + la t0, trap_entry + add t0, t0, t6 + csrw MODE_PREFIX(tvec), t0 clear_bss: - la t0, __bss_start /* t0 <- rel __bss_start in FLASH */ - add t0, t0, t6 /* t0 <- rel __bss_start in RAM */ - la t1, __bss_end /* t1 <- rel __bss_end in FLASH */ - add t1, t1, t6 /* t1 <- rel __bss_end in RAM */ - li t2, 0x00000000 /* clear */ - beq t0, t1, call_board_init_r + la t0, __bss_start /* t0 <- rel __bss_start in FLASH */ + add t0, t0, t6 /* t0 <- rel __bss_start in RAM */ + la t1, __bss_end /* t1 <- rel __bss_end in FLASH */ + add t1, t1, t6 /* t1 <- rel __bss_end in RAM */ + beq t0, t1, relocate_secondary_harts clbss_l: - SREG t2, 0(t0) /* clear loop... */ - addi t0, t0, REGBYTES - bne t0, t1, clbss_l + SREG zero, 0(t0) /* clear loop... */ + addi t0, t0, REGBYTES + blt t0, t1, clbss_l + +relocate_secondary_harts: +#ifdef CONFIG_SMP + /* send relocation IPI */ + la t0, secondary_hart_relocate + add a0, t0, t6 + + /* store relocation offset */ + mv s5, t6 + + mv a1, s2 + mv a2, s3 + mv a3, zero + jal smp_call_function + + /* hang if relocation of secondary harts has failed */ + beqz a0, 1f + mv a1, a0 + la a0, secondary_harts_relocation_error + jal printf + jal hang + + /* restore relocation offset */ +1: mv t6, s5 +#endif /* * We are done. Do not return, instead branch to second part of board * initialization, now running from RAM. */ call_board_init_r: - la t0, board_init_r - mv t4, t0 /* offset of board_init_r() */ - add t4, t4, t6 /* real address of board_init_r() */ + jal invalidate_icache_all + jal flush_dcache_all + la t0, board_init_r /* offset of board_init_r() */ + add t4, t0, t6 /* real address of board_init_r() */ /* * setup parameters for board_init_r */ - mv a0, s3 /* gd_t */ - mv a1, s4 /* dest_addr */ + mv a0, s3 /* gd_t */ + mv a1, s4 /* dest_addr */ /* * jump to it ... */ - jr t4 /* jump to board_init_r() */ + jr t4 /* jump to board_init_r() */ -/* - * trap entry - */ -trap_entry: - addi sp, sp, -32*REGBYTES - SREG x1, 1*REGBYTES(sp) - SREG x2, 2*REGBYTES(sp) - SREG x3, 3*REGBYTES(sp) - SREG x4, 4*REGBYTES(sp) - SREG x5, 5*REGBYTES(sp) - SREG x6, 6*REGBYTES(sp) - SREG x7, 7*REGBYTES(sp) - SREG x8, 8*REGBYTES(sp) - SREG x9, 9*REGBYTES(sp) - SREG x10, 10*REGBYTES(sp) - SREG x11, 11*REGBYTES(sp) - SREG x12, 12*REGBYTES(sp) - SREG x13, 13*REGBYTES(sp) - SREG x14, 14*REGBYTES(sp) - SREG x15, 15*REGBYTES(sp) - SREG x16, 16*REGBYTES(sp) - SREG x17, 17*REGBYTES(sp) - SREG x18, 18*REGBYTES(sp) - SREG x19, 19*REGBYTES(sp) - SREG x20, 20*REGBYTES(sp) - SREG x21, 21*REGBYTES(sp) - SREG x22, 22*REGBYTES(sp) - SREG x23, 23*REGBYTES(sp) - SREG x24, 24*REGBYTES(sp) - SREG x25, 25*REGBYTES(sp) - SREG x26, 26*REGBYTES(sp) - SREG x27, 27*REGBYTES(sp) - SREG x28, 28*REGBYTES(sp) - SREG x29, 29*REGBYTES(sp) - SREG x30, 30*REGBYTES(sp) - SREG x31, 31*REGBYTES(sp) - csrr a0, mcause - csrr a1, mepc - mv a2, sp - jal handle_trap - csrw mepc, a0 +#ifdef CONFIG_SMP +hart_out_of_bounds_loop: + /* Harts in this loop are out of bounds, increase CONFIG_NR_CPUS. */ + wfi + j hart_out_of_bounds_loop -/* - * Remain in M-mode after mret - */ - li t0, MSTATUS_MPP - csrs mstatus, t0 - LREG x1, 1*REGBYTES(sp) - LREG x2, 2*REGBYTES(sp) - LREG x3, 3*REGBYTES(sp) - LREG x4, 4*REGBYTES(sp) - LREG x5, 5*REGBYTES(sp) - LREG x6, 6*REGBYTES(sp) - LREG x7, 7*REGBYTES(sp) - LREG x8, 8*REGBYTES(sp) - LREG x9, 9*REGBYTES(sp) - LREG x10, 10*REGBYTES(sp) - LREG x11, 11*REGBYTES(sp) - LREG x12, 12*REGBYTES(sp) - LREG x13, 13*REGBYTES(sp) - LREG x14, 14*REGBYTES(sp) - LREG x15, 15*REGBYTES(sp) - LREG x16, 16*REGBYTES(sp) - LREG x17, 17*REGBYTES(sp) - LREG x18, 18*REGBYTES(sp) - LREG x19, 19*REGBYTES(sp) - LREG x20, 20*REGBYTES(sp) - LREG x21, 21*REGBYTES(sp) - LREG x22, 22*REGBYTES(sp) - LREG x23, 23*REGBYTES(sp) - LREG x24, 24*REGBYTES(sp) - LREG x25, 25*REGBYTES(sp) - LREG x26, 26*REGBYTES(sp) - LREG x27, 27*REGBYTES(sp) - LREG x28, 28*REGBYTES(sp) - LREG x29, 29*REGBYTES(sp) - LREG x30, 30*REGBYTES(sp) - LREG x31, 31*REGBYTES(sp) - addi sp, sp, 32*REGBYTES - mret - -#ifdef CONFIG_INIT_CRITICAL -cpu_init_crit: - ret +/* SMP relocation entry */ +secondary_hart_relocate: + /* a1: new sp */ + /* a2: new gd */ + /* tp: hart id */ + + /* setup stack */ + slli t0, tp, CONFIG_STACK_SIZE_SHIFT + sub sp, a1, t0 + + /* update global data pointer */ + mv gp, a2 +#endif + +secondary_hart_loop: + wfi + +#ifdef CONFIG_SMP + csrr t0, MODE_PREFIX(ip) +#if CONFIG_IS_ENABLED(RISCV_MMODE) + andi t0, t0, MIE_MSIE +#else + andi t0, t0, SIE_SSIE +#endif + beqz t0, secondary_hart_loop + + mv a0, tp + jal handle_ipi #endif + + j secondary_hart_loop