X-Git-Url: https://git.librecmc.org/?a=blobdiff_plain;f=arch%2Farm%2Fcpu%2Farmv8%2Fcache_v8.c;h=7c31d98a6f03157af1d72f28fe9c0162cdb7d02f;hb=60c7facfc965af6ff8ea14ee26c9d49cd2d0ec22;hp=b1ea8227cb68841a3a87680d27bf3f57206e2da4;hpb=a7e2c6f6bb0b28d5b7f08bf407742fc3fddf701b;p=oweals%2Fu-boot.git diff --git a/arch/arm/cpu/armv8/cache_v8.c b/arch/arm/cpu/armv8/cache_v8.c index b1ea8227cb..7c31d98a6f 100644 --- a/arch/arm/cpu/armv8/cache_v8.c +++ b/arch/arm/cpu/armv8/cache_v8.c @@ -1,75 +1,422 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * (C) Copyright 2013 * David Feng * - * SPDX-License-Identifier: GPL-2.0+ + * (C) Copyright 2016 + * Alexander Graf */ #include +#include +#include +#include +#include #include #include DECLARE_GLOBAL_DATA_PTR; -#ifndef CONFIG_SYS_DCACHE_OFF -inline void set_pgtable_section(u64 *page_table, u64 index, u64 section, - u64 memory_type, u64 share) +#if !CONFIG_IS_ENABLED(SYS_DCACHE_OFF) + +/* + * With 4k page granule, a virtual address is split into 4 lookup parts + * spanning 9 bits each: + * + * _______________________________________________ + * | | | | | | | + * | 0 | Lv0 | Lv1 | Lv2 | Lv3 | off | + * |_______|_______|_______|_______|_______|_______| + * 63-48 47-39 38-30 29-21 20-12 11-00 + * + * mask page size + * + * Lv0: FF8000000000 -- + * Lv1: 7FC0000000 1G + * Lv2: 3FE00000 2M + * Lv3: 1FF000 4K + * off: FFF + */ + +u64 get_tcr(int el, u64 *pips, u64 *pva_bits) { - u64 value; + u64 max_addr = 0; + u64 ips, va_bits; + u64 tcr; + int i; + + /* Find the largest address we need to support */ + for (i = 0; mem_map[i].size || mem_map[i].attrs; i++) + max_addr = max(max_addr, mem_map[i].virt + mem_map[i].size); + + /* Calculate the maximum physical (and thus virtual) address */ + if (max_addr > (1ULL << 44)) { + ips = 5; + va_bits = 48; + } else if (max_addr > (1ULL << 42)) { + ips = 4; + va_bits = 44; + } else if (max_addr > (1ULL << 40)) { + ips = 3; + va_bits = 42; + } else if (max_addr > (1ULL << 36)) { + ips = 2; + va_bits = 40; + } else if (max_addr > (1ULL << 32)) { + ips = 1; + va_bits = 36; + } else { + ips = 0; + va_bits = 32; + } + + if (el == 1) { + tcr = TCR_EL1_RSVD | (ips << 32) | TCR_EPD1_DISABLE; + } else if (el == 2) { + tcr = TCR_EL2_RSVD | (ips << 16); + } else { + tcr = TCR_EL3_RSVD | (ips << 16); + } + + /* PTWs cacheable, inner/outer WBWA and inner shareable */ + tcr |= TCR_TG0_4K | TCR_SHARED_INNER | TCR_ORGN_WBWA | TCR_IRGN_WBWA; + tcr |= TCR_T0SZ(va_bits); + + if (pips) + *pips = ips; + if (pva_bits) + *pva_bits = va_bits; - value = section | PMD_TYPE_SECT | PMD_SECT_AF; - value |= PMD_ATTRINDX(memory_type); - value |= share; - page_table[index] = value; + return tcr; } -inline void set_pgtable_table(u64 *page_table, u64 index, u64 *table_addr) +#define MAX_PTE_ENTRIES 512 + +static int pte_type(u64 *pte) { - u64 value; + return *pte & PTE_TYPE_MASK; +} - value = (u64)table_addr | PMD_TYPE_TABLE; - page_table[index] = value; +/* Returns the LSB number for a PTE on level */ +static int level2shift(int level) +{ + /* Page is 12 bits wide, every level translates 9 bits */ + return (12 + 9 * (3 - level)); } -/* to activate the MMU we need to set up virtual memory */ -static void mmu_setup(void) +static u64 *find_pte(u64 addr, int level) { - bd_t *bd = gd->bd; - u64 *page_table = (u64 *)gd->arch.tlb_addr, i, j; - int el; + int start_level = 0; + u64 *pte; + u64 idx; + u64 va_bits; + int i; + + debug("addr=%llx level=%d\n", addr, level); + + get_tcr(0, NULL, &va_bits); + if (va_bits < 39) + start_level = 1; + + if (level < start_level) + return NULL; + + /* Walk through all page table levels to find our PTE */ + pte = (u64*)gd->arch.tlb_addr; + for (i = start_level; i < 4; i++) { + idx = (addr >> level2shift(i)) & 0x1FF; + pte += idx; + debug("idx=%llx PTE %p at level %d: %llx\n", idx, pte, i, *pte); + + /* Found it */ + if (i == level) + return pte; + /* PTE is no table (either invalid or block), can't traverse */ + if (pte_type(pte) != PTE_TYPE_TABLE) + return NULL; + /* Off to the next level */ + pte = (u64*)(*pte & 0x0000fffffffff000ULL); + } + + /* Should never reach here */ + return NULL; +} + +/* Returns and creates a new full table (512 entries) */ +static u64 *create_table(void) +{ + u64 *new_table = (u64*)gd->arch.tlb_fillptr; + u64 pt_len = MAX_PTE_ENTRIES * sizeof(u64); + + /* Allocate MAX_PTE_ENTRIES pte entries */ + gd->arch.tlb_fillptr += pt_len; + + if (gd->arch.tlb_fillptr - gd->arch.tlb_addr > gd->arch.tlb_size) + panic("Insufficient RAM for page table: 0x%lx > 0x%lx. " + "Please increase the size in get_page_table_size()", + gd->arch.tlb_fillptr - gd->arch.tlb_addr, + gd->arch.tlb_size); + + /* Mark all entries as invalid */ + memset(new_table, 0, pt_len); + + return new_table; +} + +static void set_pte_table(u64 *pte, u64 *table) +{ + /* Point *pte to the new table */ + debug("Setting %p to addr=%p\n", pte, table); + *pte = PTE_TYPE_TABLE | (ulong)table; +} + +/* Splits a block PTE into table with subpages spanning the old block */ +static void split_block(u64 *pte, int level) +{ + u64 old_pte = *pte; + u64 *new_table; + u64 i = 0; + /* level describes the parent level, we need the child ones */ + int levelshift = level2shift(level + 1); + + if (pte_type(pte) != PTE_TYPE_BLOCK) + panic("PTE %p (%llx) is not a block. Some driver code wants to " + "modify dcache settings for an range not covered in " + "mem_map.", pte, old_pte); + + new_table = create_table(); + debug("Splitting pte %p (%llx) into %p\n", pte, old_pte, new_table); - /* Setup an identity-mapping for all spaces */ - for (i = 0; i < (PGTABLE_SIZE >> 3); i++) { - set_pgtable_section(page_table, i, i << SECTION_SHIFT, - MT_DEVICE_NGNRNE, PMD_SECT_NON_SHARE); + for (i = 0; i < MAX_PTE_ENTRIES; i++) { + new_table[i] = old_pte | (i << levelshift); + + /* Level 3 block PTEs have the table type */ + if ((level + 1) == 3) + new_table[i] |= PTE_TYPE_TABLE; + + debug("Setting new_table[%lld] = %llx\n", i, new_table[i]); } - /* Setup an identity-mapping for all RAM space */ - for (i = 0; i < CONFIG_NR_DRAM_BANKS; i++) { - ulong start = bd->bi_dram[i].start; - ulong end = bd->bi_dram[i].start + bd->bi_dram[i].size; - for (j = start >> SECTION_SHIFT; - j < end >> SECTION_SHIFT; j++) { - set_pgtable_section(page_table, j, j << SECTION_SHIFT, - MT_NORMAL, PMD_SECT_NON_SHARE); + /* Set the new table into effect */ + set_pte_table(pte, new_table); +} + +/* Add one mm_region map entry to the page tables */ +static void add_map(struct mm_region *map) +{ + u64 *pte; + u64 virt = map->virt; + u64 phys = map->phys; + u64 size = map->size; + u64 attrs = map->attrs | PTE_TYPE_BLOCK | PTE_BLOCK_AF; + u64 blocksize; + int level; + u64 *new_table; + + while (size) { + pte = find_pte(virt, 0); + if (pte && (pte_type(pte) == PTE_TYPE_FAULT)) { + debug("Creating table for virt 0x%llx\n", virt); + new_table = create_table(); + set_pte_table(pte, new_table); + } + + for (level = 1; level < 4; level++) { + pte = find_pte(virt, level); + if (!pte) + panic("pte not found\n"); + + blocksize = 1ULL << level2shift(level); + debug("Checking if pte fits for virt=%llx size=%llx blocksize=%llx\n", + virt, size, blocksize); + if (size >= blocksize && !(virt & (blocksize - 1))) { + /* Page fits, create block PTE */ + debug("Setting PTE %p to block virt=%llx\n", + pte, virt); + if (level == 3) + *pte = phys | attrs | PTE_TYPE_PAGE; + else + *pte = phys | attrs; + virt += blocksize; + phys += blocksize; + size -= blocksize; + break; + } else if (pte_type(pte) == PTE_TYPE_FAULT) { + /* Page doesn't fit, create subpages */ + debug("Creating subtable for virt 0x%llx blksize=%llx\n", + virt, blocksize); + new_table = create_table(); + set_pte_table(pte, new_table); + } else if (pte_type(pte) == PTE_TYPE_BLOCK) { + debug("Split block into subtable for virt 0x%llx blksize=0x%llx\n", + virt, blocksize); + split_block(pte, level); + } } } +} - /* load TTBR0 */ - el = current_el(); - if (el == 1) { - set_ttbr_tcr_mair(el, gd->arch.tlb_addr, - TCR_EL1_RSVD | TCR_FLAGS | TCR_EL1_IPS_BITS, - MEMORY_ATTRIBUTES); - } else if (el == 2) { - set_ttbr_tcr_mair(el, gd->arch.tlb_addr, - TCR_EL2_RSVD | TCR_FLAGS | TCR_EL2_IPS_BITS, - MEMORY_ATTRIBUTES); - } else { - set_ttbr_tcr_mair(el, gd->arch.tlb_addr, - TCR_EL3_RSVD | TCR_FLAGS | TCR_EL3_IPS_BITS, - MEMORY_ATTRIBUTES); +enum pte_type { + PTE_INVAL, + PTE_BLOCK, + PTE_LEVEL, +}; + +/* + * This is a recursively called function to count the number of + * page tables we need to cover a particular PTE range. If you + * call this with level = -1 you basically get the full 48 bit + * coverage. + */ +static int count_required_pts(u64 addr, int level, u64 maxaddr) +{ + int levelshift = level2shift(level); + u64 levelsize = 1ULL << levelshift; + u64 levelmask = levelsize - 1; + u64 levelend = addr + levelsize; + int r = 0; + int i; + enum pte_type pte_type = PTE_INVAL; + + for (i = 0; mem_map[i].size || mem_map[i].attrs; i++) { + struct mm_region *map = &mem_map[i]; + u64 start = map->virt; + u64 end = start + map->size; + + /* Check if the PTE would overlap with the map */ + if (max(addr, start) <= min(levelend, end)) { + start = max(addr, start); + end = min(levelend, end); + + /* We need a sub-pt for this level */ + if ((start & levelmask) || (end & levelmask)) { + pte_type = PTE_LEVEL; + break; + } + + /* Lv0 can not do block PTEs, so do levels here too */ + if (level <= 0) { + pte_type = PTE_LEVEL; + break; + } + + /* PTE is active, but fits into a block */ + pte_type = PTE_BLOCK; + } + } + + /* + * Block PTEs at this level are already covered by the parent page + * table, so we only need to count sub page tables. + */ + if (pte_type == PTE_LEVEL) { + int sublevel = level + 1; + u64 sublevelsize = 1ULL << level2shift(sublevel); + + /* Account for the new sub page table ... */ + r = 1; + + /* ... and for all child page tables that one might have */ + for (i = 0; i < MAX_PTE_ENTRIES; i++) { + r += count_required_pts(addr, sublevel, maxaddr); + addr += sublevelsize; + + if (addr >= maxaddr) { + /* + * We reached the end of address space, no need + * to look any further. + */ + break; + } + } } + + return r; +} + +/* Returns the estimated required size of all page tables */ +__weak u64 get_page_table_size(void) +{ + u64 one_pt = MAX_PTE_ENTRIES * sizeof(u64); + u64 size = 0; + u64 va_bits; + int start_level = 0; + + get_tcr(0, NULL, &va_bits); + if (va_bits < 39) + start_level = 1; + + /* Account for all page tables we would need to cover our memory map */ + size = one_pt * count_required_pts(0, start_level - 1, 1ULL << va_bits); + + /* + * We need to duplicate our page table once to have an emergency pt to + * resort to when splitting page tables later on + */ + size *= 2; + + /* + * We may need to split page tables later on if dcache settings change, + * so reserve up to 4 (random pick) page tables for that. + */ + size += one_pt * 4; + + return size; +} + +void setup_pgtables(void) +{ + int i; + + if (!gd->arch.tlb_fillptr || !gd->arch.tlb_addr) + panic("Page table pointer not setup."); + + /* + * Allocate the first level we're on with invalidate entries. + * If the starting level is 0 (va_bits >= 39), then this is our + * Lv0 page table, otherwise it's the entry Lv1 page table. + */ + create_table(); + + /* Now add all MMU table entries one after another to the table */ + for (i = 0; mem_map[i].size || mem_map[i].attrs; i++) + add_map(&mem_map[i]); +} + +static void setup_all_pgtables(void) +{ + u64 tlb_addr = gd->arch.tlb_addr; + u64 tlb_size = gd->arch.tlb_size; + + /* Reset the fill ptr */ + gd->arch.tlb_fillptr = tlb_addr; + + /* Create normal system page tables */ + setup_pgtables(); + + /* Create emergency page tables */ + gd->arch.tlb_size -= (uintptr_t)gd->arch.tlb_fillptr - + (uintptr_t)gd->arch.tlb_addr; + gd->arch.tlb_addr = gd->arch.tlb_fillptr; + setup_pgtables(); + gd->arch.tlb_emerg = gd->arch.tlb_addr; + gd->arch.tlb_addr = tlb_addr; + gd->arch.tlb_size = tlb_size; +} + +/* to activate the MMU we need to set up virtual memory */ +__weak void mmu_setup(void) +{ + int el; + + /* Set up page tables only once */ + if (!gd->arch.tlb_fillptr) + setup_all_pgtables(); + + el = current_el(); + set_ttbr_tcr_mair(el, gd->arch.tlb_addr, get_tcr(el, NULL, NULL), + MEMORY_ATTRIBUTES); + /* enable the mmu */ set_sctlr(get_sctlr() | CR_M); } @@ -80,31 +427,33 @@ static void mmu_setup(void) void invalidate_dcache_all(void) { __asm_invalidate_dcache_all(); + __asm_invalidate_l3_dcache(); } /* * Performs a clean & invalidation of the entire data cache at all levels. * This function needs to be inline to avoid using stack. - * __asm_flush_l3_cache return status of timeout + * __asm_flush_l3_dcache return status of timeout */ inline void flush_dcache_all(void) { int ret; __asm_flush_dcache_all(); - ret = __asm_flush_l3_cache(); + ret = __asm_flush_l3_dcache(); if (ret) debug("flushing dcache returns 0x%x\n", ret); else debug("flushing dcache successfully.\n"); } +#ifndef CONFIG_SYS_DISABLE_DCACHE_OPS /* * Invalidates range in all levels of D-cache/unified cache */ void invalidate_dcache_range(unsigned long start, unsigned long stop) { - __asm_flush_dcache_range(start, stop); + __asm_invalidate_dcache_range(start, stop); } /* @@ -114,6 +463,15 @@ void flush_dcache_range(unsigned long start, unsigned long stop) { __asm_flush_dcache_range(start, stop); } +#else +void invalidate_dcache_range(unsigned long start, unsigned long stop) +{ +} + +void flush_dcache_range(unsigned long start, unsigned long stop) +{ +} +#endif /* CONFIG_SYS_DISABLE_DCACHE_OPS */ void dcache_enable(void) { @@ -154,32 +512,165 @@ u64 *__weak arch_get_page_table(void) { return NULL; } +static bool is_aligned(u64 addr, u64 size, u64 align) +{ + return !(addr & (align - 1)) && !(size & (align - 1)); +} + +/* Use flag to indicate if attrs has more than d-cache attributes */ +static u64 set_one_region(u64 start, u64 size, u64 attrs, bool flag, int level) +{ + int levelshift = level2shift(level); + u64 levelsize = 1ULL << levelshift; + u64 *pte = find_pte(start, level); + + /* Can we can just modify the current level block PTE? */ + if (is_aligned(start, size, levelsize)) { + if (flag) { + *pte &= ~PMD_ATTRMASK; + *pte |= attrs & PMD_ATTRMASK; + } else { + *pte &= ~PMD_ATTRINDX_MASK; + *pte |= attrs & PMD_ATTRINDX_MASK; + } + debug("Set attrs=%llx pte=%p level=%d\n", attrs, pte, level); + + return levelsize; + } + + /* Unaligned or doesn't fit, maybe split block into table */ + debug("addr=%llx level=%d pte=%p (%llx)\n", start, level, pte, *pte); + + /* Maybe we need to split the block into a table */ + if (pte_type(pte) == PTE_TYPE_BLOCK) + split_block(pte, level); + + /* And then double-check it became a table or already is one */ + if (pte_type(pte) != PTE_TYPE_TABLE) + panic("PTE %p (%llx) for addr=%llx should be a table", + pte, *pte, start); + + /* Roll on to the next page table level */ + return 0; +} + void mmu_set_region_dcache_behaviour(phys_addr_t start, size_t size, enum dcache_option option) { - u64 *page_table = arch_get_page_table(); - u64 upto, end; + u64 attrs = PMD_ATTRINDX(option >> 2); + u64 real_start = start; + u64 real_size = size; + + debug("start=%lx size=%lx\n", (ulong)start, (ulong)size); + + if (!gd->arch.tlb_emerg) + panic("Emergency page table not setup."); + + /* + * We can not modify page tables that we're currently running on, + * so we first need to switch to the "emergency" page tables where + * we can safely modify our primary page tables and then switch back + */ + __asm_switch_ttbr(gd->arch.tlb_emerg); + + /* + * Loop through the address range until we find a page granule that fits + * our alignment constraints, then set it to the new cache attributes + */ + while (size > 0) { + int level; + u64 r; + + for (level = 1; level < 4; level++) { + /* Set d-cache attributes only */ + r = set_one_region(start, size, attrs, false, level); + if (r) { + /* PTE successfully replaced */ + size -= r; + start += r; + break; + } + } - if (page_table == NULL) - return; + } + + /* We're done modifying page tables, switch back to our primary ones */ + __asm_switch_ttbr(gd->arch.tlb_addr); + + /* + * Make sure there's nothing stale in dcache for a region that might + * have caches off now + */ + flush_dcache_range(real_start, real_start + real_size); +} - end = ALIGN(start + size, (1 << MMU_SECTION_SHIFT)) >> - MMU_SECTION_SHIFT; - start = start >> MMU_SECTION_SHIFT; - for (upto = start; upto < end; upto++) { - page_table[upto] &= ~PMD_ATTRINDX_MASK; - page_table[upto] |= PMD_ATTRINDX(option); +/* + * Modify MMU table for a region with updated PXN/UXN/Memory type/valid bits. + * The procecess is break-before-make. The target region will be marked as + * invalid during the process of changing. + */ +void mmu_change_region_attr(phys_addr_t addr, size_t siz, u64 attrs) +{ + int level; + u64 r, size, start; + + start = addr; + size = siz; + /* + * Loop through the address range until we find a page granule that fits + * our alignment constraints, then set it to "invalid". + */ + while (size > 0) { + for (level = 1; level < 4; level++) { + /* Set PTE to fault */ + r = set_one_region(start, size, PTE_TYPE_FAULT, true, + level); + if (r) { + /* PTE successfully invalidated */ + size -= r; + start += r; + break; + } + } } - asm volatile("dsb sy"); + + flush_dcache_range(gd->arch.tlb_addr, + gd->arch.tlb_addr + gd->arch.tlb_size); + __asm_invalidate_tlb_all(); + + /* + * Loop through the address range until we find a page granule that fits + * our alignment constraints, then set it to the new cache attributes + */ + start = addr; + size = siz; + while (size > 0) { + for (level = 1; level < 4; level++) { + /* Set PTE to new attributes */ + r = set_one_region(start, size, attrs, true, level); + if (r) { + /* PTE successfully updated */ + size -= r; + start += r; + break; + } + } + } + flush_dcache_range(gd->arch.tlb_addr, + gd->arch.tlb_addr + gd->arch.tlb_size); __asm_invalidate_tlb_all(); - asm volatile("dsb sy"); - asm volatile("isb"); - start = start << MMU_SECTION_SHIFT; - end = end << MMU_SECTION_SHIFT; - flush_dcache_range(start, end); - asm volatile("dsb sy"); } -#else /* CONFIG_SYS_DCACHE_OFF */ + +#else /* !CONFIG_IS_ENABLED(SYS_DCACHE_OFF) */ + +/* + * For SPL builds, we may want to not have dcache enabled. Any real U-Boot + * running however really wants to have dcache and the MMU active. Check that + * everything is sane and give the developer a hint if it isn't. + */ +#ifndef CONFIG_SPL_BUILD +#error Please describe your MMU layout in CONFIG_SYS_MEM_MAP and enable dcache. +#endif void invalidate_dcache_all(void) { @@ -207,13 +698,13 @@ void mmu_set_region_dcache_behaviour(phys_addr_t start, size_t size, { } -#endif /* CONFIG_SYS_DCACHE_OFF */ +#endif /* !CONFIG_IS_ENABLED(SYS_DCACHE_OFF) */ -#ifndef CONFIG_SYS_ICACHE_OFF +#if !CONFIG_IS_ENABLED(SYS_ICACHE_OFF) void icache_enable(void) { - __asm_invalidate_icache_all(); + invalidate_icache_all(); set_sctlr(get_sctlr() | CR_I); } @@ -230,9 +721,10 @@ int icache_status(void) void invalidate_icache_all(void) { __asm_invalidate_icache_all(); + __asm_invalidate_l3_icache(); } -#else /* CONFIG_SYS_ICACHE_OFF */ +#else /* !CONFIG_IS_ENABLED(SYS_ICACHE_OFF) */ void icache_enable(void) { @@ -251,7 +743,7 @@ void invalidate_icache_all(void) { } -#endif /* CONFIG_SYS_ICACHE_OFF */ +#endif /* !CONFIG_IS_ENABLED(SYS_ICACHE_OFF) */ /* * Enable dCache & iCache, whether cache is actually enabled