Linux-libre 5.3.12-gnu
[librecmc/linux-libre.git] / arch / arm / mm / cache-l2x0.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * arch/arm/mm/cache-l2x0.c - L210/L220/L310 cache controller support
4  *
5  * Copyright (C) 2007 ARM Limited
6  */
7 #include <linux/cpu.h>
8 #include <linux/err.h>
9 #include <linux/init.h>
10 #include <linux/smp.h>
11 #include <linux/spinlock.h>
12 #include <linux/log2.h>
13 #include <linux/io.h>
14 #include <linux/of.h>
15 #include <linux/of_address.h>
16
17 #include <asm/cacheflush.h>
18 #include <asm/cp15.h>
19 #include <asm/cputype.h>
20 #include <asm/hardware/cache-l2x0.h>
21 #include "cache-tauros3.h"
22 #include "cache-aurora-l2.h"
23
24 struct l2c_init_data {
25         const char *type;
26         unsigned way_size_0;
27         unsigned num_lock;
28         void (*of_parse)(const struct device_node *, u32 *, u32 *);
29         void (*enable)(void __iomem *, unsigned);
30         void (*fixup)(void __iomem *, u32, struct outer_cache_fns *);
31         void (*save)(void __iomem *);
32         void (*configure)(void __iomem *);
33         void (*unlock)(void __iomem *, unsigned);
34         struct outer_cache_fns outer_cache;
35 };
36
37 #define CACHE_LINE_SIZE         32
38
39 static void __iomem *l2x0_base;
40 static const struct l2c_init_data *l2x0_data;
41 static DEFINE_RAW_SPINLOCK(l2x0_lock);
42 static u32 l2x0_way_mask;       /* Bitmask of active ways */
43 static u32 l2x0_size;
44 static unsigned long sync_reg_offset = L2X0_CACHE_SYNC;
45
46 struct l2x0_regs l2x0_saved_regs;
47
48 static bool l2x0_bresp_disable;
49 static bool l2x0_flz_disable;
50
51 /*
52  * Common code for all cache controllers.
53  */
54 static inline void l2c_wait_mask(void __iomem *reg, unsigned long mask)
55 {
56         /* wait for cache operation by line or way to complete */
57         while (readl_relaxed(reg) & mask)
58                 cpu_relax();
59 }
60
61 /*
62  * By default, we write directly to secure registers.  Platforms must
63  * override this if they are running non-secure.
64  */
65 static void l2c_write_sec(unsigned long val, void __iomem *base, unsigned reg)
66 {
67         if (val == readl_relaxed(base + reg))
68                 return;
69         if (outer_cache.write_sec)
70                 outer_cache.write_sec(val, reg);
71         else
72                 writel_relaxed(val, base + reg);
73 }
74
75 /*
76  * This should only be called when we have a requirement that the
77  * register be written due to a work-around, as platforms running
78  * in non-secure mode may not be able to access this register.
79  */
80 static inline void l2c_set_debug(void __iomem *base, unsigned long val)
81 {
82         l2c_write_sec(val, base, L2X0_DEBUG_CTRL);
83 }
84
85 static void __l2c_op_way(void __iomem *reg)
86 {
87         writel_relaxed(l2x0_way_mask, reg);
88         l2c_wait_mask(reg, l2x0_way_mask);
89 }
90
91 static inline void l2c_unlock(void __iomem *base, unsigned num)
92 {
93         unsigned i;
94
95         for (i = 0; i < num; i++) {
96                 writel_relaxed(0, base + L2X0_LOCKDOWN_WAY_D_BASE +
97                                i * L2X0_LOCKDOWN_STRIDE);
98                 writel_relaxed(0, base + L2X0_LOCKDOWN_WAY_I_BASE +
99                                i * L2X0_LOCKDOWN_STRIDE);
100         }
101 }
102
103 static void l2c_configure(void __iomem *base)
104 {
105         l2c_write_sec(l2x0_saved_regs.aux_ctrl, base, L2X0_AUX_CTRL);
106 }
107
108 /*
109  * Enable the L2 cache controller.  This function must only be
110  * called when the cache controller is known to be disabled.
111  */
112 static void l2c_enable(void __iomem *base, unsigned num_lock)
113 {
114         unsigned long flags;
115
116         if (outer_cache.configure)
117                 outer_cache.configure(&l2x0_saved_regs);
118         else
119                 l2x0_data->configure(base);
120
121         l2x0_data->unlock(base, num_lock);
122
123         local_irq_save(flags);
124         __l2c_op_way(base + L2X0_INV_WAY);
125         writel_relaxed(0, base + sync_reg_offset);
126         l2c_wait_mask(base + sync_reg_offset, 1);
127         local_irq_restore(flags);
128
129         l2c_write_sec(L2X0_CTRL_EN, base, L2X0_CTRL);
130 }
131
132 static void l2c_disable(void)
133 {
134         void __iomem *base = l2x0_base;
135
136         l2x0_pmu_suspend();
137
138         outer_cache.flush_all();
139         l2c_write_sec(0, base, L2X0_CTRL);
140         dsb(st);
141 }
142
143 static void l2c_save(void __iomem *base)
144 {
145         l2x0_saved_regs.aux_ctrl = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
146 }
147
148 static void l2c_resume(void)
149 {
150         void __iomem *base = l2x0_base;
151
152         /* Do not touch the controller if already enabled. */
153         if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN))
154                 l2c_enable(base, l2x0_data->num_lock);
155
156         l2x0_pmu_resume();
157 }
158
159 /*
160  * L2C-210 specific code.
161  *
162  * The L2C-2x0 PA, set/way and sync operations are atomic, but we must
163  * ensure that no background operation is running.  The way operations
164  * are all background tasks.
165  *
166  * While a background operation is in progress, any new operation is
167  * ignored (unspecified whether this causes an error.)  Thankfully, not
168  * used on SMP.
169  *
170  * Never has a different sync register other than L2X0_CACHE_SYNC, but
171  * we use sync_reg_offset here so we can share some of this with L2C-310.
172  */
173 static void __l2c210_cache_sync(void __iomem *base)
174 {
175         writel_relaxed(0, base + sync_reg_offset);
176 }
177
178 static void __l2c210_op_pa_range(void __iomem *reg, unsigned long start,
179         unsigned long end)
180 {
181         while (start < end) {
182                 writel_relaxed(start, reg);
183                 start += CACHE_LINE_SIZE;
184         }
185 }
186
187 static void l2c210_inv_range(unsigned long start, unsigned long end)
188 {
189         void __iomem *base = l2x0_base;
190
191         if (start & (CACHE_LINE_SIZE - 1)) {
192                 start &= ~(CACHE_LINE_SIZE - 1);
193                 writel_relaxed(start, base + L2X0_CLEAN_INV_LINE_PA);
194                 start += CACHE_LINE_SIZE;
195         }
196
197         if (end & (CACHE_LINE_SIZE - 1)) {
198                 end &= ~(CACHE_LINE_SIZE - 1);
199                 writel_relaxed(end, base + L2X0_CLEAN_INV_LINE_PA);
200         }
201
202         __l2c210_op_pa_range(base + L2X0_INV_LINE_PA, start, end);
203         __l2c210_cache_sync(base);
204 }
205
206 static void l2c210_clean_range(unsigned long start, unsigned long end)
207 {
208         void __iomem *base = l2x0_base;
209
210         start &= ~(CACHE_LINE_SIZE - 1);
211         __l2c210_op_pa_range(base + L2X0_CLEAN_LINE_PA, start, end);
212         __l2c210_cache_sync(base);
213 }
214
215 static void l2c210_flush_range(unsigned long start, unsigned long end)
216 {
217         void __iomem *base = l2x0_base;
218
219         start &= ~(CACHE_LINE_SIZE - 1);
220         __l2c210_op_pa_range(base + L2X0_CLEAN_INV_LINE_PA, start, end);
221         __l2c210_cache_sync(base);
222 }
223
224 static void l2c210_flush_all(void)
225 {
226         void __iomem *base = l2x0_base;
227
228         BUG_ON(!irqs_disabled());
229
230         __l2c_op_way(base + L2X0_CLEAN_INV_WAY);
231         __l2c210_cache_sync(base);
232 }
233
234 static void l2c210_sync(void)
235 {
236         __l2c210_cache_sync(l2x0_base);
237 }
238
239 static const struct l2c_init_data l2c210_data __initconst = {
240         .type = "L2C-210",
241         .way_size_0 = SZ_8K,
242         .num_lock = 1,
243         .enable = l2c_enable,
244         .save = l2c_save,
245         .configure = l2c_configure,
246         .unlock = l2c_unlock,
247         .outer_cache = {
248                 .inv_range = l2c210_inv_range,
249                 .clean_range = l2c210_clean_range,
250                 .flush_range = l2c210_flush_range,
251                 .flush_all = l2c210_flush_all,
252                 .disable = l2c_disable,
253                 .sync = l2c210_sync,
254                 .resume = l2c_resume,
255         },
256 };
257
258 /*
259  * L2C-220 specific code.
260  *
261  * All operations are background operations: they have to be waited for.
262  * Conflicting requests generate a slave error (which will cause an
263  * imprecise abort.)  Never uses sync_reg_offset, so we hard-code the
264  * sync register here.
265  *
266  * However, we can re-use the l2c210_resume call.
267  */
268 static inline void __l2c220_cache_sync(void __iomem *base)
269 {
270         writel_relaxed(0, base + L2X0_CACHE_SYNC);
271         l2c_wait_mask(base + L2X0_CACHE_SYNC, 1);
272 }
273
274 static void l2c220_op_way(void __iomem *base, unsigned reg)
275 {
276         unsigned long flags;
277
278         raw_spin_lock_irqsave(&l2x0_lock, flags);
279         __l2c_op_way(base + reg);
280         __l2c220_cache_sync(base);
281         raw_spin_unlock_irqrestore(&l2x0_lock, flags);
282 }
283
284 static unsigned long l2c220_op_pa_range(void __iomem *reg, unsigned long start,
285         unsigned long end, unsigned long flags)
286 {
287         raw_spinlock_t *lock = &l2x0_lock;
288
289         while (start < end) {
290                 unsigned long blk_end = start + min(end - start, 4096UL);
291
292                 while (start < blk_end) {
293                         l2c_wait_mask(reg, 1);
294                         writel_relaxed(start, reg);
295                         start += CACHE_LINE_SIZE;
296                 }
297
298                 if (blk_end < end) {
299                         raw_spin_unlock_irqrestore(lock, flags);
300                         raw_spin_lock_irqsave(lock, flags);
301                 }
302         }
303
304         return flags;
305 }
306
307 static void l2c220_inv_range(unsigned long start, unsigned long end)
308 {
309         void __iomem *base = l2x0_base;
310         unsigned long flags;
311
312         raw_spin_lock_irqsave(&l2x0_lock, flags);
313         if ((start | end) & (CACHE_LINE_SIZE - 1)) {
314                 if (start & (CACHE_LINE_SIZE - 1)) {
315                         start &= ~(CACHE_LINE_SIZE - 1);
316                         writel_relaxed(start, base + L2X0_CLEAN_INV_LINE_PA);
317                         start += CACHE_LINE_SIZE;
318                 }
319
320                 if (end & (CACHE_LINE_SIZE - 1)) {
321                         end &= ~(CACHE_LINE_SIZE - 1);
322                         l2c_wait_mask(base + L2X0_CLEAN_INV_LINE_PA, 1);
323                         writel_relaxed(end, base + L2X0_CLEAN_INV_LINE_PA);
324                 }
325         }
326
327         flags = l2c220_op_pa_range(base + L2X0_INV_LINE_PA,
328                                    start, end, flags);
329         l2c_wait_mask(base + L2X0_INV_LINE_PA, 1);
330         __l2c220_cache_sync(base);
331         raw_spin_unlock_irqrestore(&l2x0_lock, flags);
332 }
333
334 static void l2c220_clean_range(unsigned long start, unsigned long end)
335 {
336         void __iomem *base = l2x0_base;
337         unsigned long flags;
338
339         start &= ~(CACHE_LINE_SIZE - 1);
340         if ((end - start) >= l2x0_size) {
341                 l2c220_op_way(base, L2X0_CLEAN_WAY);
342                 return;
343         }
344
345         raw_spin_lock_irqsave(&l2x0_lock, flags);
346         flags = l2c220_op_pa_range(base + L2X0_CLEAN_LINE_PA,
347                                    start, end, flags);
348         l2c_wait_mask(base + L2X0_CLEAN_INV_LINE_PA, 1);
349         __l2c220_cache_sync(base);
350         raw_spin_unlock_irqrestore(&l2x0_lock, flags);
351 }
352
353 static void l2c220_flush_range(unsigned long start, unsigned long end)
354 {
355         void __iomem *base = l2x0_base;
356         unsigned long flags;
357
358         start &= ~(CACHE_LINE_SIZE - 1);
359         if ((end - start) >= l2x0_size) {
360                 l2c220_op_way(base, L2X0_CLEAN_INV_WAY);
361                 return;
362         }
363
364         raw_spin_lock_irqsave(&l2x0_lock, flags);
365         flags = l2c220_op_pa_range(base + L2X0_CLEAN_INV_LINE_PA,
366                                    start, end, flags);
367         l2c_wait_mask(base + L2X0_CLEAN_INV_LINE_PA, 1);
368         __l2c220_cache_sync(base);
369         raw_spin_unlock_irqrestore(&l2x0_lock, flags);
370 }
371
372 static void l2c220_flush_all(void)
373 {
374         l2c220_op_way(l2x0_base, L2X0_CLEAN_INV_WAY);
375 }
376
377 static void l2c220_sync(void)
378 {
379         unsigned long flags;
380
381         raw_spin_lock_irqsave(&l2x0_lock, flags);
382         __l2c220_cache_sync(l2x0_base);
383         raw_spin_unlock_irqrestore(&l2x0_lock, flags);
384 }
385
386 static void l2c220_enable(void __iomem *base, unsigned num_lock)
387 {
388         /*
389          * Always enable non-secure access to the lockdown registers -
390          * we write to them as part of the L2C enable sequence so they
391          * need to be accessible.
392          */
393         l2x0_saved_regs.aux_ctrl |= L220_AUX_CTRL_NS_LOCKDOWN;
394
395         l2c_enable(base, num_lock);
396 }
397
398 static void l2c220_unlock(void __iomem *base, unsigned num_lock)
399 {
400         if (readl_relaxed(base + L2X0_AUX_CTRL) & L220_AUX_CTRL_NS_LOCKDOWN)
401                 l2c_unlock(base, num_lock);
402 }
403
404 static const struct l2c_init_data l2c220_data = {
405         .type = "L2C-220",
406         .way_size_0 = SZ_8K,
407         .num_lock = 1,
408         .enable = l2c220_enable,
409         .save = l2c_save,
410         .configure = l2c_configure,
411         .unlock = l2c220_unlock,
412         .outer_cache = {
413                 .inv_range = l2c220_inv_range,
414                 .clean_range = l2c220_clean_range,
415                 .flush_range = l2c220_flush_range,
416                 .flush_all = l2c220_flush_all,
417                 .disable = l2c_disable,
418                 .sync = l2c220_sync,
419                 .resume = l2c_resume,
420         },
421 };
422
423 /*
424  * L2C-310 specific code.
425  *
426  * Very similar to L2C-210, the PA, set/way and sync operations are atomic,
427  * and the way operations are all background tasks.  However, issuing an
428  * operation while a background operation is in progress results in a
429  * SLVERR response.  We can reuse:
430  *
431  *  __l2c210_cache_sync (using sync_reg_offset)
432  *  l2c210_sync
433  *  l2c210_inv_range (if 588369 is not applicable)
434  *  l2c210_clean_range
435  *  l2c210_flush_range (if 588369 is not applicable)
436  *  l2c210_flush_all (if 727915 is not applicable)
437  *
438  * Errata:
439  * 588369: PL310 R0P0->R1P0, fixed R2P0.
440  *      Affects: all clean+invalidate operations
441  *      clean and invalidate skips the invalidate step, so we need to issue
442  *      separate operations.  We also require the above debug workaround
443  *      enclosing this code fragment on affected parts.  On unaffected parts,
444  *      we must not use this workaround without the debug register writes
445  *      to avoid exposing a problem similar to 727915.
446  *
447  * 727915: PL310 R2P0->R3P0, fixed R3P1.
448  *      Affects: clean+invalidate by way
449  *      clean and invalidate by way runs in the background, and a store can
450  *      hit the line between the clean operation and invalidate operation,
451  *      resulting in the store being lost.
452  *
453  * 752271: PL310 R3P0->R3P1-50REL0, fixed R3P2.
454  *      Affects: 8x64-bit (double fill) line fetches
455  *      double fill line fetches can fail to cause dirty data to be evicted
456  *      from the cache before the new data overwrites the second line.
457  *
458  * 753970: PL310 R3P0, fixed R3P1.
459  *      Affects: sync
460  *      prevents merging writes after the sync operation, until another L2C
461  *      operation is performed (or a number of other conditions.)
462  *
463  * 769419: PL310 R0P0->R3P1, fixed R3P2.
464  *      Affects: store buffer
465  *      store buffer is not automatically drained.
466  */
467 static void l2c310_inv_range_erratum(unsigned long start, unsigned long end)
468 {
469         void __iomem *base = l2x0_base;
470
471         if ((start | end) & (CACHE_LINE_SIZE - 1)) {
472                 unsigned long flags;
473
474                 /* Erratum 588369 for both clean+invalidate operations */
475                 raw_spin_lock_irqsave(&l2x0_lock, flags);
476                 l2c_set_debug(base, 0x03);
477
478                 if (start & (CACHE_LINE_SIZE - 1)) {
479                         start &= ~(CACHE_LINE_SIZE - 1);
480                         writel_relaxed(start, base + L2X0_CLEAN_LINE_PA);
481                         writel_relaxed(start, base + L2X0_INV_LINE_PA);
482                         start += CACHE_LINE_SIZE;
483                 }
484
485                 if (end & (CACHE_LINE_SIZE - 1)) {
486                         end &= ~(CACHE_LINE_SIZE - 1);
487                         writel_relaxed(end, base + L2X0_CLEAN_LINE_PA);
488                         writel_relaxed(end, base + L2X0_INV_LINE_PA);
489                 }
490
491                 l2c_set_debug(base, 0x00);
492                 raw_spin_unlock_irqrestore(&l2x0_lock, flags);
493         }
494
495         __l2c210_op_pa_range(base + L2X0_INV_LINE_PA, start, end);
496         __l2c210_cache_sync(base);
497 }
498
499 static void l2c310_flush_range_erratum(unsigned long start, unsigned long end)
500 {
501         raw_spinlock_t *lock = &l2x0_lock;
502         unsigned long flags;
503         void __iomem *base = l2x0_base;
504
505         raw_spin_lock_irqsave(lock, flags);
506         while (start < end) {
507                 unsigned long blk_end = start + min(end - start, 4096UL);
508
509                 l2c_set_debug(base, 0x03);
510                 while (start < blk_end) {
511                         writel_relaxed(start, base + L2X0_CLEAN_LINE_PA);
512                         writel_relaxed(start, base + L2X0_INV_LINE_PA);
513                         start += CACHE_LINE_SIZE;
514                 }
515                 l2c_set_debug(base, 0x00);
516
517                 if (blk_end < end) {
518                         raw_spin_unlock_irqrestore(lock, flags);
519                         raw_spin_lock_irqsave(lock, flags);
520                 }
521         }
522         raw_spin_unlock_irqrestore(lock, flags);
523         __l2c210_cache_sync(base);
524 }
525
526 static void l2c310_flush_all_erratum(void)
527 {
528         void __iomem *base = l2x0_base;
529         unsigned long flags;
530
531         raw_spin_lock_irqsave(&l2x0_lock, flags);
532         l2c_set_debug(base, 0x03);
533         __l2c_op_way(base + L2X0_CLEAN_INV_WAY);
534         l2c_set_debug(base, 0x00);
535         __l2c210_cache_sync(base);
536         raw_spin_unlock_irqrestore(&l2x0_lock, flags);
537 }
538
539 static void __init l2c310_save(void __iomem *base)
540 {
541         unsigned revision;
542
543         l2c_save(base);
544
545         l2x0_saved_regs.tag_latency = readl_relaxed(base +
546                 L310_TAG_LATENCY_CTRL);
547         l2x0_saved_regs.data_latency = readl_relaxed(base +
548                 L310_DATA_LATENCY_CTRL);
549         l2x0_saved_regs.filter_end = readl_relaxed(base +
550                 L310_ADDR_FILTER_END);
551         l2x0_saved_regs.filter_start = readl_relaxed(base +
552                 L310_ADDR_FILTER_START);
553
554         revision = readl_relaxed(base + L2X0_CACHE_ID) &
555                         L2X0_CACHE_ID_RTL_MASK;
556
557         /* From r2p0, there is Prefetch offset/control register */
558         if (revision >= L310_CACHE_ID_RTL_R2P0)
559                 l2x0_saved_regs.prefetch_ctrl = readl_relaxed(base +
560                                                         L310_PREFETCH_CTRL);
561
562         /* From r3p0, there is Power control register */
563         if (revision >= L310_CACHE_ID_RTL_R3P0)
564                 l2x0_saved_regs.pwr_ctrl = readl_relaxed(base +
565                                                         L310_POWER_CTRL);
566 }
567
568 static void l2c310_configure(void __iomem *base)
569 {
570         unsigned revision;
571
572         l2c_configure(base);
573
574         /* restore pl310 setup */
575         l2c_write_sec(l2x0_saved_regs.tag_latency, base,
576                       L310_TAG_LATENCY_CTRL);
577         l2c_write_sec(l2x0_saved_regs.data_latency, base,
578                       L310_DATA_LATENCY_CTRL);
579         l2c_write_sec(l2x0_saved_regs.filter_end, base,
580                       L310_ADDR_FILTER_END);
581         l2c_write_sec(l2x0_saved_regs.filter_start, base,
582                       L310_ADDR_FILTER_START);
583
584         revision = readl_relaxed(base + L2X0_CACHE_ID) &
585                                  L2X0_CACHE_ID_RTL_MASK;
586
587         if (revision >= L310_CACHE_ID_RTL_R2P0)
588                 l2c_write_sec(l2x0_saved_regs.prefetch_ctrl, base,
589                               L310_PREFETCH_CTRL);
590         if (revision >= L310_CACHE_ID_RTL_R3P0)
591                 l2c_write_sec(l2x0_saved_regs.pwr_ctrl, base,
592                               L310_POWER_CTRL);
593 }
594
595 static int l2c310_starting_cpu(unsigned int cpu)
596 {
597         set_auxcr(get_auxcr() | BIT(3) | BIT(2) | BIT(1));
598         return 0;
599 }
600
601 static int l2c310_dying_cpu(unsigned int cpu)
602 {
603         set_auxcr(get_auxcr() & ~(BIT(3) | BIT(2) | BIT(1)));
604         return 0;
605 }
606
607 static void __init l2c310_enable(void __iomem *base, unsigned num_lock)
608 {
609         unsigned rev = readl_relaxed(base + L2X0_CACHE_ID) & L2X0_CACHE_ID_RTL_MASK;
610         bool cortex_a9 = read_cpuid_part() == ARM_CPU_PART_CORTEX_A9;
611         u32 aux = l2x0_saved_regs.aux_ctrl;
612
613         if (rev >= L310_CACHE_ID_RTL_R2P0) {
614                 if (cortex_a9 && !l2x0_bresp_disable) {
615                         aux |= L310_AUX_CTRL_EARLY_BRESP;
616                         pr_info("L2C-310 enabling early BRESP for Cortex-A9\n");
617                 } else if (aux & L310_AUX_CTRL_EARLY_BRESP) {
618                         pr_warn("L2C-310 early BRESP only supported with Cortex-A9\n");
619                         aux &= ~L310_AUX_CTRL_EARLY_BRESP;
620                 }
621         }
622
623         if (cortex_a9 && !l2x0_flz_disable) {
624                 u32 aux_cur = readl_relaxed(base + L2X0_AUX_CTRL);
625                 u32 acr = get_auxcr();
626
627                 pr_debug("Cortex-A9 ACR=0x%08x\n", acr);
628
629                 if (acr & BIT(3) && !(aux_cur & L310_AUX_CTRL_FULL_LINE_ZERO))
630                         pr_err("L2C-310: full line of zeros enabled in Cortex-A9 but not L2C-310 - invalid\n");
631
632                 if (aux & L310_AUX_CTRL_FULL_LINE_ZERO && !(acr & BIT(3)))
633                         pr_err("L2C-310: enabling full line of zeros but not enabled in Cortex-A9\n");
634
635                 if (!(aux & L310_AUX_CTRL_FULL_LINE_ZERO) && !outer_cache.write_sec) {
636                         aux |= L310_AUX_CTRL_FULL_LINE_ZERO;
637                         pr_info("L2C-310 full line of zeros enabled for Cortex-A9\n");
638                 }
639         } else if (aux & (L310_AUX_CTRL_FULL_LINE_ZERO | L310_AUX_CTRL_EARLY_BRESP)) {
640                 pr_err("L2C-310: disabling Cortex-A9 specific feature bits\n");
641                 aux &= ~(L310_AUX_CTRL_FULL_LINE_ZERO | L310_AUX_CTRL_EARLY_BRESP);
642         }
643
644         /*
645          * Always enable non-secure access to the lockdown registers -
646          * we write to them as part of the L2C enable sequence so they
647          * need to be accessible.
648          */
649         l2x0_saved_regs.aux_ctrl = aux | L310_AUX_CTRL_NS_LOCKDOWN;
650
651         l2c_enable(base, num_lock);
652
653         /* Read back resulting AUX_CTRL value as it could have been altered. */
654         aux = readl_relaxed(base + L2X0_AUX_CTRL);
655
656         if (aux & (L310_AUX_CTRL_DATA_PREFETCH | L310_AUX_CTRL_INSTR_PREFETCH)) {
657                 u32 prefetch = readl_relaxed(base + L310_PREFETCH_CTRL);
658
659                 pr_info("L2C-310 %s%s prefetch enabled, offset %u lines\n",
660                         aux & L310_AUX_CTRL_INSTR_PREFETCH ? "I" : "",
661                         aux & L310_AUX_CTRL_DATA_PREFETCH ? "D" : "",
662                         1 + (prefetch & L310_PREFETCH_CTRL_OFFSET_MASK));
663         }
664
665         /* r3p0 or later has power control register */
666         if (rev >= L310_CACHE_ID_RTL_R3P0) {
667                 u32 power_ctrl;
668
669                 power_ctrl = readl_relaxed(base + L310_POWER_CTRL);
670                 pr_info("L2C-310 dynamic clock gating %sabled, standby mode %sabled\n",
671                         power_ctrl & L310_DYNAMIC_CLK_GATING_EN ? "en" : "dis",
672                         power_ctrl & L310_STNDBY_MODE_EN ? "en" : "dis");
673         }
674
675         if (aux & L310_AUX_CTRL_FULL_LINE_ZERO)
676                 cpuhp_setup_state(CPUHP_AP_ARM_L2X0_STARTING,
677                                   "arm/l2x0:starting", l2c310_starting_cpu,
678                                   l2c310_dying_cpu);
679 }
680
681 static void __init l2c310_fixup(void __iomem *base, u32 cache_id,
682         struct outer_cache_fns *fns)
683 {
684         unsigned revision = cache_id & L2X0_CACHE_ID_RTL_MASK;
685         const char *errata[8];
686         unsigned n = 0;
687
688         if (IS_ENABLED(CONFIG_PL310_ERRATA_588369) &&
689             revision < L310_CACHE_ID_RTL_R2P0 &&
690             /* For bcm compatibility */
691             fns->inv_range == l2c210_inv_range) {
692                 fns->inv_range = l2c310_inv_range_erratum;
693                 fns->flush_range = l2c310_flush_range_erratum;
694                 errata[n++] = "588369";
695         }
696
697         if (IS_ENABLED(CONFIG_PL310_ERRATA_727915) &&
698             revision >= L310_CACHE_ID_RTL_R2P0 &&
699             revision < L310_CACHE_ID_RTL_R3P1) {
700                 fns->flush_all = l2c310_flush_all_erratum;
701                 errata[n++] = "727915";
702         }
703
704         if (revision >= L310_CACHE_ID_RTL_R3P0 &&
705             revision < L310_CACHE_ID_RTL_R3P2) {
706                 u32 val = l2x0_saved_regs.prefetch_ctrl;
707                 if (val & L310_PREFETCH_CTRL_DBL_LINEFILL) {
708                         val &= ~L310_PREFETCH_CTRL_DBL_LINEFILL;
709                         l2x0_saved_regs.prefetch_ctrl = val;
710                         errata[n++] = "752271";
711                 }
712         }
713
714         if (IS_ENABLED(CONFIG_PL310_ERRATA_753970) &&
715             revision == L310_CACHE_ID_RTL_R3P0) {
716                 sync_reg_offset = L2X0_DUMMY_REG;
717                 errata[n++] = "753970";
718         }
719
720         if (IS_ENABLED(CONFIG_PL310_ERRATA_769419))
721                 errata[n++] = "769419";
722
723         if (n) {
724                 unsigned i;
725
726                 pr_info("L2C-310 errat%s", n > 1 ? "a" : "um");
727                 for (i = 0; i < n; i++)
728                         pr_cont(" %s", errata[i]);
729                 pr_cont(" enabled\n");
730         }
731 }
732
733 static void l2c310_disable(void)
734 {
735         /*
736          * If full-line-of-zeros is enabled, we must first disable it in the
737          * Cortex-A9 auxiliary control register before disabling the L2 cache.
738          */
739         if (l2x0_saved_regs.aux_ctrl & L310_AUX_CTRL_FULL_LINE_ZERO)
740                 set_auxcr(get_auxcr() & ~(BIT(3) | BIT(2) | BIT(1)));
741
742         l2c_disable();
743 }
744
745 static void l2c310_resume(void)
746 {
747         l2c_resume();
748
749         /* Re-enable full-line-of-zeros for Cortex-A9 */
750         if (l2x0_saved_regs.aux_ctrl & L310_AUX_CTRL_FULL_LINE_ZERO)
751                 set_auxcr(get_auxcr() | BIT(3) | BIT(2) | BIT(1));
752 }
753
754 static void l2c310_unlock(void __iomem *base, unsigned num_lock)
755 {
756         if (readl_relaxed(base + L2X0_AUX_CTRL) & L310_AUX_CTRL_NS_LOCKDOWN)
757                 l2c_unlock(base, num_lock);
758 }
759
760 static const struct l2c_init_data l2c310_init_fns __initconst = {
761         .type = "L2C-310",
762         .way_size_0 = SZ_8K,
763         .num_lock = 8,
764         .enable = l2c310_enable,
765         .fixup = l2c310_fixup,
766         .save = l2c310_save,
767         .configure = l2c310_configure,
768         .unlock = l2c310_unlock,
769         .outer_cache = {
770                 .inv_range = l2c210_inv_range,
771                 .clean_range = l2c210_clean_range,
772                 .flush_range = l2c210_flush_range,
773                 .flush_all = l2c210_flush_all,
774                 .disable = l2c310_disable,
775                 .sync = l2c210_sync,
776                 .resume = l2c310_resume,
777         },
778 };
779
780 static int __init __l2c_init(const struct l2c_init_data *data,
781                              u32 aux_val, u32 aux_mask, u32 cache_id, bool nosync)
782 {
783         struct outer_cache_fns fns;
784         unsigned way_size_bits, ways;
785         u32 aux, old_aux;
786
787         /*
788          * Save the pointer globally so that callbacks which do not receive
789          * context from callers can access the structure.
790          */
791         l2x0_data = kmemdup(data, sizeof(*data), GFP_KERNEL);
792         if (!l2x0_data)
793                 return -ENOMEM;
794
795         /*
796          * Sanity check the aux values.  aux_mask is the bits we preserve
797          * from reading the hardware register, and aux_val is the bits we
798          * set.
799          */
800         if (aux_val & aux_mask)
801                 pr_alert("L2C: platform provided aux values permit register corruption.\n");
802
803         old_aux = aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
804         aux &= aux_mask;
805         aux |= aux_val;
806
807         if (old_aux != aux)
808                 pr_warn("L2C: DT/platform modifies aux control register: 0x%08x -> 0x%08x\n",
809                         old_aux, aux);
810
811         /* Determine the number of ways */
812         switch (cache_id & L2X0_CACHE_ID_PART_MASK) {
813         case L2X0_CACHE_ID_PART_L310:
814                 if ((aux_val | ~aux_mask) & (L2C_AUX_CTRL_WAY_SIZE_MASK | L310_AUX_CTRL_ASSOCIATIVITY_16))
815                         pr_warn("L2C: DT/platform tries to modify or specify cache size\n");
816                 if (aux & (1 << 16))
817                         ways = 16;
818                 else
819                         ways = 8;
820                 break;
821
822         case L2X0_CACHE_ID_PART_L210:
823         case L2X0_CACHE_ID_PART_L220:
824                 ways = (aux >> 13) & 0xf;
825                 break;
826
827         case AURORA_CACHE_ID:
828                 ways = (aux >> 13) & 0xf;
829                 ways = 2 << ((ways + 1) >> 2);
830                 break;
831
832         default:
833                 /* Assume unknown chips have 8 ways */
834                 ways = 8;
835                 break;
836         }
837
838         l2x0_way_mask = (1 << ways) - 1;
839
840         /*
841          * way_size_0 is the size that a way_size value of zero would be
842          * given the calculation: way_size = way_size_0 << way_size_bits.
843          * So, if way_size_bits=0 is reserved, but way_size_bits=1 is 16k,
844          * then way_size_0 would be 8k.
845          *
846          * L2 cache size = number of ways * way size.
847          */
848         way_size_bits = (aux & L2C_AUX_CTRL_WAY_SIZE_MASK) >>
849                         L2C_AUX_CTRL_WAY_SIZE_SHIFT;
850         l2x0_size = ways * (data->way_size_0 << way_size_bits);
851
852         fns = data->outer_cache;
853         fns.write_sec = outer_cache.write_sec;
854         fns.configure = outer_cache.configure;
855         if (data->fixup)
856                 data->fixup(l2x0_base, cache_id, &fns);
857         if (nosync) {
858                 pr_info("L2C: disabling outer sync\n");
859                 fns.sync = NULL;
860         }
861
862         /*
863          * Check if l2x0 controller is already enabled.  If we are booting
864          * in non-secure mode accessing the below registers will fault.
865          */
866         if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) {
867                 l2x0_saved_regs.aux_ctrl = aux;
868
869                 data->enable(l2x0_base, data->num_lock);
870         }
871
872         outer_cache = fns;
873
874         /*
875          * It is strange to save the register state before initialisation,
876          * but hey, this is what the DT implementations decided to do.
877          */
878         if (data->save)
879                 data->save(l2x0_base);
880
881         /* Re-read it in case some bits are reserved. */
882         aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
883
884         pr_info("%s cache controller enabled, %d ways, %d kB\n",
885                 data->type, ways, l2x0_size >> 10);
886         pr_info("%s: CACHE_ID 0x%08x, AUX_CTRL 0x%08x\n",
887                 data->type, cache_id, aux);
888
889         l2x0_pmu_register(l2x0_base, cache_id);
890
891         return 0;
892 }
893
894 void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask)
895 {
896         const struct l2c_init_data *data;
897         u32 cache_id;
898
899         l2x0_base = base;
900
901         cache_id = readl_relaxed(base + L2X0_CACHE_ID);
902
903         switch (cache_id & L2X0_CACHE_ID_PART_MASK) {
904         default:
905         case L2X0_CACHE_ID_PART_L210:
906                 data = &l2c210_data;
907                 break;
908
909         case L2X0_CACHE_ID_PART_L220:
910                 data = &l2c220_data;
911                 break;
912
913         case L2X0_CACHE_ID_PART_L310:
914                 data = &l2c310_init_fns;
915                 break;
916         }
917
918         /* Read back current (default) hardware configuration */
919         if (data->save)
920                 data->save(l2x0_base);
921
922         __l2c_init(data, aux_val, aux_mask, cache_id, false);
923 }
924
925 #ifdef CONFIG_OF
926 static int l2_wt_override;
927
928 /* Aurora don't have the cache ID register available, so we have to
929  * pass it though the device tree */
930 static u32 cache_id_part_number_from_dt;
931
932 /**
933  * l2x0_cache_size_of_parse() - read cache size parameters from DT
934  * @np: the device tree node for the l2 cache
935  * @aux_val: pointer to machine-supplied auxilary register value, to
936  * be augmented by the call (bits to be set to 1)
937  * @aux_mask: pointer to machine-supplied auxilary register mask, to
938  * be augmented by the call (bits to be set to 0)
939  * @associativity: variable to return the calculated associativity in
940  * @max_way_size: the maximum size in bytes for the cache ways
941  */
942 static int __init l2x0_cache_size_of_parse(const struct device_node *np,
943                                             u32 *aux_val, u32 *aux_mask,
944                                             u32 *associativity,
945                                             u32 max_way_size)
946 {
947         u32 mask = 0, val = 0;
948         u32 cache_size = 0, sets = 0;
949         u32 way_size_bits = 1;
950         u32 way_size = 0;
951         u32 block_size = 0;
952         u32 line_size = 0;
953
954         of_property_read_u32(np, "cache-size", &cache_size);
955         of_property_read_u32(np, "cache-sets", &sets);
956         of_property_read_u32(np, "cache-block-size", &block_size);
957         of_property_read_u32(np, "cache-line-size", &line_size);
958
959         if (!cache_size || !sets)
960                 return -ENODEV;
961
962         /* All these l2 caches have the same line = block size actually */
963         if (!line_size) {
964                 if (block_size) {
965                         /* If linesize is not given, it is equal to blocksize */
966                         line_size = block_size;
967                 } else {
968                         /* Fall back to known size */
969                         pr_warn("L2C OF: no cache block/line size given: "
970                                 "falling back to default size %d bytes\n",
971                                 CACHE_LINE_SIZE);
972                         line_size = CACHE_LINE_SIZE;
973                 }
974         }
975
976         if (line_size != CACHE_LINE_SIZE)
977                 pr_warn("L2C OF: DT supplied line size %d bytes does "
978                         "not match hardware line size of %d bytes\n",
979                         line_size,
980                         CACHE_LINE_SIZE);
981
982         /*
983          * Since:
984          * set size = cache size / sets
985          * ways = cache size / (sets * line size)
986          * way size = cache size / (cache size / (sets * line size))
987          * way size = sets * line size
988          * associativity = ways = cache size / way size
989          */
990         way_size = sets * line_size;
991         *associativity = cache_size / way_size;
992
993         if (way_size > max_way_size) {
994                 pr_err("L2C OF: set size %dKB is too large\n", way_size);
995                 return -EINVAL;
996         }
997
998         pr_info("L2C OF: override cache size: %d bytes (%dKB)\n",
999                 cache_size, cache_size >> 10);
1000         pr_info("L2C OF: override line size: %d bytes\n", line_size);
1001         pr_info("L2C OF: override way size: %d bytes (%dKB)\n",
1002                 way_size, way_size >> 10);
1003         pr_info("L2C OF: override associativity: %d\n", *associativity);
1004
1005         /*
1006          * Calculates the bits 17:19 to set for way size:
1007          * 512KB -> 6, 256KB -> 5, ... 16KB -> 1
1008          */
1009         way_size_bits = ilog2(way_size >> 10) - 3;
1010         if (way_size_bits < 1 || way_size_bits > 6) {
1011                 pr_err("L2C OF: cache way size illegal: %dKB is not mapped\n",
1012                        way_size);
1013                 return -EINVAL;
1014         }
1015
1016         mask |= L2C_AUX_CTRL_WAY_SIZE_MASK;
1017         val |= (way_size_bits << L2C_AUX_CTRL_WAY_SIZE_SHIFT);
1018
1019         *aux_val &= ~mask;
1020         *aux_val |= val;
1021         *aux_mask &= ~mask;
1022
1023         return 0;
1024 }
1025
1026 static void __init l2x0_of_parse(const struct device_node *np,
1027                                  u32 *aux_val, u32 *aux_mask)
1028 {
1029         u32 data[2] = { 0, 0 };
1030         u32 tag = 0;
1031         u32 dirty = 0;
1032         u32 val = 0, mask = 0;
1033         u32 assoc;
1034         int ret;
1035
1036         of_property_read_u32(np, "arm,tag-latency", &tag);
1037         if (tag) {
1038                 mask |= L2X0_AUX_CTRL_TAG_LATENCY_MASK;
1039                 val |= (tag - 1) << L2X0_AUX_CTRL_TAG_LATENCY_SHIFT;
1040         }
1041
1042         of_property_read_u32_array(np, "arm,data-latency",
1043                                    data, ARRAY_SIZE(data));
1044         if (data[0] && data[1]) {
1045                 mask |= L2X0_AUX_CTRL_DATA_RD_LATENCY_MASK |
1046                         L2X0_AUX_CTRL_DATA_WR_LATENCY_MASK;
1047                 val |= ((data[0] - 1) << L2X0_AUX_CTRL_DATA_RD_LATENCY_SHIFT) |
1048                        ((data[1] - 1) << L2X0_AUX_CTRL_DATA_WR_LATENCY_SHIFT);
1049         }
1050
1051         of_property_read_u32(np, "arm,dirty-latency", &dirty);
1052         if (dirty) {
1053                 mask |= L2X0_AUX_CTRL_DIRTY_LATENCY_MASK;
1054                 val |= (dirty - 1) << L2X0_AUX_CTRL_DIRTY_LATENCY_SHIFT;
1055         }
1056
1057         if (of_property_read_bool(np, "arm,parity-enable")) {
1058                 mask &= ~L2C_AUX_CTRL_PARITY_ENABLE;
1059                 val |= L2C_AUX_CTRL_PARITY_ENABLE;
1060         } else if (of_property_read_bool(np, "arm,parity-disable")) {
1061                 mask &= ~L2C_AUX_CTRL_PARITY_ENABLE;
1062         }
1063
1064         if (of_property_read_bool(np, "arm,shared-override")) {
1065                 mask &= ~L2C_AUX_CTRL_SHARED_OVERRIDE;
1066                 val |= L2C_AUX_CTRL_SHARED_OVERRIDE;
1067         }
1068
1069         ret = l2x0_cache_size_of_parse(np, aux_val, aux_mask, &assoc, SZ_256K);
1070         if (ret)
1071                 return;
1072
1073         if (assoc > 8) {
1074                 pr_err("l2x0 of: cache setting yield too high associativity\n");
1075                 pr_err("l2x0 of: %d calculated, max 8\n", assoc);
1076         } else {
1077                 mask |= L2X0_AUX_CTRL_ASSOC_MASK;
1078                 val |= (assoc << L2X0_AUX_CTRL_ASSOC_SHIFT);
1079         }
1080
1081         *aux_val &= ~mask;
1082         *aux_val |= val;
1083         *aux_mask &= ~mask;
1084 }
1085
1086 static const struct l2c_init_data of_l2c210_data __initconst = {
1087         .type = "L2C-210",
1088         .way_size_0 = SZ_8K,
1089         .num_lock = 1,
1090         .of_parse = l2x0_of_parse,
1091         .enable = l2c_enable,
1092         .save = l2c_save,
1093         .configure = l2c_configure,
1094         .unlock = l2c_unlock,
1095         .outer_cache = {
1096                 .inv_range   = l2c210_inv_range,
1097                 .clean_range = l2c210_clean_range,
1098                 .flush_range = l2c210_flush_range,
1099                 .flush_all   = l2c210_flush_all,
1100                 .disable     = l2c_disable,
1101                 .sync        = l2c210_sync,
1102                 .resume      = l2c_resume,
1103         },
1104 };
1105
1106 static const struct l2c_init_data of_l2c220_data __initconst = {
1107         .type = "L2C-220",
1108         .way_size_0 = SZ_8K,
1109         .num_lock = 1,
1110         .of_parse = l2x0_of_parse,
1111         .enable = l2c220_enable,
1112         .save = l2c_save,
1113         .configure = l2c_configure,
1114         .unlock = l2c220_unlock,
1115         .outer_cache = {
1116                 .inv_range   = l2c220_inv_range,
1117                 .clean_range = l2c220_clean_range,
1118                 .flush_range = l2c220_flush_range,
1119                 .flush_all   = l2c220_flush_all,
1120                 .disable     = l2c_disable,
1121                 .sync        = l2c220_sync,
1122                 .resume      = l2c_resume,
1123         },
1124 };
1125
1126 static void __init l2c310_of_parse(const struct device_node *np,
1127         u32 *aux_val, u32 *aux_mask)
1128 {
1129         u32 data[3] = { 0, 0, 0 };
1130         u32 tag[3] = { 0, 0, 0 };
1131         u32 filter[2] = { 0, 0 };
1132         u32 assoc;
1133         u32 prefetch;
1134         u32 power;
1135         u32 val;
1136         int ret;
1137
1138         of_property_read_u32_array(np, "arm,tag-latency", tag, ARRAY_SIZE(tag));
1139         if (tag[0] && tag[1] && tag[2])
1140                 l2x0_saved_regs.tag_latency =
1141                         L310_LATENCY_CTRL_RD(tag[0] - 1) |
1142                         L310_LATENCY_CTRL_WR(tag[1] - 1) |
1143                         L310_LATENCY_CTRL_SETUP(tag[2] - 1);
1144
1145         of_property_read_u32_array(np, "arm,data-latency",
1146                                    data, ARRAY_SIZE(data));
1147         if (data[0] && data[1] && data[2])
1148                 l2x0_saved_regs.data_latency =
1149                         L310_LATENCY_CTRL_RD(data[0] - 1) |
1150                         L310_LATENCY_CTRL_WR(data[1] - 1) |
1151                         L310_LATENCY_CTRL_SETUP(data[2] - 1);
1152
1153         of_property_read_u32_array(np, "arm,filter-ranges",
1154                                    filter, ARRAY_SIZE(filter));
1155         if (filter[1]) {
1156                 l2x0_saved_regs.filter_end =
1157                                         ALIGN(filter[0] + filter[1], SZ_1M);
1158                 l2x0_saved_regs.filter_start = (filter[0] & ~(SZ_1M - 1))
1159                                         | L310_ADDR_FILTER_EN;
1160         }
1161
1162         ret = l2x0_cache_size_of_parse(np, aux_val, aux_mask, &assoc, SZ_512K);
1163         if (!ret) {
1164                 switch (assoc) {
1165                 case 16:
1166                         *aux_val &= ~L2X0_AUX_CTRL_ASSOC_MASK;
1167                         *aux_val |= L310_AUX_CTRL_ASSOCIATIVITY_16;
1168                         *aux_mask &= ~L2X0_AUX_CTRL_ASSOC_MASK;
1169                         break;
1170                 case 8:
1171                         *aux_val &= ~L2X0_AUX_CTRL_ASSOC_MASK;
1172                         *aux_mask &= ~L2X0_AUX_CTRL_ASSOC_MASK;
1173                         break;
1174                 default:
1175                         pr_err("L2C-310 OF cache associativity %d invalid, only 8 or 16 permitted\n",
1176                                assoc);
1177                         break;
1178                 }
1179         }
1180
1181         if (of_property_read_bool(np, "arm,shared-override")) {
1182                 *aux_val |= L2C_AUX_CTRL_SHARED_OVERRIDE;
1183                 *aux_mask &= ~L2C_AUX_CTRL_SHARED_OVERRIDE;
1184         }
1185
1186         if (of_property_read_bool(np, "arm,parity-enable")) {
1187                 *aux_val |= L2C_AUX_CTRL_PARITY_ENABLE;
1188                 *aux_mask &= ~L2C_AUX_CTRL_PARITY_ENABLE;
1189         } else if (of_property_read_bool(np, "arm,parity-disable")) {
1190                 *aux_val &= ~L2C_AUX_CTRL_PARITY_ENABLE;
1191                 *aux_mask &= ~L2C_AUX_CTRL_PARITY_ENABLE;
1192         }
1193
1194         if (of_property_read_bool(np, "arm,early-bresp-disable"))
1195                 l2x0_bresp_disable = true;
1196
1197         if (of_property_read_bool(np, "arm,full-line-zero-disable"))
1198                 l2x0_flz_disable = true;
1199
1200         prefetch = l2x0_saved_regs.prefetch_ctrl;
1201
1202         ret = of_property_read_u32(np, "arm,double-linefill", &val);
1203         if (ret == 0) {
1204                 if (val)
1205                         prefetch |= L310_PREFETCH_CTRL_DBL_LINEFILL;
1206                 else
1207                         prefetch &= ~L310_PREFETCH_CTRL_DBL_LINEFILL;
1208         } else if (ret != -EINVAL) {
1209                 pr_err("L2C-310 OF arm,double-linefill property value is missing\n");
1210         }
1211
1212         ret = of_property_read_u32(np, "arm,double-linefill-incr", &val);
1213         if (ret == 0) {
1214                 if (val)
1215                         prefetch |= L310_PREFETCH_CTRL_DBL_LINEFILL_INCR;
1216                 else
1217                         prefetch &= ~L310_PREFETCH_CTRL_DBL_LINEFILL_INCR;
1218         } else if (ret != -EINVAL) {
1219                 pr_err("L2C-310 OF arm,double-linefill-incr property value is missing\n");
1220         }
1221
1222         ret = of_property_read_u32(np, "arm,double-linefill-wrap", &val);
1223         if (ret == 0) {
1224                 if (!val)
1225                         prefetch |= L310_PREFETCH_CTRL_DBL_LINEFILL_WRAP;
1226                 else
1227                         prefetch &= ~L310_PREFETCH_CTRL_DBL_LINEFILL_WRAP;
1228         } else if (ret != -EINVAL) {
1229                 pr_err("L2C-310 OF arm,double-linefill-wrap property value is missing\n");
1230         }
1231
1232         ret = of_property_read_u32(np, "arm,prefetch-drop", &val);
1233         if (ret == 0) {
1234                 if (val)
1235                         prefetch |= L310_PREFETCH_CTRL_PREFETCH_DROP;
1236                 else
1237                         prefetch &= ~L310_PREFETCH_CTRL_PREFETCH_DROP;
1238         } else if (ret != -EINVAL) {
1239                 pr_err("L2C-310 OF arm,prefetch-drop property value is missing\n");
1240         }
1241
1242         ret = of_property_read_u32(np, "arm,prefetch-offset", &val);
1243         if (ret == 0) {
1244                 prefetch &= ~L310_PREFETCH_CTRL_OFFSET_MASK;
1245                 prefetch |= val & L310_PREFETCH_CTRL_OFFSET_MASK;
1246         } else if (ret != -EINVAL) {
1247                 pr_err("L2C-310 OF arm,prefetch-offset property value is missing\n");
1248         }
1249
1250         ret = of_property_read_u32(np, "prefetch-data", &val);
1251         if (ret == 0) {
1252                 if (val)
1253                         prefetch |= L310_PREFETCH_CTRL_DATA_PREFETCH;
1254                 else
1255                         prefetch &= ~L310_PREFETCH_CTRL_DATA_PREFETCH;
1256         } else if (ret != -EINVAL) {
1257                 pr_err("L2C-310 OF prefetch-data property value is missing\n");
1258         }
1259
1260         ret = of_property_read_u32(np, "prefetch-instr", &val);
1261         if (ret == 0) {
1262                 if (val)
1263                         prefetch |= L310_PREFETCH_CTRL_INSTR_PREFETCH;
1264                 else
1265                         prefetch &= ~L310_PREFETCH_CTRL_INSTR_PREFETCH;
1266         } else if (ret != -EINVAL) {
1267                 pr_err("L2C-310 OF prefetch-instr property value is missing\n");
1268         }
1269
1270         l2x0_saved_regs.prefetch_ctrl = prefetch;
1271
1272         power = l2x0_saved_regs.pwr_ctrl |
1273                 L310_DYNAMIC_CLK_GATING_EN | L310_STNDBY_MODE_EN;
1274
1275         ret = of_property_read_u32(np, "arm,dynamic-clock-gating", &val);
1276         if (!ret) {
1277                 if (!val)
1278                         power &= ~L310_DYNAMIC_CLK_GATING_EN;
1279         } else if (ret != -EINVAL) {
1280                 pr_err("L2C-310 OF dynamic-clock-gating property value is missing or invalid\n");
1281         }
1282         ret = of_property_read_u32(np, "arm,standby-mode", &val);
1283         if (!ret) {
1284                 if (!val)
1285                         power &= ~L310_STNDBY_MODE_EN;
1286         } else if (ret != -EINVAL) {
1287                 pr_err("L2C-310 OF standby-mode property value is missing or invalid\n");
1288         }
1289
1290         l2x0_saved_regs.pwr_ctrl = power;
1291 }
1292
1293 static const struct l2c_init_data of_l2c310_data __initconst = {
1294         .type = "L2C-310",
1295         .way_size_0 = SZ_8K,
1296         .num_lock = 8,
1297         .of_parse = l2c310_of_parse,
1298         .enable = l2c310_enable,
1299         .fixup = l2c310_fixup,
1300         .save  = l2c310_save,
1301         .configure = l2c310_configure,
1302         .unlock = l2c310_unlock,
1303         .outer_cache = {
1304                 .inv_range   = l2c210_inv_range,
1305                 .clean_range = l2c210_clean_range,
1306                 .flush_range = l2c210_flush_range,
1307                 .flush_all   = l2c210_flush_all,
1308                 .disable     = l2c310_disable,
1309                 .sync        = l2c210_sync,
1310                 .resume      = l2c310_resume,
1311         },
1312 };
1313
1314 /*
1315  * This is a variant of the of_l2c310_data with .sync set to
1316  * NULL. Outer sync operations are not needed when the system is I/O
1317  * coherent, and potentially harmful in certain situations (PCIe/PL310
1318  * deadlock on Armada 375/38x due to hardware I/O coherency). The
1319  * other operations are kept because they are infrequent (therefore do
1320  * not cause the deadlock in practice) and needed for secondary CPU
1321  * boot and other power management activities.
1322  */
1323 static const struct l2c_init_data of_l2c310_coherent_data __initconst = {
1324         .type = "L2C-310 Coherent",
1325         .way_size_0 = SZ_8K,
1326         .num_lock = 8,
1327         .of_parse = l2c310_of_parse,
1328         .enable = l2c310_enable,
1329         .fixup = l2c310_fixup,
1330         .save  = l2c310_save,
1331         .configure = l2c310_configure,
1332         .unlock = l2c310_unlock,
1333         .outer_cache = {
1334                 .inv_range   = l2c210_inv_range,
1335                 .clean_range = l2c210_clean_range,
1336                 .flush_range = l2c210_flush_range,
1337                 .flush_all   = l2c210_flush_all,
1338                 .disable     = l2c310_disable,
1339                 .resume      = l2c310_resume,
1340         },
1341 };
1342
1343 /*
1344  * Note that the end addresses passed to Linux primitives are
1345  * noninclusive, while the hardware cache range operations use
1346  * inclusive start and end addresses.
1347  */
1348 static unsigned long aurora_range_end(unsigned long start, unsigned long end)
1349 {
1350         /*
1351          * Limit the number of cache lines processed at once,
1352          * since cache range operations stall the CPU pipeline
1353          * until completion.
1354          */
1355         if (end > start + MAX_RANGE_SIZE)
1356                 end = start + MAX_RANGE_SIZE;
1357
1358         /*
1359          * Cache range operations can't straddle a page boundary.
1360          */
1361         if (end > PAGE_ALIGN(start+1))
1362                 end = PAGE_ALIGN(start+1);
1363
1364         return end;
1365 }
1366
1367 static void aurora_pa_range(unsigned long start, unsigned long end,
1368                             unsigned long offset)
1369 {
1370         void __iomem *base = l2x0_base;
1371         unsigned long range_end;
1372         unsigned long flags;
1373
1374         /*
1375          * round start and end adresses up to cache line size
1376          */
1377         start &= ~(CACHE_LINE_SIZE - 1);
1378         end = ALIGN(end, CACHE_LINE_SIZE);
1379
1380         /*
1381          * perform operation on all full cache lines between 'start' and 'end'
1382          */
1383         while (start < end) {
1384                 range_end = aurora_range_end(start, end);
1385
1386                 raw_spin_lock_irqsave(&l2x0_lock, flags);
1387                 writel_relaxed(start, base + AURORA_RANGE_BASE_ADDR_REG);
1388                 writel_relaxed(range_end - CACHE_LINE_SIZE, base + offset);
1389                 raw_spin_unlock_irqrestore(&l2x0_lock, flags);
1390
1391                 writel_relaxed(0, base + AURORA_SYNC_REG);
1392                 start = range_end;
1393         }
1394 }
1395 static void aurora_inv_range(unsigned long start, unsigned long end)
1396 {
1397         aurora_pa_range(start, end, AURORA_INVAL_RANGE_REG);
1398 }
1399
1400 static void aurora_clean_range(unsigned long start, unsigned long end)
1401 {
1402         /*
1403          * If L2 is forced to WT, the L2 will always be clean and we
1404          * don't need to do anything here.
1405          */
1406         if (!l2_wt_override)
1407                 aurora_pa_range(start, end, AURORA_CLEAN_RANGE_REG);
1408 }
1409
1410 static void aurora_flush_range(unsigned long start, unsigned long end)
1411 {
1412         if (l2_wt_override)
1413                 aurora_pa_range(start, end, AURORA_INVAL_RANGE_REG);
1414         else
1415                 aurora_pa_range(start, end, AURORA_FLUSH_RANGE_REG);
1416 }
1417
1418 static void aurora_flush_all(void)
1419 {
1420         void __iomem *base = l2x0_base;
1421         unsigned long flags;
1422
1423         /* clean all ways */
1424         raw_spin_lock_irqsave(&l2x0_lock, flags);
1425         __l2c_op_way(base + L2X0_CLEAN_INV_WAY);
1426         raw_spin_unlock_irqrestore(&l2x0_lock, flags);
1427
1428         writel_relaxed(0, base + AURORA_SYNC_REG);
1429 }
1430
1431 static void aurora_cache_sync(void)
1432 {
1433         writel_relaxed(0, l2x0_base + AURORA_SYNC_REG);
1434 }
1435
1436 static void aurora_disable(void)
1437 {
1438         void __iomem *base = l2x0_base;
1439         unsigned long flags;
1440
1441         raw_spin_lock_irqsave(&l2x0_lock, flags);
1442         __l2c_op_way(base + L2X0_CLEAN_INV_WAY);
1443         writel_relaxed(0, base + AURORA_SYNC_REG);
1444         l2c_write_sec(0, base, L2X0_CTRL);
1445         dsb(st);
1446         raw_spin_unlock_irqrestore(&l2x0_lock, flags);
1447 }
1448
1449 static void aurora_save(void __iomem *base)
1450 {
1451         l2x0_saved_regs.ctrl = readl_relaxed(base + L2X0_CTRL);
1452         l2x0_saved_regs.aux_ctrl = readl_relaxed(base + L2X0_AUX_CTRL);
1453 }
1454
1455 /*
1456  * For Aurora cache in no outer mode, enable via the CP15 coprocessor
1457  * broadcasting of cache commands to L2.
1458  */
1459 static void __init aurora_enable_no_outer(void __iomem *base,
1460         unsigned num_lock)
1461 {
1462         u32 u;
1463
1464         asm volatile("mrc p15, 1, %0, c15, c2, 0" : "=r" (u));
1465         u |= AURORA_CTRL_FW;            /* Set the FW bit */
1466         asm volatile("mcr p15, 1, %0, c15, c2, 0" : : "r" (u));
1467
1468         isb();
1469
1470         l2c_enable(base, num_lock);
1471 }
1472
1473 static void __init aurora_fixup(void __iomem *base, u32 cache_id,
1474         struct outer_cache_fns *fns)
1475 {
1476         sync_reg_offset = AURORA_SYNC_REG;
1477 }
1478
1479 static void __init aurora_of_parse(const struct device_node *np,
1480                                 u32 *aux_val, u32 *aux_mask)
1481 {
1482         u32 val = AURORA_ACR_REPLACEMENT_TYPE_SEMIPLRU;
1483         u32 mask =  AURORA_ACR_REPLACEMENT_MASK;
1484
1485         of_property_read_u32(np, "cache-id-part",
1486                         &cache_id_part_number_from_dt);
1487
1488         /* Determine and save the write policy */
1489         l2_wt_override = of_property_read_bool(np, "wt-override");
1490
1491         if (l2_wt_override) {
1492                 val |= AURORA_ACR_FORCE_WRITE_THRO_POLICY;
1493                 mask |= AURORA_ACR_FORCE_WRITE_POLICY_MASK;
1494         }
1495
1496         *aux_val &= ~mask;
1497         *aux_val |= val;
1498         *aux_mask &= ~mask;
1499 }
1500
1501 static const struct l2c_init_data of_aurora_with_outer_data __initconst = {
1502         .type = "Aurora",
1503         .way_size_0 = SZ_4K,
1504         .num_lock = 4,
1505         .of_parse = aurora_of_parse,
1506         .enable = l2c_enable,
1507         .fixup = aurora_fixup,
1508         .save  = aurora_save,
1509         .configure = l2c_configure,
1510         .unlock = l2c_unlock,
1511         .outer_cache = {
1512                 .inv_range   = aurora_inv_range,
1513                 .clean_range = aurora_clean_range,
1514                 .flush_range = aurora_flush_range,
1515                 .flush_all   = aurora_flush_all,
1516                 .disable     = aurora_disable,
1517                 .sync        = aurora_cache_sync,
1518                 .resume      = l2c_resume,
1519         },
1520 };
1521
1522 static const struct l2c_init_data of_aurora_no_outer_data __initconst = {
1523         .type = "Aurora",
1524         .way_size_0 = SZ_4K,
1525         .num_lock = 4,
1526         .of_parse = aurora_of_parse,
1527         .enable = aurora_enable_no_outer,
1528         .fixup = aurora_fixup,
1529         .save  = aurora_save,
1530         .configure = l2c_configure,
1531         .unlock = l2c_unlock,
1532         .outer_cache = {
1533                 .resume      = l2c_resume,
1534         },
1535 };
1536
1537 /*
1538  * For certain Broadcom SoCs, depending on the address range, different offsets
1539  * need to be added to the address before passing it to L2 for
1540  * invalidation/clean/flush
1541  *
1542  * Section Address Range              Offset        EMI
1543  *   1     0x00000000 - 0x3FFFFFFF    0x80000000    VC
1544  *   2     0x40000000 - 0xBFFFFFFF    0x40000000    SYS
1545  *   3     0xC0000000 - 0xFFFFFFFF    0x80000000    VC
1546  *
1547  * When the start and end addresses have crossed two different sections, we
1548  * need to break the L2 operation into two, each within its own section.
1549  * For example, if we need to invalidate addresses starts at 0xBFFF0000 and
1550  * ends at 0xC0001000, we need do invalidate 1) 0xBFFF0000 - 0xBFFFFFFF and 2)
1551  * 0xC0000000 - 0xC0001000
1552  *
1553  * Note 1:
1554  * By breaking a single L2 operation into two, we may potentially suffer some
1555  * performance hit, but keep in mind the cross section case is very rare
1556  *
1557  * Note 2:
1558  * We do not need to handle the case when the start address is in
1559  * Section 1 and the end address is in Section 3, since it is not a valid use
1560  * case
1561  *
1562  * Note 3:
1563  * Section 1 in practical terms can no longer be used on rev A2. Because of
1564  * that the code does not need to handle section 1 at all.
1565  *
1566  */
1567 #define BCM_SYS_EMI_START_ADDR        0x40000000UL
1568 #define BCM_VC_EMI_SEC3_START_ADDR    0xC0000000UL
1569
1570 #define BCM_SYS_EMI_OFFSET            0x40000000UL
1571 #define BCM_VC_EMI_OFFSET             0x80000000UL
1572
1573 static inline int bcm_addr_is_sys_emi(unsigned long addr)
1574 {
1575         return (addr >= BCM_SYS_EMI_START_ADDR) &&
1576                 (addr < BCM_VC_EMI_SEC3_START_ADDR);
1577 }
1578
1579 static inline unsigned long bcm_l2_phys_addr(unsigned long addr)
1580 {
1581         if (bcm_addr_is_sys_emi(addr))
1582                 return addr + BCM_SYS_EMI_OFFSET;
1583         else
1584                 return addr + BCM_VC_EMI_OFFSET;
1585 }
1586
1587 static void bcm_inv_range(unsigned long start, unsigned long end)
1588 {
1589         unsigned long new_start, new_end;
1590
1591         BUG_ON(start < BCM_SYS_EMI_START_ADDR);
1592
1593         if (unlikely(end <= start))
1594                 return;
1595
1596         new_start = bcm_l2_phys_addr(start);
1597         new_end = bcm_l2_phys_addr(end);
1598
1599         /* normal case, no cross section between start and end */
1600         if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) {
1601                 l2c210_inv_range(new_start, new_end);
1602                 return;
1603         }
1604
1605         /* They cross sections, so it can only be a cross from section
1606          * 2 to section 3
1607          */
1608         l2c210_inv_range(new_start,
1609                 bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1));
1610         l2c210_inv_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR),
1611                 new_end);
1612 }
1613
1614 static void bcm_clean_range(unsigned long start, unsigned long end)
1615 {
1616         unsigned long new_start, new_end;
1617
1618         BUG_ON(start < BCM_SYS_EMI_START_ADDR);
1619
1620         if (unlikely(end <= start))
1621                 return;
1622
1623         new_start = bcm_l2_phys_addr(start);
1624         new_end = bcm_l2_phys_addr(end);
1625
1626         /* normal case, no cross section between start and end */
1627         if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) {
1628                 l2c210_clean_range(new_start, new_end);
1629                 return;
1630         }
1631
1632         /* They cross sections, so it can only be a cross from section
1633          * 2 to section 3
1634          */
1635         l2c210_clean_range(new_start,
1636                 bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1));
1637         l2c210_clean_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR),
1638                 new_end);
1639 }
1640
1641 static void bcm_flush_range(unsigned long start, unsigned long end)
1642 {
1643         unsigned long new_start, new_end;
1644
1645         BUG_ON(start < BCM_SYS_EMI_START_ADDR);
1646
1647         if (unlikely(end <= start))
1648                 return;
1649
1650         if ((end - start) >= l2x0_size) {
1651                 outer_cache.flush_all();
1652                 return;
1653         }
1654
1655         new_start = bcm_l2_phys_addr(start);
1656         new_end = bcm_l2_phys_addr(end);
1657
1658         /* normal case, no cross section between start and end */
1659         if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) {
1660                 l2c210_flush_range(new_start, new_end);
1661                 return;
1662         }
1663
1664         /* They cross sections, so it can only be a cross from section
1665          * 2 to section 3
1666          */
1667         l2c210_flush_range(new_start,
1668                 bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1));
1669         l2c210_flush_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR),
1670                 new_end);
1671 }
1672
1673 /* Broadcom L2C-310 start from ARMs R3P2 or later, and require no fixups */
1674 static const struct l2c_init_data of_bcm_l2x0_data __initconst = {
1675         .type = "BCM-L2C-310",
1676         .way_size_0 = SZ_8K,
1677         .num_lock = 8,
1678         .of_parse = l2c310_of_parse,
1679         .enable = l2c310_enable,
1680         .save  = l2c310_save,
1681         .configure = l2c310_configure,
1682         .unlock = l2c310_unlock,
1683         .outer_cache = {
1684                 .inv_range   = bcm_inv_range,
1685                 .clean_range = bcm_clean_range,
1686                 .flush_range = bcm_flush_range,
1687                 .flush_all   = l2c210_flush_all,
1688                 .disable     = l2c310_disable,
1689                 .sync        = l2c210_sync,
1690                 .resume      = l2c310_resume,
1691         },
1692 };
1693
1694 static void __init tauros3_save(void __iomem *base)
1695 {
1696         l2c_save(base);
1697
1698         l2x0_saved_regs.aux2_ctrl =
1699                 readl_relaxed(base + TAUROS3_AUX2_CTRL);
1700         l2x0_saved_regs.prefetch_ctrl =
1701                 readl_relaxed(base + L310_PREFETCH_CTRL);
1702 }
1703
1704 static void tauros3_configure(void __iomem *base)
1705 {
1706         l2c_configure(base);
1707         writel_relaxed(l2x0_saved_regs.aux2_ctrl,
1708                        base + TAUROS3_AUX2_CTRL);
1709         writel_relaxed(l2x0_saved_regs.prefetch_ctrl,
1710                        base + L310_PREFETCH_CTRL);
1711 }
1712
1713 static const struct l2c_init_data of_tauros3_data __initconst = {
1714         .type = "Tauros3",
1715         .way_size_0 = SZ_8K,
1716         .num_lock = 8,
1717         .enable = l2c_enable,
1718         .save  = tauros3_save,
1719         .configure = tauros3_configure,
1720         .unlock = l2c_unlock,
1721         /* Tauros3 broadcasts L1 cache operations to L2 */
1722         .outer_cache = {
1723                 .resume      = l2c_resume,
1724         },
1725 };
1726
1727 #define L2C_ID(name, fns) { .compatible = name, .data = (void *)&fns }
1728 static const struct of_device_id l2x0_ids[] __initconst = {
1729         L2C_ID("arm,l210-cache", of_l2c210_data),
1730         L2C_ID("arm,l220-cache", of_l2c220_data),
1731         L2C_ID("arm,pl310-cache", of_l2c310_data),
1732         L2C_ID("brcm,bcm11351-a2-pl310-cache", of_bcm_l2x0_data),
1733         L2C_ID("marvell,aurora-outer-cache", of_aurora_with_outer_data),
1734         L2C_ID("marvell,aurora-system-cache", of_aurora_no_outer_data),
1735         L2C_ID("marvell,tauros3-cache", of_tauros3_data),
1736         /* Deprecated IDs */
1737         L2C_ID("bcm,bcm11351-a2-pl310-cache", of_bcm_l2x0_data),
1738         {}
1739 };
1740
1741 int __init l2x0_of_init(u32 aux_val, u32 aux_mask)
1742 {
1743         const struct l2c_init_data *data;
1744         struct device_node *np;
1745         struct resource res;
1746         u32 cache_id, old_aux;
1747         u32 cache_level = 2;
1748         bool nosync = false;
1749
1750         np = of_find_matching_node(NULL, l2x0_ids);
1751         if (!np)
1752                 return -ENODEV;
1753
1754         if (of_address_to_resource(np, 0, &res))
1755                 return -ENODEV;
1756
1757         l2x0_base = ioremap(res.start, resource_size(&res));
1758         if (!l2x0_base)
1759                 return -ENOMEM;
1760
1761         l2x0_saved_regs.phy_base = res.start;
1762
1763         data = of_match_node(l2x0_ids, np)->data;
1764
1765         if (of_device_is_compatible(np, "arm,pl310-cache") &&
1766             of_property_read_bool(np, "arm,io-coherent"))
1767                 data = &of_l2c310_coherent_data;
1768
1769         old_aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
1770         if (old_aux != ((old_aux & aux_mask) | aux_val)) {
1771                 pr_warn("L2C: platform modifies aux control register: 0x%08x -> 0x%08x\n",
1772                         old_aux, (old_aux & aux_mask) | aux_val);
1773         } else if (aux_mask != ~0U && aux_val != 0) {
1774                 pr_alert("L2C: platform provided aux values match the hardware, so have no effect.  Please remove them.\n");
1775         }
1776
1777         /* All L2 caches are unified, so this property should be specified */
1778         if (!of_property_read_bool(np, "cache-unified"))
1779                 pr_err("L2C: device tree omits to specify unified cache\n");
1780
1781         if (of_property_read_u32(np, "cache-level", &cache_level))
1782                 pr_err("L2C: device tree omits to specify cache-level\n");
1783
1784         if (cache_level != 2)
1785                 pr_err("L2C: device tree specifies invalid cache level\n");
1786
1787         nosync = of_property_read_bool(np, "arm,outer-sync-disable");
1788
1789         /* Read back current (default) hardware configuration */
1790         if (data->save)
1791                 data->save(l2x0_base);
1792
1793         /* L2 configuration can only be changed if the cache is disabled */
1794         if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN))
1795                 if (data->of_parse)
1796                         data->of_parse(np, &aux_val, &aux_mask);
1797
1798         if (cache_id_part_number_from_dt)
1799                 cache_id = cache_id_part_number_from_dt;
1800         else
1801                 cache_id = readl_relaxed(l2x0_base + L2X0_CACHE_ID);
1802
1803         return __l2c_init(data, aux_val, aux_mask, cache_id, nosync);
1804 }
1805 #endif