Linux-libre 4.19.8-gnu
[librecmc/linux-libre.git] / drivers / iommu / arm-smmu.c
1 /*
2  * IOMMU API for ARM architected SMMU implementations.
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License version 2 as
6  * published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11  * GNU General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
16  *
17  * Copyright (C) 2013 ARM Limited
18  *
19  * Author: Will Deacon <will.deacon@arm.com>
20  *
21  * This driver currently supports:
22  *      - SMMUv1 and v2 implementations
23  *      - Stream-matching and stream-indexing
24  *      - v7/v8 long-descriptor format
25  *      - Non-secure access to the SMMU
26  *      - Context fault reporting
27  *      - Extended Stream ID (16 bit)
28  */
29
30 #define pr_fmt(fmt) "arm-smmu: " fmt
31
32 #include <linux/acpi.h>
33 #include <linux/acpi_iort.h>
34 #include <linux/atomic.h>
35 #include <linux/delay.h>
36 #include <linux/dma-iommu.h>
37 #include <linux/dma-mapping.h>
38 #include <linux/err.h>
39 #include <linux/interrupt.h>
40 #include <linux/io.h>
41 #include <linux/io-64-nonatomic-hi-lo.h>
42 #include <linux/iommu.h>
43 #include <linux/iopoll.h>
44 #include <linux/module.h>
45 #include <linux/of.h>
46 #include <linux/of_address.h>
47 #include <linux/of_device.h>
48 #include <linux/of_iommu.h>
49 #include <linux/pci.h>
50 #include <linux/platform_device.h>
51 #include <linux/slab.h>
52 #include <linux/spinlock.h>
53
54 #include <linux/amba/bus.h>
55
56 #include "io-pgtable.h"
57 #include "arm-smmu-regs.h"
58
59 #define ARM_MMU500_ACTLR_CPRE           (1 << 1)
60
61 #define ARM_MMU500_ACR_CACHE_LOCK       (1 << 26)
62 #define ARM_MMU500_ACR_S2CRB_TLBEN      (1 << 10)
63 #define ARM_MMU500_ACR_SMTNMB_TLBEN     (1 << 8)
64
65 #define TLB_LOOP_TIMEOUT                1000000 /* 1s! */
66 #define TLB_SPIN_COUNT                  10
67
68 /* Maximum number of context banks per SMMU */
69 #define ARM_SMMU_MAX_CBS                128
70
71 /* SMMU global address space */
72 #define ARM_SMMU_GR0(smmu)              ((smmu)->base)
73 #define ARM_SMMU_GR1(smmu)              ((smmu)->base + (1 << (smmu)->pgshift))
74
75 /*
76  * SMMU global address space with conditional offset to access secure
77  * aliases of non-secure registers (e.g. nsCR0: 0x400, nsGFSR: 0x448,
78  * nsGFSYNR0: 0x450)
79  */
80 #define ARM_SMMU_GR0_NS(smmu)                                           \
81         ((smmu)->base +                                                 \
82                 ((smmu->options & ARM_SMMU_OPT_SECURE_CFG_ACCESS)       \
83                         ? 0x400 : 0))
84
85 /*
86  * Some 64-bit registers only make sense to write atomically, but in such
87  * cases all the data relevant to AArch32 formats lies within the lower word,
88  * therefore this actually makes more sense than it might first appear.
89  */
90 #ifdef CONFIG_64BIT
91 #define smmu_write_atomic_lq            writeq_relaxed
92 #else
93 #define smmu_write_atomic_lq            writel_relaxed
94 #endif
95
96 /* Translation context bank */
97 #define ARM_SMMU_CB(smmu, n)    ((smmu)->cb_base + ((n) << (smmu)->pgshift))
98
99 #define MSI_IOVA_BASE                   0x8000000
100 #define MSI_IOVA_LENGTH                 0x100000
101
102 static int force_stage;
103 module_param(force_stage, int, S_IRUGO);
104 MODULE_PARM_DESC(force_stage,
105         "Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
106 static bool disable_bypass;
107 module_param(disable_bypass, bool, S_IRUGO);
108 MODULE_PARM_DESC(disable_bypass,
109         "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
110
111 enum arm_smmu_arch_version {
112         ARM_SMMU_V1,
113         ARM_SMMU_V1_64K,
114         ARM_SMMU_V2,
115 };
116
117 enum arm_smmu_implementation {
118         GENERIC_SMMU,
119         ARM_MMU500,
120         CAVIUM_SMMUV2,
121 };
122
123 struct arm_smmu_s2cr {
124         struct iommu_group              *group;
125         int                             count;
126         enum arm_smmu_s2cr_type         type;
127         enum arm_smmu_s2cr_privcfg      privcfg;
128         u8                              cbndx;
129 };
130
131 #define s2cr_init_val (struct arm_smmu_s2cr){                           \
132         .type = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS,    \
133 }
134
135 struct arm_smmu_smr {
136         u16                             mask;
137         u16                             id;
138         bool                            valid;
139 };
140
141 struct arm_smmu_cb {
142         u64                             ttbr[2];
143         u32                             tcr[2];
144         u32                             mair[2];
145         struct arm_smmu_cfg             *cfg;
146 };
147
148 struct arm_smmu_master_cfg {
149         struct arm_smmu_device          *smmu;
150         s16                             smendx[];
151 };
152 #define INVALID_SMENDX                  -1
153 #define __fwspec_cfg(fw) ((struct arm_smmu_master_cfg *)fw->iommu_priv)
154 #define fwspec_smmu(fw)  (__fwspec_cfg(fw)->smmu)
155 #define fwspec_smendx(fw, i) \
156         (i >= fw->num_ids ? INVALID_SMENDX : __fwspec_cfg(fw)->smendx[i])
157 #define for_each_cfg_sme(fw, i, idx) \
158         for (i = 0; idx = fwspec_smendx(fw, i), i < fw->num_ids; ++i)
159
160 struct arm_smmu_device {
161         struct device                   *dev;
162
163         void __iomem                    *base;
164         void __iomem                    *cb_base;
165         unsigned long                   pgshift;
166
167 #define ARM_SMMU_FEAT_COHERENT_WALK     (1 << 0)
168 #define ARM_SMMU_FEAT_STREAM_MATCH      (1 << 1)
169 #define ARM_SMMU_FEAT_TRANS_S1          (1 << 2)
170 #define ARM_SMMU_FEAT_TRANS_S2          (1 << 3)
171 #define ARM_SMMU_FEAT_TRANS_NESTED      (1 << 4)
172 #define ARM_SMMU_FEAT_TRANS_OPS         (1 << 5)
173 #define ARM_SMMU_FEAT_VMID16            (1 << 6)
174 #define ARM_SMMU_FEAT_FMT_AARCH64_4K    (1 << 7)
175 #define ARM_SMMU_FEAT_FMT_AARCH64_16K   (1 << 8)
176 #define ARM_SMMU_FEAT_FMT_AARCH64_64K   (1 << 9)
177 #define ARM_SMMU_FEAT_FMT_AARCH32_L     (1 << 10)
178 #define ARM_SMMU_FEAT_FMT_AARCH32_S     (1 << 11)
179 #define ARM_SMMU_FEAT_EXIDS             (1 << 12)
180         u32                             features;
181
182 #define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0)
183         u32                             options;
184         enum arm_smmu_arch_version      version;
185         enum arm_smmu_implementation    model;
186
187         u32                             num_context_banks;
188         u32                             num_s2_context_banks;
189         DECLARE_BITMAP(context_map, ARM_SMMU_MAX_CBS);
190         struct arm_smmu_cb              *cbs;
191         atomic_t                        irptndx;
192
193         u32                             num_mapping_groups;
194         u16                             streamid_mask;
195         u16                             smr_mask_mask;
196         struct arm_smmu_smr             *smrs;
197         struct arm_smmu_s2cr            *s2crs;
198         struct mutex                    stream_map_mutex;
199
200         unsigned long                   va_size;
201         unsigned long                   ipa_size;
202         unsigned long                   pa_size;
203         unsigned long                   pgsize_bitmap;
204
205         u32                             num_global_irqs;
206         u32                             num_context_irqs;
207         unsigned int                    *irqs;
208
209         u32                             cavium_id_base; /* Specific to Cavium */
210
211         spinlock_t                      global_sync_lock;
212
213         /* IOMMU core code handle */
214         struct iommu_device             iommu;
215 };
216
217 enum arm_smmu_context_fmt {
218         ARM_SMMU_CTX_FMT_NONE,
219         ARM_SMMU_CTX_FMT_AARCH64,
220         ARM_SMMU_CTX_FMT_AARCH32_L,
221         ARM_SMMU_CTX_FMT_AARCH32_S,
222 };
223
224 struct arm_smmu_cfg {
225         u8                              cbndx;
226         u8                              irptndx;
227         union {
228                 u16                     asid;
229                 u16                     vmid;
230         };
231         u32                             cbar;
232         enum arm_smmu_context_fmt       fmt;
233 };
234 #define INVALID_IRPTNDX                 0xff
235
236 enum arm_smmu_domain_stage {
237         ARM_SMMU_DOMAIN_S1 = 0,
238         ARM_SMMU_DOMAIN_S2,
239         ARM_SMMU_DOMAIN_NESTED,
240         ARM_SMMU_DOMAIN_BYPASS,
241 };
242
243 struct arm_smmu_domain {
244         struct arm_smmu_device          *smmu;
245         struct io_pgtable_ops           *pgtbl_ops;
246         const struct iommu_gather_ops   *tlb_ops;
247         struct arm_smmu_cfg             cfg;
248         enum arm_smmu_domain_stage      stage;
249         struct mutex                    init_mutex; /* Protects smmu pointer */
250         spinlock_t                      cb_lock; /* Serialises ATS1* ops and TLB syncs */
251         struct iommu_domain             domain;
252 };
253
254 struct arm_smmu_option_prop {
255         u32 opt;
256         const char *prop;
257 };
258
259 static atomic_t cavium_smmu_context_count = ATOMIC_INIT(0);
260
261 static bool using_legacy_binding, using_generic_binding;
262
263 static struct arm_smmu_option_prop arm_smmu_options[] = {
264         { ARM_SMMU_OPT_SECURE_CFG_ACCESS, "calxeda,smmu-secure-config-access" },
265         { 0, NULL},
266 };
267
268 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
269 {
270         return container_of(dom, struct arm_smmu_domain, domain);
271 }
272
273 static void parse_driver_options(struct arm_smmu_device *smmu)
274 {
275         int i = 0;
276
277         do {
278                 if (of_property_read_bool(smmu->dev->of_node,
279                                                 arm_smmu_options[i].prop)) {
280                         smmu->options |= arm_smmu_options[i].opt;
281                         dev_notice(smmu->dev, "option %s\n",
282                                 arm_smmu_options[i].prop);
283                 }
284         } while (arm_smmu_options[++i].opt);
285 }
286
287 static struct device_node *dev_get_dev_node(struct device *dev)
288 {
289         if (dev_is_pci(dev)) {
290                 struct pci_bus *bus = to_pci_dev(dev)->bus;
291
292                 while (!pci_is_root_bus(bus))
293                         bus = bus->parent;
294                 return of_node_get(bus->bridge->parent->of_node);
295         }
296
297         return of_node_get(dev->of_node);
298 }
299
300 static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *data)
301 {
302         *((__be32 *)data) = cpu_to_be32(alias);
303         return 0; /* Continue walking */
304 }
305
306 static int __find_legacy_master_phandle(struct device *dev, void *data)
307 {
308         struct of_phandle_iterator *it = *(void **)data;
309         struct device_node *np = it->node;
310         int err;
311
312         of_for_each_phandle(it, err, dev->of_node, "mmu-masters",
313                             "#stream-id-cells", 0)
314                 if (it->node == np) {
315                         *(void **)data = dev;
316                         return 1;
317                 }
318         it->node = np;
319         return err == -ENOENT ? 0 : err;
320 }
321
322 static struct platform_driver arm_smmu_driver;
323 static struct iommu_ops arm_smmu_ops;
324
325 static int arm_smmu_register_legacy_master(struct device *dev,
326                                            struct arm_smmu_device **smmu)
327 {
328         struct device *smmu_dev;
329         struct device_node *np;
330         struct of_phandle_iterator it;
331         void *data = &it;
332         u32 *sids;
333         __be32 pci_sid;
334         int err;
335
336         np = dev_get_dev_node(dev);
337         if (!np || !of_find_property(np, "#stream-id-cells", NULL)) {
338                 of_node_put(np);
339                 return -ENODEV;
340         }
341
342         it.node = np;
343         err = driver_for_each_device(&arm_smmu_driver.driver, NULL, &data,
344                                      __find_legacy_master_phandle);
345         smmu_dev = data;
346         of_node_put(np);
347         if (err == 0)
348                 return -ENODEV;
349         if (err < 0)
350                 return err;
351
352         if (dev_is_pci(dev)) {
353                 /* "mmu-masters" assumes Stream ID == Requester ID */
354                 pci_for_each_dma_alias(to_pci_dev(dev), __arm_smmu_get_pci_sid,
355                                        &pci_sid);
356                 it.cur = &pci_sid;
357                 it.cur_count = 1;
358         }
359
360         err = iommu_fwspec_init(dev, &smmu_dev->of_node->fwnode,
361                                 &arm_smmu_ops);
362         if (err)
363                 return err;
364
365         sids = kcalloc(it.cur_count, sizeof(*sids), GFP_KERNEL);
366         if (!sids)
367                 return -ENOMEM;
368
369         *smmu = dev_get_drvdata(smmu_dev);
370         of_phandle_iterator_args(&it, sids, it.cur_count);
371         err = iommu_fwspec_add_ids(dev, sids, it.cur_count);
372         kfree(sids);
373         return err;
374 }
375
376 static int __arm_smmu_alloc_bitmap(unsigned long *map, int start, int end)
377 {
378         int idx;
379
380         do {
381                 idx = find_next_zero_bit(map, end, start);
382                 if (idx == end)
383                         return -ENOSPC;
384         } while (test_and_set_bit(idx, map));
385
386         return idx;
387 }
388
389 static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
390 {
391         clear_bit(idx, map);
392 }
393
394 /* Wait for any pending TLB invalidations to complete */
395 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu,
396                                 void __iomem *sync, void __iomem *status)
397 {
398         unsigned int spin_cnt, delay;
399
400         writel_relaxed(0, sync);
401         for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
402                 for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
403                         if (!(readl_relaxed(status) & sTLBGSTATUS_GSACTIVE))
404                                 return;
405                         cpu_relax();
406                 }
407                 udelay(delay);
408         }
409         dev_err_ratelimited(smmu->dev,
410                             "TLB sync timed out -- SMMU may be deadlocked\n");
411 }
412
413 static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu)
414 {
415         void __iomem *base = ARM_SMMU_GR0(smmu);
416         unsigned long flags;
417
418         spin_lock_irqsave(&smmu->global_sync_lock, flags);
419         __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_GR0_sTLBGSYNC,
420                             base + ARM_SMMU_GR0_sTLBGSTATUS);
421         spin_unlock_irqrestore(&smmu->global_sync_lock, flags);
422 }
423
424 static void arm_smmu_tlb_sync_context(void *cookie)
425 {
426         struct arm_smmu_domain *smmu_domain = cookie;
427         struct arm_smmu_device *smmu = smmu_domain->smmu;
428         void __iomem *base = ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx);
429         unsigned long flags;
430
431         spin_lock_irqsave(&smmu_domain->cb_lock, flags);
432         __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_CB_TLBSYNC,
433                             base + ARM_SMMU_CB_TLBSTATUS);
434         spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
435 }
436
437 static void arm_smmu_tlb_sync_vmid(void *cookie)
438 {
439         struct arm_smmu_domain *smmu_domain = cookie;
440
441         arm_smmu_tlb_sync_global(smmu_domain->smmu);
442 }
443
444 static void arm_smmu_tlb_inv_context_s1(void *cookie)
445 {
446         struct arm_smmu_domain *smmu_domain = cookie;
447         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
448         void __iomem *base = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
449
450         writel_relaxed(cfg->asid, base + ARM_SMMU_CB_S1_TLBIASID);
451         arm_smmu_tlb_sync_context(cookie);
452 }
453
454 static void arm_smmu_tlb_inv_context_s2(void *cookie)
455 {
456         struct arm_smmu_domain *smmu_domain = cookie;
457         struct arm_smmu_device *smmu = smmu_domain->smmu;
458         void __iomem *base = ARM_SMMU_GR0(smmu);
459
460         writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
461         arm_smmu_tlb_sync_global(smmu);
462 }
463
464 static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
465                                           size_t granule, bool leaf, void *cookie)
466 {
467         struct arm_smmu_domain *smmu_domain = cookie;
468         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
469         bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
470         void __iomem *reg = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
471
472         if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
473                 wmb();
474
475         if (stage1) {
476                 reg += leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA;
477
478                 if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
479                         iova &= ~12UL;
480                         iova |= cfg->asid;
481                         do {
482                                 writel_relaxed(iova, reg);
483                                 iova += granule;
484                         } while (size -= granule);
485                 } else {
486                         iova >>= 12;
487                         iova |= (u64)cfg->asid << 48;
488                         do {
489                                 writeq_relaxed(iova, reg);
490                                 iova += granule >> 12;
491                         } while (size -= granule);
492                 }
493         } else {
494                 reg += leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L :
495                               ARM_SMMU_CB_S2_TLBIIPAS2;
496                 iova >>= 12;
497                 do {
498                         smmu_write_atomic_lq(iova, reg);
499                         iova += granule >> 12;
500                 } while (size -= granule);
501         }
502 }
503
504 /*
505  * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
506  * almost negligible, but the benefit of getting the first one in as far ahead
507  * of the sync as possible is significant, hence we don't just make this a
508  * no-op and set .tlb_sync to arm_smmu_inv_context_s2() as you might think.
509  */
510 static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova, size_t size,
511                                          size_t granule, bool leaf, void *cookie)
512 {
513         struct arm_smmu_domain *smmu_domain = cookie;
514         void __iomem *base = ARM_SMMU_GR0(smmu_domain->smmu);
515
516         if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
517                 wmb();
518
519         writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
520 }
521
522 static const struct iommu_gather_ops arm_smmu_s1_tlb_ops = {
523         .tlb_flush_all  = arm_smmu_tlb_inv_context_s1,
524         .tlb_add_flush  = arm_smmu_tlb_inv_range_nosync,
525         .tlb_sync       = arm_smmu_tlb_sync_context,
526 };
527
528 static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v2 = {
529         .tlb_flush_all  = arm_smmu_tlb_inv_context_s2,
530         .tlb_add_flush  = arm_smmu_tlb_inv_range_nosync,
531         .tlb_sync       = arm_smmu_tlb_sync_context,
532 };
533
534 static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v1 = {
535         .tlb_flush_all  = arm_smmu_tlb_inv_context_s2,
536         .tlb_add_flush  = arm_smmu_tlb_inv_vmid_nosync,
537         .tlb_sync       = arm_smmu_tlb_sync_vmid,
538 };
539
540 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
541 {
542         u32 fsr, fsynr;
543         unsigned long iova;
544         struct iommu_domain *domain = dev;
545         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
546         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
547         struct arm_smmu_device *smmu = smmu_domain->smmu;
548         void __iomem *cb_base;
549
550         cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
551         fsr = readl_relaxed(cb_base + ARM_SMMU_CB_FSR);
552
553         if (!(fsr & FSR_FAULT))
554                 return IRQ_NONE;
555
556         fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0);
557         iova = readq_relaxed(cb_base + ARM_SMMU_CB_FAR);
558
559         dev_err_ratelimited(smmu->dev,
560         "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cb=%d\n",
561                             fsr, iova, fsynr, cfg->cbndx);
562
563         writel(fsr, cb_base + ARM_SMMU_CB_FSR);
564         return IRQ_HANDLED;
565 }
566
567 static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
568 {
569         u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
570         struct arm_smmu_device *smmu = dev;
571         void __iomem *gr0_base = ARM_SMMU_GR0_NS(smmu);
572
573         gfsr = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSR);
574         gfsynr0 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR0);
575         gfsynr1 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR1);
576         gfsynr2 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR2);
577
578         if (!gfsr)
579                 return IRQ_NONE;
580
581         dev_err_ratelimited(smmu->dev,
582                 "Unexpected global fault, this could be serious\n");
583         dev_err_ratelimited(smmu->dev,
584                 "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
585                 gfsr, gfsynr0, gfsynr1, gfsynr2);
586
587         writel(gfsr, gr0_base + ARM_SMMU_GR0_sGFSR);
588         return IRQ_HANDLED;
589 }
590
591 static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
592                                        struct io_pgtable_cfg *pgtbl_cfg)
593 {
594         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
595         struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
596         bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
597
598         cb->cfg = cfg;
599
600         /* TTBCR */
601         if (stage1) {
602                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
603                         cb->tcr[0] = pgtbl_cfg->arm_v7s_cfg.tcr;
604                 } else {
605                         cb->tcr[0] = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
606                         cb->tcr[1] = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32;
607                         cb->tcr[1] |= TTBCR2_SEP_UPSTREAM;
608                         if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
609                                 cb->tcr[1] |= TTBCR2_AS;
610                 }
611         } else {
612                 cb->tcr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
613         }
614
615         /* TTBRs */
616         if (stage1) {
617                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
618                         cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr[0];
619                         cb->ttbr[1] = pgtbl_cfg->arm_v7s_cfg.ttbr[1];
620                 } else {
621                         cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
622                         cb->ttbr[0] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
623                         cb->ttbr[1] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
624                         cb->ttbr[1] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
625                 }
626         } else {
627                 cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
628         }
629
630         /* MAIRs (stage-1 only) */
631         if (stage1) {
632                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
633                         cb->mair[0] = pgtbl_cfg->arm_v7s_cfg.prrr;
634                         cb->mair[1] = pgtbl_cfg->arm_v7s_cfg.nmrr;
635                 } else {
636                         cb->mair[0] = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
637                         cb->mair[1] = pgtbl_cfg->arm_lpae_s1_cfg.mair[1];
638                 }
639         }
640 }
641
642 static void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx)
643 {
644         u32 reg;
645         bool stage1;
646         struct arm_smmu_cb *cb = &smmu->cbs[idx];
647         struct arm_smmu_cfg *cfg = cb->cfg;
648         void __iomem *cb_base, *gr1_base;
649
650         cb_base = ARM_SMMU_CB(smmu, idx);
651
652         /* Unassigned context banks only need disabling */
653         if (!cfg) {
654                 writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR);
655                 return;
656         }
657
658         gr1_base = ARM_SMMU_GR1(smmu);
659         stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
660
661         /* CBA2R */
662         if (smmu->version > ARM_SMMU_V1) {
663                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
664                         reg = CBA2R_RW64_64BIT;
665                 else
666                         reg = CBA2R_RW64_32BIT;
667                 /* 16-bit VMIDs live in CBA2R */
668                 if (smmu->features & ARM_SMMU_FEAT_VMID16)
669                         reg |= cfg->vmid << CBA2R_VMID_SHIFT;
670
671                 writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBA2R(idx));
672         }
673
674         /* CBAR */
675         reg = cfg->cbar;
676         if (smmu->version < ARM_SMMU_V2)
677                 reg |= cfg->irptndx << CBAR_IRPTNDX_SHIFT;
678
679         /*
680          * Use the weakest shareability/memory types, so they are
681          * overridden by the ttbcr/pte.
682          */
683         if (stage1) {
684                 reg |= (CBAR_S1_BPSHCFG_NSH << CBAR_S1_BPSHCFG_SHIFT) |
685                         (CBAR_S1_MEMATTR_WB << CBAR_S1_MEMATTR_SHIFT);
686         } else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) {
687                 /* 8-bit VMIDs live in CBAR */
688                 reg |= cfg->vmid << CBAR_VMID_SHIFT;
689         }
690         writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBAR(idx));
691
692         /*
693          * TTBCR
694          * We must write this before the TTBRs, since it determines the
695          * access behaviour of some fields (in particular, ASID[15:8]).
696          */
697         if (stage1 && smmu->version > ARM_SMMU_V1)
698                 writel_relaxed(cb->tcr[1], cb_base + ARM_SMMU_CB_TTBCR2);
699         writel_relaxed(cb->tcr[0], cb_base + ARM_SMMU_CB_TTBCR);
700
701         /* TTBRs */
702         if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
703                 writel_relaxed(cfg->asid, cb_base + ARM_SMMU_CB_CONTEXTIDR);
704                 writel_relaxed(cb->ttbr[0], cb_base + ARM_SMMU_CB_TTBR0);
705                 writel_relaxed(cb->ttbr[1], cb_base + ARM_SMMU_CB_TTBR1);
706         } else {
707                 writeq_relaxed(cb->ttbr[0], cb_base + ARM_SMMU_CB_TTBR0);
708                 if (stage1)
709                         writeq_relaxed(cb->ttbr[1], cb_base + ARM_SMMU_CB_TTBR1);
710         }
711
712         /* MAIRs (stage-1 only) */
713         if (stage1) {
714                 writel_relaxed(cb->mair[0], cb_base + ARM_SMMU_CB_S1_MAIR0);
715                 writel_relaxed(cb->mair[1], cb_base + ARM_SMMU_CB_S1_MAIR1);
716         }
717
718         /* SCTLR */
719         reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE | SCTLR_M;
720         if (stage1)
721                 reg |= SCTLR_S1_ASIDPNE;
722         if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
723                 reg |= SCTLR_E;
724
725         writel_relaxed(reg, cb_base + ARM_SMMU_CB_SCTLR);
726 }
727
728 static int arm_smmu_init_domain_context(struct iommu_domain *domain,
729                                         struct arm_smmu_device *smmu)
730 {
731         int irq, start, ret = 0;
732         unsigned long ias, oas;
733         struct io_pgtable_ops *pgtbl_ops;
734         struct io_pgtable_cfg pgtbl_cfg;
735         enum io_pgtable_fmt fmt;
736         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
737         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
738
739         mutex_lock(&smmu_domain->init_mutex);
740         if (smmu_domain->smmu)
741                 goto out_unlock;
742
743         if (domain->type == IOMMU_DOMAIN_IDENTITY) {
744                 smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
745                 smmu_domain->smmu = smmu;
746                 goto out_unlock;
747         }
748
749         /*
750          * Mapping the requested stage onto what we support is surprisingly
751          * complicated, mainly because the spec allows S1+S2 SMMUs without
752          * support for nested translation. That means we end up with the
753          * following table:
754          *
755          * Requested        Supported        Actual
756          *     S1               N              S1
757          *     S1             S1+S2            S1
758          *     S1               S2             S2
759          *     S1               S1             S1
760          *     N                N              N
761          *     N              S1+S2            S2
762          *     N                S2             S2
763          *     N                S1             S1
764          *
765          * Note that you can't actually request stage-2 mappings.
766          */
767         if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
768                 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
769         if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
770                 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
771
772         /*
773          * Choosing a suitable context format is even more fiddly. Until we
774          * grow some way for the caller to express a preference, and/or move
775          * the decision into the io-pgtable code where it arguably belongs,
776          * just aim for the closest thing to the rest of the system, and hope
777          * that the hardware isn't esoteric enough that we can't assume AArch64
778          * support to be a superset of AArch32 support...
779          */
780         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_L)
781                 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_L;
782         if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) &&
783             !IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_ARM_LPAE) &&
784             (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) &&
785             (smmu_domain->stage == ARM_SMMU_DOMAIN_S1))
786                 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_S;
787         if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
788             (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
789                                ARM_SMMU_FEAT_FMT_AARCH64_16K |
790                                ARM_SMMU_FEAT_FMT_AARCH64_4K)))
791                 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH64;
792
793         if (cfg->fmt == ARM_SMMU_CTX_FMT_NONE) {
794                 ret = -EINVAL;
795                 goto out_unlock;
796         }
797
798         switch (smmu_domain->stage) {
799         case ARM_SMMU_DOMAIN_S1:
800                 cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
801                 start = smmu->num_s2_context_banks;
802                 ias = smmu->va_size;
803                 oas = smmu->ipa_size;
804                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
805                         fmt = ARM_64_LPAE_S1;
806                 } else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) {
807                         fmt = ARM_32_LPAE_S1;
808                         ias = min(ias, 32UL);
809                         oas = min(oas, 40UL);
810                 } else {
811                         fmt = ARM_V7S;
812                         ias = min(ias, 32UL);
813                         oas = min(oas, 32UL);
814                 }
815                 smmu_domain->tlb_ops = &arm_smmu_s1_tlb_ops;
816                 break;
817         case ARM_SMMU_DOMAIN_NESTED:
818                 /*
819                  * We will likely want to change this if/when KVM gets
820                  * involved.
821                  */
822         case ARM_SMMU_DOMAIN_S2:
823                 cfg->cbar = CBAR_TYPE_S2_TRANS;
824                 start = 0;
825                 ias = smmu->ipa_size;
826                 oas = smmu->pa_size;
827                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
828                         fmt = ARM_64_LPAE_S2;
829                 } else {
830                         fmt = ARM_32_LPAE_S2;
831                         ias = min(ias, 40UL);
832                         oas = min(oas, 40UL);
833                 }
834                 if (smmu->version == ARM_SMMU_V2)
835                         smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v2;
836                 else
837                         smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v1;
838                 break;
839         default:
840                 ret = -EINVAL;
841                 goto out_unlock;
842         }
843         ret = __arm_smmu_alloc_bitmap(smmu->context_map, start,
844                                       smmu->num_context_banks);
845         if (ret < 0)
846                 goto out_unlock;
847
848         cfg->cbndx = ret;
849         if (smmu->version < ARM_SMMU_V2) {
850                 cfg->irptndx = atomic_inc_return(&smmu->irptndx);
851                 cfg->irptndx %= smmu->num_context_irqs;
852         } else {
853                 cfg->irptndx = cfg->cbndx;
854         }
855
856         if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2)
857                 cfg->vmid = cfg->cbndx + 1 + smmu->cavium_id_base;
858         else
859                 cfg->asid = cfg->cbndx + smmu->cavium_id_base;
860
861         pgtbl_cfg = (struct io_pgtable_cfg) {
862                 .pgsize_bitmap  = smmu->pgsize_bitmap,
863                 .ias            = ias,
864                 .oas            = oas,
865                 .tlb            = smmu_domain->tlb_ops,
866                 .iommu_dev      = smmu->dev,
867         };
868
869         if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
870                 pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA;
871
872         smmu_domain->smmu = smmu;
873         pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
874         if (!pgtbl_ops) {
875                 ret = -ENOMEM;
876                 goto out_clear_smmu;
877         }
878
879         /* Update the domain's page sizes to reflect the page table format */
880         domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
881         domain->geometry.aperture_end = (1UL << ias) - 1;
882         domain->geometry.force_aperture = true;
883
884         /* Initialise the context bank with our page table cfg */
885         arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
886         arm_smmu_write_context_bank(smmu, cfg->cbndx);
887
888         /*
889          * Request context fault interrupt. Do this last to avoid the
890          * handler seeing a half-initialised domain state.
891          */
892         irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
893         ret = devm_request_irq(smmu->dev, irq, arm_smmu_context_fault,
894                                IRQF_SHARED, "arm-smmu-context-fault", domain);
895         if (ret < 0) {
896                 dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
897                         cfg->irptndx, irq);
898                 cfg->irptndx = INVALID_IRPTNDX;
899         }
900
901         mutex_unlock(&smmu_domain->init_mutex);
902
903         /* Publish page table ops for map/unmap */
904         smmu_domain->pgtbl_ops = pgtbl_ops;
905         return 0;
906
907 out_clear_smmu:
908         smmu_domain->smmu = NULL;
909 out_unlock:
910         mutex_unlock(&smmu_domain->init_mutex);
911         return ret;
912 }
913
914 static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
915 {
916         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
917         struct arm_smmu_device *smmu = smmu_domain->smmu;
918         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
919         int irq;
920
921         if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY)
922                 return;
923
924         /*
925          * Disable the context bank and free the page tables before freeing
926          * it.
927          */
928         smmu->cbs[cfg->cbndx].cfg = NULL;
929         arm_smmu_write_context_bank(smmu, cfg->cbndx);
930
931         if (cfg->irptndx != INVALID_IRPTNDX) {
932                 irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
933                 devm_free_irq(smmu->dev, irq, domain);
934         }
935
936         free_io_pgtable_ops(smmu_domain->pgtbl_ops);
937         __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
938 }
939
940 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
941 {
942         struct arm_smmu_domain *smmu_domain;
943
944         if (type != IOMMU_DOMAIN_UNMANAGED &&
945             type != IOMMU_DOMAIN_DMA &&
946             type != IOMMU_DOMAIN_IDENTITY)
947                 return NULL;
948         /*
949          * Allocate the domain and initialise some of its data structures.
950          * We can't really do anything meaningful until we've added a
951          * master.
952          */
953         smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
954         if (!smmu_domain)
955                 return NULL;
956
957         if (type == IOMMU_DOMAIN_DMA && (using_legacy_binding ||
958             iommu_get_dma_cookie(&smmu_domain->domain))) {
959                 kfree(smmu_domain);
960                 return NULL;
961         }
962
963         mutex_init(&smmu_domain->init_mutex);
964         spin_lock_init(&smmu_domain->cb_lock);
965
966         return &smmu_domain->domain;
967 }
968
969 static void arm_smmu_domain_free(struct iommu_domain *domain)
970 {
971         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
972
973         /*
974          * Free the domain resources. We assume that all devices have
975          * already been detached.
976          */
977         iommu_put_dma_cookie(domain);
978         arm_smmu_destroy_domain_context(domain);
979         kfree(smmu_domain);
980 }
981
982 static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx)
983 {
984         struct arm_smmu_smr *smr = smmu->smrs + idx;
985         u32 reg = smr->id << SMR_ID_SHIFT | smr->mask << SMR_MASK_SHIFT;
986
987         if (!(smmu->features & ARM_SMMU_FEAT_EXIDS) && smr->valid)
988                 reg |= SMR_VALID;
989         writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_SMR(idx));
990 }
991
992 static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
993 {
994         struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
995         u32 reg = (s2cr->type & S2CR_TYPE_MASK) << S2CR_TYPE_SHIFT |
996                   (s2cr->cbndx & S2CR_CBNDX_MASK) << S2CR_CBNDX_SHIFT |
997                   (s2cr->privcfg & S2CR_PRIVCFG_MASK) << S2CR_PRIVCFG_SHIFT;
998
999         if (smmu->features & ARM_SMMU_FEAT_EXIDS && smmu->smrs &&
1000             smmu->smrs[idx].valid)
1001                 reg |= S2CR_EXIDVALID;
1002         writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_S2CR(idx));
1003 }
1004
1005 static void arm_smmu_write_sme(struct arm_smmu_device *smmu, int idx)
1006 {
1007         arm_smmu_write_s2cr(smmu, idx);
1008         if (smmu->smrs)
1009                 arm_smmu_write_smr(smmu, idx);
1010 }
1011
1012 /*
1013  * The width of SMR's mask field depends on sCR0_EXIDENABLE, so this function
1014  * should be called after sCR0 is written.
1015  */
1016 static void arm_smmu_test_smr_masks(struct arm_smmu_device *smmu)
1017 {
1018         void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1019         u32 smr;
1020
1021         if (!smmu->smrs)
1022                 return;
1023
1024         /*
1025          * SMR.ID bits may not be preserved if the corresponding MASK
1026          * bits are set, so check each one separately. We can reject
1027          * masters later if they try to claim IDs outside these masks.
1028          */
1029         smr = smmu->streamid_mask << SMR_ID_SHIFT;
1030         writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
1031         smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
1032         smmu->streamid_mask = smr >> SMR_ID_SHIFT;
1033
1034         smr = smmu->streamid_mask << SMR_MASK_SHIFT;
1035         writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
1036         smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
1037         smmu->smr_mask_mask = smr >> SMR_MASK_SHIFT;
1038 }
1039
1040 static int arm_smmu_find_sme(struct arm_smmu_device *smmu, u16 id, u16 mask)
1041 {
1042         struct arm_smmu_smr *smrs = smmu->smrs;
1043         int i, free_idx = -ENOSPC;
1044
1045         /* Stream indexing is blissfully easy */
1046         if (!smrs)
1047                 return id;
1048
1049         /* Validating SMRs is... less so */
1050         for (i = 0; i < smmu->num_mapping_groups; ++i) {
1051                 if (!smrs[i].valid) {
1052                         /*
1053                          * Note the first free entry we come across, which
1054                          * we'll claim in the end if nothing else matches.
1055                          */
1056                         if (free_idx < 0)
1057                                 free_idx = i;
1058                         continue;
1059                 }
1060                 /*
1061                  * If the new entry is _entirely_ matched by an existing entry,
1062                  * then reuse that, with the guarantee that there also cannot
1063                  * be any subsequent conflicting entries. In normal use we'd
1064                  * expect simply identical entries for this case, but there's
1065                  * no harm in accommodating the generalisation.
1066                  */
1067                 if ((mask & smrs[i].mask) == mask &&
1068                     !((id ^ smrs[i].id) & ~smrs[i].mask))
1069                         return i;
1070                 /*
1071                  * If the new entry has any other overlap with an existing one,
1072                  * though, then there always exists at least one stream ID
1073                  * which would cause a conflict, and we can't allow that risk.
1074                  */
1075                 if (!((id ^ smrs[i].id) & ~(smrs[i].mask | mask)))
1076                         return -EINVAL;
1077         }
1078
1079         return free_idx;
1080 }
1081
1082 static bool arm_smmu_free_sme(struct arm_smmu_device *smmu, int idx)
1083 {
1084         if (--smmu->s2crs[idx].count)
1085                 return false;
1086
1087         smmu->s2crs[idx] = s2cr_init_val;
1088         if (smmu->smrs)
1089                 smmu->smrs[idx].valid = false;
1090
1091         return true;
1092 }
1093
1094 static int arm_smmu_master_alloc_smes(struct device *dev)
1095 {
1096         struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1097         struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1098         struct arm_smmu_device *smmu = cfg->smmu;
1099         struct arm_smmu_smr *smrs = smmu->smrs;
1100         struct iommu_group *group;
1101         int i, idx, ret;
1102
1103         mutex_lock(&smmu->stream_map_mutex);
1104         /* Figure out a viable stream map entry allocation */
1105         for_each_cfg_sme(fwspec, i, idx) {
1106                 u16 sid = fwspec->ids[i];
1107                 u16 mask = fwspec->ids[i] >> SMR_MASK_SHIFT;
1108
1109                 if (idx != INVALID_SMENDX) {
1110                         ret = -EEXIST;
1111                         goto out_err;
1112                 }
1113
1114                 ret = arm_smmu_find_sme(smmu, sid, mask);
1115                 if (ret < 0)
1116                         goto out_err;
1117
1118                 idx = ret;
1119                 if (smrs && smmu->s2crs[idx].count == 0) {
1120                         smrs[idx].id = sid;
1121                         smrs[idx].mask = mask;
1122                         smrs[idx].valid = true;
1123                 }
1124                 smmu->s2crs[idx].count++;
1125                 cfg->smendx[i] = (s16)idx;
1126         }
1127
1128         group = iommu_group_get_for_dev(dev);
1129         if (!group)
1130                 group = ERR_PTR(-ENOMEM);
1131         if (IS_ERR(group)) {
1132                 ret = PTR_ERR(group);
1133                 goto out_err;
1134         }
1135         iommu_group_put(group);
1136
1137         /* It worked! Now, poke the actual hardware */
1138         for_each_cfg_sme(fwspec, i, idx) {
1139                 arm_smmu_write_sme(smmu, idx);
1140                 smmu->s2crs[idx].group = group;
1141         }
1142
1143         mutex_unlock(&smmu->stream_map_mutex);
1144         return 0;
1145
1146 out_err:
1147         while (i--) {
1148                 arm_smmu_free_sme(smmu, cfg->smendx[i]);
1149                 cfg->smendx[i] = INVALID_SMENDX;
1150         }
1151         mutex_unlock(&smmu->stream_map_mutex);
1152         return ret;
1153 }
1154
1155 static void arm_smmu_master_free_smes(struct iommu_fwspec *fwspec)
1156 {
1157         struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1158         struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1159         int i, idx;
1160
1161         mutex_lock(&smmu->stream_map_mutex);
1162         for_each_cfg_sme(fwspec, i, idx) {
1163                 if (arm_smmu_free_sme(smmu, idx))
1164                         arm_smmu_write_sme(smmu, idx);
1165                 cfg->smendx[i] = INVALID_SMENDX;
1166         }
1167         mutex_unlock(&smmu->stream_map_mutex);
1168 }
1169
1170 static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
1171                                       struct iommu_fwspec *fwspec)
1172 {
1173         struct arm_smmu_device *smmu = smmu_domain->smmu;
1174         struct arm_smmu_s2cr *s2cr = smmu->s2crs;
1175         u8 cbndx = smmu_domain->cfg.cbndx;
1176         enum arm_smmu_s2cr_type type;
1177         int i, idx;
1178
1179         if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS)
1180                 type = S2CR_TYPE_BYPASS;
1181         else
1182                 type = S2CR_TYPE_TRANS;
1183
1184         for_each_cfg_sme(fwspec, i, idx) {
1185                 if (type == s2cr[idx].type && cbndx == s2cr[idx].cbndx)
1186                         continue;
1187
1188                 s2cr[idx].type = type;
1189                 s2cr[idx].privcfg = S2CR_PRIVCFG_DEFAULT;
1190                 s2cr[idx].cbndx = cbndx;
1191                 arm_smmu_write_s2cr(smmu, idx);
1192         }
1193         return 0;
1194 }
1195
1196 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1197 {
1198         int ret;
1199         struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1200         struct arm_smmu_device *smmu;
1201         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1202
1203         if (!fwspec || fwspec->ops != &arm_smmu_ops) {
1204                 dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n");
1205                 return -ENXIO;
1206         }
1207
1208         /*
1209          * FIXME: The arch/arm DMA API code tries to attach devices to its own
1210          * domains between of_xlate() and add_device() - we have no way to cope
1211          * with that, so until ARM gets converted to rely on groups and default
1212          * domains, just say no (but more politely than by dereferencing NULL).
1213          * This should be at least a WARN_ON once that's sorted.
1214          */
1215         if (!fwspec->iommu_priv)
1216                 return -ENODEV;
1217
1218         smmu = fwspec_smmu(fwspec);
1219         /* Ensure that the domain is finalised */
1220         ret = arm_smmu_init_domain_context(domain, smmu);
1221         if (ret < 0)
1222                 return ret;
1223
1224         /*
1225          * Sanity check the domain. We don't support domains across
1226          * different SMMUs.
1227          */
1228         if (smmu_domain->smmu != smmu) {
1229                 dev_err(dev,
1230                         "cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
1231                         dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev));
1232                 return -EINVAL;
1233         }
1234
1235         /* Looks ok, so add the device to the domain */
1236         return arm_smmu_domain_add_master(smmu_domain, fwspec);
1237 }
1238
1239 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1240                         phys_addr_t paddr, size_t size, int prot)
1241 {
1242         struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1243
1244         if (!ops)
1245                 return -ENODEV;
1246
1247         return ops->map(ops, iova, paddr, size, prot);
1248 }
1249
1250 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
1251                              size_t size)
1252 {
1253         struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1254
1255         if (!ops)
1256                 return 0;
1257
1258         return ops->unmap(ops, iova, size);
1259 }
1260
1261 static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
1262 {
1263         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1264
1265         if (smmu_domain->tlb_ops)
1266                 smmu_domain->tlb_ops->tlb_sync(smmu_domain);
1267 }
1268
1269 static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
1270                                               dma_addr_t iova)
1271 {
1272         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1273         struct arm_smmu_device *smmu = smmu_domain->smmu;
1274         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
1275         struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1276         struct device *dev = smmu->dev;
1277         void __iomem *cb_base;
1278         u32 tmp;
1279         u64 phys;
1280         unsigned long va, flags;
1281
1282         cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
1283
1284         spin_lock_irqsave(&smmu_domain->cb_lock, flags);
1285         /* ATS1 registers can only be written atomically */
1286         va = iova & ~0xfffUL;
1287         if (smmu->version == ARM_SMMU_V2)
1288                 smmu_write_atomic_lq(va, cb_base + ARM_SMMU_CB_ATS1PR);
1289         else /* Register is only 32-bit in v1 */
1290                 writel_relaxed(va, cb_base + ARM_SMMU_CB_ATS1PR);
1291
1292         if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp,
1293                                       !(tmp & ATSR_ACTIVE), 5, 50)) {
1294                 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1295                 dev_err(dev,
1296                         "iova to phys timed out on %pad. Falling back to software table walk.\n",
1297                         &iova);
1298                 return ops->iova_to_phys(ops, iova);
1299         }
1300
1301         phys = readq_relaxed(cb_base + ARM_SMMU_CB_PAR);
1302         spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1303         if (phys & CB_PAR_F) {
1304                 dev_err(dev, "translation fault!\n");
1305                 dev_err(dev, "PAR = 0x%llx\n", phys);
1306                 return 0;
1307         }
1308
1309         return (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
1310 }
1311
1312 static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
1313                                         dma_addr_t iova)
1314 {
1315         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1316         struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1317
1318         if (domain->type == IOMMU_DOMAIN_IDENTITY)
1319                 return iova;
1320
1321         if (!ops)
1322                 return 0;
1323
1324         if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS &&
1325                         smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
1326                 return arm_smmu_iova_to_phys_hard(domain, iova);
1327
1328         return ops->iova_to_phys(ops, iova);
1329 }
1330
1331 static bool arm_smmu_capable(enum iommu_cap cap)
1332 {
1333         switch (cap) {
1334         case IOMMU_CAP_CACHE_COHERENCY:
1335                 /*
1336                  * Return true here as the SMMU can always send out coherent
1337                  * requests.
1338                  */
1339                 return true;
1340         case IOMMU_CAP_NOEXEC:
1341                 return true;
1342         default:
1343                 return false;
1344         }
1345 }
1346
1347 static int arm_smmu_match_node(struct device *dev, void *data)
1348 {
1349         return dev->fwnode == data;
1350 }
1351
1352 static
1353 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
1354 {
1355         struct device *dev = driver_find_device(&arm_smmu_driver.driver, NULL,
1356                                                 fwnode, arm_smmu_match_node);
1357         put_device(dev);
1358         return dev ? dev_get_drvdata(dev) : NULL;
1359 }
1360
1361 static int arm_smmu_add_device(struct device *dev)
1362 {
1363         struct arm_smmu_device *smmu;
1364         struct arm_smmu_master_cfg *cfg;
1365         struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1366         int i, ret;
1367
1368         if (using_legacy_binding) {
1369                 ret = arm_smmu_register_legacy_master(dev, &smmu);
1370
1371                 /*
1372                  * If dev->iommu_fwspec is initally NULL, arm_smmu_register_legacy_master()
1373                  * will allocate/initialise a new one. Thus we need to update fwspec for
1374                  * later use.
1375                  */
1376                 fwspec = dev->iommu_fwspec;
1377                 if (ret)
1378                         goto out_free;
1379         } else if (fwspec && fwspec->ops == &arm_smmu_ops) {
1380                 smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
1381         } else {
1382                 return -ENODEV;
1383         }
1384
1385         ret = -EINVAL;
1386         for (i = 0; i < fwspec->num_ids; i++) {
1387                 u16 sid = fwspec->ids[i];
1388                 u16 mask = fwspec->ids[i] >> SMR_MASK_SHIFT;
1389
1390                 if (sid & ~smmu->streamid_mask) {
1391                         dev_err(dev, "stream ID 0x%x out of range for SMMU (0x%x)\n",
1392                                 sid, smmu->streamid_mask);
1393                         goto out_free;
1394                 }
1395                 if (mask & ~smmu->smr_mask_mask) {
1396                         dev_err(dev, "SMR mask 0x%x out of range for SMMU (0x%x)\n",
1397                                 mask, smmu->smr_mask_mask);
1398                         goto out_free;
1399                 }
1400         }
1401
1402         ret = -ENOMEM;
1403         cfg = kzalloc(offsetof(struct arm_smmu_master_cfg, smendx[i]),
1404                       GFP_KERNEL);
1405         if (!cfg)
1406                 goto out_free;
1407
1408         cfg->smmu = smmu;
1409         fwspec->iommu_priv = cfg;
1410         while (i--)
1411                 cfg->smendx[i] = INVALID_SMENDX;
1412
1413         ret = arm_smmu_master_alloc_smes(dev);
1414         if (ret)
1415                 goto out_cfg_free;
1416
1417         iommu_device_link(&smmu->iommu, dev);
1418
1419         return 0;
1420
1421 out_cfg_free:
1422         kfree(cfg);
1423 out_free:
1424         iommu_fwspec_free(dev);
1425         return ret;
1426 }
1427
1428 static void arm_smmu_remove_device(struct device *dev)
1429 {
1430         struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1431         struct arm_smmu_master_cfg *cfg;
1432         struct arm_smmu_device *smmu;
1433
1434
1435         if (!fwspec || fwspec->ops != &arm_smmu_ops)
1436                 return;
1437
1438         cfg  = fwspec->iommu_priv;
1439         smmu = cfg->smmu;
1440
1441         iommu_device_unlink(&smmu->iommu, dev);
1442         arm_smmu_master_free_smes(fwspec);
1443         iommu_group_remove_device(dev);
1444         kfree(fwspec->iommu_priv);
1445         iommu_fwspec_free(dev);
1446 }
1447
1448 static struct iommu_group *arm_smmu_device_group(struct device *dev)
1449 {
1450         struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1451         struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1452         struct iommu_group *group = NULL;
1453         int i, idx;
1454
1455         for_each_cfg_sme(fwspec, i, idx) {
1456                 if (group && smmu->s2crs[idx].group &&
1457                     group != smmu->s2crs[idx].group)
1458                         return ERR_PTR(-EINVAL);
1459
1460                 group = smmu->s2crs[idx].group;
1461         }
1462
1463         if (group)
1464                 return iommu_group_ref_get(group);
1465
1466         if (dev_is_pci(dev))
1467                 group = pci_device_group(dev);
1468         else
1469                 group = generic_device_group(dev);
1470
1471         return group;
1472 }
1473
1474 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
1475                                     enum iommu_attr attr, void *data)
1476 {
1477         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1478
1479         if (domain->type != IOMMU_DOMAIN_UNMANAGED)
1480                 return -EINVAL;
1481
1482         switch (attr) {
1483         case DOMAIN_ATTR_NESTING:
1484                 *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
1485                 return 0;
1486         default:
1487                 return -ENODEV;
1488         }
1489 }
1490
1491 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
1492                                     enum iommu_attr attr, void *data)
1493 {
1494         int ret = 0;
1495         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1496
1497         if (domain->type != IOMMU_DOMAIN_UNMANAGED)
1498                 return -EINVAL;
1499
1500         mutex_lock(&smmu_domain->init_mutex);
1501
1502         switch (attr) {
1503         case DOMAIN_ATTR_NESTING:
1504                 if (smmu_domain->smmu) {
1505                         ret = -EPERM;
1506                         goto out_unlock;
1507                 }
1508
1509                 if (*(int *)data)
1510                         smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1511                 else
1512                         smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1513
1514                 break;
1515         default:
1516                 ret = -ENODEV;
1517         }
1518
1519 out_unlock:
1520         mutex_unlock(&smmu_domain->init_mutex);
1521         return ret;
1522 }
1523
1524 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
1525 {
1526         u32 mask, fwid = 0;
1527
1528         if (args->args_count > 0)
1529                 fwid |= (u16)args->args[0];
1530
1531         if (args->args_count > 1)
1532                 fwid |= (u16)args->args[1] << SMR_MASK_SHIFT;
1533         else if (!of_property_read_u32(args->np, "stream-match-mask", &mask))
1534                 fwid |= (u16)mask << SMR_MASK_SHIFT;
1535
1536         return iommu_fwspec_add_ids(dev, &fwid, 1);
1537 }
1538
1539 static void arm_smmu_get_resv_regions(struct device *dev,
1540                                       struct list_head *head)
1541 {
1542         struct iommu_resv_region *region;
1543         int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
1544
1545         region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
1546                                          prot, IOMMU_RESV_SW_MSI);
1547         if (!region)
1548                 return;
1549
1550         list_add_tail(&region->list, head);
1551
1552         iommu_dma_get_resv_regions(dev, head);
1553 }
1554
1555 static void arm_smmu_put_resv_regions(struct device *dev,
1556                                       struct list_head *head)
1557 {
1558         struct iommu_resv_region *entry, *next;
1559
1560         list_for_each_entry_safe(entry, next, head, list)
1561                 kfree(entry);
1562 }
1563
1564 static struct iommu_ops arm_smmu_ops = {
1565         .capable                = arm_smmu_capable,
1566         .domain_alloc           = arm_smmu_domain_alloc,
1567         .domain_free            = arm_smmu_domain_free,
1568         .attach_dev             = arm_smmu_attach_dev,
1569         .map                    = arm_smmu_map,
1570         .unmap                  = arm_smmu_unmap,
1571         .flush_iotlb_all        = arm_smmu_iotlb_sync,
1572         .iotlb_sync             = arm_smmu_iotlb_sync,
1573         .iova_to_phys           = arm_smmu_iova_to_phys,
1574         .add_device             = arm_smmu_add_device,
1575         .remove_device          = arm_smmu_remove_device,
1576         .device_group           = arm_smmu_device_group,
1577         .domain_get_attr        = arm_smmu_domain_get_attr,
1578         .domain_set_attr        = arm_smmu_domain_set_attr,
1579         .of_xlate               = arm_smmu_of_xlate,
1580         .get_resv_regions       = arm_smmu_get_resv_regions,
1581         .put_resv_regions       = arm_smmu_put_resv_regions,
1582         .pgsize_bitmap          = -1UL, /* Restricted during device attach */
1583 };
1584
1585 static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
1586 {
1587         void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1588         int i;
1589         u32 reg, major;
1590
1591         /* clear global FSR */
1592         reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
1593         writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
1594
1595         /*
1596          * Reset stream mapping groups: Initial values mark all SMRn as
1597          * invalid and all S2CRn as bypass unless overridden.
1598          */
1599         for (i = 0; i < smmu->num_mapping_groups; ++i)
1600                 arm_smmu_write_sme(smmu, i);
1601
1602         if (smmu->model == ARM_MMU500) {
1603                 /*
1604                  * Before clearing ARM_MMU500_ACTLR_CPRE, need to
1605                  * clear CACHE_LOCK bit of ACR first. And, CACHE_LOCK
1606                  * bit is only present in MMU-500r2 onwards.
1607                  */
1608                 reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID7);
1609                 major = (reg >> ID7_MAJOR_SHIFT) & ID7_MAJOR_MASK;
1610                 reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_sACR);
1611                 if (major >= 2)
1612                         reg &= ~ARM_MMU500_ACR_CACHE_LOCK;
1613                 /*
1614                  * Allow unmatched Stream IDs to allocate bypass
1615                  * TLB entries for reduced latency.
1616                  */
1617                 reg |= ARM_MMU500_ACR_SMTNMB_TLBEN | ARM_MMU500_ACR_S2CRB_TLBEN;
1618                 writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_sACR);
1619         }
1620
1621         /* Make sure all context banks are disabled and clear CB_FSR  */
1622         for (i = 0; i < smmu->num_context_banks; ++i) {
1623                 void __iomem *cb_base = ARM_SMMU_CB(smmu, i);
1624
1625                 arm_smmu_write_context_bank(smmu, i);
1626                 writel_relaxed(FSR_FAULT, cb_base + ARM_SMMU_CB_FSR);
1627                 /*
1628                  * Disable MMU-500's not-particularly-beneficial next-page
1629                  * prefetcher for the sake of errata #841119 and #826419.
1630                  */
1631                 if (smmu->model == ARM_MMU500) {
1632                         reg = readl_relaxed(cb_base + ARM_SMMU_CB_ACTLR);
1633                         reg &= ~ARM_MMU500_ACTLR_CPRE;
1634                         writel_relaxed(reg, cb_base + ARM_SMMU_CB_ACTLR);
1635                 }
1636         }
1637
1638         /* Invalidate the TLB, just in case */
1639         writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLH);
1640         writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLNSNH);
1641
1642         reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1643
1644         /* Enable fault reporting */
1645         reg |= (sCR0_GFRE | sCR0_GFIE | sCR0_GCFGFRE | sCR0_GCFGFIE);
1646
1647         /* Disable TLB broadcasting. */
1648         reg |= (sCR0_VMIDPNE | sCR0_PTM);
1649
1650         /* Enable client access, handling unmatched streams as appropriate */
1651         reg &= ~sCR0_CLIENTPD;
1652         if (disable_bypass)
1653                 reg |= sCR0_USFCFG;
1654         else
1655                 reg &= ~sCR0_USFCFG;
1656
1657         /* Disable forced broadcasting */
1658         reg &= ~sCR0_FB;
1659
1660         /* Don't upgrade barriers */
1661         reg &= ~(sCR0_BSU_MASK << sCR0_BSU_SHIFT);
1662
1663         if (smmu->features & ARM_SMMU_FEAT_VMID16)
1664                 reg |= sCR0_VMID16EN;
1665
1666         if (smmu->features & ARM_SMMU_FEAT_EXIDS)
1667                 reg |= sCR0_EXIDENABLE;
1668
1669         /* Push the button */
1670         arm_smmu_tlb_sync_global(smmu);
1671         writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1672 }
1673
1674 static int arm_smmu_id_size_to_bits(int size)
1675 {
1676         switch (size) {
1677         case 0:
1678                 return 32;
1679         case 1:
1680                 return 36;
1681         case 2:
1682                 return 40;
1683         case 3:
1684                 return 42;
1685         case 4:
1686                 return 44;
1687         case 5:
1688         default:
1689                 return 48;
1690         }
1691 }
1692
1693 static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
1694 {
1695         unsigned long size;
1696         void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1697         u32 id;
1698         bool cttw_reg, cttw_fw = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK;
1699         int i;
1700
1701         dev_notice(smmu->dev, "probing hardware configuration...\n");
1702         dev_notice(smmu->dev, "SMMUv%d with:\n",
1703                         smmu->version == ARM_SMMU_V2 ? 2 : 1);
1704
1705         /* ID0 */
1706         id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID0);
1707
1708         /* Restrict available stages based on module parameter */
1709         if (force_stage == 1)
1710                 id &= ~(ID0_S2TS | ID0_NTS);
1711         else if (force_stage == 2)
1712                 id &= ~(ID0_S1TS | ID0_NTS);
1713
1714         if (id & ID0_S1TS) {
1715                 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
1716                 dev_notice(smmu->dev, "\tstage 1 translation\n");
1717         }
1718
1719         if (id & ID0_S2TS) {
1720                 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
1721                 dev_notice(smmu->dev, "\tstage 2 translation\n");
1722         }
1723
1724         if (id & ID0_NTS) {
1725                 smmu->features |= ARM_SMMU_FEAT_TRANS_NESTED;
1726                 dev_notice(smmu->dev, "\tnested translation\n");
1727         }
1728
1729         if (!(smmu->features &
1730                 (ARM_SMMU_FEAT_TRANS_S1 | ARM_SMMU_FEAT_TRANS_S2))) {
1731                 dev_err(smmu->dev, "\tno translation support!\n");
1732                 return -ENODEV;
1733         }
1734
1735         if ((id & ID0_S1TS) &&
1736                 ((smmu->version < ARM_SMMU_V2) || !(id & ID0_ATOSNS))) {
1737                 smmu->features |= ARM_SMMU_FEAT_TRANS_OPS;
1738                 dev_notice(smmu->dev, "\taddress translation ops\n");
1739         }
1740
1741         /*
1742          * In order for DMA API calls to work properly, we must defer to what
1743          * the FW says about coherency, regardless of what the hardware claims.
1744          * Fortunately, this also opens up a workaround for systems where the
1745          * ID register value has ended up configured incorrectly.
1746          */
1747         cttw_reg = !!(id & ID0_CTTW);
1748         if (cttw_fw || cttw_reg)
1749                 dev_notice(smmu->dev, "\t%scoherent table walk\n",
1750                            cttw_fw ? "" : "non-");
1751         if (cttw_fw != cttw_reg)
1752                 dev_notice(smmu->dev,
1753                            "\t(IDR0.CTTW overridden by FW configuration)\n");
1754
1755         /* Max. number of entries we have for stream matching/indexing */
1756         if (smmu->version == ARM_SMMU_V2 && id & ID0_EXIDS) {
1757                 smmu->features |= ARM_SMMU_FEAT_EXIDS;
1758                 size = 1 << 16;
1759         } else {
1760                 size = 1 << ((id >> ID0_NUMSIDB_SHIFT) & ID0_NUMSIDB_MASK);
1761         }
1762         smmu->streamid_mask = size - 1;
1763         if (id & ID0_SMS) {
1764                 smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH;
1765                 size = (id >> ID0_NUMSMRG_SHIFT) & ID0_NUMSMRG_MASK;
1766                 if (size == 0) {
1767                         dev_err(smmu->dev,
1768                                 "stream-matching supported, but no SMRs present!\n");
1769                         return -ENODEV;
1770                 }
1771
1772                 /* Zero-initialised to mark as invalid */
1773                 smmu->smrs = devm_kcalloc(smmu->dev, size, sizeof(*smmu->smrs),
1774                                           GFP_KERNEL);
1775                 if (!smmu->smrs)
1776                         return -ENOMEM;
1777
1778                 dev_notice(smmu->dev,
1779                            "\tstream matching with %lu register groups", size);
1780         }
1781         /* s2cr->type == 0 means translation, so initialise explicitly */
1782         smmu->s2crs = devm_kmalloc_array(smmu->dev, size, sizeof(*smmu->s2crs),
1783                                          GFP_KERNEL);
1784         if (!smmu->s2crs)
1785                 return -ENOMEM;
1786         for (i = 0; i < size; i++)
1787                 smmu->s2crs[i] = s2cr_init_val;
1788
1789         smmu->num_mapping_groups = size;
1790         mutex_init(&smmu->stream_map_mutex);
1791         spin_lock_init(&smmu->global_sync_lock);
1792
1793         if (smmu->version < ARM_SMMU_V2 || !(id & ID0_PTFS_NO_AARCH32)) {
1794                 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L;
1795                 if (!(id & ID0_PTFS_NO_AARCH32S))
1796                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_S;
1797         }
1798
1799         /* ID1 */
1800         id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID1);
1801         smmu->pgshift = (id & ID1_PAGESIZE) ? 16 : 12;
1802
1803         /* Check for size mismatch of SMMU address space from mapped region */
1804         size = 1 << (((id >> ID1_NUMPAGENDXB_SHIFT) & ID1_NUMPAGENDXB_MASK) + 1);
1805         size <<= smmu->pgshift;
1806         if (smmu->cb_base != gr0_base + size)
1807                 dev_warn(smmu->dev,
1808                         "SMMU address space size (0x%lx) differs from mapped region size (0x%tx)!\n",
1809                         size * 2, (smmu->cb_base - gr0_base) * 2);
1810
1811         smmu->num_s2_context_banks = (id >> ID1_NUMS2CB_SHIFT) & ID1_NUMS2CB_MASK;
1812         smmu->num_context_banks = (id >> ID1_NUMCB_SHIFT) & ID1_NUMCB_MASK;
1813         if (smmu->num_s2_context_banks > smmu->num_context_banks) {
1814                 dev_err(smmu->dev, "impossible number of S2 context banks!\n");
1815                 return -ENODEV;
1816         }
1817         dev_notice(smmu->dev, "\t%u context banks (%u stage-2 only)\n",
1818                    smmu->num_context_banks, smmu->num_s2_context_banks);
1819         /*
1820          * Cavium CN88xx erratum #27704.
1821          * Ensure ASID and VMID allocation is unique across all SMMUs in
1822          * the system.
1823          */
1824         if (smmu->model == CAVIUM_SMMUV2) {
1825                 smmu->cavium_id_base =
1826                         atomic_add_return(smmu->num_context_banks,
1827                                           &cavium_smmu_context_count);
1828                 smmu->cavium_id_base -= smmu->num_context_banks;
1829                 dev_notice(smmu->dev, "\tenabling workaround for Cavium erratum 27704\n");
1830         }
1831         smmu->cbs = devm_kcalloc(smmu->dev, smmu->num_context_banks,
1832                                  sizeof(*smmu->cbs), GFP_KERNEL);
1833         if (!smmu->cbs)
1834                 return -ENOMEM;
1835
1836         /* ID2 */
1837         id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID2);
1838         size = arm_smmu_id_size_to_bits((id >> ID2_IAS_SHIFT) & ID2_IAS_MASK);
1839         smmu->ipa_size = size;
1840
1841         /* The output mask is also applied for bypass */
1842         size = arm_smmu_id_size_to_bits((id >> ID2_OAS_SHIFT) & ID2_OAS_MASK);
1843         smmu->pa_size = size;
1844
1845         if (id & ID2_VMID16)
1846                 smmu->features |= ARM_SMMU_FEAT_VMID16;
1847
1848         /*
1849          * What the page table walker can address actually depends on which
1850          * descriptor format is in use, but since a) we don't know that yet,
1851          * and b) it can vary per context bank, this will have to do...
1852          */
1853         if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(size)))
1854                 dev_warn(smmu->dev,
1855                          "failed to set DMA mask for table walker\n");
1856
1857         if (smmu->version < ARM_SMMU_V2) {
1858                 smmu->va_size = smmu->ipa_size;
1859                 if (smmu->version == ARM_SMMU_V1_64K)
1860                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1861         } else {
1862                 size = (id >> ID2_UBS_SHIFT) & ID2_UBS_MASK;
1863                 smmu->va_size = arm_smmu_id_size_to_bits(size);
1864                 if (id & ID2_PTFS_4K)
1865                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K;
1866                 if (id & ID2_PTFS_16K)
1867                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_16K;
1868                 if (id & ID2_PTFS_64K)
1869                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1870         }
1871
1872         /* Now we've corralled the various formats, what'll it do? */
1873         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S)
1874                 smmu->pgsize_bitmap |= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
1875         if (smmu->features &
1876             (ARM_SMMU_FEAT_FMT_AARCH32_L | ARM_SMMU_FEAT_FMT_AARCH64_4K))
1877                 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
1878         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_16K)
1879                 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
1880         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
1881                 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
1882
1883         if (arm_smmu_ops.pgsize_bitmap == -1UL)
1884                 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
1885         else
1886                 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
1887         dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n",
1888                    smmu->pgsize_bitmap);
1889
1890
1891         if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
1892                 dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
1893                            smmu->va_size, smmu->ipa_size);
1894
1895         if (smmu->features & ARM_SMMU_FEAT_TRANS_S2)
1896                 dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
1897                            smmu->ipa_size, smmu->pa_size);
1898
1899         return 0;
1900 }
1901
1902 struct arm_smmu_match_data {
1903         enum arm_smmu_arch_version version;
1904         enum arm_smmu_implementation model;
1905 };
1906
1907 #define ARM_SMMU_MATCH_DATA(name, ver, imp)     \
1908 static struct arm_smmu_match_data name = { .version = ver, .model = imp }
1909
1910 ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU);
1911 ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU);
1912 ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
1913 ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
1914 ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
1915
1916 static const struct of_device_id arm_smmu_of_match[] = {
1917         { .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 },
1918         { .compatible = "arm,smmu-v2", .data = &smmu_generic_v2 },
1919         { .compatible = "arm,mmu-400", .data = &smmu_generic_v1 },
1920         { .compatible = "arm,mmu-401", .data = &arm_mmu401 },
1921         { .compatible = "arm,mmu-500", .data = &arm_mmu500 },
1922         { .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
1923         { },
1924 };
1925 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
1926
1927 #ifdef CONFIG_ACPI
1928 static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
1929 {
1930         int ret = 0;
1931
1932         switch (model) {
1933         case ACPI_IORT_SMMU_V1:
1934         case ACPI_IORT_SMMU_CORELINK_MMU400:
1935                 smmu->version = ARM_SMMU_V1;
1936                 smmu->model = GENERIC_SMMU;
1937                 break;
1938         case ACPI_IORT_SMMU_CORELINK_MMU401:
1939                 smmu->version = ARM_SMMU_V1_64K;
1940                 smmu->model = GENERIC_SMMU;
1941                 break;
1942         case ACPI_IORT_SMMU_V2:
1943                 smmu->version = ARM_SMMU_V2;
1944                 smmu->model = GENERIC_SMMU;
1945                 break;
1946         case ACPI_IORT_SMMU_CORELINK_MMU500:
1947                 smmu->version = ARM_SMMU_V2;
1948                 smmu->model = ARM_MMU500;
1949                 break;
1950         case ACPI_IORT_SMMU_CAVIUM_THUNDERX:
1951                 smmu->version = ARM_SMMU_V2;
1952                 smmu->model = CAVIUM_SMMUV2;
1953                 break;
1954         default:
1955                 ret = -ENODEV;
1956         }
1957
1958         return ret;
1959 }
1960
1961 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
1962                                       struct arm_smmu_device *smmu)
1963 {
1964         struct device *dev = smmu->dev;
1965         struct acpi_iort_node *node =
1966                 *(struct acpi_iort_node **)dev_get_platdata(dev);
1967         struct acpi_iort_smmu *iort_smmu;
1968         int ret;
1969
1970         /* Retrieve SMMU1/2 specific data */
1971         iort_smmu = (struct acpi_iort_smmu *)node->node_data;
1972
1973         ret = acpi_smmu_get_data(iort_smmu->model, smmu);
1974         if (ret < 0)
1975                 return ret;
1976
1977         /* Ignore the configuration access interrupt */
1978         smmu->num_global_irqs = 1;
1979
1980         if (iort_smmu->flags & ACPI_IORT_SMMU_COHERENT_WALK)
1981                 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
1982
1983         return 0;
1984 }
1985 #else
1986 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
1987                                              struct arm_smmu_device *smmu)
1988 {
1989         return -ENODEV;
1990 }
1991 #endif
1992
1993 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
1994                                     struct arm_smmu_device *smmu)
1995 {
1996         const struct arm_smmu_match_data *data;
1997         struct device *dev = &pdev->dev;
1998         bool legacy_binding;
1999
2000         if (of_property_read_u32(dev->of_node, "#global-interrupts",
2001                                  &smmu->num_global_irqs)) {
2002                 dev_err(dev, "missing #global-interrupts property\n");
2003                 return -ENODEV;
2004         }
2005
2006         data = of_device_get_match_data(dev);
2007         smmu->version = data->version;
2008         smmu->model = data->model;
2009
2010         parse_driver_options(smmu);
2011
2012         legacy_binding = of_find_property(dev->of_node, "mmu-masters", NULL);
2013         if (legacy_binding && !using_generic_binding) {
2014                 if (!using_legacy_binding)
2015                         pr_notice("deprecated \"mmu-masters\" DT property in use; DMA API support unavailable\n");
2016                 using_legacy_binding = true;
2017         } else if (!legacy_binding && !using_legacy_binding) {
2018                 using_generic_binding = true;
2019         } else {
2020                 dev_err(dev, "not probing due to mismatched DT properties\n");
2021                 return -ENODEV;
2022         }
2023
2024         if (of_dma_is_coherent(dev->of_node))
2025                 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
2026
2027         return 0;
2028 }
2029
2030 static void arm_smmu_bus_init(void)
2031 {
2032         /* Oh, for a proper bus abstraction */
2033         if (!iommu_present(&platform_bus_type))
2034                 bus_set_iommu(&platform_bus_type, &arm_smmu_ops);
2035 #ifdef CONFIG_ARM_AMBA
2036         if (!iommu_present(&amba_bustype))
2037                 bus_set_iommu(&amba_bustype, &arm_smmu_ops);
2038 #endif
2039 #ifdef CONFIG_PCI
2040         if (!iommu_present(&pci_bus_type)) {
2041                 pci_request_acs();
2042                 bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
2043         }
2044 #endif
2045 }
2046
2047 static int arm_smmu_device_probe(struct platform_device *pdev)
2048 {
2049         struct resource *res;
2050         resource_size_t ioaddr;
2051         struct arm_smmu_device *smmu;
2052         struct device *dev = &pdev->dev;
2053         int num_irqs, i, err;
2054
2055         smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2056         if (!smmu) {
2057                 dev_err(dev, "failed to allocate arm_smmu_device\n");
2058                 return -ENOMEM;
2059         }
2060         smmu->dev = dev;
2061
2062         if (dev->of_node)
2063                 err = arm_smmu_device_dt_probe(pdev, smmu);
2064         else
2065                 err = arm_smmu_device_acpi_probe(pdev, smmu);
2066
2067         if (err)
2068                 return err;
2069
2070         res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
2071         ioaddr = res->start;
2072         smmu->base = devm_ioremap_resource(dev, res);
2073         if (IS_ERR(smmu->base))
2074                 return PTR_ERR(smmu->base);
2075         smmu->cb_base = smmu->base + resource_size(res) / 2;
2076
2077         num_irqs = 0;
2078         while ((res = platform_get_resource(pdev, IORESOURCE_IRQ, num_irqs))) {
2079                 num_irqs++;
2080                 if (num_irqs > smmu->num_global_irqs)
2081                         smmu->num_context_irqs++;
2082         }
2083
2084         if (!smmu->num_context_irqs) {
2085                 dev_err(dev, "found %d interrupts but expected at least %d\n",
2086                         num_irqs, smmu->num_global_irqs + 1);
2087                 return -ENODEV;
2088         }
2089
2090         smmu->irqs = devm_kcalloc(dev, num_irqs, sizeof(*smmu->irqs),
2091                                   GFP_KERNEL);
2092         if (!smmu->irqs) {
2093                 dev_err(dev, "failed to allocate %d irqs\n", num_irqs);
2094                 return -ENOMEM;
2095         }
2096
2097         for (i = 0; i < num_irqs; ++i) {
2098                 int irq = platform_get_irq(pdev, i);
2099
2100                 if (irq < 0) {
2101                         dev_err(dev, "failed to get irq index %d\n", i);
2102                         return -ENODEV;
2103                 }
2104                 smmu->irqs[i] = irq;
2105         }
2106
2107         err = arm_smmu_device_cfg_probe(smmu);
2108         if (err)
2109                 return err;
2110
2111         if (smmu->version == ARM_SMMU_V2) {
2112                 if (smmu->num_context_banks > smmu->num_context_irqs) {
2113                         dev_err(dev,
2114                               "found only %d context irq(s) but %d required\n",
2115                               smmu->num_context_irqs, smmu->num_context_banks);
2116                         return -ENODEV;
2117                 }
2118
2119                 /* Ignore superfluous interrupts */
2120                 smmu->num_context_irqs = smmu->num_context_banks;
2121         }
2122
2123         for (i = 0; i < smmu->num_global_irqs; ++i) {
2124                 err = devm_request_irq(smmu->dev, smmu->irqs[i],
2125                                        arm_smmu_global_fault,
2126                                        IRQF_SHARED,
2127                                        "arm-smmu global fault",
2128                                        smmu);
2129                 if (err) {
2130                         dev_err(dev, "failed to request global IRQ %d (%u)\n",
2131                                 i, smmu->irqs[i]);
2132                         return err;
2133                 }
2134         }
2135
2136         err = iommu_device_sysfs_add(&smmu->iommu, smmu->dev, NULL,
2137                                      "smmu.%pa", &ioaddr);
2138         if (err) {
2139                 dev_err(dev, "Failed to register iommu in sysfs\n");
2140                 return err;
2141         }
2142
2143         iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
2144         iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
2145
2146         err = iommu_device_register(&smmu->iommu);
2147         if (err) {
2148                 dev_err(dev, "Failed to register iommu\n");
2149                 return err;
2150         }
2151
2152         platform_set_drvdata(pdev, smmu);
2153         arm_smmu_device_reset(smmu);
2154         arm_smmu_test_smr_masks(smmu);
2155
2156         /*
2157          * For ACPI and generic DT bindings, an SMMU will be probed before
2158          * any device which might need it, so we want the bus ops in place
2159          * ready to handle default domain setup as soon as any SMMU exists.
2160          */
2161         if (!using_legacy_binding)
2162                 arm_smmu_bus_init();
2163
2164         return 0;
2165 }
2166
2167 /*
2168  * With the legacy DT binding in play, though, we have no guarantees about
2169  * probe order, but then we're also not doing default domains, so we can
2170  * delay setting bus ops until we're sure every possible SMMU is ready,
2171  * and that way ensure that no add_device() calls get missed.
2172  */
2173 static int arm_smmu_legacy_bus_init(void)
2174 {
2175         if (using_legacy_binding)
2176                 arm_smmu_bus_init();
2177         return 0;
2178 }
2179 device_initcall_sync(arm_smmu_legacy_bus_init);
2180
2181 static int arm_smmu_device_remove(struct platform_device *pdev)
2182 {
2183         struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2184
2185         if (!smmu)
2186                 return -ENODEV;
2187
2188         if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
2189                 dev_err(&pdev->dev, "removing device with active domains!\n");
2190
2191         /* Turn the thing off */
2192         writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
2193         return 0;
2194 }
2195
2196 static void arm_smmu_device_shutdown(struct platform_device *pdev)
2197 {
2198         arm_smmu_device_remove(pdev);
2199 }
2200
2201 static int __maybe_unused arm_smmu_pm_resume(struct device *dev)
2202 {
2203         struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2204
2205         arm_smmu_device_reset(smmu);
2206         return 0;
2207 }
2208
2209 static SIMPLE_DEV_PM_OPS(arm_smmu_pm_ops, NULL, arm_smmu_pm_resume);
2210
2211 static struct platform_driver arm_smmu_driver = {
2212         .driver = {
2213                 .name           = "arm-smmu",
2214                 .of_match_table = of_match_ptr(arm_smmu_of_match),
2215                 .pm             = &arm_smmu_pm_ops,
2216         },
2217         .probe  = arm_smmu_device_probe,
2218         .remove = arm_smmu_device_remove,
2219         .shutdown = arm_smmu_device_shutdown,
2220 };
2221 module_platform_driver(arm_smmu_driver);
2222
2223 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations");
2224 MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
2225 MODULE_LICENSE("GPL v2");