common: Drop part.h from common header
[oweals/u-boot.git] / drivers / nvme / nvme.c
1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3  * Copyright (C) 2017 NXP Semiconductors
4  * Copyright (C) 2017 Bin Meng <bmeng.cn@gmail.com>
5  */
6
7 #include <common.h>
8 #include <blk.h>
9 #include <cpu_func.h>
10 #include <dm.h>
11 #include <errno.h>
12 #include <malloc.h>
13 #include <memalign.h>
14 #include <pci.h>
15 #include <time.h>
16 #include <dm/device-internal.h>
17 #include <linux/compat.h>
18 #include "nvme.h"
19
20 #define NVME_Q_DEPTH            2
21 #define NVME_AQ_DEPTH           2
22 #define NVME_SQ_SIZE(depth)     (depth * sizeof(struct nvme_command))
23 #define NVME_CQ_SIZE(depth)     (depth * sizeof(struct nvme_completion))
24 #define ADMIN_TIMEOUT           60
25 #define IO_TIMEOUT              30
26 #define MAX_PRP_POOL            512
27
28 enum nvme_queue_id {
29         NVME_ADMIN_Q,
30         NVME_IO_Q,
31         NVME_Q_NUM,
32 };
33
34 /*
35  * An NVM Express queue. Each device has at least two (one for admin
36  * commands and one for I/O commands).
37  */
38 struct nvme_queue {
39         struct nvme_dev *dev;
40         struct nvme_command *sq_cmds;
41         struct nvme_completion *cqes;
42         wait_queue_head_t sq_full;
43         u32 __iomem *q_db;
44         u16 q_depth;
45         s16 cq_vector;
46         u16 sq_head;
47         u16 sq_tail;
48         u16 cq_head;
49         u16 qid;
50         u8 cq_phase;
51         u8 cqe_seen;
52         unsigned long cmdid_data[];
53 };
54
55 static int nvme_wait_ready(struct nvme_dev *dev, bool enabled)
56 {
57         u32 bit = enabled ? NVME_CSTS_RDY : 0;
58         int timeout;
59         ulong start;
60
61         /* Timeout field in the CAP register is in 500 millisecond units */
62         timeout = NVME_CAP_TIMEOUT(dev->cap) * 500;
63
64         start = get_timer(0);
65         while (get_timer(start) < timeout) {
66                 if ((readl(&dev->bar->csts) & NVME_CSTS_RDY) == bit)
67                         return 0;
68         }
69
70         return -ETIME;
71 }
72
73 static int nvme_setup_prps(struct nvme_dev *dev, u64 *prp2,
74                            int total_len, u64 dma_addr)
75 {
76         u32 page_size = dev->page_size;
77         int offset = dma_addr & (page_size - 1);
78         u64 *prp_pool;
79         int length = total_len;
80         int i, nprps;
81         u32 prps_per_page = (page_size >> 3) - 1;
82         u32 num_pages;
83
84         length -= (page_size - offset);
85
86         if (length <= 0) {
87                 *prp2 = 0;
88                 return 0;
89         }
90
91         if (length)
92                 dma_addr += (page_size - offset);
93
94         if (length <= page_size) {
95                 *prp2 = dma_addr;
96                 return 0;
97         }
98
99         nprps = DIV_ROUND_UP(length, page_size);
100         num_pages = DIV_ROUND_UP(nprps, prps_per_page);
101
102         if (nprps > dev->prp_entry_num) {
103                 free(dev->prp_pool);
104                 /*
105                  * Always increase in increments of pages.  It doesn't waste
106                  * much memory and reduces the number of allocations.
107                  */
108                 dev->prp_pool = memalign(page_size, num_pages * page_size);
109                 if (!dev->prp_pool) {
110                         printf("Error: malloc prp_pool fail\n");
111                         return -ENOMEM;
112                 }
113                 dev->prp_entry_num = prps_per_page * num_pages;
114         }
115
116         prp_pool = dev->prp_pool;
117         i = 0;
118         while (nprps) {
119                 if (i == ((page_size >> 3) - 1)) {
120                         *(prp_pool + i) = cpu_to_le64((ulong)prp_pool +
121                                         page_size);
122                         i = 0;
123                         prp_pool += page_size;
124                 }
125                 *(prp_pool + i++) = cpu_to_le64(dma_addr);
126                 dma_addr += page_size;
127                 nprps--;
128         }
129         *prp2 = (ulong)dev->prp_pool;
130
131         flush_dcache_range((ulong)dev->prp_pool, (ulong)dev->prp_pool +
132                            dev->prp_entry_num * sizeof(u64));
133
134         return 0;
135 }
136
137 static __le16 nvme_get_cmd_id(void)
138 {
139         static unsigned short cmdid;
140
141         return cpu_to_le16((cmdid < USHRT_MAX) ? cmdid++ : 0);
142 }
143
144 static u16 nvme_read_completion_status(struct nvme_queue *nvmeq, u16 index)
145 {
146         u64 start = (ulong)&nvmeq->cqes[index];
147         u64 stop = start + sizeof(struct nvme_completion);
148
149         invalidate_dcache_range(start, stop);
150
151         return le16_to_cpu(readw(&(nvmeq->cqes[index].status)));
152 }
153
154 /**
155  * nvme_submit_cmd() - copy a command into a queue and ring the doorbell
156  *
157  * @nvmeq:      The queue to use
158  * @cmd:        The command to send
159  */
160 static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd)
161 {
162         u16 tail = nvmeq->sq_tail;
163
164         memcpy(&nvmeq->sq_cmds[tail], cmd, sizeof(*cmd));
165         flush_dcache_range((ulong)&nvmeq->sq_cmds[tail],
166                            (ulong)&nvmeq->sq_cmds[tail] + sizeof(*cmd));
167
168         if (++tail == nvmeq->q_depth)
169                 tail = 0;
170         writel(tail, nvmeq->q_db);
171         nvmeq->sq_tail = tail;
172 }
173
174 static int nvme_submit_sync_cmd(struct nvme_queue *nvmeq,
175                                 struct nvme_command *cmd,
176                                 u32 *result, unsigned timeout)
177 {
178         u16 head = nvmeq->cq_head;
179         u16 phase = nvmeq->cq_phase;
180         u16 status;
181         ulong start_time;
182         ulong timeout_us = timeout * 100000;
183
184         cmd->common.command_id = nvme_get_cmd_id();
185         nvme_submit_cmd(nvmeq, cmd);
186
187         start_time = timer_get_us();
188
189         for (;;) {
190                 status = nvme_read_completion_status(nvmeq, head);
191                 if ((status & 0x01) == phase)
192                         break;
193                 if (timeout_us > 0 && (timer_get_us() - start_time)
194                     >= timeout_us)
195                         return -ETIMEDOUT;
196         }
197
198         status >>= 1;
199         if (status) {
200                 printf("ERROR: status = %x, phase = %d, head = %d\n",
201                        status, phase, head);
202                 status = 0;
203                 if (++head == nvmeq->q_depth) {
204                         head = 0;
205                         phase = !phase;
206                 }
207                 writel(head, nvmeq->q_db + nvmeq->dev->db_stride);
208                 nvmeq->cq_head = head;
209                 nvmeq->cq_phase = phase;
210
211                 return -EIO;
212         }
213
214         if (result)
215                 *result = le32_to_cpu(readl(&(nvmeq->cqes[head].result)));
216
217         if (++head == nvmeq->q_depth) {
218                 head = 0;
219                 phase = !phase;
220         }
221         writel(head, nvmeq->q_db + nvmeq->dev->db_stride);
222         nvmeq->cq_head = head;
223         nvmeq->cq_phase = phase;
224
225         return status;
226 }
227
228 static int nvme_submit_admin_cmd(struct nvme_dev *dev, struct nvme_command *cmd,
229                                  u32 *result)
230 {
231         return nvme_submit_sync_cmd(dev->queues[NVME_ADMIN_Q], cmd,
232                                     result, ADMIN_TIMEOUT);
233 }
234
235 static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev,
236                                            int qid, int depth)
237 {
238         struct nvme_queue *nvmeq = malloc(sizeof(*nvmeq));
239         if (!nvmeq)
240                 return NULL;
241         memset(nvmeq, 0, sizeof(*nvmeq));
242
243         nvmeq->cqes = (void *)memalign(4096, NVME_CQ_SIZE(depth));
244         if (!nvmeq->cqes)
245                 goto free_nvmeq;
246         memset((void *)nvmeq->cqes, 0, NVME_CQ_SIZE(depth));
247
248         nvmeq->sq_cmds = (void *)memalign(4096, NVME_SQ_SIZE(depth));
249         if (!nvmeq->sq_cmds)
250                 goto free_queue;
251         memset((void *)nvmeq->sq_cmds, 0, NVME_SQ_SIZE(depth));
252
253         nvmeq->dev = dev;
254
255         nvmeq->cq_head = 0;
256         nvmeq->cq_phase = 1;
257         nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
258         nvmeq->q_depth = depth;
259         nvmeq->qid = qid;
260         dev->queue_count++;
261         dev->queues[qid] = nvmeq;
262
263         return nvmeq;
264
265  free_queue:
266         free((void *)nvmeq->cqes);
267  free_nvmeq:
268         free(nvmeq);
269
270         return NULL;
271 }
272
273 static int nvme_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
274 {
275         struct nvme_command c;
276
277         memset(&c, 0, sizeof(c));
278         c.delete_queue.opcode = opcode;
279         c.delete_queue.qid = cpu_to_le16(id);
280
281         return nvme_submit_admin_cmd(dev, &c, NULL);
282 }
283
284 static int nvme_delete_sq(struct nvme_dev *dev, u16 sqid)
285 {
286         return nvme_delete_queue(dev, nvme_admin_delete_sq, sqid);
287 }
288
289 static int nvme_delete_cq(struct nvme_dev *dev, u16 cqid)
290 {
291         return nvme_delete_queue(dev, nvme_admin_delete_cq, cqid);
292 }
293
294 static int nvme_enable_ctrl(struct nvme_dev *dev)
295 {
296         dev->ctrl_config &= ~NVME_CC_SHN_MASK;
297         dev->ctrl_config |= NVME_CC_ENABLE;
298         writel(cpu_to_le32(dev->ctrl_config), &dev->bar->cc);
299
300         return nvme_wait_ready(dev, true);
301 }
302
303 static int nvme_disable_ctrl(struct nvme_dev *dev)
304 {
305         dev->ctrl_config &= ~NVME_CC_SHN_MASK;
306         dev->ctrl_config &= ~NVME_CC_ENABLE;
307         writel(cpu_to_le32(dev->ctrl_config), &dev->bar->cc);
308
309         return nvme_wait_ready(dev, false);
310 }
311
312 static void nvme_free_queue(struct nvme_queue *nvmeq)
313 {
314         free((void *)nvmeq->cqes);
315         free(nvmeq->sq_cmds);
316         free(nvmeq);
317 }
318
319 static void nvme_free_queues(struct nvme_dev *dev, int lowest)
320 {
321         int i;
322
323         for (i = dev->queue_count - 1; i >= lowest; i--) {
324                 struct nvme_queue *nvmeq = dev->queues[i];
325                 dev->queue_count--;
326                 dev->queues[i] = NULL;
327                 nvme_free_queue(nvmeq);
328         }
329 }
330
331 static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid)
332 {
333         struct nvme_dev *dev = nvmeq->dev;
334
335         nvmeq->sq_tail = 0;
336         nvmeq->cq_head = 0;
337         nvmeq->cq_phase = 1;
338         nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
339         memset((void *)nvmeq->cqes, 0, NVME_CQ_SIZE(nvmeq->q_depth));
340         flush_dcache_range((ulong)nvmeq->cqes,
341                            (ulong)nvmeq->cqes + NVME_CQ_SIZE(nvmeq->q_depth));
342         dev->online_queues++;
343 }
344
345 static int nvme_configure_admin_queue(struct nvme_dev *dev)
346 {
347         int result;
348         u32 aqa;
349         u64 cap = dev->cap;
350         struct nvme_queue *nvmeq;
351         /* most architectures use 4KB as the page size */
352         unsigned page_shift = 12;
353         unsigned dev_page_min = NVME_CAP_MPSMIN(cap) + 12;
354         unsigned dev_page_max = NVME_CAP_MPSMAX(cap) + 12;
355
356         if (page_shift < dev_page_min) {
357                 debug("Device minimum page size (%u) too large for host (%u)\n",
358                       1 << dev_page_min, 1 << page_shift);
359                 return -ENODEV;
360         }
361
362         if (page_shift > dev_page_max) {
363                 debug("Device maximum page size (%u) smaller than host (%u)\n",
364                       1 << dev_page_max, 1 << page_shift);
365                 page_shift = dev_page_max;
366         }
367
368         result = nvme_disable_ctrl(dev);
369         if (result < 0)
370                 return result;
371
372         nvmeq = dev->queues[NVME_ADMIN_Q];
373         if (!nvmeq) {
374                 nvmeq = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH);
375                 if (!nvmeq)
376                         return -ENOMEM;
377         }
378
379         aqa = nvmeq->q_depth - 1;
380         aqa |= aqa << 16;
381         aqa |= aqa << 16;
382
383         dev->page_size = 1 << page_shift;
384
385         dev->ctrl_config = NVME_CC_CSS_NVM;
386         dev->ctrl_config |= (page_shift - 12) << NVME_CC_MPS_SHIFT;
387         dev->ctrl_config |= NVME_CC_ARB_RR | NVME_CC_SHN_NONE;
388         dev->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES;
389
390         writel(aqa, &dev->bar->aqa);
391         nvme_writeq((ulong)nvmeq->sq_cmds, &dev->bar->asq);
392         nvme_writeq((ulong)nvmeq->cqes, &dev->bar->acq);
393
394         result = nvme_enable_ctrl(dev);
395         if (result)
396                 goto free_nvmeq;
397
398         nvmeq->cq_vector = 0;
399
400         nvme_init_queue(dev->queues[NVME_ADMIN_Q], 0);
401
402         return result;
403
404  free_nvmeq:
405         nvme_free_queues(dev, 0);
406
407         return result;
408 }
409
410 static int nvme_alloc_cq(struct nvme_dev *dev, u16 qid,
411                             struct nvme_queue *nvmeq)
412 {
413         struct nvme_command c;
414         int flags = NVME_QUEUE_PHYS_CONTIG | NVME_CQ_IRQ_ENABLED;
415
416         memset(&c, 0, sizeof(c));
417         c.create_cq.opcode = nvme_admin_create_cq;
418         c.create_cq.prp1 = cpu_to_le64((ulong)nvmeq->cqes);
419         c.create_cq.cqid = cpu_to_le16(qid);
420         c.create_cq.qsize = cpu_to_le16(nvmeq->q_depth - 1);
421         c.create_cq.cq_flags = cpu_to_le16(flags);
422         c.create_cq.irq_vector = cpu_to_le16(nvmeq->cq_vector);
423
424         return nvme_submit_admin_cmd(dev, &c, NULL);
425 }
426
427 static int nvme_alloc_sq(struct nvme_dev *dev, u16 qid,
428                             struct nvme_queue *nvmeq)
429 {
430         struct nvme_command c;
431         int flags = NVME_QUEUE_PHYS_CONTIG | NVME_SQ_PRIO_MEDIUM;
432
433         memset(&c, 0, sizeof(c));
434         c.create_sq.opcode = nvme_admin_create_sq;
435         c.create_sq.prp1 = cpu_to_le64((ulong)nvmeq->sq_cmds);
436         c.create_sq.sqid = cpu_to_le16(qid);
437         c.create_sq.qsize = cpu_to_le16(nvmeq->q_depth - 1);
438         c.create_sq.sq_flags = cpu_to_le16(flags);
439         c.create_sq.cqid = cpu_to_le16(qid);
440
441         return nvme_submit_admin_cmd(dev, &c, NULL);
442 }
443
444 int nvme_identify(struct nvme_dev *dev, unsigned nsid,
445                   unsigned cns, dma_addr_t dma_addr)
446 {
447         struct nvme_command c;
448         u32 page_size = dev->page_size;
449         int offset = dma_addr & (page_size - 1);
450         int length = sizeof(struct nvme_id_ctrl);
451         int ret;
452
453         memset(&c, 0, sizeof(c));
454         c.identify.opcode = nvme_admin_identify;
455         c.identify.nsid = cpu_to_le32(nsid);
456         c.identify.prp1 = cpu_to_le64(dma_addr);
457
458         length -= (page_size - offset);
459         if (length <= 0) {
460                 c.identify.prp2 = 0;
461         } else {
462                 dma_addr += (page_size - offset);
463                 c.identify.prp2 = cpu_to_le64(dma_addr);
464         }
465
466         c.identify.cns = cpu_to_le32(cns);
467
468         ret = nvme_submit_admin_cmd(dev, &c, NULL);
469         if (!ret)
470                 invalidate_dcache_range(dma_addr,
471                                         dma_addr + sizeof(struct nvme_id_ctrl));
472
473         return ret;
474 }
475
476 int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid,
477                       dma_addr_t dma_addr, u32 *result)
478 {
479         struct nvme_command c;
480
481         memset(&c, 0, sizeof(c));
482         c.features.opcode = nvme_admin_get_features;
483         c.features.nsid = cpu_to_le32(nsid);
484         c.features.prp1 = cpu_to_le64(dma_addr);
485         c.features.fid = cpu_to_le32(fid);
486
487         /*
488          * TODO: add cache invalidate operation when the size of
489          * the DMA buffer is known
490          */
491
492         return nvme_submit_admin_cmd(dev, &c, result);
493 }
494
495 int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11,
496                       dma_addr_t dma_addr, u32 *result)
497 {
498         struct nvme_command c;
499
500         memset(&c, 0, sizeof(c));
501         c.features.opcode = nvme_admin_set_features;
502         c.features.prp1 = cpu_to_le64(dma_addr);
503         c.features.fid = cpu_to_le32(fid);
504         c.features.dword11 = cpu_to_le32(dword11);
505
506         /*
507          * TODO: add cache flush operation when the size of
508          * the DMA buffer is known
509          */
510
511         return nvme_submit_admin_cmd(dev, &c, result);
512 }
513
514 static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
515 {
516         struct nvme_dev *dev = nvmeq->dev;
517         int result;
518
519         nvmeq->cq_vector = qid - 1;
520         result = nvme_alloc_cq(dev, qid, nvmeq);
521         if (result < 0)
522                 goto release_cq;
523
524         result = nvme_alloc_sq(dev, qid, nvmeq);
525         if (result < 0)
526                 goto release_sq;
527
528         nvme_init_queue(nvmeq, qid);
529
530         return result;
531
532  release_sq:
533         nvme_delete_sq(dev, qid);
534  release_cq:
535         nvme_delete_cq(dev, qid);
536
537         return result;
538 }
539
540 static int nvme_set_queue_count(struct nvme_dev *dev, int count)
541 {
542         int status;
543         u32 result;
544         u32 q_count = (count - 1) | ((count - 1) << 16);
545
546         status = nvme_set_features(dev, NVME_FEAT_NUM_QUEUES,
547                         q_count, 0, &result);
548
549         if (status < 0)
550                 return status;
551         if (status > 1)
552                 return 0;
553
554         return min(result & 0xffff, result >> 16) + 1;
555 }
556
557 static void nvme_create_io_queues(struct nvme_dev *dev)
558 {
559         unsigned int i;
560
561         for (i = dev->queue_count; i <= dev->max_qid; i++)
562                 if (!nvme_alloc_queue(dev, i, dev->q_depth))
563                         break;
564
565         for (i = dev->online_queues; i <= dev->queue_count - 1; i++)
566                 if (nvme_create_queue(dev->queues[i], i))
567                         break;
568 }
569
570 static int nvme_setup_io_queues(struct nvme_dev *dev)
571 {
572         int nr_io_queues;
573         int result;
574
575         nr_io_queues = 1;
576         result = nvme_set_queue_count(dev, nr_io_queues);
577         if (result <= 0)
578                 return result;
579
580         dev->max_qid = nr_io_queues;
581
582         /* Free previously allocated queues */
583         nvme_free_queues(dev, nr_io_queues + 1);
584         nvme_create_io_queues(dev);
585
586         return 0;
587 }
588
589 static int nvme_get_info_from_identify(struct nvme_dev *dev)
590 {
591         struct nvme_id_ctrl *ctrl;
592         int ret;
593         int shift = NVME_CAP_MPSMIN(dev->cap) + 12;
594
595         ctrl = memalign(dev->page_size, sizeof(struct nvme_id_ctrl));
596         if (!ctrl)
597                 return -ENOMEM;
598
599         ret = nvme_identify(dev, 0, 1, (dma_addr_t)(long)ctrl);
600         if (ret) {
601                 free(ctrl);
602                 return -EIO;
603         }
604
605         dev->nn = le32_to_cpu(ctrl->nn);
606         dev->vwc = ctrl->vwc;
607         memcpy(dev->serial, ctrl->sn, sizeof(ctrl->sn));
608         memcpy(dev->model, ctrl->mn, sizeof(ctrl->mn));
609         memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr));
610         if (ctrl->mdts)
611                 dev->max_transfer_shift = (ctrl->mdts + shift);
612         else {
613                 /*
614                  * Maximum Data Transfer Size (MDTS) field indicates the maximum
615                  * data transfer size between the host and the controller. The
616                  * host should not submit a command that exceeds this transfer
617                  * size. The value is in units of the minimum memory page size
618                  * and is reported as a power of two (2^n).
619                  *
620                  * The spec also says: a value of 0h indicates no restrictions
621                  * on transfer size. But in nvme_blk_read/write() below we have
622                  * the following algorithm for maximum number of logic blocks
623                  * per transfer:
624                  *
625                  * u16 lbas = 1 << (dev->max_transfer_shift - ns->lba_shift);
626                  *
627                  * In order for lbas not to overflow, the maximum number is 15
628                  * which means dev->max_transfer_shift = 15 + 9 (ns->lba_shift).
629                  * Let's use 20 which provides 1MB size.
630                  */
631                 dev->max_transfer_shift = 20;
632         }
633
634         free(ctrl);
635         return 0;
636 }
637
638 int nvme_get_namespace_id(struct udevice *udev, u32 *ns_id, u8 *eui64)
639 {
640         struct nvme_ns *ns = dev_get_priv(udev);
641
642         if (ns_id)
643                 *ns_id = ns->ns_id;
644         if (eui64)
645                 memcpy(eui64, ns->eui64, sizeof(ns->eui64));
646
647         return 0;
648 }
649
650 int nvme_scan_namespace(void)
651 {
652         struct uclass *uc;
653         struct udevice *dev;
654         int ret;
655
656         ret = uclass_get(UCLASS_NVME, &uc);
657         if (ret)
658                 return ret;
659
660         uclass_foreach_dev(dev, uc) {
661                 ret = device_probe(dev);
662                 if (ret)
663                         return ret;
664         }
665
666         return 0;
667 }
668
669 static int nvme_blk_probe(struct udevice *udev)
670 {
671         struct nvme_dev *ndev = dev_get_priv(udev->parent);
672         struct blk_desc *desc = dev_get_uclass_platdata(udev);
673         struct nvme_ns *ns = dev_get_priv(udev);
674         u8 flbas;
675         struct pci_child_platdata *pplat;
676         struct nvme_id_ns *id;
677
678         id = memalign(ndev->page_size, sizeof(struct nvme_id_ns));
679         if (!id)
680                 return -ENOMEM;
681
682         memset(ns, 0, sizeof(*ns));
683         ns->dev = ndev;
684         /* extract the namespace id from the block device name */
685         ns->ns_id = trailing_strtol(udev->name) + 1;
686         if (nvme_identify(ndev, ns->ns_id, 0, (dma_addr_t)(long)id)) {
687                 free(id);
688                 return -EIO;
689         }
690
691         memcpy(&ns->eui64, &id->eui64, sizeof(id->eui64));
692         flbas = id->flbas & NVME_NS_FLBAS_LBA_MASK;
693         ns->flbas = flbas;
694         ns->lba_shift = id->lbaf[flbas].ds;
695         ns->mode_select_num_blocks = le64_to_cpu(id->nsze);
696         ns->mode_select_block_len = 1 << ns->lba_shift;
697         list_add(&ns->list, &ndev->namespaces);
698
699         desc->lba = ns->mode_select_num_blocks;
700         desc->log2blksz = ns->lba_shift;
701         desc->blksz = 1 << ns->lba_shift;
702         desc->bdev = udev;
703         pplat = dev_get_parent_platdata(udev->parent);
704         sprintf(desc->vendor, "0x%.4x", pplat->vendor);
705         memcpy(desc->product, ndev->serial, sizeof(ndev->serial));
706         memcpy(desc->revision, ndev->firmware_rev, sizeof(ndev->firmware_rev));
707
708         free(id);
709         return 0;
710 }
711
712 static ulong nvme_blk_rw(struct udevice *udev, lbaint_t blknr,
713                          lbaint_t blkcnt, void *buffer, bool read)
714 {
715         struct nvme_ns *ns = dev_get_priv(udev);
716         struct nvme_dev *dev = ns->dev;
717         struct nvme_command c;
718         struct blk_desc *desc = dev_get_uclass_platdata(udev);
719         int status;
720         u64 prp2;
721         u64 total_len = blkcnt << desc->log2blksz;
722         u64 temp_len = total_len;
723
724         u64 slba = blknr;
725         u16 lbas = 1 << (dev->max_transfer_shift - ns->lba_shift);
726         u64 total_lbas = blkcnt;
727
728         flush_dcache_range((unsigned long)buffer,
729                            (unsigned long)buffer + total_len);
730
731         c.rw.opcode = read ? nvme_cmd_read : nvme_cmd_write;
732         c.rw.flags = 0;
733         c.rw.nsid = cpu_to_le32(ns->ns_id);
734         c.rw.control = 0;
735         c.rw.dsmgmt = 0;
736         c.rw.reftag = 0;
737         c.rw.apptag = 0;
738         c.rw.appmask = 0;
739         c.rw.metadata = 0;
740
741         while (total_lbas) {
742                 if (total_lbas < lbas) {
743                         lbas = (u16)total_lbas;
744                         total_lbas = 0;
745                 } else {
746                         total_lbas -= lbas;
747                 }
748
749                 if (nvme_setup_prps(dev, &prp2,
750                                     lbas << ns->lba_shift, (ulong)buffer))
751                         return -EIO;
752                 c.rw.slba = cpu_to_le64(slba);
753                 slba += lbas;
754                 c.rw.length = cpu_to_le16(lbas - 1);
755                 c.rw.prp1 = cpu_to_le64((ulong)buffer);
756                 c.rw.prp2 = cpu_to_le64(prp2);
757                 status = nvme_submit_sync_cmd(dev->queues[NVME_IO_Q],
758                                 &c, NULL, IO_TIMEOUT);
759                 if (status)
760                         break;
761                 temp_len -= (u32)lbas << ns->lba_shift;
762                 buffer += lbas << ns->lba_shift;
763         }
764
765         if (read)
766                 invalidate_dcache_range((unsigned long)buffer,
767                                         (unsigned long)buffer + total_len);
768
769         return (total_len - temp_len) >> desc->log2blksz;
770 }
771
772 static ulong nvme_blk_read(struct udevice *udev, lbaint_t blknr,
773                            lbaint_t blkcnt, void *buffer)
774 {
775         return nvme_blk_rw(udev, blknr, blkcnt, buffer, true);
776 }
777
778 static ulong nvme_blk_write(struct udevice *udev, lbaint_t blknr,
779                             lbaint_t blkcnt, const void *buffer)
780 {
781         return nvme_blk_rw(udev, blknr, blkcnt, (void *)buffer, false);
782 }
783
784 static const struct blk_ops nvme_blk_ops = {
785         .read   = nvme_blk_read,
786         .write  = nvme_blk_write,
787 };
788
789 U_BOOT_DRIVER(nvme_blk) = {
790         .name   = "nvme-blk",
791         .id     = UCLASS_BLK,
792         .probe  = nvme_blk_probe,
793         .ops    = &nvme_blk_ops,
794         .priv_auto_alloc_size = sizeof(struct nvme_ns),
795 };
796
797 static int nvme_bind(struct udevice *udev)
798 {
799         static int ndev_num;
800         char name[20];
801
802         sprintf(name, "nvme#%d", ndev_num++);
803
804         return device_set_name(udev, name);
805 }
806
807 static int nvme_probe(struct udevice *udev)
808 {
809         int ret;
810         struct nvme_dev *ndev = dev_get_priv(udev);
811
812         ndev->instance = trailing_strtol(udev->name);
813
814         INIT_LIST_HEAD(&ndev->namespaces);
815         ndev->bar = dm_pci_map_bar(udev, PCI_BASE_ADDRESS_0,
816                         PCI_REGION_MEM);
817         if (readl(&ndev->bar->csts) == -1) {
818                 ret = -ENODEV;
819                 printf("Error: %s: Out of memory!\n", udev->name);
820                 goto free_nvme;
821         }
822
823         ndev->queues = malloc(NVME_Q_NUM * sizeof(struct nvme_queue *));
824         if (!ndev->queues) {
825                 ret = -ENOMEM;
826                 printf("Error: %s: Out of memory!\n", udev->name);
827                 goto free_nvme;
828         }
829         memset(ndev->queues, 0, NVME_Q_NUM * sizeof(struct nvme_queue *));
830
831         ndev->cap = nvme_readq(&ndev->bar->cap);
832         ndev->q_depth = min_t(int, NVME_CAP_MQES(ndev->cap) + 1, NVME_Q_DEPTH);
833         ndev->db_stride = 1 << NVME_CAP_STRIDE(ndev->cap);
834         ndev->dbs = ((void __iomem *)ndev->bar) + 4096;
835
836         ret = nvme_configure_admin_queue(ndev);
837         if (ret)
838                 goto free_queue;
839
840         /* Allocate after the page size is known */
841         ndev->prp_pool = memalign(ndev->page_size, MAX_PRP_POOL);
842         if (!ndev->prp_pool) {
843                 ret = -ENOMEM;
844                 printf("Error: %s: Out of memory!\n", udev->name);
845                 goto free_nvme;
846         }
847         ndev->prp_entry_num = MAX_PRP_POOL >> 3;
848
849         ret = nvme_setup_io_queues(ndev);
850         if (ret)
851                 goto free_queue;
852
853         nvme_get_info_from_identify(ndev);
854
855         return 0;
856
857 free_queue:
858         free((void *)ndev->queues);
859 free_nvme:
860         return ret;
861 }
862
863 U_BOOT_DRIVER(nvme) = {
864         .name   = "nvme",
865         .id     = UCLASS_NVME,
866         .bind   = nvme_bind,
867         .probe  = nvme_probe,
868         .priv_auto_alloc_size = sizeof(struct nvme_dev),
869 };
870
871 struct pci_device_id nvme_supported[] = {
872         { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, ~0) },
873         {}
874 };
875
876 U_BOOT_PCI_DEVICE(nvme, nvme_supported);