2 * nvme-lightnvm.c - LightNVM NVMe device
4 * Copyright (C) 2014-2015 IT University of Copenhagen
5 * Initial release: Matias Bjorling <mb@lightnvm.io>
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License version
9 * 2 as published by the Free Software Foundation.
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; see the file COPYING. If not, write to
18 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
25 #include <linux/nvme.h>
26 #include <linux/bitops.h>
27 #include <linux/lightnvm.h>
28 #include <linux/vmalloc.h>
29 #include <linux/sched/sysctl.h>
30 #include <uapi/linux/lightnvm.h>
32 enum nvme_nvm_admin_opcode {
33 nvme_nvm_admin_identity = 0xe2,
34 nvme_nvm_admin_get_bb_tbl = 0xf2,
35 nvme_nvm_admin_set_bb_tbl = 0xf1,
38 enum nvme_nvm_log_page {
39 NVME_NVM_LOG_REPORT_CHUNK = 0xca,
42 struct nvme_nvm_ph_rw {
58 struct nvme_nvm_erase_blk {
73 struct nvme_nvm_identity {
84 struct nvme_nvm_getbbtbl {
96 struct nvme_nvm_setbbtbl {
111 struct nvme_nvm_command {
113 struct nvme_common_command common;
114 struct nvme_nvm_ph_rw ph_rw;
115 struct nvme_nvm_erase_blk erase;
116 struct nvme_nvm_identity identity;
117 struct nvme_nvm_getbbtbl get_bb;
118 struct nvme_nvm_setbbtbl set_bb;
122 struct nvme_nvm_id12_grp {
148 struct nvme_nvm_id12_addrf {
164 struct nvme_nvm_id12 {
171 struct nvme_nvm_id12_addrf ppaf;
173 struct nvme_nvm_id12_grp grp;
177 struct nvme_nvm_bb_tbl {
191 struct nvme_nvm_id20_addrf {
199 struct nvme_nvm_id20 {
204 struct nvme_nvm_id20_addrf lbaf;
219 /* Write data requirements */
227 /* Performance related metrics */
239 /* Vendor specific */
243 struct nvme_nvm_chk_meta {
254 * Check we didn't inadvertently grow the command struct
256 static inline void _nvme_nvm_check_size(void)
258 BUILD_BUG_ON(sizeof(struct nvme_nvm_identity) != 64);
259 BUILD_BUG_ON(sizeof(struct nvme_nvm_ph_rw) != 64);
260 BUILD_BUG_ON(sizeof(struct nvme_nvm_erase_blk) != 64);
261 BUILD_BUG_ON(sizeof(struct nvme_nvm_getbbtbl) != 64);
262 BUILD_BUG_ON(sizeof(struct nvme_nvm_setbbtbl) != 64);
263 BUILD_BUG_ON(sizeof(struct nvme_nvm_id12_grp) != 960);
264 BUILD_BUG_ON(sizeof(struct nvme_nvm_id12_addrf) != 16);
265 BUILD_BUG_ON(sizeof(struct nvme_nvm_id12) != NVME_IDENTIFY_DATA_SIZE);
266 BUILD_BUG_ON(sizeof(struct nvme_nvm_bb_tbl) != 64);
267 BUILD_BUG_ON(sizeof(struct nvme_nvm_id20_addrf) != 8);
268 BUILD_BUG_ON(sizeof(struct nvme_nvm_id20) != NVME_IDENTIFY_DATA_SIZE);
269 BUILD_BUG_ON(sizeof(struct nvme_nvm_chk_meta) != 32);
270 BUILD_BUG_ON(sizeof(struct nvme_nvm_chk_meta) !=
271 sizeof(struct nvm_chk_meta));
274 static void nvme_nvm_set_addr_12(struct nvm_addrf_12 *dst,
275 struct nvme_nvm_id12_addrf *src)
277 dst->ch_len = src->ch_len;
278 dst->lun_len = src->lun_len;
279 dst->blk_len = src->blk_len;
280 dst->pg_len = src->pg_len;
281 dst->pln_len = src->pln_len;
282 dst->sec_len = src->sec_len;
284 dst->ch_offset = src->ch_offset;
285 dst->lun_offset = src->lun_offset;
286 dst->blk_offset = src->blk_offset;
287 dst->pg_offset = src->pg_offset;
288 dst->pln_offset = src->pln_offset;
289 dst->sec_offset = src->sec_offset;
291 dst->ch_mask = ((1ULL << dst->ch_len) - 1) << dst->ch_offset;
292 dst->lun_mask = ((1ULL << dst->lun_len) - 1) << dst->lun_offset;
293 dst->blk_mask = ((1ULL << dst->blk_len) - 1) << dst->blk_offset;
294 dst->pg_mask = ((1ULL << dst->pg_len) - 1) << dst->pg_offset;
295 dst->pln_mask = ((1ULL << dst->pln_len) - 1) << dst->pln_offset;
296 dst->sec_mask = ((1ULL << dst->sec_len) - 1) << dst->sec_offset;
299 static int nvme_nvm_setup_12(struct nvme_nvm_id12 *id,
302 struct nvme_nvm_id12_grp *src;
303 int sec_per_pg, sec_per_pl, pg_per_blk;
310 if (src->mtype != 0) {
311 pr_err("nvm: memory type not supported\n");
315 /* 1.2 spec. only reports a single version id - unfold */
316 geo->major_ver_id = id->ver_id;
317 geo->minor_ver_id = 2;
319 /* Set compacted version for upper layers */
320 geo->version = NVM_OCSSD_SPEC_12;
322 geo->num_ch = src->num_ch;
323 geo->num_lun = src->num_lun;
324 geo->all_luns = geo->num_ch * geo->num_lun;
326 geo->num_chk = le16_to_cpu(src->num_chk);
328 geo->csecs = le16_to_cpu(src->csecs);
329 geo->sos = le16_to_cpu(src->sos);
331 pg_per_blk = le16_to_cpu(src->num_pg);
332 sec_per_pg = le16_to_cpu(src->fpg_sz) / geo->csecs;
333 sec_per_pl = sec_per_pg * src->num_pln;
334 geo->clba = sec_per_pl * pg_per_blk;
336 geo->all_chunks = geo->all_luns * geo->num_chk;
337 geo->total_secs = geo->clba * geo->all_chunks;
339 geo->ws_min = sec_per_pg;
340 geo->ws_opt = sec_per_pg;
341 geo->mw_cunits = geo->ws_opt << 3; /* default to MLC safe values */
343 /* Do not impose values for maximum number of open blocks as it is
344 * unspecified in 1.2. Users of 1.2 must be aware of this and eventually
345 * specify these values through a quirk if restrictions apply.
347 geo->maxoc = geo->all_luns * geo->num_chk;
348 geo->maxocpu = geo->num_chk;
350 geo->mccap = le32_to_cpu(src->mccap);
352 geo->trdt = le32_to_cpu(src->trdt);
353 geo->trdm = le32_to_cpu(src->trdm);
354 geo->tprt = le32_to_cpu(src->tprt);
355 geo->tprm = le32_to_cpu(src->tprm);
356 geo->tbet = le32_to_cpu(src->tbet);
357 geo->tbem = le32_to_cpu(src->tbem);
359 /* 1.2 compatibility */
360 geo->vmnt = id->vmnt;
361 geo->cap = le32_to_cpu(id->cap);
362 geo->dom = le32_to_cpu(id->dom);
364 geo->mtype = src->mtype;
365 geo->fmtype = src->fmtype;
367 geo->cpar = le16_to_cpu(src->cpar);
368 geo->mpos = le32_to_cpu(src->mpos);
370 geo->pln_mode = NVM_PLANE_SINGLE;
372 if (geo->mpos & 0x020202) {
373 geo->pln_mode = NVM_PLANE_DOUBLE;
375 } else if (geo->mpos & 0x040404) {
376 geo->pln_mode = NVM_PLANE_QUAD;
380 geo->num_pln = src->num_pln;
381 geo->num_pg = le16_to_cpu(src->num_pg);
382 geo->fpg_sz = le16_to_cpu(src->fpg_sz);
384 nvme_nvm_set_addr_12((struct nvm_addrf_12 *)&geo->addrf, &id->ppaf);
389 static void nvme_nvm_set_addr_20(struct nvm_addrf *dst,
390 struct nvme_nvm_id20_addrf *src)
392 dst->ch_len = src->grp_len;
393 dst->lun_len = src->pu_len;
394 dst->chk_len = src->chk_len;
395 dst->sec_len = src->lba_len;
398 dst->chk_offset = dst->sec_len;
399 dst->lun_offset = dst->chk_offset + dst->chk_len;
400 dst->ch_offset = dst->lun_offset + dst->lun_len;
402 dst->ch_mask = ((1ULL << dst->ch_len) - 1) << dst->ch_offset;
403 dst->lun_mask = ((1ULL << dst->lun_len) - 1) << dst->lun_offset;
404 dst->chk_mask = ((1ULL << dst->chk_len) - 1) << dst->chk_offset;
405 dst->sec_mask = ((1ULL << dst->sec_len) - 1) << dst->sec_offset;
408 static int nvme_nvm_setup_20(struct nvme_nvm_id20 *id,
411 geo->major_ver_id = id->mjr;
412 geo->minor_ver_id = id->mnr;
414 /* Set compacted version for upper layers */
415 geo->version = NVM_OCSSD_SPEC_20;
417 geo->num_ch = le16_to_cpu(id->num_grp);
418 geo->num_lun = le16_to_cpu(id->num_pu);
419 geo->all_luns = geo->num_ch * geo->num_lun;
421 geo->num_chk = le32_to_cpu(id->num_chk);
422 geo->clba = le32_to_cpu(id->clba);
424 geo->all_chunks = geo->all_luns * geo->num_chk;
425 geo->total_secs = geo->clba * geo->all_chunks;
427 geo->ws_min = le32_to_cpu(id->ws_min);
428 geo->ws_opt = le32_to_cpu(id->ws_opt);
429 geo->mw_cunits = le32_to_cpu(id->mw_cunits);
430 geo->maxoc = le32_to_cpu(id->maxoc);
431 geo->maxocpu = le32_to_cpu(id->maxocpu);
433 geo->trdt = le32_to_cpu(id->trdt);
434 geo->trdm = le32_to_cpu(id->trdm);
435 geo->tprt = le32_to_cpu(id->twrt);
436 geo->tprm = le32_to_cpu(id->twrm);
437 geo->tbet = le32_to_cpu(id->tcrst);
438 geo->tbem = le32_to_cpu(id->tcrsm);
440 nvme_nvm_set_addr_20(&geo->addrf, &id->lbaf);
445 static int nvme_nvm_identity(struct nvm_dev *nvmdev)
447 struct nvme_ns *ns = nvmdev->q->queuedata;
448 struct nvme_nvm_id12 *id;
449 struct nvme_nvm_command c = {};
452 c.identity.opcode = nvme_nvm_admin_identity;
453 c.identity.nsid = cpu_to_le32(ns->head->ns_id);
455 id = kmalloc(sizeof(struct nvme_nvm_id12), GFP_KERNEL);
459 ret = nvme_submit_sync_cmd(ns->ctrl->admin_q, (struct nvme_command *)&c,
460 id, sizeof(struct nvme_nvm_id12));
467 * The 1.2 and 2.0 specifications share the first byte in their geometry
468 * command to make it possible to know what version a device implements.
470 switch (id->ver_id) {
472 ret = nvme_nvm_setup_12(id, &nvmdev->geo);
475 ret = nvme_nvm_setup_20((struct nvme_nvm_id20 *)id,
479 dev_err(ns->ctrl->device, "OCSSD revision not supported (%d)\n",
489 static int nvme_nvm_get_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr ppa,
492 struct request_queue *q = nvmdev->q;
493 struct nvm_geo *geo = &nvmdev->geo;
494 struct nvme_ns *ns = q->queuedata;
495 struct nvme_ctrl *ctrl = ns->ctrl;
496 struct nvme_nvm_command c = {};
497 struct nvme_nvm_bb_tbl *bb_tbl;
498 int nr_blks = geo->num_chk * geo->num_pln;
499 int tblsz = sizeof(struct nvme_nvm_bb_tbl) + nr_blks;
502 c.get_bb.opcode = nvme_nvm_admin_get_bb_tbl;
503 c.get_bb.nsid = cpu_to_le32(ns->head->ns_id);
504 c.get_bb.spba = cpu_to_le64(ppa.ppa);
506 bb_tbl = kzalloc(tblsz, GFP_KERNEL);
510 ret = nvme_submit_sync_cmd(ctrl->admin_q, (struct nvme_command *)&c,
513 dev_err(ctrl->device, "get bad block table failed (%d)\n", ret);
518 if (bb_tbl->tblid[0] != 'B' || bb_tbl->tblid[1] != 'B' ||
519 bb_tbl->tblid[2] != 'L' || bb_tbl->tblid[3] != 'T') {
520 dev_err(ctrl->device, "bbt format mismatch\n");
525 if (le16_to_cpu(bb_tbl->verid) != 1) {
527 dev_err(ctrl->device, "bbt version not supported\n");
531 if (le32_to_cpu(bb_tbl->tblks) != nr_blks) {
533 dev_err(ctrl->device,
534 "bbt unsuspected blocks returned (%u!=%u)",
535 le32_to_cpu(bb_tbl->tblks), nr_blks);
539 memcpy(blks, bb_tbl->blk, geo->num_chk * geo->num_pln);
545 static int nvme_nvm_set_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr *ppas,
546 int nr_ppas, int type)
548 struct nvme_ns *ns = nvmdev->q->queuedata;
549 struct nvme_nvm_command c = {};
552 c.set_bb.opcode = nvme_nvm_admin_set_bb_tbl;
553 c.set_bb.nsid = cpu_to_le32(ns->head->ns_id);
554 c.set_bb.spba = cpu_to_le64(ppas->ppa);
555 c.set_bb.nlb = cpu_to_le16(nr_ppas - 1);
556 c.set_bb.value = type;
558 ret = nvme_submit_sync_cmd(ns->ctrl->admin_q, (struct nvme_command *)&c,
561 dev_err(ns->ctrl->device, "set bad block table failed (%d)\n",
567 * Expect the lba in device format
569 static int nvme_nvm_get_chk_meta(struct nvm_dev *ndev,
570 struct nvm_chk_meta *meta,
571 sector_t slba, int nchks)
573 struct nvm_geo *geo = &ndev->geo;
574 struct nvme_ns *ns = ndev->q->queuedata;
575 struct nvme_ctrl *ctrl = ns->ctrl;
576 struct nvme_nvm_chk_meta *dev_meta = (struct nvme_nvm_chk_meta *)meta;
578 size_t left = nchks * sizeof(struct nvme_nvm_chk_meta);
579 size_t log_pos, offset, len;
583 * limit requests to maximum 256K to avoid issuing arbitrary large
584 * requests when the device does not specific a maximum transfer size.
586 max_len = min_t(unsigned int, ctrl->max_hw_sectors << 9, 256 * 1024);
588 /* Normalize lba address space to obtain log offset */
590 ppa = dev_to_generic_addr(ndev, ppa);
593 log_pos += ppa.m.pu * geo->num_chk;
594 log_pos += ppa.m.grp * geo->num_lun * geo->num_chk;
596 offset = log_pos * sizeof(struct nvme_nvm_chk_meta);
599 len = min_t(unsigned int, left, max_len);
601 ret = nvme_get_log(ctrl, ns->head->ns_id,
602 NVME_NVM_LOG_REPORT_CHUNK, 0, dev_meta, len,
605 dev_err(ctrl->device, "Get REPORT CHUNK log error\n");
609 for (i = 0; i < len; i += sizeof(struct nvme_nvm_chk_meta)) {
610 meta->state = dev_meta->state;
611 meta->type = dev_meta->type;
612 meta->wi = dev_meta->wi;
613 meta->slba = le64_to_cpu(dev_meta->slba);
614 meta->cnlb = le64_to_cpu(dev_meta->cnlb);
615 meta->wp = le64_to_cpu(dev_meta->wp);
628 static inline void nvme_nvm_rqtocmd(struct nvm_rq *rqd, struct nvme_ns *ns,
629 struct nvme_nvm_command *c)
631 c->ph_rw.opcode = rqd->opcode;
632 c->ph_rw.nsid = cpu_to_le32(ns->head->ns_id);
633 c->ph_rw.spba = cpu_to_le64(rqd->ppa_addr.ppa);
634 c->ph_rw.metadata = cpu_to_le64(rqd->dma_meta_list);
635 c->ph_rw.control = cpu_to_le16(rqd->flags);
636 c->ph_rw.length = cpu_to_le16(rqd->nr_ppas - 1);
639 static void nvme_nvm_end_io(struct request *rq, blk_status_t status)
641 struct nvm_rq *rqd = rq->end_io_data;
643 rqd->ppa_status = le64_to_cpu(nvme_req(rq)->result.u64);
644 rqd->error = nvme_req(rq)->status;
647 kfree(nvme_req(rq)->cmd);
648 blk_mq_free_request(rq);
651 static struct request *nvme_nvm_alloc_request(struct request_queue *q,
653 struct nvme_nvm_command *cmd)
655 struct nvme_ns *ns = q->queuedata;
658 nvme_nvm_rqtocmd(rqd, ns, cmd);
660 rq = nvme_alloc_request(q, (struct nvme_command *)cmd, 0, NVME_QID_ANY);
664 rq->cmd_flags &= ~REQ_FAILFAST_DRIVER;
667 blk_init_request_from_bio(rq, rqd->bio);
669 rq->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_NORM);
674 static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
676 struct request_queue *q = dev->q;
677 struct nvme_nvm_command *cmd;
680 cmd = kzalloc(sizeof(struct nvme_nvm_command), GFP_KERNEL);
684 rq = nvme_nvm_alloc_request(q, rqd, cmd);
690 rq->end_io_data = rqd;
692 blk_execute_rq_nowait(q, NULL, rq, 0, nvme_nvm_end_io);
697 static int nvme_nvm_submit_io_sync(struct nvm_dev *dev, struct nvm_rq *rqd)
699 struct request_queue *q = dev->q;
701 struct nvme_nvm_command cmd;
704 memset(&cmd, 0, sizeof(struct nvme_nvm_command));
706 rq = nvme_nvm_alloc_request(q, rqd, &cmd);
710 /* I/Os can fail and the error is signaled through rqd. Callers must
711 * handle the error accordingly.
713 blk_execute_rq(q, NULL, rq, 0);
714 if (nvme_req(rq)->flags & NVME_REQ_CANCELLED)
717 rqd->ppa_status = le64_to_cpu(nvme_req(rq)->result.u64);
718 rqd->error = nvme_req(rq)->status;
720 blk_mq_free_request(rq);
725 static void *nvme_nvm_create_dma_pool(struct nvm_dev *nvmdev, char *name)
727 struct nvme_ns *ns = nvmdev->q->queuedata;
729 return dma_pool_create(name, ns->ctrl->dev, PAGE_SIZE, PAGE_SIZE, 0);
732 static void nvme_nvm_destroy_dma_pool(void *pool)
734 struct dma_pool *dma_pool = pool;
736 dma_pool_destroy(dma_pool);
739 static void *nvme_nvm_dev_dma_alloc(struct nvm_dev *dev, void *pool,
740 gfp_t mem_flags, dma_addr_t *dma_handler)
742 return dma_pool_alloc(pool, mem_flags, dma_handler);
745 static void nvme_nvm_dev_dma_free(void *pool, void *addr,
746 dma_addr_t dma_handler)
748 dma_pool_free(pool, addr, dma_handler);
751 static struct nvm_dev_ops nvme_nvm_dev_ops = {
752 .identity = nvme_nvm_identity,
754 .get_bb_tbl = nvme_nvm_get_bb_tbl,
755 .set_bb_tbl = nvme_nvm_set_bb_tbl,
757 .get_chk_meta = nvme_nvm_get_chk_meta,
759 .submit_io = nvme_nvm_submit_io,
760 .submit_io_sync = nvme_nvm_submit_io_sync,
762 .create_dma_pool = nvme_nvm_create_dma_pool,
763 .destroy_dma_pool = nvme_nvm_destroy_dma_pool,
764 .dev_dma_alloc = nvme_nvm_dev_dma_alloc,
765 .dev_dma_free = nvme_nvm_dev_dma_free,
768 static int nvme_nvm_submit_user_cmd(struct request_queue *q,
770 struct nvme_nvm_command *vcmd,
771 void __user *ubuf, unsigned int bufflen,
772 void __user *meta_buf, unsigned int meta_len,
773 void __user *ppa_buf, unsigned int ppa_len,
774 u32 *result, u64 *status, unsigned int timeout)
776 bool write = nvme_is_write((struct nvme_command *)vcmd);
777 struct nvm_dev *dev = ns->ndev;
778 struct gendisk *disk = ns->disk;
780 struct bio *bio = NULL;
781 __le64 *ppa_list = NULL;
783 __le64 *metadata = NULL;
784 dma_addr_t metadata_dma;
785 DECLARE_COMPLETION_ONSTACK(wait);
788 rq = nvme_alloc_request(q, (struct nvme_command *)vcmd, 0,
795 rq->timeout = timeout ? timeout : ADMIN_TIMEOUT;
797 if (ppa_buf && ppa_len) {
798 ppa_list = dma_pool_alloc(dev->dma_pool, GFP_KERNEL, &ppa_dma);
803 if (copy_from_user(ppa_list, (void __user *)ppa_buf,
804 sizeof(u64) * (ppa_len + 1))) {
808 vcmd->ph_rw.spba = cpu_to_le64(ppa_dma);
810 vcmd->ph_rw.spba = cpu_to_le64((uintptr_t)ppa_buf);
813 if (ubuf && bufflen) {
814 ret = blk_rq_map_user(q, rq, NULL, ubuf, bufflen, GFP_KERNEL);
819 if (meta_buf && meta_len) {
820 metadata = dma_pool_alloc(dev->dma_pool, GFP_KERNEL,
828 if (copy_from_user(metadata,
829 (void __user *)meta_buf,
835 vcmd->ph_rw.metadata = cpu_to_le64(metadata_dma);
841 blk_execute_rq(q, NULL, rq, 0);
843 if (nvme_req(rq)->flags & NVME_REQ_CANCELLED)
845 else if (nvme_req(rq)->status & 0x7ff)
848 *result = nvme_req(rq)->status & 0x7ff;
850 *status = le64_to_cpu(nvme_req(rq)->result.u64);
852 if (metadata && !ret && !write) {
853 if (copy_to_user(meta_buf, (void *)metadata, meta_len))
857 if (meta_buf && meta_len)
858 dma_pool_free(dev->dma_pool, metadata, metadata_dma);
861 blk_rq_unmap_user(bio);
863 if (ppa_buf && ppa_len)
864 dma_pool_free(dev->dma_pool, ppa_list, ppa_dma);
866 blk_mq_free_request(rq);
871 static int nvme_nvm_submit_vio(struct nvme_ns *ns,
872 struct nvm_user_vio __user *uvio)
874 struct nvm_user_vio vio;
875 struct nvme_nvm_command c;
879 if (copy_from_user(&vio, uvio, sizeof(vio)))
884 memset(&c, 0, sizeof(c));
885 c.ph_rw.opcode = vio.opcode;
886 c.ph_rw.nsid = cpu_to_le32(ns->head->ns_id);
887 c.ph_rw.control = cpu_to_le16(vio.control);
888 c.ph_rw.length = cpu_to_le16(vio.nppas);
890 length = (vio.nppas + 1) << ns->lba_shift;
892 ret = nvme_nvm_submit_user_cmd(ns->queue, ns, &c,
893 (void __user *)(uintptr_t)vio.addr, length,
894 (void __user *)(uintptr_t)vio.metadata,
896 (void __user *)(uintptr_t)vio.ppa_list, vio.nppas,
897 &vio.result, &vio.status, 0);
899 if (ret && copy_to_user(uvio, &vio, sizeof(vio)))
905 static int nvme_nvm_user_vcmd(struct nvme_ns *ns, int admin,
906 struct nvm_passthru_vio __user *uvcmd)
908 struct nvm_passthru_vio vcmd;
909 struct nvme_nvm_command c;
910 struct request_queue *q;
911 unsigned int timeout = 0;
914 if (copy_from_user(&vcmd, uvcmd, sizeof(vcmd)))
916 if ((vcmd.opcode != 0xF2) && (!capable(CAP_SYS_ADMIN)))
921 memset(&c, 0, sizeof(c));
922 c.common.opcode = vcmd.opcode;
923 c.common.nsid = cpu_to_le32(ns->head->ns_id);
924 c.common.cdw2[0] = cpu_to_le32(vcmd.cdw2);
925 c.common.cdw2[1] = cpu_to_le32(vcmd.cdw3);
927 c.ph_rw.length = cpu_to_le16(vcmd.nppas);
928 c.ph_rw.control = cpu_to_le16(vcmd.control);
929 c.common.cdw10[3] = cpu_to_le32(vcmd.cdw13);
930 c.common.cdw10[4] = cpu_to_le32(vcmd.cdw14);
931 c.common.cdw10[5] = cpu_to_le32(vcmd.cdw15);
934 timeout = msecs_to_jiffies(vcmd.timeout_ms);
936 q = admin ? ns->ctrl->admin_q : ns->queue;
938 ret = nvme_nvm_submit_user_cmd(q, ns,
939 (struct nvme_nvm_command *)&c,
940 (void __user *)(uintptr_t)vcmd.addr, vcmd.data_len,
941 (void __user *)(uintptr_t)vcmd.metadata,
943 (void __user *)(uintptr_t)vcmd.ppa_list, vcmd.nppas,
944 &vcmd.result, &vcmd.status, timeout);
946 if (ret && copy_to_user(uvcmd, &vcmd, sizeof(vcmd)))
952 int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, unsigned long arg)
955 case NVME_NVM_IOCTL_ADMIN_VIO:
956 return nvme_nvm_user_vcmd(ns, 1, (void __user *)arg);
957 case NVME_NVM_IOCTL_IO_VIO:
958 return nvme_nvm_user_vcmd(ns, 0, (void __user *)arg);
959 case NVME_NVM_IOCTL_SUBMIT_VIO:
960 return nvme_nvm_submit_vio(ns, (void __user *)arg);
966 void nvme_nvm_update_nvm_info(struct nvme_ns *ns)
968 struct nvm_dev *ndev = ns->ndev;
969 struct nvm_geo *geo = &ndev->geo;
971 if (geo->version == NVM_OCSSD_SPEC_12)
974 geo->csecs = 1 << ns->lba_shift;
978 int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node)
980 struct request_queue *q = ns->queue;
983 _nvme_nvm_check_size();
985 dev = nvm_alloc_dev(node);
990 memcpy(dev->name, disk_name, DISK_NAME_LEN);
991 dev->ops = &nvme_nvm_dev_ops;
992 dev->private_data = ns;
995 return nvm_register(dev);
998 void nvme_nvm_unregister(struct nvme_ns *ns)
1000 nvm_unregister(ns->ndev);
1003 static ssize_t nvm_dev_attr_show(struct device *dev,
1004 struct device_attribute *dattr, char *page)
1006 struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
1007 struct nvm_dev *ndev = ns->ndev;
1008 struct nvm_geo *geo = &ndev->geo;
1009 struct attribute *attr;
1014 attr = &dattr->attr;
1016 if (strcmp(attr->name, "version") == 0) {
1017 if (geo->major_ver_id == 1)
1018 return scnprintf(page, PAGE_SIZE, "%u\n",
1021 return scnprintf(page, PAGE_SIZE, "%u.%u\n",
1024 } else if (strcmp(attr->name, "capabilities") == 0) {
1025 return scnprintf(page, PAGE_SIZE, "%u\n", geo->cap);
1026 } else if (strcmp(attr->name, "read_typ") == 0) {
1027 return scnprintf(page, PAGE_SIZE, "%u\n", geo->trdt);
1028 } else if (strcmp(attr->name, "read_max") == 0) {
1029 return scnprintf(page, PAGE_SIZE, "%u\n", geo->trdm);
1031 return scnprintf(page,
1033 "Unhandled attr(%s) in `%s`\n",
1034 attr->name, __func__);
1038 static ssize_t nvm_dev_attr_show_ppaf(struct nvm_addrf_12 *ppaf, char *page)
1040 return scnprintf(page, PAGE_SIZE,
1041 "0x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n",
1042 ppaf->ch_offset, ppaf->ch_len,
1043 ppaf->lun_offset, ppaf->lun_len,
1044 ppaf->pln_offset, ppaf->pln_len,
1045 ppaf->blk_offset, ppaf->blk_len,
1046 ppaf->pg_offset, ppaf->pg_len,
1047 ppaf->sec_offset, ppaf->sec_len);
1050 static ssize_t nvm_dev_attr_show_12(struct device *dev,
1051 struct device_attribute *dattr, char *page)
1053 struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
1054 struct nvm_dev *ndev = ns->ndev;
1055 struct nvm_geo *geo = &ndev->geo;
1056 struct attribute *attr;
1061 attr = &dattr->attr;
1063 if (strcmp(attr->name, "vendor_opcode") == 0) {
1064 return scnprintf(page, PAGE_SIZE, "%u\n", geo->vmnt);
1065 } else if (strcmp(attr->name, "device_mode") == 0) {
1066 return scnprintf(page, PAGE_SIZE, "%u\n", geo->dom);
1067 /* kept for compatibility */
1068 } else if (strcmp(attr->name, "media_manager") == 0) {
1069 return scnprintf(page, PAGE_SIZE, "%s\n", "gennvm");
1070 } else if (strcmp(attr->name, "ppa_format") == 0) {
1071 return nvm_dev_attr_show_ppaf((void *)&geo->addrf, page);
1072 } else if (strcmp(attr->name, "media_type") == 0) { /* u8 */
1073 return scnprintf(page, PAGE_SIZE, "%u\n", geo->mtype);
1074 } else if (strcmp(attr->name, "flash_media_type") == 0) {
1075 return scnprintf(page, PAGE_SIZE, "%u\n", geo->fmtype);
1076 } else if (strcmp(attr->name, "num_channels") == 0) {
1077 return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_ch);
1078 } else if (strcmp(attr->name, "num_luns") == 0) {
1079 return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_lun);
1080 } else if (strcmp(attr->name, "num_planes") == 0) {
1081 return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_pln);
1082 } else if (strcmp(attr->name, "num_blocks") == 0) { /* u16 */
1083 return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_chk);
1084 } else if (strcmp(attr->name, "num_pages") == 0) {
1085 return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_pg);
1086 } else if (strcmp(attr->name, "page_size") == 0) {
1087 return scnprintf(page, PAGE_SIZE, "%u\n", geo->fpg_sz);
1088 } else if (strcmp(attr->name, "hw_sector_size") == 0) {
1089 return scnprintf(page, PAGE_SIZE, "%u\n", geo->csecs);
1090 } else if (strcmp(attr->name, "oob_sector_size") == 0) {/* u32 */
1091 return scnprintf(page, PAGE_SIZE, "%u\n", geo->sos);
1092 } else if (strcmp(attr->name, "prog_typ") == 0) {
1093 return scnprintf(page, PAGE_SIZE, "%u\n", geo->tprt);
1094 } else if (strcmp(attr->name, "prog_max") == 0) {
1095 return scnprintf(page, PAGE_SIZE, "%u\n", geo->tprm);
1096 } else if (strcmp(attr->name, "erase_typ") == 0) {
1097 return scnprintf(page, PAGE_SIZE, "%u\n", geo->tbet);
1098 } else if (strcmp(attr->name, "erase_max") == 0) {
1099 return scnprintf(page, PAGE_SIZE, "%u\n", geo->tbem);
1100 } else if (strcmp(attr->name, "multiplane_modes") == 0) {
1101 return scnprintf(page, PAGE_SIZE, "0x%08x\n", geo->mpos);
1102 } else if (strcmp(attr->name, "media_capabilities") == 0) {
1103 return scnprintf(page, PAGE_SIZE, "0x%08x\n", geo->mccap);
1104 } else if (strcmp(attr->name, "max_phys_secs") == 0) {
1105 return scnprintf(page, PAGE_SIZE, "%u\n", NVM_MAX_VLBA);
1107 return scnprintf(page, PAGE_SIZE,
1108 "Unhandled attr(%s) in `%s`\n",
1109 attr->name, __func__);
1113 static ssize_t nvm_dev_attr_show_20(struct device *dev,
1114 struct device_attribute *dattr, char *page)
1116 struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
1117 struct nvm_dev *ndev = ns->ndev;
1118 struct nvm_geo *geo = &ndev->geo;
1119 struct attribute *attr;
1124 attr = &dattr->attr;
1126 if (strcmp(attr->name, "groups") == 0) {
1127 return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_ch);
1128 } else if (strcmp(attr->name, "punits") == 0) {
1129 return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_lun);
1130 } else if (strcmp(attr->name, "chunks") == 0) {
1131 return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_chk);
1132 } else if (strcmp(attr->name, "clba") == 0) {
1133 return scnprintf(page, PAGE_SIZE, "%u\n", geo->clba);
1134 } else if (strcmp(attr->name, "ws_min") == 0) {
1135 return scnprintf(page, PAGE_SIZE, "%u\n", geo->ws_min);
1136 } else if (strcmp(attr->name, "ws_opt") == 0) {
1137 return scnprintf(page, PAGE_SIZE, "%u\n", geo->ws_opt);
1138 } else if (strcmp(attr->name, "maxoc") == 0) {
1139 return scnprintf(page, PAGE_SIZE, "%u\n", geo->maxoc);
1140 } else if (strcmp(attr->name, "maxocpu") == 0) {
1141 return scnprintf(page, PAGE_SIZE, "%u\n", geo->maxocpu);
1142 } else if (strcmp(attr->name, "mw_cunits") == 0) {
1143 return scnprintf(page, PAGE_SIZE, "%u\n", geo->mw_cunits);
1144 } else if (strcmp(attr->name, "write_typ") == 0) {
1145 return scnprintf(page, PAGE_SIZE, "%u\n", geo->tprt);
1146 } else if (strcmp(attr->name, "write_max") == 0) {
1147 return scnprintf(page, PAGE_SIZE, "%u\n", geo->tprm);
1148 } else if (strcmp(attr->name, "reset_typ") == 0) {
1149 return scnprintf(page, PAGE_SIZE, "%u\n", geo->tbet);
1150 } else if (strcmp(attr->name, "reset_max") == 0) {
1151 return scnprintf(page, PAGE_SIZE, "%u\n", geo->tbem);
1153 return scnprintf(page, PAGE_SIZE,
1154 "Unhandled attr(%s) in `%s`\n",
1155 attr->name, __func__);
1159 #define NVM_DEV_ATTR_RO(_name) \
1160 DEVICE_ATTR(_name, S_IRUGO, nvm_dev_attr_show, NULL)
1161 #define NVM_DEV_ATTR_12_RO(_name) \
1162 DEVICE_ATTR(_name, S_IRUGO, nvm_dev_attr_show_12, NULL)
1163 #define NVM_DEV_ATTR_20_RO(_name) \
1164 DEVICE_ATTR(_name, S_IRUGO, nvm_dev_attr_show_20, NULL)
1166 /* general attributes */
1167 static NVM_DEV_ATTR_RO(version);
1168 static NVM_DEV_ATTR_RO(capabilities);
1170 static NVM_DEV_ATTR_RO(read_typ);
1171 static NVM_DEV_ATTR_RO(read_max);
1174 static NVM_DEV_ATTR_12_RO(vendor_opcode);
1175 static NVM_DEV_ATTR_12_RO(device_mode);
1176 static NVM_DEV_ATTR_12_RO(ppa_format);
1177 static NVM_DEV_ATTR_12_RO(media_manager);
1178 static NVM_DEV_ATTR_12_RO(media_type);
1179 static NVM_DEV_ATTR_12_RO(flash_media_type);
1180 static NVM_DEV_ATTR_12_RO(num_channels);
1181 static NVM_DEV_ATTR_12_RO(num_luns);
1182 static NVM_DEV_ATTR_12_RO(num_planes);
1183 static NVM_DEV_ATTR_12_RO(num_blocks);
1184 static NVM_DEV_ATTR_12_RO(num_pages);
1185 static NVM_DEV_ATTR_12_RO(page_size);
1186 static NVM_DEV_ATTR_12_RO(hw_sector_size);
1187 static NVM_DEV_ATTR_12_RO(oob_sector_size);
1188 static NVM_DEV_ATTR_12_RO(prog_typ);
1189 static NVM_DEV_ATTR_12_RO(prog_max);
1190 static NVM_DEV_ATTR_12_RO(erase_typ);
1191 static NVM_DEV_ATTR_12_RO(erase_max);
1192 static NVM_DEV_ATTR_12_RO(multiplane_modes);
1193 static NVM_DEV_ATTR_12_RO(media_capabilities);
1194 static NVM_DEV_ATTR_12_RO(max_phys_secs);
1196 static struct attribute *nvm_dev_attrs_12[] = {
1197 &dev_attr_version.attr,
1198 &dev_attr_capabilities.attr,
1200 &dev_attr_vendor_opcode.attr,
1201 &dev_attr_device_mode.attr,
1202 &dev_attr_media_manager.attr,
1203 &dev_attr_ppa_format.attr,
1204 &dev_attr_media_type.attr,
1205 &dev_attr_flash_media_type.attr,
1206 &dev_attr_num_channels.attr,
1207 &dev_attr_num_luns.attr,
1208 &dev_attr_num_planes.attr,
1209 &dev_attr_num_blocks.attr,
1210 &dev_attr_num_pages.attr,
1211 &dev_attr_page_size.attr,
1212 &dev_attr_hw_sector_size.attr,
1213 &dev_attr_oob_sector_size.attr,
1214 &dev_attr_read_typ.attr,
1215 &dev_attr_read_max.attr,
1216 &dev_attr_prog_typ.attr,
1217 &dev_attr_prog_max.attr,
1218 &dev_attr_erase_typ.attr,
1219 &dev_attr_erase_max.attr,
1220 &dev_attr_multiplane_modes.attr,
1221 &dev_attr_media_capabilities.attr,
1222 &dev_attr_max_phys_secs.attr,
1227 static const struct attribute_group nvm_dev_attr_group_12 = {
1229 .attrs = nvm_dev_attrs_12,
1233 static NVM_DEV_ATTR_20_RO(groups);
1234 static NVM_DEV_ATTR_20_RO(punits);
1235 static NVM_DEV_ATTR_20_RO(chunks);
1236 static NVM_DEV_ATTR_20_RO(clba);
1237 static NVM_DEV_ATTR_20_RO(ws_min);
1238 static NVM_DEV_ATTR_20_RO(ws_opt);
1239 static NVM_DEV_ATTR_20_RO(maxoc);
1240 static NVM_DEV_ATTR_20_RO(maxocpu);
1241 static NVM_DEV_ATTR_20_RO(mw_cunits);
1242 static NVM_DEV_ATTR_20_RO(write_typ);
1243 static NVM_DEV_ATTR_20_RO(write_max);
1244 static NVM_DEV_ATTR_20_RO(reset_typ);
1245 static NVM_DEV_ATTR_20_RO(reset_max);
1247 static struct attribute *nvm_dev_attrs_20[] = {
1248 &dev_attr_version.attr,
1249 &dev_attr_capabilities.attr,
1251 &dev_attr_groups.attr,
1252 &dev_attr_punits.attr,
1253 &dev_attr_chunks.attr,
1254 &dev_attr_clba.attr,
1255 &dev_attr_ws_min.attr,
1256 &dev_attr_ws_opt.attr,
1257 &dev_attr_maxoc.attr,
1258 &dev_attr_maxocpu.attr,
1259 &dev_attr_mw_cunits.attr,
1261 &dev_attr_read_typ.attr,
1262 &dev_attr_read_max.attr,
1263 &dev_attr_write_typ.attr,
1264 &dev_attr_write_max.attr,
1265 &dev_attr_reset_typ.attr,
1266 &dev_attr_reset_max.attr,
1271 static const struct attribute_group nvm_dev_attr_group_20 = {
1273 .attrs = nvm_dev_attrs_20,
1276 int nvme_nvm_register_sysfs(struct nvme_ns *ns)
1278 struct nvm_dev *ndev = ns->ndev;
1279 struct nvm_geo *geo = &ndev->geo;
1284 switch (geo->major_ver_id) {
1286 return sysfs_create_group(&disk_to_dev(ns->disk)->kobj,
1287 &nvm_dev_attr_group_12);
1289 return sysfs_create_group(&disk_to_dev(ns->disk)->kobj,
1290 &nvm_dev_attr_group_20);
1296 void nvme_nvm_unregister_sysfs(struct nvme_ns *ns)
1298 struct nvm_dev *ndev = ns->ndev;
1299 struct nvm_geo *geo = &ndev->geo;
1301 switch (geo->major_ver_id) {
1303 sysfs_remove_group(&disk_to_dev(ns->disk)->kobj,
1304 &nvm_dev_attr_group_12);
1307 sysfs_remove_group(&disk_to_dev(ns->disk)->kobj,
1308 &nvm_dev_attr_group_20);