2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 #include <linux/module.h>
33 #include <linux/moduleparam.h>
34 #include <linux/device.h>
35 #include <linux/netdevice.h>
36 #include <linux/etherdevice.h>
37 #include <linux/delay.h>
38 #include <linux/errno.h>
39 #include <linux/list.h>
40 #include <linux/sched/mm.h>
41 #include <linux/spinlock.h>
42 #include <linux/ethtool.h>
43 #include <linux/rtnetlink.h>
44 #include <linux/inetdevice.h>
45 #include <linux/slab.h>
49 #include <asm/byteorder.h>
51 #include <rdma/iw_cm.h>
52 #include <rdma/ib_verbs.h>
53 #include <rdma/ib_smi.h>
54 #include <rdma/ib_umem.h>
55 #include <rdma/ib_user_verbs.h>
56 #include <rdma/uverbs_ioctl.h>
60 #include "iwch_provider.h"
62 #include <rdma/cxgb3-abi.h>
65 static void iwch_dealloc_ucontext(struct ib_ucontext *context)
67 struct iwch_dev *rhp = to_iwch_dev(context->device);
68 struct iwch_ucontext *ucontext = to_iwch_ucontext(context);
69 struct iwch_mm_entry *mm, *tmp;
71 pr_debug("%s context %p\n", __func__, context);
72 list_for_each_entry_safe(mm, tmp, &ucontext->mmaps, entry)
74 cxio_release_ucontext(&rhp->rdev, &ucontext->uctx);
77 static int iwch_alloc_ucontext(struct ib_ucontext *ucontext,
78 struct ib_udata *udata)
80 struct ib_device *ibdev = ucontext->device;
81 struct iwch_ucontext *context = to_iwch_ucontext(ucontext);
82 struct iwch_dev *rhp = to_iwch_dev(ibdev);
84 pr_debug("%s ibdev %p\n", __func__, ibdev);
85 cxio_init_ucontext(&rhp->rdev, &context->uctx);
86 INIT_LIST_HEAD(&context->mmaps);
87 spin_lock_init(&context->mmap_lock);
91 static void iwch_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
95 pr_debug("%s ib_cq %p\n", __func__, ib_cq);
96 chp = to_iwch_cq(ib_cq);
98 xa_erase_irq(&chp->rhp->cqs, chp->cq.cqid);
99 atomic_dec(&chp->refcnt);
100 wait_event(chp->wait, !atomic_read(&chp->refcnt));
102 cxio_destroy_cq(&chp->rhp->rdev, &chp->cq);
105 static int iwch_create_cq(struct ib_cq *ibcq,
106 const struct ib_cq_init_attr *attr,
107 struct ib_udata *udata)
109 struct ib_device *ibdev = ibcq->device;
110 int entries = attr->cqe;
111 struct iwch_dev *rhp = to_iwch_dev(ibcq->device);
112 struct iwch_cq *chp = to_iwch_cq(ibcq);
113 struct iwch_create_cq_resp uresp;
114 struct iwch_create_cq_req ureq;
118 pr_debug("%s ib_dev %p entries %d\n", __func__, ibdev, entries);
123 if (!t3a_device(rhp)) {
124 if (ib_copy_from_udata(&ureq, udata, sizeof(ureq)))
127 chp->user_rptr_addr = (u32 __user *)(unsigned long)ureq.user_rptr_addr;
131 if (t3a_device(rhp)) {
134 * T3A: Add some fluff to handle extra CQEs inserted
135 * for various errors.
136 * Additional CQE possibilities:
138 * incoming RDMA WRITE Failures
139 * incoming RDMA READ REQUEST FAILUREs
140 * NOTE: We cannot ensure the CQ won't overflow.
144 entries = roundup_pow_of_two(entries);
145 chp->cq.size_log2 = ilog2(entries);
147 if (cxio_create_cq(&rhp->rdev, &chp->cq, !udata))
151 chp->ibcq.cqe = 1 << chp->cq.size_log2;
152 spin_lock_init(&chp->lock);
153 spin_lock_init(&chp->comp_handler_lock);
154 atomic_set(&chp->refcnt, 1);
155 init_waitqueue_head(&chp->wait);
156 if (xa_store_irq(&rhp->cqs, chp->cq.cqid, chp, GFP_KERNEL)) {
157 cxio_destroy_cq(&chp->rhp->rdev, &chp->cq);
162 struct iwch_mm_entry *mm;
163 struct iwch_ucontext *ucontext = rdma_udata_to_drv_context(
164 udata, struct iwch_ucontext, ibucontext);
166 mm = kmalloc(sizeof(*mm), GFP_KERNEL);
168 iwch_destroy_cq(&chp->ibcq, udata);
171 uresp.cqid = chp->cq.cqid;
172 uresp.size_log2 = chp->cq.size_log2;
173 spin_lock(&ucontext->mmap_lock);
174 uresp.key = ucontext->key;
175 ucontext->key += PAGE_SIZE;
176 spin_unlock(&ucontext->mmap_lock);
178 mm->addr = virt_to_phys(chp->cq.queue);
179 if (udata->outlen < sizeof(uresp)) {
181 pr_warn("Warning - downlevel libcxgb3 (non-fatal)\n");
182 mm->len = PAGE_ALIGN((1UL << uresp.size_log2) *
183 sizeof(struct t3_cqe));
184 resplen = sizeof(struct iwch_create_cq_resp_v0);
186 mm->len = PAGE_ALIGN(((1UL << uresp.size_log2) + 1) *
187 sizeof(struct t3_cqe));
188 uresp.memsize = mm->len;
190 resplen = sizeof(uresp);
192 if (ib_copy_to_udata(udata, &uresp, resplen)) {
194 iwch_destroy_cq(&chp->ibcq, udata);
197 insert_mmap(ucontext, mm);
199 pr_debug("created cqid 0x%0x chp %p size 0x%0x, dma_addr %pad\n",
200 chp->cq.cqid, chp, (1 << chp->cq.size_log2),
205 static int iwch_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
207 struct iwch_dev *rhp;
209 enum t3_cq_opcode cq_op;
214 chp = to_iwch_cq(ibcq);
216 if ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED)
220 if (chp->user_rptr_addr) {
221 if (get_user(rptr, chp->user_rptr_addr))
223 spin_lock_irqsave(&chp->lock, flag);
226 spin_lock_irqsave(&chp->lock, flag);
227 pr_debug("%s rptr 0x%x\n", __func__, chp->cq.rptr);
228 err = cxio_hal_cq_op(&rhp->rdev, &chp->cq, cq_op, 0);
229 spin_unlock_irqrestore(&chp->lock, flag);
231 pr_err("Error %d rearming CQID 0x%x\n", err, chp->cq.cqid);
232 if (err > 0 && !(flags & IB_CQ_REPORT_MISSED_EVENTS))
237 static int iwch_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
239 int len = vma->vm_end - vma->vm_start;
240 u32 key = vma->vm_pgoff << PAGE_SHIFT;
241 struct cxio_rdev *rdev_p;
243 struct iwch_mm_entry *mm;
244 struct iwch_ucontext *ucontext;
247 pr_debug("%s pgoff 0x%lx key 0x%x len %d\n", __func__, vma->vm_pgoff,
250 if (vma->vm_start & (PAGE_SIZE-1)) {
254 rdev_p = &(to_iwch_dev(context->device)->rdev);
255 ucontext = to_iwch_ucontext(context);
257 mm = remove_mmap(ucontext, key, len);
263 if ((addr >= rdev_p->rnic_info.udbell_physbase) &&
264 (addr < (rdev_p->rnic_info.udbell_physbase +
265 rdev_p->rnic_info.udbell_len))) {
268 * Map T3 DB register.
270 if (vma->vm_flags & VM_READ) {
274 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
275 vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
276 vma->vm_flags &= ~VM_MAYREAD;
277 ret = io_remap_pfn_range(vma, vma->vm_start,
279 len, vma->vm_page_prot);
283 * Map WQ or CQ contig dma memory...
285 ret = remap_pfn_range(vma, vma->vm_start,
287 len, vma->vm_page_prot);
293 static void iwch_deallocate_pd(struct ib_pd *pd, struct ib_udata *udata)
295 struct iwch_dev *rhp;
298 php = to_iwch_pd(pd);
300 pr_debug("%s ibpd %p pdid 0x%x\n", __func__, pd, php->pdid);
301 cxio_hal_put_pdid(rhp->rdev.rscp, php->pdid);
304 static int iwch_allocate_pd(struct ib_pd *pd, struct ib_udata *udata)
306 struct iwch_pd *php = to_iwch_pd(pd);
307 struct ib_device *ibdev = pd->device;
309 struct iwch_dev *rhp;
311 pr_debug("%s ibdev %p\n", __func__, ibdev);
312 rhp = (struct iwch_dev *) ibdev;
313 pdid = cxio_hal_get_pdid(rhp->rdev.rscp);
320 struct iwch_alloc_pd_resp resp = {.pdid = php->pdid};
322 if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
323 iwch_deallocate_pd(&php->ibpd, udata);
327 pr_debug("%s pdid 0x%0x ptr 0x%p\n", __func__, pdid, php);
331 static int iwch_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata)
333 struct iwch_dev *rhp;
337 pr_debug("%s ib_mr %p\n", __func__, ib_mr);
339 mhp = to_iwch_mr(ib_mr);
342 mmid = mhp->attr.stag >> 8;
343 cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
346 xa_erase_irq(&rhp->mrs, mmid);
348 kfree((void *) (unsigned long) mhp->kva);
349 ib_umem_release(mhp->umem);
350 pr_debug("%s mmid 0x%x ptr %p\n", __func__, mmid, mhp);
355 static struct ib_mr *iwch_get_dma_mr(struct ib_pd *pd, int acc)
357 const u64 total_size = 0xffffffff;
358 const u64 mask = (total_size + PAGE_SIZE - 1) & PAGE_MASK;
359 struct iwch_pd *php = to_iwch_pd(pd);
360 struct iwch_dev *rhp = php->rhp;
363 int shift = 26, npages, ret, i;
365 pr_debug("%s ib_pd %p\n", __func__, pd);
368 * T3 only supports 32 bits of size.
370 if (sizeof(phys_addr_t) > 4) {
371 pr_warn_once("Cannot support dma_mrs on this platform\n");
372 return ERR_PTR(-ENOTSUPP);
375 mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
377 return ERR_PTR(-ENOMEM);
381 npages = (total_size + (1ULL << shift) - 1) >> shift;
387 page_list = kmalloc_array(npages, sizeof(u64), GFP_KERNEL);
393 for (i = 0; i < npages; i++)
394 page_list[i] = cpu_to_be64((u64)i << shift);
396 pr_debug("%s mask 0x%llx shift %d len %lld pbl_size %d\n",
397 __func__, mask, shift, total_size, npages);
399 ret = iwch_alloc_pbl(mhp, npages);
405 ret = iwch_write_pbl(mhp, page_list, npages, 0);
410 mhp->attr.pdid = php->pdid;
413 mhp->attr.perms = iwch_ib_to_tpt_access(acc);
414 mhp->attr.va_fbo = 0;
415 mhp->attr.page_size = shift - 12;
417 mhp->attr.len = (u32) total_size;
418 mhp->attr.pbl_size = npages;
419 ret = iwch_register_mem(rhp, php, mhp, shift);
433 static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
434 u64 virt, int acc, struct ib_udata *udata)
439 struct iwch_dev *rhp;
442 struct iwch_reg_user_mr_resp uresp;
443 struct sg_dma_page_iter sg_iter;
444 pr_debug("%s ib_pd %p\n", __func__, pd);
446 php = to_iwch_pd(pd);
448 mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
450 return ERR_PTR(-ENOMEM);
454 mhp->umem = ib_umem_get(udata, start, length, acc, 0);
455 if (IS_ERR(mhp->umem)) {
456 err = PTR_ERR(mhp->umem);
463 n = ib_umem_num_pages(mhp->umem);
465 err = iwch_alloc_pbl(mhp, n);
469 pages = (__be64 *) __get_free_page(GFP_KERNEL);
477 for_each_sg_dma_page(mhp->umem->sg_head.sgl, &sg_iter, mhp->umem->nmap, 0) {
478 pages[i++] = cpu_to_be64(sg_page_iter_dma_address(&sg_iter));
479 if (i == PAGE_SIZE / sizeof(*pages)) {
480 err = iwch_write_pbl(mhp, pages, i, n);
489 err = iwch_write_pbl(mhp, pages, i, n);
492 free_page((unsigned long) pages);
496 mhp->attr.pdid = php->pdid;
498 mhp->attr.perms = iwch_ib_to_tpt_access(acc);
499 mhp->attr.va_fbo = virt;
500 mhp->attr.page_size = shift - 12;
501 mhp->attr.len = (u32) length;
503 err = iwch_register_mem(rhp, php, mhp, shift);
507 if (udata && !t3a_device(rhp)) {
508 uresp.pbl_addr = (mhp->attr.pbl_addr -
509 rhp->rdev.rnic_info.pbl_base) >> 3;
510 pr_debug("%s user resp pbl_addr 0x%x\n", __func__,
513 if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) {
514 iwch_dereg_mr(&mhp->ibmr, udata);
526 ib_umem_release(mhp->umem);
531 static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
532 struct ib_udata *udata)
534 struct iwch_dev *rhp;
541 if (type != IB_MW_TYPE_1)
542 return ERR_PTR(-EINVAL);
544 php = to_iwch_pd(pd);
546 mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
548 return ERR_PTR(-ENOMEM);
549 ret = cxio_allocate_window(&rhp->rdev, &stag, php->pdid);
555 mhp->attr.pdid = php->pdid;
556 mhp->attr.type = TPT_MW;
557 mhp->attr.stag = stag;
559 mhp->ibmw.rkey = stag;
560 if (xa_insert_irq(&rhp->mrs, mmid, mhp, GFP_KERNEL)) {
561 cxio_deallocate_window(&rhp->rdev, mhp->attr.stag);
563 return ERR_PTR(-ENOMEM);
565 pr_debug("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag);
569 static int iwch_dealloc_mw(struct ib_mw *mw)
571 struct iwch_dev *rhp;
575 mhp = to_iwch_mw(mw);
577 mmid = (mw->rkey) >> 8;
578 cxio_deallocate_window(&rhp->rdev, mhp->attr.stag);
579 xa_erase_irq(&rhp->mrs, mmid);
580 pr_debug("%s ib_mw %p mmid 0x%x ptr %p\n", __func__, mw, mmid, mhp);
585 static struct ib_mr *iwch_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
586 u32 max_num_sg, struct ib_udata *udata)
588 struct iwch_dev *rhp;
595 if (mr_type != IB_MR_TYPE_MEM_REG ||
596 max_num_sg > T3_MAX_FASTREG_DEPTH)
597 return ERR_PTR(-EINVAL);
599 php = to_iwch_pd(pd);
601 mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
605 mhp->pages = kcalloc(max_num_sg, sizeof(u64), GFP_KERNEL);
610 ret = iwch_alloc_pbl(mhp, max_num_sg);
613 mhp->attr.pbl_size = max_num_sg;
614 ret = cxio_allocate_stag(&rhp->rdev, &stag, php->pdid,
615 mhp->attr.pbl_size, mhp->attr.pbl_addr);
618 mhp->attr.pdid = php->pdid;
619 mhp->attr.type = TPT_NON_SHARED_MR;
620 mhp->attr.stag = stag;
623 mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
624 ret = xa_insert_irq(&rhp->mrs, mmid, mhp, GFP_KERNEL);
628 pr_debug("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag);
631 cxio_dereg_mem(&rhp->rdev, stag, mhp->attr.pbl_size,
643 static int iwch_set_page(struct ib_mr *ibmr, u64 addr)
645 struct iwch_mr *mhp = to_iwch_mr(ibmr);
647 if (unlikely(mhp->npages == mhp->attr.pbl_size))
650 mhp->pages[mhp->npages++] = addr;
655 static int iwch_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
656 int sg_nents, unsigned int *sg_offset)
658 struct iwch_mr *mhp = to_iwch_mr(ibmr);
662 return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, iwch_set_page);
665 static int iwch_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata)
667 struct iwch_dev *rhp;
669 struct iwch_qp_attributes attrs;
670 struct iwch_ucontext *ucontext;
672 qhp = to_iwch_qp(ib_qp);
675 attrs.next_state = IWCH_QP_STATE_ERROR;
676 iwch_modify_qp(rhp, qhp, IWCH_QP_ATTR_NEXT_STATE, &attrs, 0);
677 wait_event(qhp->wait, !qhp->ep);
679 xa_erase_irq(&rhp->qps, qhp->wq.qpid);
681 atomic_dec(&qhp->refcnt);
682 wait_event(qhp->wait, !atomic_read(&qhp->refcnt));
684 ucontext = rdma_udata_to_drv_context(udata, struct iwch_ucontext,
686 cxio_destroy_qp(&rhp->rdev, &qhp->wq,
687 ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
689 pr_debug("%s ib_qp %p qpid 0x%0x qhp %p\n", __func__,
690 ib_qp, qhp->wq.qpid, qhp);
695 static struct ib_qp *iwch_create_qp(struct ib_pd *pd,
696 struct ib_qp_init_attr *attrs,
697 struct ib_udata *udata)
699 struct iwch_dev *rhp;
702 struct iwch_cq *schp;
703 struct iwch_cq *rchp;
704 struct iwch_create_qp_resp uresp;
705 int wqsize, sqsize, rqsize;
706 struct iwch_ucontext *ucontext;
708 pr_debug("%s ib_pd %p\n", __func__, pd);
709 if (attrs->qp_type != IB_QPT_RC)
710 return ERR_PTR(-EINVAL);
711 php = to_iwch_pd(pd);
713 schp = get_chp(rhp, ((struct iwch_cq *) attrs->send_cq)->cq.cqid);
714 rchp = get_chp(rhp, ((struct iwch_cq *) attrs->recv_cq)->cq.cqid);
716 return ERR_PTR(-EINVAL);
718 /* The RQT size must be # of entries + 1 rounded up to a power of two */
719 rqsize = roundup_pow_of_two(attrs->cap.max_recv_wr);
720 if (rqsize == attrs->cap.max_recv_wr)
721 rqsize = roundup_pow_of_two(attrs->cap.max_recv_wr+1);
723 /* T3 doesn't support RQT depth < 16 */
727 if (rqsize > T3_MAX_RQ_SIZE)
728 return ERR_PTR(-EINVAL);
730 if (attrs->cap.max_inline_data > T3_MAX_INLINE)
731 return ERR_PTR(-EINVAL);
734 * NOTE: The SQ and total WQ sizes don't need to be
735 * a power of two. However, all the code assumes
736 * they are. EG: Q_FREECNT() and friends.
738 sqsize = roundup_pow_of_two(attrs->cap.max_send_wr);
739 wqsize = roundup_pow_of_two(rqsize + sqsize);
742 * Kernel users need more wq space for fastreg WRs which can take
745 ucontext = rdma_udata_to_drv_context(udata, struct iwch_ucontext,
747 if (!ucontext && wqsize < (rqsize + (2 * sqsize)))
748 wqsize = roundup_pow_of_two(rqsize +
749 roundup_pow_of_two(attrs->cap.max_send_wr * 2));
750 pr_debug("%s wqsize %d sqsize %d rqsize %d\n", __func__,
751 wqsize, sqsize, rqsize);
752 qhp = kzalloc(sizeof(*qhp), GFP_KERNEL);
754 return ERR_PTR(-ENOMEM);
755 qhp->wq.size_log2 = ilog2(wqsize);
756 qhp->wq.rq_size_log2 = ilog2(rqsize);
757 qhp->wq.sq_size_log2 = ilog2(sqsize);
758 if (cxio_create_qp(&rhp->rdev, !udata, &qhp->wq,
759 ucontext ? &ucontext->uctx : &rhp->rdev.uctx)) {
761 return ERR_PTR(-ENOMEM);
764 attrs->cap.max_recv_wr = rqsize - 1;
765 attrs->cap.max_send_wr = sqsize;
766 attrs->cap.max_inline_data = T3_MAX_INLINE;
769 qhp->attr.pd = php->pdid;
770 qhp->attr.scq = ((struct iwch_cq *) attrs->send_cq)->cq.cqid;
771 qhp->attr.rcq = ((struct iwch_cq *) attrs->recv_cq)->cq.cqid;
772 qhp->attr.sq_num_entries = attrs->cap.max_send_wr;
773 qhp->attr.rq_num_entries = attrs->cap.max_recv_wr;
774 qhp->attr.sq_max_sges = attrs->cap.max_send_sge;
775 qhp->attr.sq_max_sges_rdma_write = attrs->cap.max_send_sge;
776 qhp->attr.rq_max_sges = attrs->cap.max_recv_sge;
777 qhp->attr.state = IWCH_QP_STATE_IDLE;
778 qhp->attr.next_state = IWCH_QP_STATE_IDLE;
781 * XXX - These don't get passed in from the openib user
782 * at create time. The CM sets them via a QP modify.
783 * Need to fix... I think the CM should
785 qhp->attr.enable_rdma_read = 1;
786 qhp->attr.enable_rdma_write = 1;
787 qhp->attr.enable_bind = 1;
788 qhp->attr.max_ord = 1;
789 qhp->attr.max_ird = 1;
791 spin_lock_init(&qhp->lock);
792 init_waitqueue_head(&qhp->wait);
793 atomic_set(&qhp->refcnt, 1);
795 if (xa_store_irq(&rhp->qps, qhp->wq.qpid, qhp, GFP_KERNEL)) {
796 cxio_destroy_qp(&rhp->rdev, &qhp->wq,
797 ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
799 return ERR_PTR(-ENOMEM);
804 struct iwch_mm_entry *mm1, *mm2;
806 mm1 = kmalloc(sizeof(*mm1), GFP_KERNEL);
808 iwch_destroy_qp(&qhp->ibqp, udata);
809 return ERR_PTR(-ENOMEM);
812 mm2 = kmalloc(sizeof(*mm2), GFP_KERNEL);
815 iwch_destroy_qp(&qhp->ibqp, udata);
816 return ERR_PTR(-ENOMEM);
819 uresp.qpid = qhp->wq.qpid;
820 uresp.size_log2 = qhp->wq.size_log2;
821 uresp.sq_size_log2 = qhp->wq.sq_size_log2;
822 uresp.rq_size_log2 = qhp->wq.rq_size_log2;
823 spin_lock(&ucontext->mmap_lock);
824 uresp.key = ucontext->key;
825 ucontext->key += PAGE_SIZE;
826 uresp.db_key = ucontext->key;
827 ucontext->key += PAGE_SIZE;
828 spin_unlock(&ucontext->mmap_lock);
829 if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) {
832 iwch_destroy_qp(&qhp->ibqp, udata);
833 return ERR_PTR(-EFAULT);
835 mm1->key = uresp.key;
836 mm1->addr = virt_to_phys(qhp->wq.queue);
837 mm1->len = PAGE_ALIGN(wqsize * sizeof(union t3_wr));
838 insert_mmap(ucontext, mm1);
839 mm2->key = uresp.db_key;
840 mm2->addr = qhp->wq.udb & PAGE_MASK;
841 mm2->len = PAGE_SIZE;
842 insert_mmap(ucontext, mm2);
844 qhp->ibqp.qp_num = qhp->wq.qpid;
846 "%s sq_num_entries %d, rq_num_entries %d qpid 0x%0x qhp %p dma_addr %pad size %d rq_addr 0x%x\n",
847 __func__, qhp->attr.sq_num_entries, qhp->attr.rq_num_entries,
848 qhp->wq.qpid, qhp, &qhp->wq.dma_addr, 1 << qhp->wq.size_log2,
853 static int iwch_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
854 int attr_mask, struct ib_udata *udata)
856 struct iwch_dev *rhp;
858 enum iwch_qp_attr_mask mask = 0;
859 struct iwch_qp_attributes attrs = {};
861 pr_debug("%s ib_qp %p\n", __func__, ibqp);
863 /* iwarp does not support the RTR state */
864 if ((attr_mask & IB_QP_STATE) && (attr->qp_state == IB_QPS_RTR))
865 attr_mask &= ~IB_QP_STATE;
867 /* Make sure we still have something left to do */
871 qhp = to_iwch_qp(ibqp);
874 attrs.next_state = iwch_convert_state(attr->qp_state);
875 attrs.enable_rdma_read = (attr->qp_access_flags &
876 IB_ACCESS_REMOTE_READ) ? 1 : 0;
877 attrs.enable_rdma_write = (attr->qp_access_flags &
878 IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
879 attrs.enable_bind = (attr->qp_access_flags & IB_ACCESS_MW_BIND) ? 1 : 0;
882 mask |= (attr_mask & IB_QP_STATE) ? IWCH_QP_ATTR_NEXT_STATE : 0;
883 mask |= (attr_mask & IB_QP_ACCESS_FLAGS) ?
884 (IWCH_QP_ATTR_ENABLE_RDMA_READ |
885 IWCH_QP_ATTR_ENABLE_RDMA_WRITE |
886 IWCH_QP_ATTR_ENABLE_RDMA_BIND) : 0;
888 return iwch_modify_qp(rhp, qhp, mask, &attrs, 0);
891 void iwch_qp_add_ref(struct ib_qp *qp)
893 pr_debug("%s ib_qp %p\n", __func__, qp);
894 atomic_inc(&(to_iwch_qp(qp)->refcnt));
897 void iwch_qp_rem_ref(struct ib_qp *qp)
899 pr_debug("%s ib_qp %p\n", __func__, qp);
900 if (atomic_dec_and_test(&(to_iwch_qp(qp)->refcnt)))
901 wake_up(&(to_iwch_qp(qp)->wait));
904 static struct ib_qp *iwch_get_qp(struct ib_device *dev, int qpn)
906 pr_debug("%s ib_dev %p qpn 0x%x\n", __func__, dev, qpn);
907 return (struct ib_qp *)get_qhp(to_iwch_dev(dev), qpn);
911 static int iwch_query_pkey(struct ib_device *ibdev,
912 u8 port, u16 index, u16 * pkey)
914 pr_debug("%s ibdev %p\n", __func__, ibdev);
919 static int iwch_query_gid(struct ib_device *ibdev, u8 port,
920 int index, union ib_gid *gid)
922 struct iwch_dev *dev;
924 pr_debug("%s ibdev %p, port %d, index %d, gid %p\n",
925 __func__, ibdev, port, index, gid);
926 dev = to_iwch_dev(ibdev);
927 BUG_ON(port == 0 || port > 2);
928 memset(&(gid->raw[0]), 0, sizeof(gid->raw));
929 memcpy(&(gid->raw[0]), dev->rdev.port_info.lldevs[port-1]->dev_addr, 6);
933 static u64 fw_vers_string_to_u64(struct iwch_dev *iwch_dev)
935 struct ethtool_drvinfo info;
936 struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev;
938 unsigned fw_maj, fw_min, fw_mic;
940 lldev->ethtool_ops->get_drvinfo(lldev, &info);
942 next = info.fw_version + 1;
943 cp = strsep(&next, ".");
944 sscanf(cp, "%i", &fw_maj);
945 cp = strsep(&next, ".");
946 sscanf(cp, "%i", &fw_min);
947 cp = strsep(&next, ".");
948 sscanf(cp, "%i", &fw_mic);
950 return (((u64)fw_maj & 0xffff) << 32) | ((fw_min & 0xffff) << 16) |
954 static int iwch_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
955 struct ib_udata *uhw)
958 struct iwch_dev *dev;
960 pr_debug("%s ibdev %p\n", __func__, ibdev);
962 if (uhw->inlen || uhw->outlen)
965 dev = to_iwch_dev(ibdev);
966 memcpy(&props->sys_image_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6);
967 props->hw_ver = dev->rdev.t3cdev_p->type;
968 props->fw_ver = fw_vers_string_to_u64(dev);
969 props->device_cap_flags = dev->device_cap_flags;
970 props->page_size_cap = dev->attr.mem_pgsizes_bitmask;
971 props->vendor_id = (u32)dev->rdev.rnic_info.pdev->vendor;
972 props->vendor_part_id = (u32)dev->rdev.rnic_info.pdev->device;
973 props->max_mr_size = dev->attr.max_mr_size;
974 props->max_qp = dev->attr.max_qps;
975 props->max_qp_wr = dev->attr.max_wrs;
976 props->max_send_sge = dev->attr.max_sge_per_wr;
977 props->max_recv_sge = dev->attr.max_sge_per_wr;
978 props->max_sge_rd = 1;
979 props->max_qp_rd_atom = dev->attr.max_rdma_reads_per_qp;
980 props->max_qp_init_rd_atom = dev->attr.max_rdma_reads_per_qp;
981 props->max_cq = dev->attr.max_cqs;
982 props->max_cqe = dev->attr.max_cqes_per_cq;
983 props->max_mr = dev->attr.max_mem_regs;
984 props->max_pd = dev->attr.max_pds;
985 props->local_ca_ack_delay = 0;
986 props->max_fast_reg_page_list_len = T3_MAX_FASTREG_DEPTH;
991 static int iwch_query_port(struct ib_device *ibdev,
992 u8 port, struct ib_port_attr *props)
994 struct iwch_dev *dev;
995 struct net_device *netdev;
996 struct in_device *inetdev;
998 pr_debug("%s ibdev %p\n", __func__, ibdev);
1000 dev = to_iwch_dev(ibdev);
1001 netdev = dev->rdev.port_info.lldevs[port-1];
1003 /* props being zeroed by the caller, avoid zeroing it here */
1004 props->max_mtu = IB_MTU_4096;
1005 props->active_mtu = ib_mtu_int_to_enum(netdev->mtu);
1007 if (!netif_carrier_ok(netdev))
1008 props->state = IB_PORT_DOWN;
1010 inetdev = in_dev_get(netdev);
1012 if (inetdev->ifa_list)
1013 props->state = IB_PORT_ACTIVE;
1015 props->state = IB_PORT_INIT;
1016 in_dev_put(inetdev);
1018 props->state = IB_PORT_INIT;
1021 props->port_cap_flags =
1023 IB_PORT_SNMP_TUNNEL_SUP |
1024 IB_PORT_REINIT_SUP |
1025 IB_PORT_DEVICE_MGMT_SUP |
1026 IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP;
1027 props->gid_tbl_len = 1;
1028 props->pkey_tbl_len = 1;
1029 props->active_width = 2;
1030 props->active_speed = IB_SPEED_DDR;
1031 props->max_msg_sz = -1;
1036 static ssize_t hw_rev_show(struct device *dev,
1037 struct device_attribute *attr, char *buf)
1039 struct iwch_dev *iwch_dev =
1040 rdma_device_to_drv_device(dev, struct iwch_dev, ibdev);
1042 pr_debug("%s dev 0x%p\n", __func__, dev);
1043 return sprintf(buf, "%d\n", iwch_dev->rdev.t3cdev_p->type);
1045 static DEVICE_ATTR_RO(hw_rev);
1047 static ssize_t hca_type_show(struct device *dev,
1048 struct device_attribute *attr, char *buf)
1050 struct iwch_dev *iwch_dev =
1051 rdma_device_to_drv_device(dev, struct iwch_dev, ibdev);
1052 struct ethtool_drvinfo info;
1053 struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev;
1055 pr_debug("%s dev 0x%p\n", __func__, dev);
1056 lldev->ethtool_ops->get_drvinfo(lldev, &info);
1057 return sprintf(buf, "%s\n", info.driver);
1059 static DEVICE_ATTR_RO(hca_type);
1061 static ssize_t board_id_show(struct device *dev,
1062 struct device_attribute *attr, char *buf)
1064 struct iwch_dev *iwch_dev =
1065 rdma_device_to_drv_device(dev, struct iwch_dev, ibdev);
1067 pr_debug("%s dev 0x%p\n", __func__, dev);
1068 return sprintf(buf, "%x.%x\n", iwch_dev->rdev.rnic_info.pdev->vendor,
1069 iwch_dev->rdev.rnic_info.pdev->device);
1071 static DEVICE_ATTR_RO(board_id);
1102 static const char * const names[] = {
1103 [IPINRECEIVES] = "ipInReceives",
1104 [IPINHDRERRORS] = "ipInHdrErrors",
1105 [IPINADDRERRORS] = "ipInAddrErrors",
1106 [IPINUNKNOWNPROTOS] = "ipInUnknownProtos",
1107 [IPINDISCARDS] = "ipInDiscards",
1108 [IPINDELIVERS] = "ipInDelivers",
1109 [IPOUTREQUESTS] = "ipOutRequests",
1110 [IPOUTDISCARDS] = "ipOutDiscards",
1111 [IPOUTNOROUTES] = "ipOutNoRoutes",
1112 [IPREASMTIMEOUT] = "ipReasmTimeout",
1113 [IPREASMREQDS] = "ipReasmReqds",
1114 [IPREASMOKS] = "ipReasmOKs",
1115 [IPREASMFAILS] = "ipReasmFails",
1116 [TCPACTIVEOPENS] = "tcpActiveOpens",
1117 [TCPPASSIVEOPENS] = "tcpPassiveOpens",
1118 [TCPATTEMPTFAILS] = "tcpAttemptFails",
1119 [TCPESTABRESETS] = "tcpEstabResets",
1120 [TCPCURRESTAB] = "tcpCurrEstab",
1121 [TCPINSEGS] = "tcpInSegs",
1122 [TCPOUTSEGS] = "tcpOutSegs",
1123 [TCPRETRANSSEGS] = "tcpRetransSegs",
1124 [TCPINERRS] = "tcpInErrs",
1125 [TCPOUTRSTS] = "tcpOutRsts",
1126 [TCPRTOMIN] = "tcpRtoMin",
1127 [TCPRTOMAX] = "tcpRtoMax",
1130 static struct rdma_hw_stats *iwch_alloc_stats(struct ib_device *ibdev,
1133 BUILD_BUG_ON(ARRAY_SIZE(names) != NR_COUNTERS);
1135 /* Our driver only supports device level stats */
1139 return rdma_alloc_hw_stats_struct(names, NR_COUNTERS,
1140 RDMA_HW_STATS_DEFAULT_LIFESPAN);
1143 static int iwch_get_mib(struct ib_device *ibdev, struct rdma_hw_stats *stats,
1146 struct iwch_dev *dev;
1147 struct tp_mib_stats m;
1150 if (port != 0 || !stats)
1153 pr_debug("%s ibdev %p\n", __func__, ibdev);
1154 dev = to_iwch_dev(ibdev);
1155 ret = dev->rdev.t3cdev_p->ctl(dev->rdev.t3cdev_p, RDMA_GET_MIB, &m);
1159 stats->value[IPINRECEIVES] = ((u64)m.ipInReceive_hi << 32) + m.ipInReceive_lo;
1160 stats->value[IPINHDRERRORS] = ((u64)m.ipInHdrErrors_hi << 32) + m.ipInHdrErrors_lo;
1161 stats->value[IPINADDRERRORS] = ((u64)m.ipInAddrErrors_hi << 32) + m.ipInAddrErrors_lo;
1162 stats->value[IPINUNKNOWNPROTOS] = ((u64)m.ipInUnknownProtos_hi << 32) + m.ipInUnknownProtos_lo;
1163 stats->value[IPINDISCARDS] = ((u64)m.ipInDiscards_hi << 32) + m.ipInDiscards_lo;
1164 stats->value[IPINDELIVERS] = ((u64)m.ipInDelivers_hi << 32) + m.ipInDelivers_lo;
1165 stats->value[IPOUTREQUESTS] = ((u64)m.ipOutRequests_hi << 32) + m.ipOutRequests_lo;
1166 stats->value[IPOUTDISCARDS] = ((u64)m.ipOutDiscards_hi << 32) + m.ipOutDiscards_lo;
1167 stats->value[IPOUTNOROUTES] = ((u64)m.ipOutNoRoutes_hi << 32) + m.ipOutNoRoutes_lo;
1168 stats->value[IPREASMTIMEOUT] = m.ipReasmTimeout;
1169 stats->value[IPREASMREQDS] = m.ipReasmReqds;
1170 stats->value[IPREASMOKS] = m.ipReasmOKs;
1171 stats->value[IPREASMFAILS] = m.ipReasmFails;
1172 stats->value[TCPACTIVEOPENS] = m.tcpActiveOpens;
1173 stats->value[TCPPASSIVEOPENS] = m.tcpPassiveOpens;
1174 stats->value[TCPATTEMPTFAILS] = m.tcpAttemptFails;
1175 stats->value[TCPESTABRESETS] = m.tcpEstabResets;
1176 stats->value[TCPCURRESTAB] = m.tcpOutRsts;
1177 stats->value[TCPINSEGS] = m.tcpCurrEstab;
1178 stats->value[TCPOUTSEGS] = ((u64)m.tcpInSegs_hi << 32) + m.tcpInSegs_lo;
1179 stats->value[TCPRETRANSSEGS] = ((u64)m.tcpOutSegs_hi << 32) + m.tcpOutSegs_lo;
1180 stats->value[TCPINERRS] = ((u64)m.tcpRetransSeg_hi << 32) + m.tcpRetransSeg_lo,
1181 stats->value[TCPOUTRSTS] = ((u64)m.tcpInErrs_hi << 32) + m.tcpInErrs_lo;
1182 stats->value[TCPRTOMIN] = m.tcpRtoMin;
1183 stats->value[TCPRTOMAX] = m.tcpRtoMax;
1185 return stats->num_counters;
1188 static struct attribute *iwch_class_attributes[] = {
1189 &dev_attr_hw_rev.attr,
1190 &dev_attr_hca_type.attr,
1191 &dev_attr_board_id.attr,
1195 static const struct attribute_group iwch_attr_group = {
1196 .attrs = iwch_class_attributes,
1199 static int iwch_port_immutable(struct ib_device *ibdev, u8 port_num,
1200 struct ib_port_immutable *immutable)
1202 struct ib_port_attr attr;
1205 immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
1207 err = ib_query_port(ibdev, port_num, &attr);
1211 immutable->pkey_tbl_len = attr.pkey_tbl_len;
1212 immutable->gid_tbl_len = attr.gid_tbl_len;
1217 static void get_dev_fw_ver_str(struct ib_device *ibdev, char *str)
1219 struct iwch_dev *iwch_dev = to_iwch_dev(ibdev);
1220 struct ethtool_drvinfo info;
1221 struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev;
1223 pr_debug("%s dev 0x%p\n", __func__, iwch_dev);
1224 lldev->ethtool_ops->get_drvinfo(lldev, &info);
1225 snprintf(str, IB_FW_VERSION_NAME_MAX, "%s", info.fw_version);
1228 static const struct ib_device_ops iwch_dev_ops = {
1229 .owner = THIS_MODULE,
1230 .driver_id = RDMA_DRIVER_CXGB3,
1231 .uverbs_abi_ver = IWCH_UVERBS_ABI_VERSION,
1232 .uverbs_no_driver_id_binding = 1,
1234 .alloc_hw_stats = iwch_alloc_stats,
1235 .alloc_mr = iwch_alloc_mr,
1236 .alloc_mw = iwch_alloc_mw,
1237 .alloc_pd = iwch_allocate_pd,
1238 .alloc_ucontext = iwch_alloc_ucontext,
1239 .create_cq = iwch_create_cq,
1240 .create_qp = iwch_create_qp,
1241 .dealloc_mw = iwch_dealloc_mw,
1242 .dealloc_pd = iwch_deallocate_pd,
1243 .dealloc_ucontext = iwch_dealloc_ucontext,
1244 .dereg_mr = iwch_dereg_mr,
1245 .destroy_cq = iwch_destroy_cq,
1246 .destroy_qp = iwch_destroy_qp,
1247 .get_dev_fw_str = get_dev_fw_ver_str,
1248 .get_dma_mr = iwch_get_dma_mr,
1249 .get_hw_stats = iwch_get_mib,
1250 .get_port_immutable = iwch_port_immutable,
1251 .iw_accept = iwch_accept_cr,
1252 .iw_add_ref = iwch_qp_add_ref,
1253 .iw_connect = iwch_connect,
1254 .iw_create_listen = iwch_create_listen,
1255 .iw_destroy_listen = iwch_destroy_listen,
1256 .iw_get_qp = iwch_get_qp,
1257 .iw_reject = iwch_reject_cr,
1258 .iw_rem_ref = iwch_qp_rem_ref,
1259 .map_mr_sg = iwch_map_mr_sg,
1261 .modify_qp = iwch_ib_modify_qp,
1262 .poll_cq = iwch_poll_cq,
1263 .post_recv = iwch_post_receive,
1264 .post_send = iwch_post_send,
1265 .query_device = iwch_query_device,
1266 .query_gid = iwch_query_gid,
1267 .query_pkey = iwch_query_pkey,
1268 .query_port = iwch_query_port,
1269 .reg_user_mr = iwch_reg_user_mr,
1270 .req_notify_cq = iwch_arm_cq,
1271 INIT_RDMA_OBJ_SIZE(ib_pd, iwch_pd, ibpd),
1272 INIT_RDMA_OBJ_SIZE(ib_cq, iwch_cq, ibcq),
1273 INIT_RDMA_OBJ_SIZE(ib_ucontext, iwch_ucontext, ibucontext),
1276 int iwch_register_device(struct iwch_dev *dev)
1278 pr_debug("%s iwch_dev %p\n", __func__, dev);
1279 memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid));
1280 memcpy(&dev->ibdev.node_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6);
1281 dev->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY |
1282 IB_DEVICE_MEM_WINDOW |
1283 IB_DEVICE_MEM_MGT_EXTENSIONS;
1285 /* cxgb3 supports STag 0. */
1286 dev->ibdev.local_dma_lkey = 0;
1288 dev->ibdev.uverbs_cmd_mask =
1289 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
1290 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
1291 (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
1292 (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
1293 (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
1294 (1ull << IB_USER_VERBS_CMD_REG_MR) |
1295 (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
1296 (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
1297 (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
1298 (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
1299 (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
1300 (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
1301 (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
1302 (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
1303 (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
1304 (1ull << IB_USER_VERBS_CMD_POST_SEND) |
1305 (1ull << IB_USER_VERBS_CMD_POST_RECV);
1306 dev->ibdev.node_type = RDMA_NODE_RNIC;
1307 BUILD_BUG_ON(sizeof(IWCH_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX);
1308 memcpy(dev->ibdev.node_desc, IWCH_NODE_DESC, sizeof(IWCH_NODE_DESC));
1309 dev->ibdev.phys_port_cnt = dev->rdev.port_info.nports;
1310 dev->ibdev.num_comp_vectors = 1;
1311 dev->ibdev.dev.parent = &dev->rdev.rnic_info.pdev->dev;
1313 memcpy(dev->ibdev.iw_ifname, dev->rdev.t3cdev_p->lldev->name,
1314 sizeof(dev->ibdev.iw_ifname));
1316 rdma_set_device_sysfs_group(&dev->ibdev, &iwch_attr_group);
1317 ib_set_device_ops(&dev->ibdev, &iwch_dev_ops);
1318 return ib_register_device(&dev->ibdev, "cxgb3_%d");
1321 void iwch_unregister_device(struct iwch_dev *dev)
1323 pr_debug("%s iwch_dev %p\n", __func__, dev);
1324 ib_unregister_device(&dev->ibdev);