2 * Copyright (c) 2016 Hisilicon Limited.
3 * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 #include <linux/pci.h>
35 #include <linux/platform_device.h>
36 #include <rdma/ib_addr.h>
37 #include <rdma/ib_umem.h>
38 #include <rdma/uverbs_ioctl.h>
39 #include "hns_roce_common.h"
40 #include "hns_roce_device.h"
41 #include "hns_roce_hem.h"
42 #include <rdma/hns-abi.h>
44 #define SQP_NUM (2 * HNS_ROCE_MAX_PORTS)
46 void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type)
48 struct device *dev = hr_dev->dev;
49 struct hns_roce_qp *qp;
51 xa_lock(&hr_dev->qp_table_xa);
52 qp = __hns_roce_qp_lookup(hr_dev, qpn);
54 atomic_inc(&qp->refcount);
55 xa_unlock(&hr_dev->qp_table_xa);
58 dev_warn(dev, "Async event for bogus QP %08x\n", qpn);
62 qp->event(qp, (enum hns_roce_event)event_type);
64 if (atomic_dec_and_test(&qp->refcount))
68 static void hns_roce_ib_qp_event(struct hns_roce_qp *hr_qp,
69 enum hns_roce_event type)
71 struct ib_event event;
72 struct ib_qp *ibqp = &hr_qp->ibqp;
74 if (ibqp->event_handler) {
75 event.device = ibqp->device;
76 event.element.qp = ibqp;
78 case HNS_ROCE_EVENT_TYPE_PATH_MIG:
79 event.event = IB_EVENT_PATH_MIG;
81 case HNS_ROCE_EVENT_TYPE_COMM_EST:
82 event.event = IB_EVENT_COMM_EST;
84 case HNS_ROCE_EVENT_TYPE_SQ_DRAINED:
85 event.event = IB_EVENT_SQ_DRAINED;
87 case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH:
88 event.event = IB_EVENT_QP_LAST_WQE_REACHED;
90 case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
91 event.event = IB_EVENT_QP_FATAL;
93 case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED:
94 event.event = IB_EVENT_PATH_MIG_ERR;
96 case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
97 event.event = IB_EVENT_QP_REQ_ERR;
99 case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
100 event.event = IB_EVENT_QP_ACCESS_ERR;
103 dev_dbg(ibqp->device->dev.parent, "roce_ib: Unexpected event type %d on QP %06lx\n",
107 ibqp->event_handler(&event, ibqp->qp_context);
111 static int hns_roce_reserve_range_qp(struct hns_roce_dev *hr_dev, int cnt,
112 int align, unsigned long *base)
114 struct hns_roce_qp_table *qp_table = &hr_dev->qp_table;
116 return hns_roce_bitmap_alloc_range(&qp_table->bitmap, cnt, align,
122 enum hns_roce_qp_state to_hns_roce_state(enum ib_qp_state state)
126 return HNS_ROCE_QP_STATE_RST;
128 return HNS_ROCE_QP_STATE_INIT;
130 return HNS_ROCE_QP_STATE_RTR;
132 return HNS_ROCE_QP_STATE_RTS;
134 return HNS_ROCE_QP_STATE_SQD;
136 return HNS_ROCE_QP_STATE_ERR;
138 return HNS_ROCE_QP_NUM_STATE;
142 static int hns_roce_gsi_qp_alloc(struct hns_roce_dev *hr_dev, unsigned long qpn,
143 struct hns_roce_qp *hr_qp)
145 struct xarray *xa = &hr_dev->qp_table_xa;
152 atomic_set(&hr_qp->refcount, 1);
153 init_completion(&hr_qp->free);
155 ret = xa_err(xa_store_irq(xa, hr_qp->qpn & (hr_dev->caps.num_qps - 1),
158 dev_err(hr_dev->dev, "QPC xa_store failed\n");
163 static int hns_roce_qp_alloc(struct hns_roce_dev *hr_dev, unsigned long qpn,
164 struct hns_roce_qp *hr_qp)
166 struct hns_roce_qp_table *qp_table = &hr_dev->qp_table;
167 struct device *dev = hr_dev->dev;
175 /* Alloc memory for QPC */
176 ret = hns_roce_table_get(hr_dev, &qp_table->qp_table, hr_qp->qpn);
178 dev_err(dev, "QPC table get failed\n");
182 /* Alloc memory for IRRL */
183 ret = hns_roce_table_get(hr_dev, &qp_table->irrl_table, hr_qp->qpn);
185 dev_err(dev, "IRRL table get failed\n");
189 if (hr_dev->caps.trrl_entry_sz) {
190 /* Alloc memory for TRRL */
191 ret = hns_roce_table_get(hr_dev, &qp_table->trrl_table,
194 dev_err(dev, "TRRL table get failed\n");
199 if (hr_dev->caps.sccc_entry_sz) {
200 /* Alloc memory for SCC CTX */
201 ret = hns_roce_table_get(hr_dev, &qp_table->sccc_table,
204 dev_err(dev, "SCC CTX table get failed\n");
209 ret = hns_roce_gsi_qp_alloc(hr_dev, qpn, hr_qp);
216 if (hr_dev->caps.sccc_entry_sz)
217 hns_roce_table_put(hr_dev, &qp_table->sccc_table,
221 if (hr_dev->caps.trrl_entry_sz)
222 hns_roce_table_put(hr_dev, &qp_table->trrl_table, hr_qp->qpn);
225 hns_roce_table_put(hr_dev, &qp_table->irrl_table, hr_qp->qpn);
228 hns_roce_table_put(hr_dev, &qp_table->qp_table, hr_qp->qpn);
234 void hns_roce_qp_remove(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
236 struct xarray *xa = &hr_dev->qp_table_xa;
239 xa_lock_irqsave(xa, flags);
240 __xa_erase(xa, hr_qp->qpn & (hr_dev->caps.num_qps - 1));
241 xa_unlock_irqrestore(xa, flags);
244 void hns_roce_qp_free(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
246 struct hns_roce_qp_table *qp_table = &hr_dev->qp_table;
248 if (atomic_dec_and_test(&hr_qp->refcount))
249 complete(&hr_qp->free);
250 wait_for_completion(&hr_qp->free);
252 if ((hr_qp->ibqp.qp_type) != IB_QPT_GSI) {
253 if (hr_dev->caps.trrl_entry_sz)
254 hns_roce_table_put(hr_dev, &qp_table->trrl_table,
256 hns_roce_table_put(hr_dev, &qp_table->irrl_table, hr_qp->qpn);
260 void hns_roce_release_range_qp(struct hns_roce_dev *hr_dev, int base_qpn,
263 struct hns_roce_qp_table *qp_table = &hr_dev->qp_table;
265 if (base_qpn < hr_dev->caps.reserved_qps)
268 hns_roce_bitmap_free_range(&qp_table->bitmap, base_qpn, cnt, BITMAP_RR);
271 static int hns_roce_set_rq_size(struct hns_roce_dev *hr_dev,
272 struct ib_qp_cap *cap, bool is_user, int has_rq,
273 struct hns_roce_qp *hr_qp)
275 struct device *dev = hr_dev->dev;
278 /* Check the validity of QP support capacity */
279 if (cap->max_recv_wr > hr_dev->caps.max_wqes ||
280 cap->max_recv_sge > hr_dev->caps.max_rq_sg) {
281 dev_err(dev, "RQ WR or sge error!max_recv_wr=%d max_recv_sge=%d\n",
282 cap->max_recv_wr, cap->max_recv_sge);
286 /* If srq exist, set zero for relative number of rq */
288 hr_qp->rq.wqe_cnt = 0;
289 hr_qp->rq.max_gs = 0;
290 cap->max_recv_wr = 0;
291 cap->max_recv_sge = 0;
293 if (is_user && (!cap->max_recv_wr || !cap->max_recv_sge)) {
294 dev_err(dev, "user space no need config max_recv_wr max_recv_sge\n");
298 if (hr_dev->caps.min_wqes)
299 max_cnt = max(cap->max_recv_wr, hr_dev->caps.min_wqes);
301 max_cnt = cap->max_recv_wr;
303 hr_qp->rq.wqe_cnt = roundup_pow_of_two(max_cnt);
305 if ((u32)hr_qp->rq.wqe_cnt > hr_dev->caps.max_wqes) {
306 dev_err(dev, "while setting rq size, rq.wqe_cnt too large\n");
310 max_cnt = max(1U, cap->max_recv_sge);
311 hr_qp->rq.max_gs = roundup_pow_of_two(max_cnt);
312 if (hr_dev->caps.max_rq_sg <= 2)
313 hr_qp->rq.wqe_shift =
314 ilog2(hr_dev->caps.max_rq_desc_sz);
316 hr_qp->rq.wqe_shift =
317 ilog2(hr_dev->caps.max_rq_desc_sz
321 cap->max_recv_wr = hr_qp->rq.max_post = hr_qp->rq.wqe_cnt;
322 cap->max_recv_sge = hr_qp->rq.max_gs;
327 static int check_sq_size_with_integrity(struct hns_roce_dev *hr_dev,
328 struct ib_qp_cap *cap,
329 struct hns_roce_ib_create_qp *ucmd)
331 u32 roundup_sq_stride = roundup_pow_of_two(hr_dev->caps.max_sq_desc_sz);
332 u8 max_sq_stride = ilog2(roundup_sq_stride);
334 /* Sanity check SQ size before proceeding */
335 if (ucmd->log_sq_stride > max_sq_stride ||
336 ucmd->log_sq_stride < HNS_ROCE_IB_MIN_SQ_STRIDE) {
337 ibdev_err(&hr_dev->ib_dev, "check SQ size error!\n");
341 if (cap->max_send_sge > hr_dev->caps.max_sq_sg) {
342 ibdev_err(&hr_dev->ib_dev, "SQ sge error! max_send_sge=%d\n",
350 static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev,
351 struct ib_qp_cap *cap,
352 struct hns_roce_qp *hr_qp,
353 struct hns_roce_ib_create_qp *ucmd)
360 if (check_shl_overflow(1, ucmd->log_sq_bb_count, &hr_qp->sq.wqe_cnt) ||
361 hr_qp->sq.wqe_cnt > hr_dev->caps.max_wqes)
364 ret = check_sq_size_with_integrity(hr_dev, cap, ucmd);
366 ibdev_err(&hr_dev->ib_dev, "Sanity check sq size failed\n");
370 hr_qp->sq.wqe_shift = ucmd->log_sq_stride;
372 max_cnt = max(1U, cap->max_send_sge);
373 if (hr_dev->caps.max_sq_sg <= 2)
374 hr_qp->sq.max_gs = roundup_pow_of_two(max_cnt);
376 hr_qp->sq.max_gs = max_cnt;
378 if (hr_qp->sq.max_gs > 2)
379 hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt *
380 (hr_qp->sq.max_gs - 2));
382 if ((hr_qp->sq.max_gs > 2) && (hr_dev->pci_dev->revision == 0x20)) {
383 if (hr_qp->sge.sge_cnt > hr_dev->caps.max_extend_sg) {
385 "The extended sge cnt error! sge_cnt=%d\n",
391 hr_qp->sge.sge_shift = 4;
392 ex_sge_num = hr_qp->sge.sge_cnt;
394 /* Get buf size, SQ and RQ are aligned to page_szie */
395 if (hr_dev->caps.max_sq_sg <= 2) {
396 hr_qp->buff_size = HNS_ROCE_ALOGN_UP((hr_qp->rq.wqe_cnt <<
397 hr_qp->rq.wqe_shift), PAGE_SIZE) +
398 HNS_ROCE_ALOGN_UP((hr_qp->sq.wqe_cnt <<
399 hr_qp->sq.wqe_shift), PAGE_SIZE);
401 hr_qp->sq.offset = 0;
402 hr_qp->rq.offset = HNS_ROCE_ALOGN_UP((hr_qp->sq.wqe_cnt <<
403 hr_qp->sq.wqe_shift), PAGE_SIZE);
405 page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT);
406 hr_qp->sge.sge_cnt = ex_sge_num ?
407 max(page_size / (1 << hr_qp->sge.sge_shift), ex_sge_num) : 0;
408 hr_qp->buff_size = HNS_ROCE_ALOGN_UP((hr_qp->rq.wqe_cnt <<
409 hr_qp->rq.wqe_shift), page_size) +
410 HNS_ROCE_ALOGN_UP((hr_qp->sge.sge_cnt <<
411 hr_qp->sge.sge_shift), page_size) +
412 HNS_ROCE_ALOGN_UP((hr_qp->sq.wqe_cnt <<
413 hr_qp->sq.wqe_shift), page_size);
415 hr_qp->sq.offset = 0;
417 hr_qp->sge.offset = HNS_ROCE_ALOGN_UP(
418 (hr_qp->sq.wqe_cnt <<
419 hr_qp->sq.wqe_shift),
421 hr_qp->rq.offset = hr_qp->sge.offset +
422 HNS_ROCE_ALOGN_UP((hr_qp->sge.sge_cnt <<
423 hr_qp->sge.sge_shift),
426 hr_qp->rq.offset = HNS_ROCE_ALOGN_UP(
427 (hr_qp->sq.wqe_cnt <<
428 hr_qp->sq.wqe_shift),
436 static int split_wqe_buf_region(struct hns_roce_dev *hr_dev,
437 struct hns_roce_qp *hr_qp,
438 struct hns_roce_buf_region *regions,
439 int region_max, int page_shift)
441 int page_size = 1 << page_shift;
447 if (hr_qp->buff_size < 1 || region_max < 1)
450 if (hr_qp->sge.sge_cnt > 0)
451 is_extend_sge = true;
453 is_extend_sge = false;
457 buf_size = hr_qp->sge.offset - hr_qp->sq.offset;
459 buf_size = hr_qp->rq.offset - hr_qp->sq.offset;
461 if (buf_size > 0 && region_cnt < region_max) {
462 buf_cnt = DIV_ROUND_UP(buf_size, page_size);
463 hns_roce_init_buf_region(®ions[region_cnt],
464 hr_dev->caps.wqe_sq_hop_num,
465 hr_qp->sq.offset / page_size,
472 buf_size = hr_qp->rq.offset - hr_qp->sge.offset;
473 if (buf_size > 0 && region_cnt < region_max) {
474 buf_cnt = DIV_ROUND_UP(buf_size, page_size);
475 hns_roce_init_buf_region(®ions[region_cnt],
476 hr_dev->caps.wqe_sge_hop_num,
477 hr_qp->sge.offset / page_size,
484 buf_size = hr_qp->buff_size - hr_qp->rq.offset;
486 buf_cnt = DIV_ROUND_UP(buf_size, page_size);
487 hns_roce_init_buf_region(®ions[region_cnt],
488 hr_dev->caps.wqe_rq_hop_num,
489 hr_qp->rq.offset / page_size,
497 static int calc_wqe_bt_page_shift(struct hns_roce_dev *hr_dev,
498 struct hns_roce_buf_region *regions,
505 bt_pg_shift = PAGE_SHIFT + hr_dev->caps.mtt_ba_pg_sz;
507 /* all root ba entries must in one bt page */
509 ba_num = (1 << bt_pg_shift) / BA_BYTE_LEN;
510 ret = hns_roce_hem_list_calc_root_ba(regions, region_cnt,
516 } while (ret > ba_num);
518 return bt_pg_shift - PAGE_SHIFT;
521 static int set_extend_sge_param(struct hns_roce_dev *hr_dev,
522 struct hns_roce_qp *hr_qp)
524 struct device *dev = hr_dev->dev;
526 if (hr_qp->sq.max_gs > 2) {
527 hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt *
528 (hr_qp->sq.max_gs - 2));
529 hr_qp->sge.sge_shift = 4;
532 /* ud sqwqe's sge use extend sge */
533 if (hr_dev->caps.max_sq_sg > 2 && hr_qp->ibqp.qp_type == IB_QPT_GSI) {
534 hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt *
536 hr_qp->sge.sge_shift = 4;
539 if ((hr_qp->sq.max_gs > 2) && hr_dev->pci_dev->revision == 0x20) {
540 if (hr_qp->sge.sge_cnt > hr_dev->caps.max_extend_sg) {
541 dev_err(dev, "The extended sge cnt error! sge_cnt=%d\n",
550 static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev,
551 struct ib_qp_cap *cap,
552 struct hns_roce_qp *hr_qp)
554 struct device *dev = hr_dev->dev;
560 if (cap->max_send_wr > hr_dev->caps.max_wqes ||
561 cap->max_send_sge > hr_dev->caps.max_sq_sg ||
562 cap->max_inline_data > hr_dev->caps.max_sq_inline) {
563 dev_err(dev, "SQ WR or sge or inline data error!\n");
567 hr_qp->sq.wqe_shift = ilog2(hr_dev->caps.max_sq_desc_sz);
569 if (hr_dev->caps.min_wqes)
570 max_cnt = max(cap->max_send_wr, hr_dev->caps.min_wqes);
572 max_cnt = cap->max_send_wr;
574 hr_qp->sq.wqe_cnt = roundup_pow_of_two(max_cnt);
575 if ((u32)hr_qp->sq.wqe_cnt > hr_dev->caps.max_wqes) {
576 dev_err(dev, "while setting kernel sq size, sq.wqe_cnt too large\n");
580 /* Get data_seg numbers */
581 max_cnt = max(1U, cap->max_send_sge);
582 if (hr_dev->caps.max_sq_sg <= 2)
583 hr_qp->sq.max_gs = roundup_pow_of_two(max_cnt);
585 hr_qp->sq.max_gs = max_cnt;
587 ret = set_extend_sge_param(hr_dev, hr_qp);
589 dev_err(dev, "set extend sge parameters fail\n");
593 /* Get buf size, SQ and RQ are aligned to PAGE_SIZE */
594 page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT);
595 hr_qp->sq.offset = 0;
596 size = HNS_ROCE_ALOGN_UP(hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift,
599 if (hr_dev->caps.max_sq_sg > 2 && hr_qp->sge.sge_cnt) {
600 hr_qp->sge.sge_cnt = max(page_size/(1 << hr_qp->sge.sge_shift),
601 (u32)hr_qp->sge.sge_cnt);
602 hr_qp->sge.offset = size;
603 size += HNS_ROCE_ALOGN_UP(hr_qp->sge.sge_cnt <<
604 hr_qp->sge.sge_shift, page_size);
607 hr_qp->rq.offset = size;
608 size += HNS_ROCE_ALOGN_UP((hr_qp->rq.wqe_cnt << hr_qp->rq.wqe_shift),
610 hr_qp->buff_size = size;
612 /* Get wr and sge number which send */
613 cap->max_send_wr = hr_qp->sq.max_post = hr_qp->sq.wqe_cnt;
614 cap->max_send_sge = hr_qp->sq.max_gs;
616 /* We don't support inline sends for kernel QPs (yet) */
617 cap->max_inline_data = 0;
622 static int hns_roce_qp_has_sq(struct ib_qp_init_attr *attr)
624 if (attr->qp_type == IB_QPT_XRC_TGT || !attr->cap.max_send_wr)
630 static int hns_roce_qp_has_rq(struct ib_qp_init_attr *attr)
632 if (attr->qp_type == IB_QPT_XRC_INI ||
633 attr->qp_type == IB_QPT_XRC_TGT || attr->srq ||
634 !attr->cap.max_recv_wr)
640 static int alloc_rq_inline_buf(struct hns_roce_qp *hr_qp,
641 struct ib_qp_init_attr *init_attr)
643 u32 max_recv_sge = init_attr->cap.max_recv_sge;
644 struct hns_roce_rinl_wqe *wqe_list;
645 u32 wqe_cnt = hr_qp->rq.wqe_cnt;
648 /* allocate recv inline buf */
649 wqe_list = kcalloc(wqe_cnt, sizeof(struct hns_roce_rinl_wqe),
655 /* Allocate a continuous buffer for all inline sge we need */
656 wqe_list[0].sg_list = kcalloc(wqe_cnt, (max_recv_sge *
657 sizeof(struct hns_roce_rinl_sge)),
659 if (!wqe_list[0].sg_list)
662 /* Assign buffers of sg_list to each inline wqe */
663 for (i = 1; i < wqe_cnt; i++)
664 wqe_list[i].sg_list = &wqe_list[0].sg_list[i * max_recv_sge];
666 hr_qp->rq_inl_buf.wqe_list = wqe_list;
667 hr_qp->rq_inl_buf.wqe_cnt = wqe_cnt;
678 static void free_rq_inline_buf(struct hns_roce_qp *hr_qp)
680 kfree(hr_qp->rq_inl_buf.wqe_list[0].sg_list);
681 kfree(hr_qp->rq_inl_buf.wqe_list);
684 static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
686 struct ib_qp_init_attr *init_attr,
687 struct ib_udata *udata, unsigned long sqpn,
688 struct hns_roce_qp *hr_qp)
690 dma_addr_t *buf_list[ARRAY_SIZE(hr_qp->regions)] = { NULL };
691 struct device *dev = hr_dev->dev;
692 struct hns_roce_ib_create_qp ucmd;
693 struct hns_roce_ib_create_qp_resp resp = {};
694 struct hns_roce_ucontext *uctx = rdma_udata_to_drv_context(
695 udata, struct hns_roce_ucontext, ibucontext);
696 struct hns_roce_buf_region *r;
697 unsigned long qpn = 0;
703 mutex_init(&hr_qp->mutex);
704 spin_lock_init(&hr_qp->sq.lock);
705 spin_lock_init(&hr_qp->rq.lock);
707 hr_qp->state = IB_QPS_RESET;
709 hr_qp->ibqp.qp_type = init_attr->qp_type;
711 if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
712 hr_qp->sq_signal_bits = IB_SIGNAL_ALL_WR;
714 hr_qp->sq_signal_bits = IB_SIGNAL_REQ_WR;
716 ret = hns_roce_set_rq_size(hr_dev, &init_attr->cap, udata,
717 hns_roce_qp_has_rq(init_attr), hr_qp);
719 dev_err(dev, "hns_roce_set_rq_size failed\n");
723 if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) &&
724 hns_roce_qp_has_rq(init_attr)) {
725 ret = alloc_rq_inline_buf(hr_qp, init_attr);
727 dev_err(dev, "allocate receive inline buffer failed\n");
732 page_shift = PAGE_SHIFT + hr_dev->caps.mtt_buf_pg_sz;
734 if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
735 dev_err(dev, "ib_copy_from_udata error for create qp\n");
737 goto err_alloc_rq_inline_buf;
740 ret = hns_roce_set_user_sq_size(hr_dev, &init_attr->cap, hr_qp,
743 dev_err(dev, "hns_roce_set_user_sq_size error for create qp\n");
744 goto err_alloc_rq_inline_buf;
747 hr_qp->umem = ib_umem_get(udata, ucmd.buf_addr,
748 hr_qp->buff_size, 0, 0);
749 if (IS_ERR(hr_qp->umem)) {
750 dev_err(dev, "ib_umem_get error for create qp\n");
751 ret = PTR_ERR(hr_qp->umem);
752 goto err_alloc_rq_inline_buf;
754 hr_qp->region_cnt = split_wqe_buf_region(hr_dev, hr_qp,
755 hr_qp->regions, ARRAY_SIZE(hr_qp->regions),
757 ret = hns_roce_alloc_buf_list(hr_qp->regions, buf_list,
760 dev_err(dev, "alloc buf_list error for create qp\n");
764 for (i = 0; i < hr_qp->region_cnt; i++) {
765 r = &hr_qp->regions[i];
766 buf_count = hns_roce_get_umem_bufs(hr_dev,
767 buf_list[i], r->count, r->offset,
768 hr_qp->umem, page_shift);
769 if (buf_count != r->count) {
771 "get umem buf err, expect %d,ret %d.\n",
772 r->count, buf_count);
778 if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SQ_RECORD_DB) &&
779 (udata->inlen >= sizeof(ucmd)) &&
780 (udata->outlen >= sizeof(resp)) &&
781 hns_roce_qp_has_sq(init_attr)) {
782 ret = hns_roce_db_map_user(uctx, udata, ucmd.sdb_addr,
785 dev_err(dev, "sq record doorbell map failed!\n");
789 /* indicate kernel supports sq record db */
790 resp.cap_flags |= HNS_ROCE_SUPPORT_SQ_RECORD_DB;
794 if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) &&
795 (udata->outlen >= sizeof(resp)) &&
796 hns_roce_qp_has_rq(init_attr)) {
797 ret = hns_roce_db_map_user(uctx, udata, ucmd.db_addr,
800 dev_err(dev, "rq record doorbell map failed!\n");
804 /* indicate kernel supports rq record db */
805 resp.cap_flags |= HNS_ROCE_SUPPORT_RQ_RECORD_DB;
809 if (init_attr->create_flags &
810 IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) {
811 dev_err(dev, "init_attr->create_flags error!\n");
813 goto err_alloc_rq_inline_buf;
816 if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO) {
817 dev_err(dev, "init_attr->create_flags error!\n");
819 goto err_alloc_rq_inline_buf;
823 ret = hns_roce_set_kernel_sq_size(hr_dev, &init_attr->cap,
826 dev_err(dev, "hns_roce_set_kernel_sq_size error!\n");
827 goto err_alloc_rq_inline_buf;
830 /* QP doorbell register address */
831 hr_qp->sq.db_reg_l = hr_dev->reg_base + hr_dev->sdb_offset +
832 DB_REG_OFFSET * hr_dev->priv_uar.index;
833 hr_qp->rq.db_reg_l = hr_dev->reg_base + hr_dev->odb_offset +
834 DB_REG_OFFSET * hr_dev->priv_uar.index;
836 if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) &&
837 hns_roce_qp_has_rq(init_attr)) {
838 ret = hns_roce_alloc_db(hr_dev, &hr_qp->rdb, 0);
840 dev_err(dev, "rq record doorbell alloc failed!\n");
841 goto err_alloc_rq_inline_buf;
843 *hr_qp->rdb.db_record = 0;
847 /* Allocate QP buf */
848 if (hns_roce_buf_alloc(hr_dev, hr_qp->buff_size,
849 (1 << page_shift) * 2,
850 &hr_qp->hr_buf, page_shift)) {
851 dev_err(dev, "hns_roce_buf_alloc error!\n");
855 hr_qp->region_cnt = split_wqe_buf_region(hr_dev, hr_qp,
856 hr_qp->regions, ARRAY_SIZE(hr_qp->regions),
858 ret = hns_roce_alloc_buf_list(hr_qp->regions, buf_list,
861 dev_err(dev, "alloc buf_list error for create qp!\n");
865 for (i = 0; i < hr_qp->region_cnt; i++) {
866 r = &hr_qp->regions[i];
867 buf_count = hns_roce_get_kmem_bufs(hr_dev,
868 buf_list[i], r->count, r->offset,
870 if (buf_count != r->count) {
872 "get kmem buf err, expect %d,ret %d.\n",
873 r->count, buf_count);
879 hr_qp->sq.wrid = kcalloc(hr_qp->sq.wqe_cnt, sizeof(u64),
881 if (ZERO_OR_NULL_PTR(hr_qp->sq.wrid)) {
886 if (hr_qp->rq.wqe_cnt) {
887 hr_qp->rq.wrid = kcalloc(hr_qp->rq.wqe_cnt, sizeof(u64),
889 if (ZERO_OR_NULL_PTR(hr_qp->rq.wrid)) {
900 ret = hns_roce_reserve_range_qp(hr_dev, 1, 1, &qpn);
902 dev_err(dev, "hns_roce_reserve_range_qp alloc qpn error\n");
907 hr_qp->wqe_bt_pg_shift = calc_wqe_bt_page_shift(hr_dev, hr_qp->regions,
909 hns_roce_mtr_init(&hr_qp->mtr, PAGE_SHIFT + hr_qp->wqe_bt_pg_shift,
911 ret = hns_roce_mtr_attach(hr_dev, &hr_qp->mtr, buf_list,
912 hr_qp->regions, hr_qp->region_cnt);
914 dev_err(dev, "mtr attach error for create qp\n");
918 if (init_attr->qp_type == IB_QPT_GSI &&
919 hr_dev->hw_rev == HNS_ROCE_HW_VER1) {
920 /* In v1 engine, GSI QP context in RoCE engine's register */
921 ret = hns_roce_gsi_qp_alloc(hr_dev, qpn, hr_qp);
923 dev_err(dev, "hns_roce_qp_alloc failed!\n");
927 ret = hns_roce_qp_alloc(hr_dev, qpn, hr_qp);
929 dev_err(dev, "hns_roce_qp_alloc failed!\n");
935 hr_qp->doorbell_qpn = 1;
937 hr_qp->doorbell_qpn = (u32)hr_qp->qpn;
940 ret = ib_copy_to_udata(udata, &resp,
941 min(udata->outlen, sizeof(resp)));
946 if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL) {
947 ret = hr_dev->hw->qp_flow_control_init(hr_dev, hr_qp);
952 hr_qp->event = hns_roce_ib_qp_event;
953 hns_roce_free_buf_list(buf_list, hr_qp->region_cnt);
958 if (init_attr->qp_type == IB_QPT_GSI &&
959 hr_dev->hw_rev == HNS_ROCE_HW_VER1)
960 hns_roce_qp_remove(hr_dev, hr_qp);
962 hns_roce_qp_free(hr_dev, hr_qp);
966 hns_roce_release_range_qp(hr_dev, qpn, 1);
969 hns_roce_mtr_cleanup(hr_dev, &hr_qp->mtr);
973 if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) &&
974 (udata->outlen >= sizeof(resp)) &&
975 hns_roce_qp_has_rq(init_attr))
976 hns_roce_db_unmap_user(uctx, &hr_qp->rdb);
978 if (hr_qp->rq.wqe_cnt)
979 kfree(hr_qp->rq.wrid);
984 if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SQ_RECORD_DB) &&
985 (udata->inlen >= sizeof(ucmd)) &&
986 (udata->outlen >= sizeof(resp)) &&
987 hns_roce_qp_has_sq(init_attr))
988 hns_roce_db_unmap_user(uctx, &hr_qp->sdb);
992 kfree(hr_qp->sq.wrid);
995 hns_roce_free_buf_list(buf_list, hr_qp->region_cnt);
999 hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf);
1000 ib_umem_release(hr_qp->umem);
1003 if (!udata && hns_roce_qp_has_rq(init_attr) &&
1004 (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB))
1005 hns_roce_free_db(hr_dev, &hr_qp->rdb);
1007 err_alloc_rq_inline_buf:
1008 if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) &&
1009 hns_roce_qp_has_rq(init_attr))
1010 free_rq_inline_buf(hr_qp);
1016 struct ib_qp *hns_roce_create_qp(struct ib_pd *pd,
1017 struct ib_qp_init_attr *init_attr,
1018 struct ib_udata *udata)
1020 struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
1021 struct ib_device *ibdev = &hr_dev->ib_dev;
1022 struct hns_roce_sqp *hr_sqp;
1023 struct hns_roce_qp *hr_qp;
1026 switch (init_attr->qp_type) {
1028 hr_qp = kzalloc(sizeof(*hr_qp), GFP_KERNEL);
1030 return ERR_PTR(-ENOMEM);
1032 ret = hns_roce_create_qp_common(hr_dev, pd, init_attr, udata, 0,
1035 ibdev_err(ibdev, "Create RC QP 0x%06lx failed(%d)\n",
1038 return ERR_PTR(ret);
1041 hr_qp->ibqp.qp_num = hr_qp->qpn;
1046 /* Userspace is not allowed to create special QPs: */
1048 ibdev_err(ibdev, "not support usr space GSI\n");
1049 return ERR_PTR(-EINVAL);
1052 hr_sqp = kzalloc(sizeof(*hr_sqp), GFP_KERNEL);
1054 return ERR_PTR(-ENOMEM);
1056 hr_qp = &hr_sqp->hr_qp;
1057 hr_qp->port = init_attr->port_num - 1;
1058 hr_qp->phy_port = hr_dev->iboe.phy_port[hr_qp->port];
1060 /* when hw version is v1, the sqpn is allocated */
1061 if (hr_dev->caps.max_sq_sg <= 2)
1062 hr_qp->ibqp.qp_num = HNS_ROCE_MAX_PORTS +
1063 hr_dev->iboe.phy_port[hr_qp->port];
1065 hr_qp->ibqp.qp_num = 1;
1067 ret = hns_roce_create_qp_common(hr_dev, pd, init_attr, udata,
1068 hr_qp->ibqp.qp_num, hr_qp);
1070 ibdev_err(ibdev, "Create GSI QP failed!\n");
1072 return ERR_PTR(ret);
1078 ibdev_err(ibdev, "not support QP type %d\n",
1079 init_attr->qp_type);
1080 return ERR_PTR(-EINVAL);
1084 return &hr_qp->ibqp;
1087 int to_hr_qp_type(int qp_type)
1091 if (qp_type == IB_QPT_RC)
1092 transport_type = SERV_TYPE_RC;
1093 else if (qp_type == IB_QPT_UC)
1094 transport_type = SERV_TYPE_UC;
1095 else if (qp_type == IB_QPT_UD)
1096 transport_type = SERV_TYPE_UD;
1097 else if (qp_type == IB_QPT_GSI)
1098 transport_type = SERV_TYPE_UD;
1100 transport_type = -1;
1102 return transport_type;
1105 static int check_mtu_validate(struct hns_roce_dev *hr_dev,
1106 struct hns_roce_qp *hr_qp,
1107 struct ib_qp_attr *attr, int attr_mask)
1109 enum ib_mtu active_mtu;
1112 p = attr_mask & IB_QP_PORT ? (attr->port_num - 1) : hr_qp->port;
1113 active_mtu = iboe_get_mtu(hr_dev->iboe.netdevs[p]->mtu);
1115 if ((hr_dev->caps.max_mtu >= IB_MTU_2048 &&
1116 attr->path_mtu > hr_dev->caps.max_mtu) ||
1117 attr->path_mtu < IB_MTU_256 || attr->path_mtu > active_mtu) {
1118 ibdev_err(&hr_dev->ib_dev,
1119 "attr path_mtu(%d)invalid while modify qp",
1127 static int hns_roce_check_qp_attr(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1130 struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
1131 struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
1134 if ((attr_mask & IB_QP_PORT) &&
1135 (attr->port_num == 0 || attr->port_num > hr_dev->caps.num_ports)) {
1136 ibdev_err(&hr_dev->ib_dev,
1137 "attr port_num invalid.attr->port_num=%d\n",
1142 if (attr_mask & IB_QP_PKEY_INDEX) {
1143 p = attr_mask & IB_QP_PORT ? (attr->port_num - 1) : hr_qp->port;
1144 if (attr->pkey_index >= hr_dev->caps.pkey_table_len[p]) {
1145 ibdev_err(&hr_dev->ib_dev,
1146 "attr pkey_index invalid.attr->pkey_index=%d\n",
1152 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
1153 attr->max_rd_atomic > hr_dev->caps.max_qp_init_rdma) {
1154 ibdev_err(&hr_dev->ib_dev,
1155 "attr max_rd_atomic invalid.attr->max_rd_atomic=%d\n",
1156 attr->max_rd_atomic);
1160 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
1161 attr->max_dest_rd_atomic > hr_dev->caps.max_qp_dest_rdma) {
1162 ibdev_err(&hr_dev->ib_dev,
1163 "attr max_dest_rd_atomic invalid.attr->max_dest_rd_atomic=%d\n",
1164 attr->max_dest_rd_atomic);
1168 if (attr_mask & IB_QP_PATH_MTU)
1169 return check_mtu_validate(hr_dev, hr_qp, attr, attr_mask);
1174 int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1175 int attr_mask, struct ib_udata *udata)
1177 struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
1178 struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
1179 enum ib_qp_state cur_state, new_state;
1182 mutex_lock(&hr_qp->mutex);
1184 cur_state = attr_mask & IB_QP_CUR_STATE ?
1185 attr->cur_qp_state : (enum ib_qp_state)hr_qp->state;
1186 new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
1188 if (ibqp->uobject &&
1189 (attr_mask & IB_QP_STATE) && new_state == IB_QPS_ERR) {
1190 if (hr_qp->sdb_en == 1) {
1191 hr_qp->sq.head = *(int *)(hr_qp->sdb.virt_addr);
1193 if (hr_qp->rdb_en == 1)
1194 hr_qp->rq.head = *(int *)(hr_qp->rdb.virt_addr);
1196 ibdev_warn(&hr_dev->ib_dev,
1197 "flush cqe is not supported in userspace!\n");
1202 if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
1204 ibdev_err(&hr_dev->ib_dev, "ib_modify_qp_is_ok failed\n");
1208 ret = hns_roce_check_qp_attr(ibqp, attr, attr_mask);
1212 if (cur_state == new_state && cur_state == IB_QPS_RESET) {
1213 if (hr_dev->caps.min_wqes) {
1215 ibdev_err(&hr_dev->ib_dev,
1216 "cur_state=%d new_state=%d\n", cur_state,
1225 ret = hr_dev->hw->modify_qp(ibqp, attr, attr_mask, cur_state,
1229 mutex_unlock(&hr_qp->mutex);
1234 void hns_roce_lock_cqs(struct hns_roce_cq *send_cq, struct hns_roce_cq *recv_cq)
1235 __acquires(&send_cq->lock) __acquires(&recv_cq->lock)
1237 if (send_cq == recv_cq) {
1238 spin_lock_irq(&send_cq->lock);
1239 __acquire(&recv_cq->lock);
1240 } else if (send_cq->cqn < recv_cq->cqn) {
1241 spin_lock_irq(&send_cq->lock);
1242 spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING);
1244 spin_lock_irq(&recv_cq->lock);
1245 spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING);
1249 void hns_roce_unlock_cqs(struct hns_roce_cq *send_cq,
1250 struct hns_roce_cq *recv_cq) __releases(&send_cq->lock)
1251 __releases(&recv_cq->lock)
1253 if (send_cq == recv_cq) {
1254 __release(&recv_cq->lock);
1255 spin_unlock_irq(&send_cq->lock);
1256 } else if (send_cq->cqn < recv_cq->cqn) {
1257 spin_unlock(&recv_cq->lock);
1258 spin_unlock_irq(&send_cq->lock);
1260 spin_unlock(&send_cq->lock);
1261 spin_unlock_irq(&recv_cq->lock);
1265 static void *get_wqe(struct hns_roce_qp *hr_qp, int offset)
1268 return hns_roce_buf_offset(&hr_qp->hr_buf, offset);
1271 void *get_recv_wqe(struct hns_roce_qp *hr_qp, int n)
1273 return get_wqe(hr_qp, hr_qp->rq.offset + (n << hr_qp->rq.wqe_shift));
1276 void *get_send_wqe(struct hns_roce_qp *hr_qp, int n)
1278 return get_wqe(hr_qp, hr_qp->sq.offset + (n << hr_qp->sq.wqe_shift));
1281 void *get_send_extend_sge(struct hns_roce_qp *hr_qp, int n)
1283 return hns_roce_buf_offset(&hr_qp->hr_buf, hr_qp->sge.offset +
1284 (n << hr_qp->sge.sge_shift));
1287 bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, int nreq,
1288 struct ib_cq *ib_cq)
1290 struct hns_roce_cq *hr_cq;
1293 cur = hr_wq->head - hr_wq->tail;
1294 if (likely(cur + nreq < hr_wq->max_post))
1297 hr_cq = to_hr_cq(ib_cq);
1298 spin_lock(&hr_cq->lock);
1299 cur = hr_wq->head - hr_wq->tail;
1300 spin_unlock(&hr_cq->lock);
1302 return cur + nreq >= hr_wq->max_post;
1305 int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev)
1307 struct hns_roce_qp_table *qp_table = &hr_dev->qp_table;
1308 int reserved_from_top = 0;
1309 int reserved_from_bot;
1312 mutex_init(&qp_table->scc_mutex);
1313 xa_init(&hr_dev->qp_table_xa);
1315 reserved_from_bot = hr_dev->caps.reserved_qps;
1317 ret = hns_roce_bitmap_init(&qp_table->bitmap, hr_dev->caps.num_qps,
1318 hr_dev->caps.num_qps - 1, reserved_from_bot,
1321 dev_err(hr_dev->dev, "qp bitmap init failed!error=%d\n",
1329 void hns_roce_cleanup_qp_table(struct hns_roce_dev *hr_dev)
1331 hns_roce_bitmap_cleanup(&hr_dev->qp_table.bitmap);