Linux-libre 5.4.47-gnu
[librecmc/linux-libre.git] / net / smc / smc_wr.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Shared Memory Communications over RDMA (SMC-R) and RoCE
4  *
5  * Work Requests exploiting Infiniband API
6  *
7  * Work requests (WR) of type ib_post_send or ib_post_recv respectively
8  * are submitted to either RC SQ or RC RQ respectively
9  * (reliably connected send/receive queue)
10  * and become work queue entries (WQEs).
11  * While an SQ WR/WQE is pending, we track it until transmission completion.
12  * Through a send or receive completion queue (CQ) respectively,
13  * we get completion queue entries (CQEs) [aka work completions (WCs)].
14  * Since the CQ callback is called from IRQ context, we split work by using
15  * bottom halves implemented by tasklets.
16  *
17  * SMC uses this to exchange LLC (link layer control)
18  * and CDC (connection data control) messages.
19  *
20  * Copyright IBM Corp. 2016
21  *
22  * Author(s):  Steffen Maier <maier@linux.vnet.ibm.com>
23  */
24
25 #include <linux/atomic.h>
26 #include <linux/hashtable.h>
27 #include <linux/wait.h>
28 #include <rdma/ib_verbs.h>
29 #include <asm/div64.h>
30
31 #include "smc.h"
32 #include "smc_wr.h"
33
34 #define SMC_WR_MAX_POLL_CQE 10  /* max. # of compl. queue elements in 1 poll */
35
36 #define SMC_WR_RX_HASH_BITS 4
37 static DEFINE_HASHTABLE(smc_wr_rx_hash, SMC_WR_RX_HASH_BITS);
38 static DEFINE_SPINLOCK(smc_wr_rx_hash_lock);
39
40 struct smc_wr_tx_pend { /* control data for a pending send request */
41         u64                     wr_id;          /* work request id sent */
42         smc_wr_tx_handler       handler;
43         enum ib_wc_status       wc_status;      /* CQE status */
44         struct smc_link         *link;
45         u32                     idx;
46         struct smc_wr_tx_pend_priv priv;
47 };
48
49 /******************************** send queue *********************************/
50
51 /*------------------------------- completion --------------------------------*/
52
53 static inline int smc_wr_tx_find_pending_index(struct smc_link *link, u64 wr_id)
54 {
55         u32 i;
56
57         for (i = 0; i < link->wr_tx_cnt; i++) {
58                 if (link->wr_tx_pends[i].wr_id == wr_id)
59                         return i;
60         }
61         return link->wr_tx_cnt;
62 }
63
64 static inline void smc_wr_tx_process_cqe(struct ib_wc *wc)
65 {
66         struct smc_wr_tx_pend pnd_snd;
67         struct smc_link *link;
68         u32 pnd_snd_idx;
69         int i;
70
71         link = wc->qp->qp_context;
72
73         if (wc->opcode == IB_WC_REG_MR) {
74                 if (wc->status)
75                         link->wr_reg_state = FAILED;
76                 else
77                         link->wr_reg_state = CONFIRMED;
78                 wake_up(&link->wr_reg_wait);
79                 return;
80         }
81
82         pnd_snd_idx = smc_wr_tx_find_pending_index(link, wc->wr_id);
83         if (pnd_snd_idx == link->wr_tx_cnt)
84                 return;
85         link->wr_tx_pends[pnd_snd_idx].wc_status = wc->status;
86         memcpy(&pnd_snd, &link->wr_tx_pends[pnd_snd_idx], sizeof(pnd_snd));
87         /* clear the full struct smc_wr_tx_pend including .priv */
88         memset(&link->wr_tx_pends[pnd_snd_idx], 0,
89                sizeof(link->wr_tx_pends[pnd_snd_idx]));
90         memset(&link->wr_tx_bufs[pnd_snd_idx], 0,
91                sizeof(link->wr_tx_bufs[pnd_snd_idx]));
92         if (!test_and_clear_bit(pnd_snd_idx, link->wr_tx_mask))
93                 return;
94         if (wc->status) {
95                 for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) {
96                         /* clear full struct smc_wr_tx_pend including .priv */
97                         memset(&link->wr_tx_pends[i], 0,
98                                sizeof(link->wr_tx_pends[i]));
99                         memset(&link->wr_tx_bufs[i], 0,
100                                sizeof(link->wr_tx_bufs[i]));
101                         clear_bit(i, link->wr_tx_mask);
102                 }
103                 /* terminate connections of this link group abnormally */
104                 smc_lgr_terminate(smc_get_lgr(link));
105         }
106         if (pnd_snd.handler)
107                 pnd_snd.handler(&pnd_snd.priv, link, wc->status);
108         wake_up(&link->wr_tx_wait);
109 }
110
111 static void smc_wr_tx_tasklet_fn(unsigned long data)
112 {
113         struct smc_ib_device *dev = (struct smc_ib_device *)data;
114         struct ib_wc wc[SMC_WR_MAX_POLL_CQE];
115         int i = 0, rc;
116         int polled = 0;
117
118 again:
119         polled++;
120         do {
121                 memset(&wc, 0, sizeof(wc));
122                 rc = ib_poll_cq(dev->roce_cq_send, SMC_WR_MAX_POLL_CQE, wc);
123                 if (polled == 1) {
124                         ib_req_notify_cq(dev->roce_cq_send,
125                                          IB_CQ_NEXT_COMP |
126                                          IB_CQ_REPORT_MISSED_EVENTS);
127                 }
128                 if (!rc)
129                         break;
130                 for (i = 0; i < rc; i++)
131                         smc_wr_tx_process_cqe(&wc[i]);
132         } while (rc > 0);
133         if (polled == 1)
134                 goto again;
135 }
136
137 void smc_wr_tx_cq_handler(struct ib_cq *ib_cq, void *cq_context)
138 {
139         struct smc_ib_device *dev = (struct smc_ib_device *)cq_context;
140
141         tasklet_schedule(&dev->send_tasklet);
142 }
143
144 /*---------------------------- request submission ---------------------------*/
145
146 static inline int smc_wr_tx_get_free_slot_index(struct smc_link *link, u32 *idx)
147 {
148         *idx = link->wr_tx_cnt;
149         for_each_clear_bit(*idx, link->wr_tx_mask, link->wr_tx_cnt) {
150                 if (!test_and_set_bit(*idx, link->wr_tx_mask))
151                         return 0;
152         }
153         *idx = link->wr_tx_cnt;
154         return -EBUSY;
155 }
156
157 /**
158  * smc_wr_tx_get_free_slot() - returns buffer for message assembly,
159  *                      and sets info for pending transmit tracking
160  * @link:               Pointer to smc_link used to later send the message.
161  * @handler:            Send completion handler function pointer.
162  * @wr_buf:             Out value returns pointer to message buffer.
163  * @wr_rdma_buf:        Out value returns pointer to rdma work request.
164  * @wr_pend_priv:       Out value returns pointer serving as handler context.
165  *
166  * Return: 0 on success, or -errno on error.
167  */
168 int smc_wr_tx_get_free_slot(struct smc_link *link,
169                             smc_wr_tx_handler handler,
170                             struct smc_wr_buf **wr_buf,
171                             struct smc_rdma_wr **wr_rdma_buf,
172                             struct smc_wr_tx_pend_priv **wr_pend_priv)
173 {
174         struct smc_wr_tx_pend *wr_pend;
175         u32 idx = link->wr_tx_cnt;
176         struct ib_send_wr *wr_ib;
177         u64 wr_id;
178         int rc;
179
180         *wr_buf = NULL;
181         *wr_pend_priv = NULL;
182         if (in_softirq()) {
183                 rc = smc_wr_tx_get_free_slot_index(link, &idx);
184                 if (rc)
185                         return rc;
186         } else {
187                 rc = wait_event_timeout(
188                         link->wr_tx_wait,
189                         link->state == SMC_LNK_INACTIVE ||
190                         (smc_wr_tx_get_free_slot_index(link, &idx) != -EBUSY),
191                         SMC_WR_TX_WAIT_FREE_SLOT_TIME);
192                 if (!rc) {
193                         /* timeout - terminate connections */
194                         smc_lgr_terminate(smc_get_lgr(link));
195                         return -EPIPE;
196                 }
197                 if (idx == link->wr_tx_cnt)
198                         return -EPIPE;
199         }
200         wr_id = smc_wr_tx_get_next_wr_id(link);
201         wr_pend = &link->wr_tx_pends[idx];
202         wr_pend->wr_id = wr_id;
203         wr_pend->handler = handler;
204         wr_pend->link = link;
205         wr_pend->idx = idx;
206         wr_ib = &link->wr_tx_ibs[idx];
207         wr_ib->wr_id = wr_id;
208         *wr_buf = &link->wr_tx_bufs[idx];
209         if (wr_rdma_buf)
210                 *wr_rdma_buf = &link->wr_tx_rdmas[idx];
211         *wr_pend_priv = &wr_pend->priv;
212         return 0;
213 }
214
215 int smc_wr_tx_put_slot(struct smc_link *link,
216                        struct smc_wr_tx_pend_priv *wr_pend_priv)
217 {
218         struct smc_wr_tx_pend *pend;
219
220         pend = container_of(wr_pend_priv, struct smc_wr_tx_pend, priv);
221         if (pend->idx < link->wr_tx_cnt) {
222                 u32 idx = pend->idx;
223
224                 /* clear the full struct smc_wr_tx_pend including .priv */
225                 memset(&link->wr_tx_pends[idx], 0,
226                        sizeof(link->wr_tx_pends[idx]));
227                 memset(&link->wr_tx_bufs[idx], 0,
228                        sizeof(link->wr_tx_bufs[idx]));
229                 test_and_clear_bit(idx, link->wr_tx_mask);
230                 return 1;
231         }
232
233         return 0;
234 }
235
236 /* Send prepared WR slot via ib_post_send.
237  * @priv: pointer to smc_wr_tx_pend_priv identifying prepared message buffer
238  */
239 int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv)
240 {
241         struct smc_wr_tx_pend *pend;
242         int rc;
243
244         ib_req_notify_cq(link->smcibdev->roce_cq_send,
245                          IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
246         pend = container_of(priv, struct smc_wr_tx_pend, priv);
247         rc = ib_post_send(link->roce_qp, &link->wr_tx_ibs[pend->idx], NULL);
248         if (rc) {
249                 smc_wr_tx_put_slot(link, priv);
250                 smc_lgr_terminate(smc_get_lgr(link));
251         }
252         return rc;
253 }
254
255 /* Register a memory region and wait for result. */
256 int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr)
257 {
258         int rc;
259
260         ib_req_notify_cq(link->smcibdev->roce_cq_send,
261                          IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
262         link->wr_reg_state = POSTED;
263         link->wr_reg.wr.wr_id = (u64)(uintptr_t)mr;
264         link->wr_reg.mr = mr;
265         link->wr_reg.key = mr->rkey;
266         rc = ib_post_send(link->roce_qp, &link->wr_reg.wr, NULL);
267         if (rc)
268                 return rc;
269
270         rc = wait_event_interruptible_timeout(link->wr_reg_wait,
271                                               (link->wr_reg_state != POSTED),
272                                               SMC_WR_REG_MR_WAIT_TIME);
273         if (!rc) {
274                 /* timeout - terminate connections */
275                 smc_lgr_terminate(smc_get_lgr(link));
276                 return -EPIPE;
277         }
278         if (rc == -ERESTARTSYS)
279                 return -EINTR;
280         switch (link->wr_reg_state) {
281         case CONFIRMED:
282                 rc = 0;
283                 break;
284         case FAILED:
285                 rc = -EIO;
286                 break;
287         case POSTED:
288                 rc = -EPIPE;
289                 break;
290         }
291         return rc;
292 }
293
294 void smc_wr_tx_dismiss_slots(struct smc_link *link, u8 wr_tx_hdr_type,
295                              smc_wr_tx_filter filter,
296                              smc_wr_tx_dismisser dismisser,
297                              unsigned long data)
298 {
299         struct smc_wr_tx_pend_priv *tx_pend;
300         struct smc_wr_rx_hdr *wr_tx;
301         int i;
302
303         for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) {
304                 wr_tx = (struct smc_wr_rx_hdr *)&link->wr_tx_bufs[i];
305                 if (wr_tx->type != wr_tx_hdr_type)
306                         continue;
307                 tx_pend = &link->wr_tx_pends[i].priv;
308                 if (filter(tx_pend, data))
309                         dismisser(tx_pend);
310         }
311 }
312
313 /****************************** receive queue ********************************/
314
315 int smc_wr_rx_register_handler(struct smc_wr_rx_handler *handler)
316 {
317         struct smc_wr_rx_handler *h_iter;
318         int rc = 0;
319
320         spin_lock(&smc_wr_rx_hash_lock);
321         hash_for_each_possible(smc_wr_rx_hash, h_iter, list, handler->type) {
322                 if (h_iter->type == handler->type) {
323                         rc = -EEXIST;
324                         goto out_unlock;
325                 }
326         }
327         hash_add(smc_wr_rx_hash, &handler->list, handler->type);
328 out_unlock:
329         spin_unlock(&smc_wr_rx_hash_lock);
330         return rc;
331 }
332
333 /* Demultiplex a received work request based on the message type to its handler.
334  * Relies on smc_wr_rx_hash having been completely filled before any IB WRs,
335  * and not being modified any more afterwards so we don't need to lock it.
336  */
337 static inline void smc_wr_rx_demultiplex(struct ib_wc *wc)
338 {
339         struct smc_link *link = (struct smc_link *)wc->qp->qp_context;
340         struct smc_wr_rx_handler *handler;
341         struct smc_wr_rx_hdr *wr_rx;
342         u64 temp_wr_id;
343         u32 index;
344
345         if (wc->byte_len < sizeof(*wr_rx))
346                 return; /* short message */
347         temp_wr_id = wc->wr_id;
348         index = do_div(temp_wr_id, link->wr_rx_cnt);
349         wr_rx = (struct smc_wr_rx_hdr *)&link->wr_rx_bufs[index];
350         hash_for_each_possible(smc_wr_rx_hash, handler, list, wr_rx->type) {
351                 if (handler->type == wr_rx->type)
352                         handler->handler(wc, wr_rx);
353         }
354 }
355
356 static inline void smc_wr_rx_process_cqes(struct ib_wc wc[], int num)
357 {
358         struct smc_link *link;
359         int i;
360
361         for (i = 0; i < num; i++) {
362                 link = wc[i].qp->qp_context;
363                 if (wc[i].status == IB_WC_SUCCESS) {
364                         link->wr_rx_tstamp = jiffies;
365                         smc_wr_rx_demultiplex(&wc[i]);
366                         smc_wr_rx_post(link); /* refill WR RX */
367                 } else {
368                         /* handle status errors */
369                         switch (wc[i].status) {
370                         case IB_WC_RETRY_EXC_ERR:
371                         case IB_WC_RNR_RETRY_EXC_ERR:
372                         case IB_WC_WR_FLUSH_ERR:
373                                 /* terminate connections of this link group
374                                  * abnormally
375                                  */
376                                 smc_lgr_terminate(smc_get_lgr(link));
377                                 break;
378                         default:
379                                 smc_wr_rx_post(link); /* refill WR RX */
380                                 break;
381                         }
382                 }
383         }
384 }
385
386 static void smc_wr_rx_tasklet_fn(unsigned long data)
387 {
388         struct smc_ib_device *dev = (struct smc_ib_device *)data;
389         struct ib_wc wc[SMC_WR_MAX_POLL_CQE];
390         int polled = 0;
391         int rc;
392
393 again:
394         polled++;
395         do {
396                 memset(&wc, 0, sizeof(wc));
397                 rc = ib_poll_cq(dev->roce_cq_recv, SMC_WR_MAX_POLL_CQE, wc);
398                 if (polled == 1) {
399                         ib_req_notify_cq(dev->roce_cq_recv,
400                                          IB_CQ_SOLICITED_MASK
401                                          | IB_CQ_REPORT_MISSED_EVENTS);
402                 }
403                 if (!rc)
404                         break;
405                 smc_wr_rx_process_cqes(&wc[0], rc);
406         } while (rc > 0);
407         if (polled == 1)
408                 goto again;
409 }
410
411 void smc_wr_rx_cq_handler(struct ib_cq *ib_cq, void *cq_context)
412 {
413         struct smc_ib_device *dev = (struct smc_ib_device *)cq_context;
414
415         tasklet_schedule(&dev->recv_tasklet);
416 }
417
418 int smc_wr_rx_post_init(struct smc_link *link)
419 {
420         u32 i;
421         int rc = 0;
422
423         for (i = 0; i < link->wr_rx_cnt; i++)
424                 rc = smc_wr_rx_post(link);
425         return rc;
426 }
427
428 /***************************** init, exit, misc ******************************/
429
430 void smc_wr_remember_qp_attr(struct smc_link *lnk)
431 {
432         struct ib_qp_attr *attr = &lnk->qp_attr;
433         struct ib_qp_init_attr init_attr;
434
435         memset(attr, 0, sizeof(*attr));
436         memset(&init_attr, 0, sizeof(init_attr));
437         ib_query_qp(lnk->roce_qp, attr,
438                     IB_QP_STATE |
439                     IB_QP_CUR_STATE |
440                     IB_QP_PKEY_INDEX |
441                     IB_QP_PORT |
442                     IB_QP_QKEY |
443                     IB_QP_AV |
444                     IB_QP_PATH_MTU |
445                     IB_QP_TIMEOUT |
446                     IB_QP_RETRY_CNT |
447                     IB_QP_RNR_RETRY |
448                     IB_QP_RQ_PSN |
449                     IB_QP_ALT_PATH |
450                     IB_QP_MIN_RNR_TIMER |
451                     IB_QP_SQ_PSN |
452                     IB_QP_PATH_MIG_STATE |
453                     IB_QP_CAP |
454                     IB_QP_DEST_QPN,
455                     &init_attr);
456
457         lnk->wr_tx_cnt = min_t(size_t, SMC_WR_BUF_CNT,
458                                lnk->qp_attr.cap.max_send_wr);
459         lnk->wr_rx_cnt = min_t(size_t, SMC_WR_BUF_CNT * 3,
460                                lnk->qp_attr.cap.max_recv_wr);
461 }
462
463 static void smc_wr_init_sge(struct smc_link *lnk)
464 {
465         u32 i;
466
467         for (i = 0; i < lnk->wr_tx_cnt; i++) {
468                 lnk->wr_tx_sges[i].addr =
469                         lnk->wr_tx_dma_addr + i * SMC_WR_BUF_SIZE;
470                 lnk->wr_tx_sges[i].length = SMC_WR_TX_SIZE;
471                 lnk->wr_tx_sges[i].lkey = lnk->roce_pd->local_dma_lkey;
472                 lnk->wr_tx_rdma_sges[i].tx_rdma_sge[0].wr_tx_rdma_sge[0].lkey =
473                         lnk->roce_pd->local_dma_lkey;
474                 lnk->wr_tx_rdma_sges[i].tx_rdma_sge[0].wr_tx_rdma_sge[1].lkey =
475                         lnk->roce_pd->local_dma_lkey;
476                 lnk->wr_tx_rdma_sges[i].tx_rdma_sge[1].wr_tx_rdma_sge[0].lkey =
477                         lnk->roce_pd->local_dma_lkey;
478                 lnk->wr_tx_rdma_sges[i].tx_rdma_sge[1].wr_tx_rdma_sge[1].lkey =
479                         lnk->roce_pd->local_dma_lkey;
480                 lnk->wr_tx_ibs[i].next = NULL;
481                 lnk->wr_tx_ibs[i].sg_list = &lnk->wr_tx_sges[i];
482                 lnk->wr_tx_ibs[i].num_sge = 1;
483                 lnk->wr_tx_ibs[i].opcode = IB_WR_SEND;
484                 lnk->wr_tx_ibs[i].send_flags =
485                         IB_SEND_SIGNALED | IB_SEND_SOLICITED;
486                 lnk->wr_tx_rdmas[i].wr_tx_rdma[0].wr.opcode = IB_WR_RDMA_WRITE;
487                 lnk->wr_tx_rdmas[i].wr_tx_rdma[1].wr.opcode = IB_WR_RDMA_WRITE;
488                 lnk->wr_tx_rdmas[i].wr_tx_rdma[0].wr.sg_list =
489                         lnk->wr_tx_rdma_sges[i].tx_rdma_sge[0].wr_tx_rdma_sge;
490                 lnk->wr_tx_rdmas[i].wr_tx_rdma[1].wr.sg_list =
491                         lnk->wr_tx_rdma_sges[i].tx_rdma_sge[1].wr_tx_rdma_sge;
492         }
493         for (i = 0; i < lnk->wr_rx_cnt; i++) {
494                 lnk->wr_rx_sges[i].addr =
495                         lnk->wr_rx_dma_addr + i * SMC_WR_BUF_SIZE;
496                 lnk->wr_rx_sges[i].length = SMC_WR_BUF_SIZE;
497                 lnk->wr_rx_sges[i].lkey = lnk->roce_pd->local_dma_lkey;
498                 lnk->wr_rx_ibs[i].next = NULL;
499                 lnk->wr_rx_ibs[i].sg_list = &lnk->wr_rx_sges[i];
500                 lnk->wr_rx_ibs[i].num_sge = 1;
501         }
502         lnk->wr_reg.wr.next = NULL;
503         lnk->wr_reg.wr.num_sge = 0;
504         lnk->wr_reg.wr.send_flags = IB_SEND_SIGNALED;
505         lnk->wr_reg.wr.opcode = IB_WR_REG_MR;
506         lnk->wr_reg.access = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE;
507 }
508
509 void smc_wr_free_link(struct smc_link *lnk)
510 {
511         struct ib_device *ibdev;
512
513         memset(lnk->wr_tx_mask, 0,
514                BITS_TO_LONGS(SMC_WR_BUF_CNT) * sizeof(*lnk->wr_tx_mask));
515
516         if (!lnk->smcibdev)
517                 return;
518         ibdev = lnk->smcibdev->ibdev;
519
520         if (lnk->wr_rx_dma_addr) {
521                 ib_dma_unmap_single(ibdev, lnk->wr_rx_dma_addr,
522                                     SMC_WR_BUF_SIZE * lnk->wr_rx_cnt,
523                                     DMA_FROM_DEVICE);
524                 lnk->wr_rx_dma_addr = 0;
525         }
526         if (lnk->wr_tx_dma_addr) {
527                 ib_dma_unmap_single(ibdev, lnk->wr_tx_dma_addr,
528                                     SMC_WR_BUF_SIZE * lnk->wr_tx_cnt,
529                                     DMA_TO_DEVICE);
530                 lnk->wr_tx_dma_addr = 0;
531         }
532 }
533
534 void smc_wr_free_link_mem(struct smc_link *lnk)
535 {
536         kfree(lnk->wr_tx_pends);
537         lnk->wr_tx_pends = NULL;
538         kfree(lnk->wr_tx_mask);
539         lnk->wr_tx_mask = NULL;
540         kfree(lnk->wr_tx_sges);
541         lnk->wr_tx_sges = NULL;
542         kfree(lnk->wr_tx_rdma_sges);
543         lnk->wr_tx_rdma_sges = NULL;
544         kfree(lnk->wr_rx_sges);
545         lnk->wr_rx_sges = NULL;
546         kfree(lnk->wr_tx_rdmas);
547         lnk->wr_tx_rdmas = NULL;
548         kfree(lnk->wr_rx_ibs);
549         lnk->wr_rx_ibs = NULL;
550         kfree(lnk->wr_tx_ibs);
551         lnk->wr_tx_ibs = NULL;
552         kfree(lnk->wr_tx_bufs);
553         lnk->wr_tx_bufs = NULL;
554         kfree(lnk->wr_rx_bufs);
555         lnk->wr_rx_bufs = NULL;
556 }
557
558 int smc_wr_alloc_link_mem(struct smc_link *link)
559 {
560         /* allocate link related memory */
561         link->wr_tx_bufs = kcalloc(SMC_WR_BUF_CNT, SMC_WR_BUF_SIZE, GFP_KERNEL);
562         if (!link->wr_tx_bufs)
563                 goto no_mem;
564         link->wr_rx_bufs = kcalloc(SMC_WR_BUF_CNT * 3, SMC_WR_BUF_SIZE,
565                                    GFP_KERNEL);
566         if (!link->wr_rx_bufs)
567                 goto no_mem_wr_tx_bufs;
568         link->wr_tx_ibs = kcalloc(SMC_WR_BUF_CNT, sizeof(link->wr_tx_ibs[0]),
569                                   GFP_KERNEL);
570         if (!link->wr_tx_ibs)
571                 goto no_mem_wr_rx_bufs;
572         link->wr_rx_ibs = kcalloc(SMC_WR_BUF_CNT * 3,
573                                   sizeof(link->wr_rx_ibs[0]),
574                                   GFP_KERNEL);
575         if (!link->wr_rx_ibs)
576                 goto no_mem_wr_tx_ibs;
577         link->wr_tx_rdmas = kcalloc(SMC_WR_BUF_CNT,
578                                     sizeof(link->wr_tx_rdmas[0]),
579                                     GFP_KERNEL);
580         if (!link->wr_tx_rdmas)
581                 goto no_mem_wr_rx_ibs;
582         link->wr_tx_rdma_sges = kcalloc(SMC_WR_BUF_CNT,
583                                         sizeof(link->wr_tx_rdma_sges[0]),
584                                         GFP_KERNEL);
585         if (!link->wr_tx_rdma_sges)
586                 goto no_mem_wr_tx_rdmas;
587         link->wr_tx_sges = kcalloc(SMC_WR_BUF_CNT, sizeof(link->wr_tx_sges[0]),
588                                    GFP_KERNEL);
589         if (!link->wr_tx_sges)
590                 goto no_mem_wr_tx_rdma_sges;
591         link->wr_rx_sges = kcalloc(SMC_WR_BUF_CNT * 3,
592                                    sizeof(link->wr_rx_sges[0]),
593                                    GFP_KERNEL);
594         if (!link->wr_rx_sges)
595                 goto no_mem_wr_tx_sges;
596         link->wr_tx_mask = kcalloc(BITS_TO_LONGS(SMC_WR_BUF_CNT),
597                                    sizeof(*link->wr_tx_mask),
598                                    GFP_KERNEL);
599         if (!link->wr_tx_mask)
600                 goto no_mem_wr_rx_sges;
601         link->wr_tx_pends = kcalloc(SMC_WR_BUF_CNT,
602                                     sizeof(link->wr_tx_pends[0]),
603                                     GFP_KERNEL);
604         if (!link->wr_tx_pends)
605                 goto no_mem_wr_tx_mask;
606         return 0;
607
608 no_mem_wr_tx_mask:
609         kfree(link->wr_tx_mask);
610 no_mem_wr_rx_sges:
611         kfree(link->wr_rx_sges);
612 no_mem_wr_tx_sges:
613         kfree(link->wr_tx_sges);
614 no_mem_wr_tx_rdma_sges:
615         kfree(link->wr_tx_rdma_sges);
616 no_mem_wr_tx_rdmas:
617         kfree(link->wr_tx_rdmas);
618 no_mem_wr_rx_ibs:
619         kfree(link->wr_rx_ibs);
620 no_mem_wr_tx_ibs:
621         kfree(link->wr_tx_ibs);
622 no_mem_wr_rx_bufs:
623         kfree(link->wr_rx_bufs);
624 no_mem_wr_tx_bufs:
625         kfree(link->wr_tx_bufs);
626 no_mem:
627         return -ENOMEM;
628 }
629
630 void smc_wr_remove_dev(struct smc_ib_device *smcibdev)
631 {
632         tasklet_kill(&smcibdev->recv_tasklet);
633         tasklet_kill(&smcibdev->send_tasklet);
634 }
635
636 void smc_wr_add_dev(struct smc_ib_device *smcibdev)
637 {
638         tasklet_init(&smcibdev->recv_tasklet, smc_wr_rx_tasklet_fn,
639                      (unsigned long)smcibdev);
640         tasklet_init(&smcibdev->send_tasklet, smc_wr_tx_tasklet_fn,
641                      (unsigned long)smcibdev);
642 }
643
644 int smc_wr_create_link(struct smc_link *lnk)
645 {
646         struct ib_device *ibdev = lnk->smcibdev->ibdev;
647         int rc = 0;
648
649         smc_wr_tx_set_wr_id(&lnk->wr_tx_id, 0);
650         lnk->wr_rx_id = 0;
651         lnk->wr_rx_dma_addr = ib_dma_map_single(
652                 ibdev, lnk->wr_rx_bufs, SMC_WR_BUF_SIZE * lnk->wr_rx_cnt,
653                 DMA_FROM_DEVICE);
654         if (ib_dma_mapping_error(ibdev, lnk->wr_rx_dma_addr)) {
655                 lnk->wr_rx_dma_addr = 0;
656                 rc = -EIO;
657                 goto out;
658         }
659         lnk->wr_tx_dma_addr = ib_dma_map_single(
660                 ibdev, lnk->wr_tx_bufs, SMC_WR_BUF_SIZE * lnk->wr_tx_cnt,
661                 DMA_TO_DEVICE);
662         if (ib_dma_mapping_error(ibdev, lnk->wr_tx_dma_addr)) {
663                 rc = -EIO;
664                 goto dma_unmap;
665         }
666         smc_wr_init_sge(lnk);
667         memset(lnk->wr_tx_mask, 0,
668                BITS_TO_LONGS(SMC_WR_BUF_CNT) * sizeof(*lnk->wr_tx_mask));
669         init_waitqueue_head(&lnk->wr_tx_wait);
670         init_waitqueue_head(&lnk->wr_reg_wait);
671         return rc;
672
673 dma_unmap:
674         ib_dma_unmap_single(ibdev, lnk->wr_rx_dma_addr,
675                             SMC_WR_BUF_SIZE * lnk->wr_rx_cnt,
676                             DMA_FROM_DEVICE);
677         lnk->wr_rx_dma_addr = 0;
678 out:
679         return rc;
680 }