Linux-libre 4.4.228-gnu
[librecmc/linux-libre.git] / drivers / infiniband / core / cm.c
1 /*
2  * Copyright (c) 2004-2007 Intel Corporation.  All rights reserved.
3  * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
4  * Copyright (c) 2004, 2005 Voltaire Corporation.  All rights reserved.
5  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
6  *
7  * This software is available to you under a choice of one of two
8  * licenses.  You may choose to be licensed under the terms of the GNU
9  * General Public License (GPL) Version 2, available from the file
10  * COPYING in the main directory of this source tree, or the
11  * OpenIB.org BSD license below:
12  *
13  *     Redistribution and use in source and binary forms, with or
14  *     without modification, are permitted provided that the following
15  *     conditions are met:
16  *
17  *      - Redistributions of source code must retain the above
18  *        copyright notice, this list of conditions and the following
19  *        disclaimer.
20  *
21  *      - Redistributions in binary form must reproduce the above
22  *        copyright notice, this list of conditions and the following
23  *        disclaimer in the documentation and/or other materials
24  *        provided with the distribution.
25  *
26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33  * SOFTWARE.
34  */
35
36 #include <linux/completion.h>
37 #include <linux/dma-mapping.h>
38 #include <linux/device.h>
39 #include <linux/module.h>
40 #include <linux/err.h>
41 #include <linux/idr.h>
42 #include <linux/interrupt.h>
43 #include <linux/random.h>
44 #include <linux/rbtree.h>
45 #include <linux/spinlock.h>
46 #include <linux/slab.h>
47 #include <linux/sysfs.h>
48 #include <linux/workqueue.h>
49 #include <linux/kdev_t.h>
50 #include <linux/etherdevice.h>
51
52 #include <rdma/ib_cache.h>
53 #include <rdma/ib_cm.h>
54 #include "cm_msgs.h"
55
56 MODULE_AUTHOR("Sean Hefty");
57 MODULE_DESCRIPTION("InfiniBand CM");
58 MODULE_LICENSE("Dual BSD/GPL");
59
60 static void cm_add_one(struct ib_device *device);
61 static void cm_remove_one(struct ib_device *device, void *client_data);
62
63 static struct ib_client cm_client = {
64         .name   = "cm",
65         .add    = cm_add_one,
66         .remove = cm_remove_one
67 };
68
69 static struct ib_cm {
70         spinlock_t lock;
71         struct list_head device_list;
72         rwlock_t device_lock;
73         struct rb_root listen_service_table;
74         u64 listen_service_id;
75         /* struct rb_root peer_service_table; todo: fix peer to peer */
76         struct rb_root remote_qp_table;
77         struct rb_root remote_id_table;
78         struct rb_root remote_sidr_table;
79         struct idr local_id_table;
80         __be32 random_id_operand;
81         struct list_head timewait_list;
82         struct workqueue_struct *wq;
83         /* Sync on cm change port state */
84         spinlock_t state_lock;
85 } cm;
86
87 /* Counter indexes ordered by attribute ID */
88 enum {
89         CM_REQ_COUNTER,
90         CM_MRA_COUNTER,
91         CM_REJ_COUNTER,
92         CM_REP_COUNTER,
93         CM_RTU_COUNTER,
94         CM_DREQ_COUNTER,
95         CM_DREP_COUNTER,
96         CM_SIDR_REQ_COUNTER,
97         CM_SIDR_REP_COUNTER,
98         CM_LAP_COUNTER,
99         CM_APR_COUNTER,
100         CM_ATTR_COUNT,
101         CM_ATTR_ID_OFFSET = 0x0010,
102 };
103
104 enum {
105         CM_XMIT,
106         CM_XMIT_RETRIES,
107         CM_RECV,
108         CM_RECV_DUPLICATES,
109         CM_COUNTER_GROUPS
110 };
111
112 static char const counter_group_names[CM_COUNTER_GROUPS]
113                                      [sizeof("cm_rx_duplicates")] = {
114         "cm_tx_msgs", "cm_tx_retries",
115         "cm_rx_msgs", "cm_rx_duplicates"
116 };
117
118 struct cm_counter_group {
119         struct kobject obj;
120         atomic_long_t counter[CM_ATTR_COUNT];
121 };
122
123 struct cm_counter_attribute {
124         struct attribute attr;
125         int index;
126 };
127
128 #define CM_COUNTER_ATTR(_name, _index) \
129 struct cm_counter_attribute cm_##_name##_counter_attr = { \
130         .attr = { .name = __stringify(_name), .mode = 0444 }, \
131         .index = _index \
132 }
133
134 static CM_COUNTER_ATTR(req, CM_REQ_COUNTER);
135 static CM_COUNTER_ATTR(mra, CM_MRA_COUNTER);
136 static CM_COUNTER_ATTR(rej, CM_REJ_COUNTER);
137 static CM_COUNTER_ATTR(rep, CM_REP_COUNTER);
138 static CM_COUNTER_ATTR(rtu, CM_RTU_COUNTER);
139 static CM_COUNTER_ATTR(dreq, CM_DREQ_COUNTER);
140 static CM_COUNTER_ATTR(drep, CM_DREP_COUNTER);
141 static CM_COUNTER_ATTR(sidr_req, CM_SIDR_REQ_COUNTER);
142 static CM_COUNTER_ATTR(sidr_rep, CM_SIDR_REP_COUNTER);
143 static CM_COUNTER_ATTR(lap, CM_LAP_COUNTER);
144 static CM_COUNTER_ATTR(apr, CM_APR_COUNTER);
145
146 static struct attribute *cm_counter_default_attrs[] = {
147         &cm_req_counter_attr.attr,
148         &cm_mra_counter_attr.attr,
149         &cm_rej_counter_attr.attr,
150         &cm_rep_counter_attr.attr,
151         &cm_rtu_counter_attr.attr,
152         &cm_dreq_counter_attr.attr,
153         &cm_drep_counter_attr.attr,
154         &cm_sidr_req_counter_attr.attr,
155         &cm_sidr_rep_counter_attr.attr,
156         &cm_lap_counter_attr.attr,
157         &cm_apr_counter_attr.attr,
158         NULL
159 };
160
161 struct cm_port {
162         struct cm_device *cm_dev;
163         struct ib_mad_agent *mad_agent;
164         struct kobject port_obj;
165         u8 port_num;
166         struct list_head cm_priv_prim_list;
167         struct list_head cm_priv_altr_list;
168         struct cm_counter_group counter_group[CM_COUNTER_GROUPS];
169 };
170
171 struct cm_device {
172         struct list_head list;
173         struct ib_device *ib_device;
174         struct device *device;
175         u8 ack_delay;
176         int going_down;
177         struct cm_port *port[0];
178 };
179
180 struct cm_av {
181         struct cm_port *port;
182         union ib_gid dgid;
183         struct ib_ah_attr ah_attr;
184         u16 pkey_index;
185         u8 timeout;
186 };
187
188 struct cm_work {
189         struct delayed_work work;
190         struct list_head list;
191         struct cm_port *port;
192         struct ib_mad_recv_wc *mad_recv_wc;     /* Received MADs */
193         __be32 local_id;                        /* Established / timewait */
194         __be32 remote_id;
195         struct ib_cm_event cm_event;
196         struct ib_sa_path_rec path[0];
197 };
198
199 struct cm_timewait_info {
200         struct cm_work work;                    /* Must be first. */
201         struct list_head list;
202         struct rb_node remote_qp_node;
203         struct rb_node remote_id_node;
204         __be64 remote_ca_guid;
205         __be32 remote_qpn;
206         u8 inserted_remote_qp;
207         u8 inserted_remote_id;
208 };
209
210 struct cm_id_private {
211         struct ib_cm_id id;
212
213         struct rb_node service_node;
214         struct rb_node sidr_id_node;
215         spinlock_t lock;        /* Do not acquire inside cm.lock */
216         struct completion comp;
217         atomic_t refcount;
218         /* Number of clients sharing this ib_cm_id. Only valid for listeners.
219          * Protected by the cm.lock spinlock. */
220         int listen_sharecount;
221
222         struct ib_mad_send_buf *msg;
223         struct cm_timewait_info *timewait_info;
224         /* todo: use alternate port on send failure */
225         struct cm_av av;
226         struct cm_av alt_av;
227
228         void *private_data;
229         __be64 tid;
230         __be32 local_qpn;
231         __be32 remote_qpn;
232         enum ib_qp_type qp_type;
233         __be32 sq_psn;
234         __be32 rq_psn;
235         int timeout_ms;
236         enum ib_mtu path_mtu;
237         __be16 pkey;
238         u8 private_data_len;
239         u8 max_cm_retries;
240         u8 peer_to_peer;
241         u8 responder_resources;
242         u8 initiator_depth;
243         u8 retry_count;
244         u8 rnr_retry_count;
245         u8 service_timeout;
246         u8 target_ack_delay;
247
248         struct list_head prim_list;
249         struct list_head altr_list;
250         /* Indicates that the send port mad is registered and av is set */
251         int prim_send_port_not_ready;
252         int altr_send_port_not_ready;
253
254         struct list_head work_list;
255         atomic_t work_count;
256 };
257
258 static void cm_work_handler(struct work_struct *work);
259
260 static inline void cm_deref_id(struct cm_id_private *cm_id_priv)
261 {
262         if (atomic_dec_and_test(&cm_id_priv->refcount))
263                 complete(&cm_id_priv->comp);
264 }
265
266 static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
267                         struct ib_mad_send_buf **msg)
268 {
269         struct ib_mad_agent *mad_agent;
270         struct ib_mad_send_buf *m;
271         struct ib_ah *ah;
272         struct cm_av *av;
273         unsigned long flags, flags2;
274         int ret = 0;
275
276         /* don't let the port to be released till the agent is down */
277         spin_lock_irqsave(&cm.state_lock, flags2);
278         spin_lock_irqsave(&cm.lock, flags);
279         if (!cm_id_priv->prim_send_port_not_ready)
280                 av = &cm_id_priv->av;
281         else if (!cm_id_priv->altr_send_port_not_ready &&
282                  (cm_id_priv->alt_av.port))
283                 av = &cm_id_priv->alt_av;
284         else {
285                 pr_info("%s: not valid CM id\n", __func__);
286                 ret = -ENODEV;
287                 spin_unlock_irqrestore(&cm.lock, flags);
288                 goto out;
289         }
290         spin_unlock_irqrestore(&cm.lock, flags);
291         /* Make sure the port haven't released the mad yet */
292         mad_agent = cm_id_priv->av.port->mad_agent;
293         if (!mad_agent) {
294                 pr_info("%s: not a valid MAD agent\n", __func__);
295                 ret = -ENODEV;
296                 goto out;
297         }
298         ah = ib_create_ah(mad_agent->qp->pd, &av->ah_attr);
299         if (IS_ERR(ah)) {
300                 ret = PTR_ERR(ah);
301                 goto out;
302         }
303
304         m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn,
305                                av->pkey_index,
306                                0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
307                                GFP_ATOMIC,
308                                IB_MGMT_BASE_VERSION);
309         if (IS_ERR(m)) {
310                 ib_destroy_ah(ah);
311                 ret = PTR_ERR(m);
312                 goto out;
313         }
314
315         /* Timeout set by caller if response is expected. */
316         m->ah = ah;
317         m->retries = cm_id_priv->max_cm_retries;
318
319         atomic_inc(&cm_id_priv->refcount);
320         m->context[0] = cm_id_priv;
321         *msg = m;
322
323 out:
324         spin_unlock_irqrestore(&cm.state_lock, flags2);
325         return ret;
326 }
327
328 static int cm_alloc_response_msg(struct cm_port *port,
329                                  struct ib_mad_recv_wc *mad_recv_wc,
330                                  struct ib_mad_send_buf **msg)
331 {
332         struct ib_mad_send_buf *m;
333         struct ib_ah *ah;
334
335         ah = ib_create_ah_from_wc(port->mad_agent->qp->pd, mad_recv_wc->wc,
336                                   mad_recv_wc->recv_buf.grh, port->port_num);
337         if (IS_ERR(ah))
338                 return PTR_ERR(ah);
339
340         m = ib_create_send_mad(port->mad_agent, 1, mad_recv_wc->wc->pkey_index,
341                                0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
342                                GFP_ATOMIC,
343                                IB_MGMT_BASE_VERSION);
344         if (IS_ERR(m)) {
345                 ib_destroy_ah(ah);
346                 return PTR_ERR(m);
347         }
348         m->ah = ah;
349         *msg = m;
350         return 0;
351 }
352
353 static void cm_free_msg(struct ib_mad_send_buf *msg)
354 {
355         ib_destroy_ah(msg->ah);
356         if (msg->context[0])
357                 cm_deref_id(msg->context[0]);
358         ib_free_send_mad(msg);
359 }
360
361 static void * cm_copy_private_data(const void *private_data,
362                                    u8 private_data_len)
363 {
364         void *data;
365
366         if (!private_data || !private_data_len)
367                 return NULL;
368
369         data = kmemdup(private_data, private_data_len, GFP_KERNEL);
370         if (!data)
371                 return ERR_PTR(-ENOMEM);
372
373         return data;
374 }
375
376 static void cm_set_private_data(struct cm_id_private *cm_id_priv,
377                                  void *private_data, u8 private_data_len)
378 {
379         if (cm_id_priv->private_data && cm_id_priv->private_data_len)
380                 kfree(cm_id_priv->private_data);
381
382         cm_id_priv->private_data = private_data;
383         cm_id_priv->private_data_len = private_data_len;
384 }
385
386 static void cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
387                                     struct ib_grh *grh, struct cm_av *av)
388 {
389         av->port = port;
390         av->pkey_index = wc->pkey_index;
391         ib_init_ah_from_wc(port->cm_dev->ib_device, port->port_num, wc,
392                            grh, &av->ah_attr);
393 }
394
395 static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av,
396                               struct cm_id_private *cm_id_priv)
397 {
398         struct cm_device *cm_dev;
399         struct cm_port *port = NULL;
400         unsigned long flags;
401         int ret;
402         u8 p;
403         struct net_device *ndev = ib_get_ndev_from_path(path);
404
405         read_lock_irqsave(&cm.device_lock, flags);
406         list_for_each_entry(cm_dev, &cm.device_list, list) {
407                 if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid,
408                                         ndev, &p, NULL)) {
409                         port = cm_dev->port[p-1];
410                         break;
411                 }
412         }
413         read_unlock_irqrestore(&cm.device_lock, flags);
414
415         if (ndev)
416                 dev_put(ndev);
417
418         if (!port)
419                 return -EINVAL;
420
421         ret = ib_find_cached_pkey(cm_dev->ib_device, port->port_num,
422                                   be16_to_cpu(path->pkey), &av->pkey_index);
423         if (ret)
424                 return ret;
425
426         av->port = port;
427         ib_init_ah_from_path(cm_dev->ib_device, port->port_num, path,
428                              &av->ah_attr);
429         av->timeout = path->packet_life_time + 1;
430
431         spin_lock_irqsave(&cm.lock, flags);
432         if (&cm_id_priv->av == av)
433                 list_add_tail(&cm_id_priv->prim_list, &port->cm_priv_prim_list);
434         else if (&cm_id_priv->alt_av == av)
435                 list_add_tail(&cm_id_priv->altr_list, &port->cm_priv_altr_list);
436         else
437                 ret = -EINVAL;
438
439         spin_unlock_irqrestore(&cm.lock, flags);
440
441         return ret;
442 }
443
444 static int cm_alloc_id(struct cm_id_private *cm_id_priv)
445 {
446         unsigned long flags;
447         int id;
448
449         idr_preload(GFP_KERNEL);
450         spin_lock_irqsave(&cm.lock, flags);
451
452         id = idr_alloc_cyclic(&cm.local_id_table, cm_id_priv, 0, 0, GFP_NOWAIT);
453
454         spin_unlock_irqrestore(&cm.lock, flags);
455         idr_preload_end();
456
457         cm_id_priv->id.local_id = (__force __be32)id ^ cm.random_id_operand;
458         return id < 0 ? id : 0;
459 }
460
461 static void cm_free_id(__be32 local_id)
462 {
463         spin_lock_irq(&cm.lock);
464         idr_remove(&cm.local_id_table,
465                    (__force int) (local_id ^ cm.random_id_operand));
466         spin_unlock_irq(&cm.lock);
467 }
468
469 static struct cm_id_private * cm_get_id(__be32 local_id, __be32 remote_id)
470 {
471         struct cm_id_private *cm_id_priv;
472
473         cm_id_priv = idr_find(&cm.local_id_table,
474                               (__force int) (local_id ^ cm.random_id_operand));
475         if (cm_id_priv) {
476                 if (cm_id_priv->id.remote_id == remote_id)
477                         atomic_inc(&cm_id_priv->refcount);
478                 else
479                         cm_id_priv = NULL;
480         }
481
482         return cm_id_priv;
483 }
484
485 static struct cm_id_private * cm_acquire_id(__be32 local_id, __be32 remote_id)
486 {
487         struct cm_id_private *cm_id_priv;
488
489         spin_lock_irq(&cm.lock);
490         cm_id_priv = cm_get_id(local_id, remote_id);
491         spin_unlock_irq(&cm.lock);
492
493         return cm_id_priv;
494 }
495
496 /*
497  * Trivial helpers to strip endian annotation and compare; the
498  * endianness doesn't actually matter since we just need a stable
499  * order for the RB tree.
500  */
501 static int be32_lt(__be32 a, __be32 b)
502 {
503         return (__force u32) a < (__force u32) b;
504 }
505
506 static int be32_gt(__be32 a, __be32 b)
507 {
508         return (__force u32) a > (__force u32) b;
509 }
510
511 static int be64_lt(__be64 a, __be64 b)
512 {
513         return (__force u64) a < (__force u64) b;
514 }
515
516 static int be64_gt(__be64 a, __be64 b)
517 {
518         return (__force u64) a > (__force u64) b;
519 }
520
521 static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv)
522 {
523         struct rb_node **link = &cm.listen_service_table.rb_node;
524         struct rb_node *parent = NULL;
525         struct cm_id_private *cur_cm_id_priv;
526         __be64 service_id = cm_id_priv->id.service_id;
527         __be64 service_mask = cm_id_priv->id.service_mask;
528
529         while (*link) {
530                 parent = *link;
531                 cur_cm_id_priv = rb_entry(parent, struct cm_id_private,
532                                           service_node);
533                 if ((cur_cm_id_priv->id.service_mask & service_id) ==
534                     (service_mask & cur_cm_id_priv->id.service_id) &&
535                     (cm_id_priv->id.device == cur_cm_id_priv->id.device))
536                         return cur_cm_id_priv;
537
538                 if (cm_id_priv->id.device < cur_cm_id_priv->id.device)
539                         link = &(*link)->rb_left;
540                 else if (cm_id_priv->id.device > cur_cm_id_priv->id.device)
541                         link = &(*link)->rb_right;
542                 else if (be64_lt(service_id, cur_cm_id_priv->id.service_id))
543                         link = &(*link)->rb_left;
544                 else if (be64_gt(service_id, cur_cm_id_priv->id.service_id))
545                         link = &(*link)->rb_right;
546                 else
547                         link = &(*link)->rb_right;
548         }
549         rb_link_node(&cm_id_priv->service_node, parent, link);
550         rb_insert_color(&cm_id_priv->service_node, &cm.listen_service_table);
551         return NULL;
552 }
553
554 static struct cm_id_private * cm_find_listen(struct ib_device *device,
555                                              __be64 service_id)
556 {
557         struct rb_node *node = cm.listen_service_table.rb_node;
558         struct cm_id_private *cm_id_priv;
559
560         while (node) {
561                 cm_id_priv = rb_entry(node, struct cm_id_private, service_node);
562                 if ((cm_id_priv->id.service_mask & service_id) ==
563                      cm_id_priv->id.service_id &&
564                     (cm_id_priv->id.device == device))
565                         return cm_id_priv;
566
567                 if (device < cm_id_priv->id.device)
568                         node = node->rb_left;
569                 else if (device > cm_id_priv->id.device)
570                         node = node->rb_right;
571                 else if (be64_lt(service_id, cm_id_priv->id.service_id))
572                         node = node->rb_left;
573                 else if (be64_gt(service_id, cm_id_priv->id.service_id))
574                         node = node->rb_right;
575                 else
576                         node = node->rb_right;
577         }
578         return NULL;
579 }
580
581 static struct cm_timewait_info * cm_insert_remote_id(struct cm_timewait_info
582                                                      *timewait_info)
583 {
584         struct rb_node **link = &cm.remote_id_table.rb_node;
585         struct rb_node *parent = NULL;
586         struct cm_timewait_info *cur_timewait_info;
587         __be64 remote_ca_guid = timewait_info->remote_ca_guid;
588         __be32 remote_id = timewait_info->work.remote_id;
589
590         while (*link) {
591                 parent = *link;
592                 cur_timewait_info = rb_entry(parent, struct cm_timewait_info,
593                                              remote_id_node);
594                 if (be32_lt(remote_id, cur_timewait_info->work.remote_id))
595                         link = &(*link)->rb_left;
596                 else if (be32_gt(remote_id, cur_timewait_info->work.remote_id))
597                         link = &(*link)->rb_right;
598                 else if (be64_lt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
599                         link = &(*link)->rb_left;
600                 else if (be64_gt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
601                         link = &(*link)->rb_right;
602                 else
603                         return cur_timewait_info;
604         }
605         timewait_info->inserted_remote_id = 1;
606         rb_link_node(&timewait_info->remote_id_node, parent, link);
607         rb_insert_color(&timewait_info->remote_id_node, &cm.remote_id_table);
608         return NULL;
609 }
610
611 static struct cm_timewait_info * cm_find_remote_id(__be64 remote_ca_guid,
612                                                    __be32 remote_id)
613 {
614         struct rb_node *node = cm.remote_id_table.rb_node;
615         struct cm_timewait_info *timewait_info;
616
617         while (node) {
618                 timewait_info = rb_entry(node, struct cm_timewait_info,
619                                          remote_id_node);
620                 if (be32_lt(remote_id, timewait_info->work.remote_id))
621                         node = node->rb_left;
622                 else if (be32_gt(remote_id, timewait_info->work.remote_id))
623                         node = node->rb_right;
624                 else if (be64_lt(remote_ca_guid, timewait_info->remote_ca_guid))
625                         node = node->rb_left;
626                 else if (be64_gt(remote_ca_guid, timewait_info->remote_ca_guid))
627                         node = node->rb_right;
628                 else
629                         return timewait_info;
630         }
631         return NULL;
632 }
633
634 static struct cm_timewait_info * cm_insert_remote_qpn(struct cm_timewait_info
635                                                       *timewait_info)
636 {
637         struct rb_node **link = &cm.remote_qp_table.rb_node;
638         struct rb_node *parent = NULL;
639         struct cm_timewait_info *cur_timewait_info;
640         __be64 remote_ca_guid = timewait_info->remote_ca_guid;
641         __be32 remote_qpn = timewait_info->remote_qpn;
642
643         while (*link) {
644                 parent = *link;
645                 cur_timewait_info = rb_entry(parent, struct cm_timewait_info,
646                                              remote_qp_node);
647                 if (be32_lt(remote_qpn, cur_timewait_info->remote_qpn))
648                         link = &(*link)->rb_left;
649                 else if (be32_gt(remote_qpn, cur_timewait_info->remote_qpn))
650                         link = &(*link)->rb_right;
651                 else if (be64_lt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
652                         link = &(*link)->rb_left;
653                 else if (be64_gt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
654                         link = &(*link)->rb_right;
655                 else
656                         return cur_timewait_info;
657         }
658         timewait_info->inserted_remote_qp = 1;
659         rb_link_node(&timewait_info->remote_qp_node, parent, link);
660         rb_insert_color(&timewait_info->remote_qp_node, &cm.remote_qp_table);
661         return NULL;
662 }
663
664 static struct cm_id_private * cm_insert_remote_sidr(struct cm_id_private
665                                                     *cm_id_priv)
666 {
667         struct rb_node **link = &cm.remote_sidr_table.rb_node;
668         struct rb_node *parent = NULL;
669         struct cm_id_private *cur_cm_id_priv;
670         union ib_gid *port_gid = &cm_id_priv->av.dgid;
671         __be32 remote_id = cm_id_priv->id.remote_id;
672
673         while (*link) {
674                 parent = *link;
675                 cur_cm_id_priv = rb_entry(parent, struct cm_id_private,
676                                           sidr_id_node);
677                 if (be32_lt(remote_id, cur_cm_id_priv->id.remote_id))
678                         link = &(*link)->rb_left;
679                 else if (be32_gt(remote_id, cur_cm_id_priv->id.remote_id))
680                         link = &(*link)->rb_right;
681                 else {
682                         int cmp;
683                         cmp = memcmp(port_gid, &cur_cm_id_priv->av.dgid,
684                                      sizeof *port_gid);
685                         if (cmp < 0)
686                                 link = &(*link)->rb_left;
687                         else if (cmp > 0)
688                                 link = &(*link)->rb_right;
689                         else
690                                 return cur_cm_id_priv;
691                 }
692         }
693         rb_link_node(&cm_id_priv->sidr_id_node, parent, link);
694         rb_insert_color(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
695         return NULL;
696 }
697
698 static void cm_reject_sidr_req(struct cm_id_private *cm_id_priv,
699                                enum ib_cm_sidr_status status)
700 {
701         struct ib_cm_sidr_rep_param param;
702
703         memset(&param, 0, sizeof param);
704         param.status = status;
705         ib_send_cm_sidr_rep(&cm_id_priv->id, &param);
706 }
707
708 struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
709                                  ib_cm_handler cm_handler,
710                                  void *context)
711 {
712         struct cm_id_private *cm_id_priv;
713         int ret;
714
715         cm_id_priv = kzalloc(sizeof *cm_id_priv, GFP_KERNEL);
716         if (!cm_id_priv)
717                 return ERR_PTR(-ENOMEM);
718
719         cm_id_priv->id.state = IB_CM_IDLE;
720         cm_id_priv->id.device = device;
721         cm_id_priv->id.cm_handler = cm_handler;
722         cm_id_priv->id.context = context;
723         cm_id_priv->id.remote_cm_qpn = 1;
724         ret = cm_alloc_id(cm_id_priv);
725         if (ret)
726                 goto error;
727
728         spin_lock_init(&cm_id_priv->lock);
729         init_completion(&cm_id_priv->comp);
730         INIT_LIST_HEAD(&cm_id_priv->work_list);
731         INIT_LIST_HEAD(&cm_id_priv->prim_list);
732         INIT_LIST_HEAD(&cm_id_priv->altr_list);
733         atomic_set(&cm_id_priv->work_count, -1);
734         atomic_set(&cm_id_priv->refcount, 1);
735         return &cm_id_priv->id;
736
737 error:
738         kfree(cm_id_priv);
739         return ERR_PTR(-ENOMEM);
740 }
741 EXPORT_SYMBOL(ib_create_cm_id);
742
743 static struct cm_work * cm_dequeue_work(struct cm_id_private *cm_id_priv)
744 {
745         struct cm_work *work;
746
747         if (list_empty(&cm_id_priv->work_list))
748                 return NULL;
749
750         work = list_entry(cm_id_priv->work_list.next, struct cm_work, list);
751         list_del(&work->list);
752         return work;
753 }
754
755 static void cm_free_work(struct cm_work *work)
756 {
757         if (work->mad_recv_wc)
758                 ib_free_recv_mad(work->mad_recv_wc);
759         kfree(work);
760 }
761
762 static inline int cm_convert_to_ms(int iba_time)
763 {
764         /* approximate conversion to ms from 4.096us x 2^iba_time */
765         return 1 << max(iba_time - 8, 0);
766 }
767
768 /*
769  * calculate: 4.096x2^ack_timeout = 4.096x2^ack_delay + 2x4.096x2^life_time
770  * Because of how ack_timeout is stored, adding one doubles the timeout.
771  * To avoid large timeouts, select the max(ack_delay, life_time + 1), and
772  * increment it (round up) only if the other is within 50%.
773  */
774 static u8 cm_ack_timeout(u8 ca_ack_delay, u8 packet_life_time)
775 {
776         int ack_timeout = packet_life_time + 1;
777
778         if (ack_timeout >= ca_ack_delay)
779                 ack_timeout += (ca_ack_delay >= (ack_timeout - 1));
780         else
781                 ack_timeout = ca_ack_delay +
782                               (ack_timeout >= (ca_ack_delay - 1));
783
784         return min(31, ack_timeout);
785 }
786
787 static void cm_cleanup_timewait(struct cm_timewait_info *timewait_info)
788 {
789         if (timewait_info->inserted_remote_id) {
790                 rb_erase(&timewait_info->remote_id_node, &cm.remote_id_table);
791                 timewait_info->inserted_remote_id = 0;
792         }
793
794         if (timewait_info->inserted_remote_qp) {
795                 rb_erase(&timewait_info->remote_qp_node, &cm.remote_qp_table);
796                 timewait_info->inserted_remote_qp = 0;
797         }
798 }
799
800 static struct cm_timewait_info * cm_create_timewait_info(__be32 local_id)
801 {
802         struct cm_timewait_info *timewait_info;
803
804         timewait_info = kzalloc(sizeof *timewait_info, GFP_KERNEL);
805         if (!timewait_info)
806                 return ERR_PTR(-ENOMEM);
807
808         timewait_info->work.local_id = local_id;
809         INIT_DELAYED_WORK(&timewait_info->work.work, cm_work_handler);
810         timewait_info->work.cm_event.event = IB_CM_TIMEWAIT_EXIT;
811         return timewait_info;
812 }
813
814 static void cm_enter_timewait(struct cm_id_private *cm_id_priv)
815 {
816         int wait_time;
817         unsigned long flags;
818         struct cm_device *cm_dev;
819
820         cm_dev = ib_get_client_data(cm_id_priv->id.device, &cm_client);
821         if (!cm_dev)
822                 return;
823
824         spin_lock_irqsave(&cm.lock, flags);
825         cm_cleanup_timewait(cm_id_priv->timewait_info);
826         list_add_tail(&cm_id_priv->timewait_info->list, &cm.timewait_list);
827         spin_unlock_irqrestore(&cm.lock, flags);
828
829         /*
830          * The cm_id could be destroyed by the user before we exit timewait.
831          * To protect against this, we search for the cm_id after exiting
832          * timewait before notifying the user that we've exited timewait.
833          */
834         cm_id_priv->id.state = IB_CM_TIMEWAIT;
835         wait_time = cm_convert_to_ms(cm_id_priv->av.timeout);
836
837         /* Check if the device started its remove_one */
838         spin_lock_irqsave(&cm.lock, flags);
839         if (!cm_dev->going_down)
840                 queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work,
841                                    msecs_to_jiffies(wait_time));
842         spin_unlock_irqrestore(&cm.lock, flags);
843
844         cm_id_priv->timewait_info = NULL;
845 }
846
847 static void cm_reset_to_idle(struct cm_id_private *cm_id_priv)
848 {
849         unsigned long flags;
850
851         cm_id_priv->id.state = IB_CM_IDLE;
852         if (cm_id_priv->timewait_info) {
853                 spin_lock_irqsave(&cm.lock, flags);
854                 cm_cleanup_timewait(cm_id_priv->timewait_info);
855                 spin_unlock_irqrestore(&cm.lock, flags);
856                 kfree(cm_id_priv->timewait_info);
857                 cm_id_priv->timewait_info = NULL;
858         }
859 }
860
861 static void cm_destroy_id(struct ib_cm_id *cm_id, int err)
862 {
863         struct cm_id_private *cm_id_priv;
864         struct cm_work *work;
865
866         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
867 retest:
868         spin_lock_irq(&cm_id_priv->lock);
869         switch (cm_id->state) {
870         case IB_CM_LISTEN:
871                 spin_unlock_irq(&cm_id_priv->lock);
872
873                 spin_lock_irq(&cm.lock);
874                 if (--cm_id_priv->listen_sharecount > 0) {
875                         /* The id is still shared. */
876                         cm_deref_id(cm_id_priv);
877                         spin_unlock_irq(&cm.lock);
878                         return;
879                 }
880                 rb_erase(&cm_id_priv->service_node, &cm.listen_service_table);
881                 spin_unlock_irq(&cm.lock);
882                 break;
883         case IB_CM_SIDR_REQ_SENT:
884                 cm_id->state = IB_CM_IDLE;
885                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
886                 spin_unlock_irq(&cm_id_priv->lock);
887                 break;
888         case IB_CM_SIDR_REQ_RCVD:
889                 spin_unlock_irq(&cm_id_priv->lock);
890                 cm_reject_sidr_req(cm_id_priv, IB_SIDR_REJECT);
891                 spin_lock_irq(&cm.lock);
892                 if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node))
893                         rb_erase(&cm_id_priv->sidr_id_node,
894                                  &cm.remote_sidr_table);
895                 spin_unlock_irq(&cm.lock);
896                 break;
897         case IB_CM_REQ_SENT:
898         case IB_CM_MRA_REQ_RCVD:
899                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
900                 spin_unlock_irq(&cm_id_priv->lock);
901                 ib_send_cm_rej(cm_id, IB_CM_REJ_TIMEOUT,
902                                &cm_id_priv->id.device->node_guid,
903                                sizeof cm_id_priv->id.device->node_guid,
904                                NULL, 0);
905                 break;
906         case IB_CM_REQ_RCVD:
907                 if (err == -ENOMEM) {
908                         /* Do not reject to allow future retries. */
909                         cm_reset_to_idle(cm_id_priv);
910                         spin_unlock_irq(&cm_id_priv->lock);
911                 } else {
912                         spin_unlock_irq(&cm_id_priv->lock);
913                         ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
914                                        NULL, 0, NULL, 0);
915                 }
916                 break;
917         case IB_CM_REP_SENT:
918         case IB_CM_MRA_REP_RCVD:
919                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
920                 /* Fall through */
921         case IB_CM_MRA_REQ_SENT:
922         case IB_CM_REP_RCVD:
923         case IB_CM_MRA_REP_SENT:
924                 spin_unlock_irq(&cm_id_priv->lock);
925                 ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
926                                NULL, 0, NULL, 0);
927                 break;
928         case IB_CM_ESTABLISHED:
929                 spin_unlock_irq(&cm_id_priv->lock);
930                 if (cm_id_priv->qp_type == IB_QPT_XRC_TGT)
931                         break;
932                 ib_send_cm_dreq(cm_id, NULL, 0);
933                 goto retest;
934         case IB_CM_DREQ_SENT:
935                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
936                 cm_enter_timewait(cm_id_priv);
937                 spin_unlock_irq(&cm_id_priv->lock);
938                 break;
939         case IB_CM_DREQ_RCVD:
940                 spin_unlock_irq(&cm_id_priv->lock);
941                 ib_send_cm_drep(cm_id, NULL, 0);
942                 break;
943         default:
944                 spin_unlock_irq(&cm_id_priv->lock);
945                 break;
946         }
947
948         spin_lock_irq(&cm.lock);
949         if (!list_empty(&cm_id_priv->altr_list) &&
950             (!cm_id_priv->altr_send_port_not_ready))
951                 list_del(&cm_id_priv->altr_list);
952         if (!list_empty(&cm_id_priv->prim_list) &&
953             (!cm_id_priv->prim_send_port_not_ready))
954                 list_del(&cm_id_priv->prim_list);
955         spin_unlock_irq(&cm.lock);
956
957         cm_free_id(cm_id->local_id);
958         cm_deref_id(cm_id_priv);
959         wait_for_completion(&cm_id_priv->comp);
960         while ((work = cm_dequeue_work(cm_id_priv)) != NULL)
961                 cm_free_work(work);
962         kfree(cm_id_priv->private_data);
963         kfree(cm_id_priv);
964 }
965
966 void ib_destroy_cm_id(struct ib_cm_id *cm_id)
967 {
968         cm_destroy_id(cm_id, 0);
969 }
970 EXPORT_SYMBOL(ib_destroy_cm_id);
971
972 /**
973  * __ib_cm_listen - Initiates listening on the specified service ID for
974  *   connection and service ID resolution requests.
975  * @cm_id: Connection identifier associated with the listen request.
976  * @service_id: Service identifier matched against incoming connection
977  *   and service ID resolution requests.  The service ID should be specified
978  *   network-byte order.  If set to IB_CM_ASSIGN_SERVICE_ID, the CM will
979  *   assign a service ID to the caller.
980  * @service_mask: Mask applied to service ID used to listen across a
981  *   range of service IDs.  If set to 0, the service ID is matched
982  *   exactly.  This parameter is ignored if %service_id is set to
983  *   IB_CM_ASSIGN_SERVICE_ID.
984  */
985 static int __ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id,
986                           __be64 service_mask)
987 {
988         struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
989         int ret = 0;
990
991         service_mask = service_mask ? service_mask : ~cpu_to_be64(0);
992         service_id &= service_mask;
993         if ((service_id & IB_SERVICE_ID_AGN_MASK) == IB_CM_ASSIGN_SERVICE_ID &&
994             (service_id != IB_CM_ASSIGN_SERVICE_ID))
995                 return -EINVAL;
996
997         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
998         if (cm_id->state != IB_CM_IDLE)
999                 return -EINVAL;
1000
1001         cm_id->state = IB_CM_LISTEN;
1002         ++cm_id_priv->listen_sharecount;
1003
1004         if (service_id == IB_CM_ASSIGN_SERVICE_ID) {
1005                 cm_id->service_id = cpu_to_be64(cm.listen_service_id++);
1006                 cm_id->service_mask = ~cpu_to_be64(0);
1007         } else {
1008                 cm_id->service_id = service_id;
1009                 cm_id->service_mask = service_mask;
1010         }
1011         cur_cm_id_priv = cm_insert_listen(cm_id_priv);
1012
1013         if (cur_cm_id_priv) {
1014                 cm_id->state = IB_CM_IDLE;
1015                 --cm_id_priv->listen_sharecount;
1016                 ret = -EBUSY;
1017         }
1018         return ret;
1019 }
1020
1021 int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask)
1022 {
1023         unsigned long flags;
1024         int ret;
1025
1026         spin_lock_irqsave(&cm.lock, flags);
1027         ret = __ib_cm_listen(cm_id, service_id, service_mask);
1028         spin_unlock_irqrestore(&cm.lock, flags);
1029
1030         return ret;
1031 }
1032 EXPORT_SYMBOL(ib_cm_listen);
1033
1034 /**
1035  * Create a new listening ib_cm_id and listen on the given service ID.
1036  *
1037  * If there's an existing ID listening on that same device and service ID,
1038  * return it.
1039  *
1040  * @device: Device associated with the cm_id.  All related communication will
1041  * be associated with the specified device.
1042  * @cm_handler: Callback invoked to notify the user of CM events.
1043  * @service_id: Service identifier matched against incoming connection
1044  *   and service ID resolution requests.  The service ID should be specified
1045  *   network-byte order.  If set to IB_CM_ASSIGN_SERVICE_ID, the CM will
1046  *   assign a service ID to the caller.
1047  *
1048  * Callers should call ib_destroy_cm_id when done with the listener ID.
1049  */
1050 struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device,
1051                                      ib_cm_handler cm_handler,
1052                                      __be64 service_id)
1053 {
1054         struct cm_id_private *cm_id_priv;
1055         struct ib_cm_id *cm_id;
1056         unsigned long flags;
1057         int err = 0;
1058
1059         /* Create an ID in advance, since the creation may sleep */
1060         cm_id = ib_create_cm_id(device, cm_handler, NULL);
1061         if (IS_ERR(cm_id))
1062                 return cm_id;
1063
1064         spin_lock_irqsave(&cm.lock, flags);
1065
1066         if (service_id == IB_CM_ASSIGN_SERVICE_ID)
1067                 goto new_id;
1068
1069         /* Find an existing ID */
1070         cm_id_priv = cm_find_listen(device, service_id);
1071         if (cm_id_priv) {
1072                 if (cm_id->cm_handler != cm_handler || cm_id->context) {
1073                         /* Sharing an ib_cm_id with different handlers is not
1074                          * supported */
1075                         spin_unlock_irqrestore(&cm.lock, flags);
1076                         ib_destroy_cm_id(cm_id);
1077                         return ERR_PTR(-EINVAL);
1078                 }
1079                 atomic_inc(&cm_id_priv->refcount);
1080                 ++cm_id_priv->listen_sharecount;
1081                 spin_unlock_irqrestore(&cm.lock, flags);
1082
1083                 ib_destroy_cm_id(cm_id);
1084                 cm_id = &cm_id_priv->id;
1085                 return cm_id;
1086         }
1087
1088 new_id:
1089         /* Use newly created ID */
1090         err = __ib_cm_listen(cm_id, service_id, 0);
1091
1092         spin_unlock_irqrestore(&cm.lock, flags);
1093
1094         if (err) {
1095                 ib_destroy_cm_id(cm_id);
1096                 return ERR_PTR(err);
1097         }
1098         return cm_id;
1099 }
1100 EXPORT_SYMBOL(ib_cm_insert_listen);
1101
1102 static __be64 cm_form_tid(struct cm_id_private *cm_id_priv,
1103                           enum cm_msg_sequence msg_seq)
1104 {
1105         u64 hi_tid, low_tid;
1106
1107         hi_tid   = ((u64) cm_id_priv->av.port->mad_agent->hi_tid) << 32;
1108         low_tid  = (u64) ((__force u32)cm_id_priv->id.local_id |
1109                           (msg_seq << 30));
1110         return cpu_to_be64(hi_tid | low_tid);
1111 }
1112
1113 static void cm_format_mad_hdr(struct ib_mad_hdr *hdr,
1114                               __be16 attr_id, __be64 tid)
1115 {
1116         hdr->base_version  = IB_MGMT_BASE_VERSION;
1117         hdr->mgmt_class    = IB_MGMT_CLASS_CM;
1118         hdr->class_version = IB_CM_CLASS_VERSION;
1119         hdr->method        = IB_MGMT_METHOD_SEND;
1120         hdr->attr_id       = attr_id;
1121         hdr->tid           = tid;
1122 }
1123
1124 static void cm_format_req(struct cm_req_msg *req_msg,
1125                           struct cm_id_private *cm_id_priv,
1126                           struct ib_cm_req_param *param)
1127 {
1128         struct ib_sa_path_rec *pri_path = param->primary_path;
1129         struct ib_sa_path_rec *alt_path = param->alternate_path;
1130
1131         cm_format_mad_hdr(&req_msg->hdr, CM_REQ_ATTR_ID,
1132                           cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_REQ));
1133
1134         req_msg->local_comm_id = cm_id_priv->id.local_id;
1135         req_msg->service_id = param->service_id;
1136         req_msg->local_ca_guid = cm_id_priv->id.device->node_guid;
1137         cm_req_set_local_qpn(req_msg, cpu_to_be32(param->qp_num));
1138         cm_req_set_init_depth(req_msg, param->initiator_depth);
1139         cm_req_set_remote_resp_timeout(req_msg,
1140                                        param->remote_cm_response_timeout);
1141         cm_req_set_qp_type(req_msg, param->qp_type);
1142         cm_req_set_flow_ctrl(req_msg, param->flow_control);
1143         cm_req_set_starting_psn(req_msg, cpu_to_be32(param->starting_psn));
1144         cm_req_set_local_resp_timeout(req_msg,
1145                                       param->local_cm_response_timeout);
1146         req_msg->pkey = param->primary_path->pkey;
1147         cm_req_set_path_mtu(req_msg, param->primary_path->mtu);
1148         cm_req_set_max_cm_retries(req_msg, param->max_cm_retries);
1149
1150         if (param->qp_type != IB_QPT_XRC_INI) {
1151                 cm_req_set_resp_res(req_msg, param->responder_resources);
1152                 cm_req_set_retry_count(req_msg, param->retry_count);
1153                 cm_req_set_rnr_retry_count(req_msg, param->rnr_retry_count);
1154                 cm_req_set_srq(req_msg, param->srq);
1155         }
1156
1157         if (pri_path->hop_limit <= 1) {
1158                 req_msg->primary_local_lid = pri_path->slid;
1159                 req_msg->primary_remote_lid = pri_path->dlid;
1160         } else {
1161                 /* Work-around until there's a way to obtain remote LID info */
1162                 req_msg->primary_local_lid = IB_LID_PERMISSIVE;
1163                 req_msg->primary_remote_lid = IB_LID_PERMISSIVE;
1164         }
1165         req_msg->primary_local_gid = pri_path->sgid;
1166         req_msg->primary_remote_gid = pri_path->dgid;
1167         cm_req_set_primary_flow_label(req_msg, pri_path->flow_label);
1168         cm_req_set_primary_packet_rate(req_msg, pri_path->rate);
1169         req_msg->primary_traffic_class = pri_path->traffic_class;
1170         req_msg->primary_hop_limit = pri_path->hop_limit;
1171         cm_req_set_primary_sl(req_msg, pri_path->sl);
1172         cm_req_set_primary_subnet_local(req_msg, (pri_path->hop_limit <= 1));
1173         cm_req_set_primary_local_ack_timeout(req_msg,
1174                 cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
1175                                pri_path->packet_life_time));
1176
1177         if (alt_path) {
1178                 if (alt_path->hop_limit <= 1) {
1179                         req_msg->alt_local_lid = alt_path->slid;
1180                         req_msg->alt_remote_lid = alt_path->dlid;
1181                 } else {
1182                         req_msg->alt_local_lid = IB_LID_PERMISSIVE;
1183                         req_msg->alt_remote_lid = IB_LID_PERMISSIVE;
1184                 }
1185                 req_msg->alt_local_gid = alt_path->sgid;
1186                 req_msg->alt_remote_gid = alt_path->dgid;
1187                 cm_req_set_alt_flow_label(req_msg,
1188                                           alt_path->flow_label);
1189                 cm_req_set_alt_packet_rate(req_msg, alt_path->rate);
1190                 req_msg->alt_traffic_class = alt_path->traffic_class;
1191                 req_msg->alt_hop_limit = alt_path->hop_limit;
1192                 cm_req_set_alt_sl(req_msg, alt_path->sl);
1193                 cm_req_set_alt_subnet_local(req_msg, (alt_path->hop_limit <= 1));
1194                 cm_req_set_alt_local_ack_timeout(req_msg,
1195                         cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
1196                                        alt_path->packet_life_time));
1197         }
1198
1199         if (param->private_data && param->private_data_len)
1200                 memcpy(req_msg->private_data, param->private_data,
1201                        param->private_data_len);
1202 }
1203
1204 static int cm_validate_req_param(struct ib_cm_req_param *param)
1205 {
1206         /* peer-to-peer not supported */
1207         if (param->peer_to_peer)
1208                 return -EINVAL;
1209
1210         if (!param->primary_path)
1211                 return -EINVAL;
1212
1213         if (param->qp_type != IB_QPT_RC && param->qp_type != IB_QPT_UC &&
1214             param->qp_type != IB_QPT_XRC_INI)
1215                 return -EINVAL;
1216
1217         if (param->private_data &&
1218             param->private_data_len > IB_CM_REQ_PRIVATE_DATA_SIZE)
1219                 return -EINVAL;
1220
1221         if (param->alternate_path &&
1222             (param->alternate_path->pkey != param->primary_path->pkey ||
1223              param->alternate_path->mtu != param->primary_path->mtu))
1224                 return -EINVAL;
1225
1226         return 0;
1227 }
1228
1229 int ib_send_cm_req(struct ib_cm_id *cm_id,
1230                    struct ib_cm_req_param *param)
1231 {
1232         struct cm_id_private *cm_id_priv;
1233         struct cm_req_msg *req_msg;
1234         unsigned long flags;
1235         int ret;
1236
1237         ret = cm_validate_req_param(param);
1238         if (ret)
1239                 return ret;
1240
1241         /* Verify that we're not in timewait. */
1242         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1243         spin_lock_irqsave(&cm_id_priv->lock, flags);
1244         if (cm_id->state != IB_CM_IDLE) {
1245                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1246                 ret = -EINVAL;
1247                 goto out;
1248         }
1249         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1250
1251         cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv->
1252                                                             id.local_id);
1253         if (IS_ERR(cm_id_priv->timewait_info)) {
1254                 ret = PTR_ERR(cm_id_priv->timewait_info);
1255                 goto out;
1256         }
1257
1258         ret = cm_init_av_by_path(param->primary_path, &cm_id_priv->av,
1259                                  cm_id_priv);
1260         if (ret)
1261                 goto error1;
1262         if (param->alternate_path) {
1263                 ret = cm_init_av_by_path(param->alternate_path,
1264                                          &cm_id_priv->alt_av, cm_id_priv);
1265                 if (ret)
1266                         goto error1;
1267         }
1268         cm_id->service_id = param->service_id;
1269         cm_id->service_mask = ~cpu_to_be64(0);
1270         cm_id_priv->timeout_ms = cm_convert_to_ms(
1271                                     param->primary_path->packet_life_time) * 2 +
1272                                  cm_convert_to_ms(
1273                                     param->remote_cm_response_timeout);
1274         cm_id_priv->max_cm_retries = param->max_cm_retries;
1275         cm_id_priv->initiator_depth = param->initiator_depth;
1276         cm_id_priv->responder_resources = param->responder_resources;
1277         cm_id_priv->retry_count = param->retry_count;
1278         cm_id_priv->path_mtu = param->primary_path->mtu;
1279         cm_id_priv->pkey = param->primary_path->pkey;
1280         cm_id_priv->qp_type = param->qp_type;
1281
1282         ret = cm_alloc_msg(cm_id_priv, &cm_id_priv->msg);
1283         if (ret)
1284                 goto error1;
1285
1286         req_msg = (struct cm_req_msg *) cm_id_priv->msg->mad;
1287         cm_format_req(req_msg, cm_id_priv, param);
1288         cm_id_priv->tid = req_msg->hdr.tid;
1289         cm_id_priv->msg->timeout_ms = cm_id_priv->timeout_ms;
1290         cm_id_priv->msg->context[1] = (void *) (unsigned long) IB_CM_REQ_SENT;
1291
1292         cm_id_priv->local_qpn = cm_req_get_local_qpn(req_msg);
1293         cm_id_priv->rq_psn = cm_req_get_starting_psn(req_msg);
1294
1295         spin_lock_irqsave(&cm_id_priv->lock, flags);
1296         ret = ib_post_send_mad(cm_id_priv->msg, NULL);
1297         if (ret) {
1298                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1299                 goto error2;
1300         }
1301         BUG_ON(cm_id->state != IB_CM_IDLE);
1302         cm_id->state = IB_CM_REQ_SENT;
1303         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1304         return 0;
1305
1306 error2: cm_free_msg(cm_id_priv->msg);
1307 error1: kfree(cm_id_priv->timewait_info);
1308 out:    return ret;
1309 }
1310 EXPORT_SYMBOL(ib_send_cm_req);
1311
1312 static int cm_issue_rej(struct cm_port *port,
1313                         struct ib_mad_recv_wc *mad_recv_wc,
1314                         enum ib_cm_rej_reason reason,
1315                         enum cm_msg_response msg_rejected,
1316                         void *ari, u8 ari_length)
1317 {
1318         struct ib_mad_send_buf *msg = NULL;
1319         struct cm_rej_msg *rej_msg, *rcv_msg;
1320         int ret;
1321
1322         ret = cm_alloc_response_msg(port, mad_recv_wc, &msg);
1323         if (ret)
1324                 return ret;
1325
1326         /* We just need common CM header information.  Cast to any message. */
1327         rcv_msg = (struct cm_rej_msg *) mad_recv_wc->recv_buf.mad;
1328         rej_msg = (struct cm_rej_msg *) msg->mad;
1329
1330         cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, rcv_msg->hdr.tid);
1331         rej_msg->remote_comm_id = rcv_msg->local_comm_id;
1332         rej_msg->local_comm_id = rcv_msg->remote_comm_id;
1333         cm_rej_set_msg_rejected(rej_msg, msg_rejected);
1334         rej_msg->reason = cpu_to_be16(reason);
1335
1336         if (ari && ari_length) {
1337                 cm_rej_set_reject_info_len(rej_msg, ari_length);
1338                 memcpy(rej_msg->ari, ari, ari_length);
1339         }
1340
1341         ret = ib_post_send_mad(msg, NULL);
1342         if (ret)
1343                 cm_free_msg(msg);
1344
1345         return ret;
1346 }
1347
1348 static inline int cm_is_active_peer(__be64 local_ca_guid, __be64 remote_ca_guid,
1349                                     __be32 local_qpn, __be32 remote_qpn)
1350 {
1351         return (be64_to_cpu(local_ca_guid) > be64_to_cpu(remote_ca_guid) ||
1352                 ((local_ca_guid == remote_ca_guid) &&
1353                  (be32_to_cpu(local_qpn) > be32_to_cpu(remote_qpn))));
1354 }
1355
1356 static void cm_format_paths_from_req(struct cm_req_msg *req_msg,
1357                                             struct ib_sa_path_rec *primary_path,
1358                                             struct ib_sa_path_rec *alt_path)
1359 {
1360         memset(primary_path, 0, sizeof *primary_path);
1361         primary_path->dgid = req_msg->primary_local_gid;
1362         primary_path->sgid = req_msg->primary_remote_gid;
1363         primary_path->dlid = req_msg->primary_local_lid;
1364         primary_path->slid = req_msg->primary_remote_lid;
1365         primary_path->flow_label = cm_req_get_primary_flow_label(req_msg);
1366         primary_path->hop_limit = req_msg->primary_hop_limit;
1367         primary_path->traffic_class = req_msg->primary_traffic_class;
1368         primary_path->reversible = 1;
1369         primary_path->pkey = req_msg->pkey;
1370         primary_path->sl = cm_req_get_primary_sl(req_msg);
1371         primary_path->mtu_selector = IB_SA_EQ;
1372         primary_path->mtu = cm_req_get_path_mtu(req_msg);
1373         primary_path->rate_selector = IB_SA_EQ;
1374         primary_path->rate = cm_req_get_primary_packet_rate(req_msg);
1375         primary_path->packet_life_time_selector = IB_SA_EQ;
1376         primary_path->packet_life_time =
1377                 cm_req_get_primary_local_ack_timeout(req_msg);
1378         primary_path->packet_life_time -= (primary_path->packet_life_time > 0);
1379         primary_path->service_id = req_msg->service_id;
1380
1381         if (req_msg->alt_local_lid) {
1382                 memset(alt_path, 0, sizeof *alt_path);
1383                 alt_path->dgid = req_msg->alt_local_gid;
1384                 alt_path->sgid = req_msg->alt_remote_gid;
1385                 alt_path->dlid = req_msg->alt_local_lid;
1386                 alt_path->slid = req_msg->alt_remote_lid;
1387                 alt_path->flow_label = cm_req_get_alt_flow_label(req_msg);
1388                 alt_path->hop_limit = req_msg->alt_hop_limit;
1389                 alt_path->traffic_class = req_msg->alt_traffic_class;
1390                 alt_path->reversible = 1;
1391                 alt_path->pkey = req_msg->pkey;
1392                 alt_path->sl = cm_req_get_alt_sl(req_msg);
1393                 alt_path->mtu_selector = IB_SA_EQ;
1394                 alt_path->mtu = cm_req_get_path_mtu(req_msg);
1395                 alt_path->rate_selector = IB_SA_EQ;
1396                 alt_path->rate = cm_req_get_alt_packet_rate(req_msg);
1397                 alt_path->packet_life_time_selector = IB_SA_EQ;
1398                 alt_path->packet_life_time =
1399                         cm_req_get_alt_local_ack_timeout(req_msg);
1400                 alt_path->packet_life_time -= (alt_path->packet_life_time > 0);
1401                 alt_path->service_id = req_msg->service_id;
1402         }
1403 }
1404
1405 static u16 cm_get_bth_pkey(struct cm_work *work)
1406 {
1407         struct ib_device *ib_dev = work->port->cm_dev->ib_device;
1408         u8 port_num = work->port->port_num;
1409         u16 pkey_index = work->mad_recv_wc->wc->pkey_index;
1410         u16 pkey;
1411         int ret;
1412
1413         ret = ib_get_cached_pkey(ib_dev, port_num, pkey_index, &pkey);
1414         if (ret) {
1415                 dev_warn_ratelimited(&ib_dev->dev, "ib_cm: Couldn't retrieve pkey for incoming request (port %d, pkey index %d). %d\n",
1416                                      port_num, pkey_index, ret);
1417                 return 0;
1418         }
1419
1420         return pkey;
1421 }
1422
1423 static void cm_format_req_event(struct cm_work *work,
1424                                 struct cm_id_private *cm_id_priv,
1425                                 struct ib_cm_id *listen_id)
1426 {
1427         struct cm_req_msg *req_msg;
1428         struct ib_cm_req_event_param *param;
1429
1430         req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
1431         param = &work->cm_event.param.req_rcvd;
1432         param->listen_id = listen_id;
1433         param->bth_pkey = cm_get_bth_pkey(work);
1434         param->port = cm_id_priv->av.port->port_num;
1435         param->primary_path = &work->path[0];
1436         if (req_msg->alt_local_lid)
1437                 param->alternate_path = &work->path[1];
1438         else
1439                 param->alternate_path = NULL;
1440         param->remote_ca_guid = req_msg->local_ca_guid;
1441         param->remote_qkey = be32_to_cpu(req_msg->local_qkey);
1442         param->remote_qpn = be32_to_cpu(cm_req_get_local_qpn(req_msg));
1443         param->qp_type = cm_req_get_qp_type(req_msg);
1444         param->starting_psn = be32_to_cpu(cm_req_get_starting_psn(req_msg));
1445         param->responder_resources = cm_req_get_init_depth(req_msg);
1446         param->initiator_depth = cm_req_get_resp_res(req_msg);
1447         param->local_cm_response_timeout =
1448                                         cm_req_get_remote_resp_timeout(req_msg);
1449         param->flow_control = cm_req_get_flow_ctrl(req_msg);
1450         param->remote_cm_response_timeout =
1451                                         cm_req_get_local_resp_timeout(req_msg);
1452         param->retry_count = cm_req_get_retry_count(req_msg);
1453         param->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg);
1454         param->srq = cm_req_get_srq(req_msg);
1455         work->cm_event.private_data = &req_msg->private_data;
1456 }
1457
1458 static void cm_process_work(struct cm_id_private *cm_id_priv,
1459                             struct cm_work *work)
1460 {
1461         int ret;
1462
1463         /* We will typically only have the current event to report. */
1464         ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &work->cm_event);
1465         cm_free_work(work);
1466
1467         while (!ret && !atomic_add_negative(-1, &cm_id_priv->work_count)) {
1468                 spin_lock_irq(&cm_id_priv->lock);
1469                 work = cm_dequeue_work(cm_id_priv);
1470                 spin_unlock_irq(&cm_id_priv->lock);
1471                 BUG_ON(!work);
1472                 ret = cm_id_priv->id.cm_handler(&cm_id_priv->id,
1473                                                 &work->cm_event);
1474                 cm_free_work(work);
1475         }
1476         cm_deref_id(cm_id_priv);
1477         if (ret)
1478                 cm_destroy_id(&cm_id_priv->id, ret);
1479 }
1480
1481 static void cm_format_mra(struct cm_mra_msg *mra_msg,
1482                           struct cm_id_private *cm_id_priv,
1483                           enum cm_msg_response msg_mraed, u8 service_timeout,
1484                           const void *private_data, u8 private_data_len)
1485 {
1486         cm_format_mad_hdr(&mra_msg->hdr, CM_MRA_ATTR_ID, cm_id_priv->tid);
1487         cm_mra_set_msg_mraed(mra_msg, msg_mraed);
1488         mra_msg->local_comm_id = cm_id_priv->id.local_id;
1489         mra_msg->remote_comm_id = cm_id_priv->id.remote_id;
1490         cm_mra_set_service_timeout(mra_msg, service_timeout);
1491
1492         if (private_data && private_data_len)
1493                 memcpy(mra_msg->private_data, private_data, private_data_len);
1494 }
1495
1496 static void cm_format_rej(struct cm_rej_msg *rej_msg,
1497                           struct cm_id_private *cm_id_priv,
1498                           enum ib_cm_rej_reason reason,
1499                           void *ari,
1500                           u8 ari_length,
1501                           const void *private_data,
1502                           u8 private_data_len)
1503 {
1504         cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, cm_id_priv->tid);
1505         rej_msg->remote_comm_id = cm_id_priv->id.remote_id;
1506
1507         switch(cm_id_priv->id.state) {
1508         case IB_CM_REQ_RCVD:
1509                 rej_msg->local_comm_id = 0;
1510                 cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REQ);
1511                 break;
1512         case IB_CM_MRA_REQ_SENT:
1513                 rej_msg->local_comm_id = cm_id_priv->id.local_id;
1514                 cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REQ);
1515                 break;
1516         case IB_CM_REP_RCVD:
1517         case IB_CM_MRA_REP_SENT:
1518                 rej_msg->local_comm_id = cm_id_priv->id.local_id;
1519                 cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REP);
1520                 break;
1521         default:
1522                 rej_msg->local_comm_id = cm_id_priv->id.local_id;
1523                 cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_OTHER);
1524                 break;
1525         }
1526
1527         rej_msg->reason = cpu_to_be16(reason);
1528         if (ari && ari_length) {
1529                 cm_rej_set_reject_info_len(rej_msg, ari_length);
1530                 memcpy(rej_msg->ari, ari, ari_length);
1531         }
1532
1533         if (private_data && private_data_len)
1534                 memcpy(rej_msg->private_data, private_data, private_data_len);
1535 }
1536
1537 static void cm_dup_req_handler(struct cm_work *work,
1538                                struct cm_id_private *cm_id_priv)
1539 {
1540         struct ib_mad_send_buf *msg = NULL;
1541         int ret;
1542
1543         atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
1544                         counter[CM_REQ_COUNTER]);
1545
1546         /* Quick state check to discard duplicate REQs. */
1547         if (cm_id_priv->id.state == IB_CM_REQ_RCVD)
1548                 return;
1549
1550         ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg);
1551         if (ret)
1552                 return;
1553
1554         spin_lock_irq(&cm_id_priv->lock);
1555         switch (cm_id_priv->id.state) {
1556         case IB_CM_MRA_REQ_SENT:
1557                 cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
1558                               CM_MSG_RESPONSE_REQ, cm_id_priv->service_timeout,
1559                               cm_id_priv->private_data,
1560                               cm_id_priv->private_data_len);
1561                 break;
1562         case IB_CM_TIMEWAIT:
1563                 cm_format_rej((struct cm_rej_msg *) msg->mad, cm_id_priv,
1564                               IB_CM_REJ_STALE_CONN, NULL, 0, NULL, 0);
1565                 break;
1566         default:
1567                 goto unlock;
1568         }
1569         spin_unlock_irq(&cm_id_priv->lock);
1570
1571         ret = ib_post_send_mad(msg, NULL);
1572         if (ret)
1573                 goto free;
1574         return;
1575
1576 unlock: spin_unlock_irq(&cm_id_priv->lock);
1577 free:   cm_free_msg(msg);
1578 }
1579
1580 static struct cm_id_private * cm_match_req(struct cm_work *work,
1581                                            struct cm_id_private *cm_id_priv)
1582 {
1583         struct cm_id_private *listen_cm_id_priv, *cur_cm_id_priv;
1584         struct cm_timewait_info *timewait_info;
1585         struct cm_req_msg *req_msg;
1586
1587         req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
1588
1589         /* Check for possible duplicate REQ. */
1590         spin_lock_irq(&cm.lock);
1591         timewait_info = cm_insert_remote_id(cm_id_priv->timewait_info);
1592         if (timewait_info) {
1593                 cur_cm_id_priv = cm_get_id(timewait_info->work.local_id,
1594                                            timewait_info->work.remote_id);
1595                 spin_unlock_irq(&cm.lock);
1596                 if (cur_cm_id_priv) {
1597                         cm_dup_req_handler(work, cur_cm_id_priv);
1598                         cm_deref_id(cur_cm_id_priv);
1599                 }
1600                 return NULL;
1601         }
1602
1603         /* Check for stale connections. */
1604         timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info);
1605         if (timewait_info) {
1606                 cm_cleanup_timewait(cm_id_priv->timewait_info);
1607                 spin_unlock_irq(&cm.lock);
1608                 cm_issue_rej(work->port, work->mad_recv_wc,
1609                              IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REQ,
1610                              NULL, 0);
1611                 return NULL;
1612         }
1613
1614         /* Find matching listen request. */
1615         listen_cm_id_priv = cm_find_listen(cm_id_priv->id.device,
1616                                            req_msg->service_id);
1617         if (!listen_cm_id_priv) {
1618                 cm_cleanup_timewait(cm_id_priv->timewait_info);
1619                 spin_unlock_irq(&cm.lock);
1620                 cm_issue_rej(work->port, work->mad_recv_wc,
1621                              IB_CM_REJ_INVALID_SERVICE_ID, CM_MSG_RESPONSE_REQ,
1622                              NULL, 0);
1623                 goto out;
1624         }
1625         atomic_inc(&listen_cm_id_priv->refcount);
1626         atomic_inc(&cm_id_priv->refcount);
1627         cm_id_priv->id.state = IB_CM_REQ_RCVD;
1628         atomic_inc(&cm_id_priv->work_count);
1629         spin_unlock_irq(&cm.lock);
1630 out:
1631         return listen_cm_id_priv;
1632 }
1633
1634 /*
1635  * Work-around for inter-subnet connections.  If the LIDs are permissive,
1636  * we need to override the LID/SL data in the REQ with the LID information
1637  * in the work completion.
1638  */
1639 static void cm_process_routed_req(struct cm_req_msg *req_msg, struct ib_wc *wc)
1640 {
1641         if (!cm_req_get_primary_subnet_local(req_msg)) {
1642                 if (req_msg->primary_local_lid == IB_LID_PERMISSIVE) {
1643                         req_msg->primary_local_lid = cpu_to_be16(wc->slid);
1644                         cm_req_set_primary_sl(req_msg, wc->sl);
1645                 }
1646
1647                 if (req_msg->primary_remote_lid == IB_LID_PERMISSIVE)
1648                         req_msg->primary_remote_lid = cpu_to_be16(wc->dlid_path_bits);
1649         }
1650
1651         if (!cm_req_get_alt_subnet_local(req_msg)) {
1652                 if (req_msg->alt_local_lid == IB_LID_PERMISSIVE) {
1653                         req_msg->alt_local_lid = cpu_to_be16(wc->slid);
1654                         cm_req_set_alt_sl(req_msg, wc->sl);
1655                 }
1656
1657                 if (req_msg->alt_remote_lid == IB_LID_PERMISSIVE)
1658                         req_msg->alt_remote_lid = cpu_to_be16(wc->dlid_path_bits);
1659         }
1660 }
1661
1662 static int cm_req_handler(struct cm_work *work)
1663 {
1664         struct ib_cm_id *cm_id;
1665         struct cm_id_private *cm_id_priv, *listen_cm_id_priv;
1666         struct cm_req_msg *req_msg;
1667         int ret;
1668
1669         req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
1670
1671         cm_id = ib_create_cm_id(work->port->cm_dev->ib_device, NULL, NULL);
1672         if (IS_ERR(cm_id))
1673                 return PTR_ERR(cm_id);
1674
1675         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1676         cm_id_priv->id.remote_id = req_msg->local_comm_id;
1677         cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
1678                                 work->mad_recv_wc->recv_buf.grh,
1679                                 &cm_id_priv->av);
1680         cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv->
1681                                                             id.local_id);
1682         if (IS_ERR(cm_id_priv->timewait_info)) {
1683                 ret = PTR_ERR(cm_id_priv->timewait_info);
1684                 goto destroy;
1685         }
1686         cm_id_priv->timewait_info->work.remote_id = req_msg->local_comm_id;
1687         cm_id_priv->timewait_info->remote_ca_guid = req_msg->local_ca_guid;
1688         cm_id_priv->timewait_info->remote_qpn = cm_req_get_local_qpn(req_msg);
1689
1690         listen_cm_id_priv = cm_match_req(work, cm_id_priv);
1691         if (!listen_cm_id_priv) {
1692                 ret = -EINVAL;
1693                 kfree(cm_id_priv->timewait_info);
1694                 goto destroy;
1695         }
1696
1697         cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler;
1698         cm_id_priv->id.context = listen_cm_id_priv->id.context;
1699         cm_id_priv->id.service_id = req_msg->service_id;
1700         cm_id_priv->id.service_mask = ~cpu_to_be64(0);
1701
1702         cm_process_routed_req(req_msg, work->mad_recv_wc->wc);
1703         cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]);
1704
1705         memcpy(work->path[0].dmac, cm_id_priv->av.ah_attr.dmac, ETH_ALEN);
1706         ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av,
1707                                  cm_id_priv);
1708         if (ret) {
1709                 ib_get_cached_gid(work->port->cm_dev->ib_device,
1710                                   work->port->port_num, 0, &work->path[0].sgid,
1711                                   NULL);
1712                 ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
1713                                &work->path[0].sgid, sizeof work->path[0].sgid,
1714                                NULL, 0);
1715                 goto rejected;
1716         }
1717         if (req_msg->alt_local_lid) {
1718                 ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av,
1719                                          cm_id_priv);
1720                 if (ret) {
1721                         ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID,
1722                                        &work->path[0].sgid,
1723                                        sizeof work->path[0].sgid, NULL, 0);
1724                         goto rejected;
1725                 }
1726         }
1727         cm_id_priv->tid = req_msg->hdr.tid;
1728         cm_id_priv->timeout_ms = cm_convert_to_ms(
1729                                         cm_req_get_local_resp_timeout(req_msg));
1730         cm_id_priv->max_cm_retries = cm_req_get_max_cm_retries(req_msg);
1731         cm_id_priv->remote_qpn = cm_req_get_local_qpn(req_msg);
1732         cm_id_priv->initiator_depth = cm_req_get_resp_res(req_msg);
1733         cm_id_priv->responder_resources = cm_req_get_init_depth(req_msg);
1734         cm_id_priv->path_mtu = cm_req_get_path_mtu(req_msg);
1735         cm_id_priv->pkey = req_msg->pkey;
1736         cm_id_priv->sq_psn = cm_req_get_starting_psn(req_msg);
1737         cm_id_priv->retry_count = cm_req_get_retry_count(req_msg);
1738         cm_id_priv->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg);
1739         cm_id_priv->qp_type = cm_req_get_qp_type(req_msg);
1740
1741         cm_format_req_event(work, cm_id_priv, &listen_cm_id_priv->id);
1742         cm_process_work(cm_id_priv, work);
1743         cm_deref_id(listen_cm_id_priv);
1744         return 0;
1745
1746 rejected:
1747         atomic_dec(&cm_id_priv->refcount);
1748         cm_deref_id(listen_cm_id_priv);
1749 destroy:
1750         ib_destroy_cm_id(cm_id);
1751         return ret;
1752 }
1753
1754 static void cm_format_rep(struct cm_rep_msg *rep_msg,
1755                           struct cm_id_private *cm_id_priv,
1756                           struct ib_cm_rep_param *param)
1757 {
1758         cm_format_mad_hdr(&rep_msg->hdr, CM_REP_ATTR_ID, cm_id_priv->tid);
1759         rep_msg->local_comm_id = cm_id_priv->id.local_id;
1760         rep_msg->remote_comm_id = cm_id_priv->id.remote_id;
1761         cm_rep_set_starting_psn(rep_msg, cpu_to_be32(param->starting_psn));
1762         rep_msg->resp_resources = param->responder_resources;
1763         cm_rep_set_target_ack_delay(rep_msg,
1764                                     cm_id_priv->av.port->cm_dev->ack_delay);
1765         cm_rep_set_failover(rep_msg, param->failover_accepted);
1766         cm_rep_set_rnr_retry_count(rep_msg, param->rnr_retry_count);
1767         rep_msg->local_ca_guid = cm_id_priv->id.device->node_guid;
1768
1769         if (cm_id_priv->qp_type != IB_QPT_XRC_TGT) {
1770                 rep_msg->initiator_depth = param->initiator_depth;
1771                 cm_rep_set_flow_ctrl(rep_msg, param->flow_control);
1772                 cm_rep_set_srq(rep_msg, param->srq);
1773                 cm_rep_set_local_qpn(rep_msg, cpu_to_be32(param->qp_num));
1774         } else {
1775                 cm_rep_set_srq(rep_msg, 1);
1776                 cm_rep_set_local_eecn(rep_msg, cpu_to_be32(param->qp_num));
1777         }
1778
1779         if (param->private_data && param->private_data_len)
1780                 memcpy(rep_msg->private_data, param->private_data,
1781                        param->private_data_len);
1782 }
1783
1784 int ib_send_cm_rep(struct ib_cm_id *cm_id,
1785                    struct ib_cm_rep_param *param)
1786 {
1787         struct cm_id_private *cm_id_priv;
1788         struct ib_mad_send_buf *msg;
1789         struct cm_rep_msg *rep_msg;
1790         unsigned long flags;
1791         int ret;
1792
1793         if (param->private_data &&
1794             param->private_data_len > IB_CM_REP_PRIVATE_DATA_SIZE)
1795                 return -EINVAL;
1796
1797         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1798         spin_lock_irqsave(&cm_id_priv->lock, flags);
1799         if (cm_id->state != IB_CM_REQ_RCVD &&
1800             cm_id->state != IB_CM_MRA_REQ_SENT) {
1801                 ret = -EINVAL;
1802                 goto out;
1803         }
1804
1805         ret = cm_alloc_msg(cm_id_priv, &msg);
1806         if (ret)
1807                 goto out;
1808
1809         rep_msg = (struct cm_rep_msg *) msg->mad;
1810         cm_format_rep(rep_msg, cm_id_priv, param);
1811         msg->timeout_ms = cm_id_priv->timeout_ms;
1812         msg->context[1] = (void *) (unsigned long) IB_CM_REP_SENT;
1813
1814         ret = ib_post_send_mad(msg, NULL);
1815         if (ret) {
1816                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1817                 cm_free_msg(msg);
1818                 return ret;
1819         }
1820
1821         cm_id->state = IB_CM_REP_SENT;
1822         cm_id_priv->msg = msg;
1823         cm_id_priv->initiator_depth = param->initiator_depth;
1824         cm_id_priv->responder_resources = param->responder_resources;
1825         cm_id_priv->rq_psn = cm_rep_get_starting_psn(rep_msg);
1826         cm_id_priv->local_qpn = cpu_to_be32(param->qp_num & 0xFFFFFF);
1827
1828 out:    spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1829         return ret;
1830 }
1831 EXPORT_SYMBOL(ib_send_cm_rep);
1832
1833 static void cm_format_rtu(struct cm_rtu_msg *rtu_msg,
1834                           struct cm_id_private *cm_id_priv,
1835                           const void *private_data,
1836                           u8 private_data_len)
1837 {
1838         cm_format_mad_hdr(&rtu_msg->hdr, CM_RTU_ATTR_ID, cm_id_priv->tid);
1839         rtu_msg->local_comm_id = cm_id_priv->id.local_id;
1840         rtu_msg->remote_comm_id = cm_id_priv->id.remote_id;
1841
1842         if (private_data && private_data_len)
1843                 memcpy(rtu_msg->private_data, private_data, private_data_len);
1844 }
1845
1846 int ib_send_cm_rtu(struct ib_cm_id *cm_id,
1847                    const void *private_data,
1848                    u8 private_data_len)
1849 {
1850         struct cm_id_private *cm_id_priv;
1851         struct ib_mad_send_buf *msg;
1852         unsigned long flags;
1853         void *data;
1854         int ret;
1855
1856         if (private_data && private_data_len > IB_CM_RTU_PRIVATE_DATA_SIZE)
1857                 return -EINVAL;
1858
1859         data = cm_copy_private_data(private_data, private_data_len);
1860         if (IS_ERR(data))
1861                 return PTR_ERR(data);
1862
1863         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1864         spin_lock_irqsave(&cm_id_priv->lock, flags);
1865         if (cm_id->state != IB_CM_REP_RCVD &&
1866             cm_id->state != IB_CM_MRA_REP_SENT) {
1867                 ret = -EINVAL;
1868                 goto error;
1869         }
1870
1871         ret = cm_alloc_msg(cm_id_priv, &msg);
1872         if (ret)
1873                 goto error;
1874
1875         cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv,
1876                       private_data, private_data_len);
1877
1878         ret = ib_post_send_mad(msg, NULL);
1879         if (ret) {
1880                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1881                 cm_free_msg(msg);
1882                 kfree(data);
1883                 return ret;
1884         }
1885
1886         cm_id->state = IB_CM_ESTABLISHED;
1887         cm_set_private_data(cm_id_priv, data, private_data_len);
1888         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1889         return 0;
1890
1891 error:  spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1892         kfree(data);
1893         return ret;
1894 }
1895 EXPORT_SYMBOL(ib_send_cm_rtu);
1896
1897 static void cm_format_rep_event(struct cm_work *work, enum ib_qp_type qp_type)
1898 {
1899         struct cm_rep_msg *rep_msg;
1900         struct ib_cm_rep_event_param *param;
1901
1902         rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad;
1903         param = &work->cm_event.param.rep_rcvd;
1904         param->remote_ca_guid = rep_msg->local_ca_guid;
1905         param->remote_qkey = be32_to_cpu(rep_msg->local_qkey);
1906         param->remote_qpn = be32_to_cpu(cm_rep_get_qpn(rep_msg, qp_type));
1907         param->starting_psn = be32_to_cpu(cm_rep_get_starting_psn(rep_msg));
1908         param->responder_resources = rep_msg->initiator_depth;
1909         param->initiator_depth = rep_msg->resp_resources;
1910         param->target_ack_delay = cm_rep_get_target_ack_delay(rep_msg);
1911         param->failover_accepted = cm_rep_get_failover(rep_msg);
1912         param->flow_control = cm_rep_get_flow_ctrl(rep_msg);
1913         param->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg);
1914         param->srq = cm_rep_get_srq(rep_msg);
1915         work->cm_event.private_data = &rep_msg->private_data;
1916 }
1917
1918 static void cm_dup_rep_handler(struct cm_work *work)
1919 {
1920         struct cm_id_private *cm_id_priv;
1921         struct cm_rep_msg *rep_msg;
1922         struct ib_mad_send_buf *msg = NULL;
1923         int ret;
1924
1925         rep_msg = (struct cm_rep_msg *) work->mad_recv_wc->recv_buf.mad;
1926         cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id,
1927                                    rep_msg->local_comm_id);
1928         if (!cm_id_priv)
1929                 return;
1930
1931         atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
1932                         counter[CM_REP_COUNTER]);
1933         ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg);
1934         if (ret)
1935                 goto deref;
1936
1937         spin_lock_irq(&cm_id_priv->lock);
1938         if (cm_id_priv->id.state == IB_CM_ESTABLISHED)
1939                 cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv,
1940                               cm_id_priv->private_data,
1941                               cm_id_priv->private_data_len);
1942         else if (cm_id_priv->id.state == IB_CM_MRA_REP_SENT)
1943                 cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
1944                               CM_MSG_RESPONSE_REP, cm_id_priv->service_timeout,
1945                               cm_id_priv->private_data,
1946                               cm_id_priv->private_data_len);
1947         else
1948                 goto unlock;
1949         spin_unlock_irq(&cm_id_priv->lock);
1950
1951         ret = ib_post_send_mad(msg, NULL);
1952         if (ret)
1953                 goto free;
1954         goto deref;
1955
1956 unlock: spin_unlock_irq(&cm_id_priv->lock);
1957 free:   cm_free_msg(msg);
1958 deref:  cm_deref_id(cm_id_priv);
1959 }
1960
1961 static int cm_rep_handler(struct cm_work *work)
1962 {
1963         struct cm_id_private *cm_id_priv;
1964         struct cm_rep_msg *rep_msg;
1965         int ret;
1966
1967         rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad;
1968         cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id, 0);
1969         if (!cm_id_priv) {
1970                 cm_dup_rep_handler(work);
1971                 return -EINVAL;
1972         }
1973
1974         cm_format_rep_event(work, cm_id_priv->qp_type);
1975
1976         spin_lock_irq(&cm_id_priv->lock);
1977         switch (cm_id_priv->id.state) {
1978         case IB_CM_REQ_SENT:
1979         case IB_CM_MRA_REQ_RCVD:
1980                 break;
1981         default:
1982                 spin_unlock_irq(&cm_id_priv->lock);
1983                 ret = -EINVAL;
1984                 goto error;
1985         }
1986
1987         cm_id_priv->timewait_info->work.remote_id = rep_msg->local_comm_id;
1988         cm_id_priv->timewait_info->remote_ca_guid = rep_msg->local_ca_guid;
1989         cm_id_priv->timewait_info->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type);
1990
1991         spin_lock(&cm.lock);
1992         /* Check for duplicate REP. */
1993         if (cm_insert_remote_id(cm_id_priv->timewait_info)) {
1994                 spin_unlock(&cm.lock);
1995                 spin_unlock_irq(&cm_id_priv->lock);
1996                 ret = -EINVAL;
1997                 goto error;
1998         }
1999         /* Check for a stale connection. */
2000         if (cm_insert_remote_qpn(cm_id_priv->timewait_info)) {
2001                 rb_erase(&cm_id_priv->timewait_info->remote_id_node,
2002                          &cm.remote_id_table);
2003                 cm_id_priv->timewait_info->inserted_remote_id = 0;
2004                 spin_unlock(&cm.lock);
2005                 spin_unlock_irq(&cm_id_priv->lock);
2006                 cm_issue_rej(work->port, work->mad_recv_wc,
2007                              IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP,
2008                              NULL, 0);
2009                 ret = -EINVAL;
2010                 goto error;
2011         }
2012         spin_unlock(&cm.lock);
2013
2014         cm_id_priv->id.state = IB_CM_REP_RCVD;
2015         cm_id_priv->id.remote_id = rep_msg->local_comm_id;
2016         cm_id_priv->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type);
2017         cm_id_priv->initiator_depth = rep_msg->resp_resources;
2018         cm_id_priv->responder_resources = rep_msg->initiator_depth;
2019         cm_id_priv->sq_psn = cm_rep_get_starting_psn(rep_msg);
2020         cm_id_priv->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg);
2021         cm_id_priv->target_ack_delay = cm_rep_get_target_ack_delay(rep_msg);
2022         cm_id_priv->av.timeout =
2023                         cm_ack_timeout(cm_id_priv->target_ack_delay,
2024                                        cm_id_priv->av.timeout - 1);
2025         cm_id_priv->alt_av.timeout =
2026                         cm_ack_timeout(cm_id_priv->target_ack_delay,
2027                                        cm_id_priv->alt_av.timeout - 1);
2028
2029         /* todo: handle peer_to_peer */
2030
2031         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2032         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2033         if (!ret)
2034                 list_add_tail(&work->list, &cm_id_priv->work_list);
2035         spin_unlock_irq(&cm_id_priv->lock);
2036
2037         if (ret)
2038                 cm_process_work(cm_id_priv, work);
2039         else
2040                 cm_deref_id(cm_id_priv);
2041         return 0;
2042
2043 error:
2044         cm_deref_id(cm_id_priv);
2045         return ret;
2046 }
2047
2048 static int cm_establish_handler(struct cm_work *work)
2049 {
2050         struct cm_id_private *cm_id_priv;
2051         int ret;
2052
2053         /* See comment in cm_establish about lookup. */
2054         cm_id_priv = cm_acquire_id(work->local_id, work->remote_id);
2055         if (!cm_id_priv)
2056                 return -EINVAL;
2057
2058         spin_lock_irq(&cm_id_priv->lock);
2059         if (cm_id_priv->id.state != IB_CM_ESTABLISHED) {
2060                 spin_unlock_irq(&cm_id_priv->lock);
2061                 goto out;
2062         }
2063
2064         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2065         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2066         if (!ret)
2067                 list_add_tail(&work->list, &cm_id_priv->work_list);
2068         spin_unlock_irq(&cm_id_priv->lock);
2069
2070         if (ret)
2071                 cm_process_work(cm_id_priv, work);
2072         else
2073                 cm_deref_id(cm_id_priv);
2074         return 0;
2075 out:
2076         cm_deref_id(cm_id_priv);
2077         return -EINVAL;
2078 }
2079
2080 static int cm_rtu_handler(struct cm_work *work)
2081 {
2082         struct cm_id_private *cm_id_priv;
2083         struct cm_rtu_msg *rtu_msg;
2084         int ret;
2085
2086         rtu_msg = (struct cm_rtu_msg *)work->mad_recv_wc->recv_buf.mad;
2087         cm_id_priv = cm_acquire_id(rtu_msg->remote_comm_id,
2088                                    rtu_msg->local_comm_id);
2089         if (!cm_id_priv)
2090                 return -EINVAL;
2091
2092         work->cm_event.private_data = &rtu_msg->private_data;
2093
2094         spin_lock_irq(&cm_id_priv->lock);
2095         if (cm_id_priv->id.state != IB_CM_REP_SENT &&
2096             cm_id_priv->id.state != IB_CM_MRA_REP_RCVD) {
2097                 spin_unlock_irq(&cm_id_priv->lock);
2098                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2099                                 counter[CM_RTU_COUNTER]);
2100                 goto out;
2101         }
2102         cm_id_priv->id.state = IB_CM_ESTABLISHED;
2103
2104         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2105         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2106         if (!ret)
2107                 list_add_tail(&work->list, &cm_id_priv->work_list);
2108         spin_unlock_irq(&cm_id_priv->lock);
2109
2110         if (ret)
2111                 cm_process_work(cm_id_priv, work);
2112         else
2113                 cm_deref_id(cm_id_priv);
2114         return 0;
2115 out:
2116         cm_deref_id(cm_id_priv);
2117         return -EINVAL;
2118 }
2119
2120 static void cm_format_dreq(struct cm_dreq_msg *dreq_msg,
2121                           struct cm_id_private *cm_id_priv,
2122                           const void *private_data,
2123                           u8 private_data_len)
2124 {
2125         cm_format_mad_hdr(&dreq_msg->hdr, CM_DREQ_ATTR_ID,
2126                           cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_DREQ));
2127         dreq_msg->local_comm_id = cm_id_priv->id.local_id;
2128         dreq_msg->remote_comm_id = cm_id_priv->id.remote_id;
2129         cm_dreq_set_remote_qpn(dreq_msg, cm_id_priv->remote_qpn);
2130
2131         if (private_data && private_data_len)
2132                 memcpy(dreq_msg->private_data, private_data, private_data_len);
2133 }
2134
2135 int ib_send_cm_dreq(struct ib_cm_id *cm_id,
2136                     const void *private_data,
2137                     u8 private_data_len)
2138 {
2139         struct cm_id_private *cm_id_priv;
2140         struct ib_mad_send_buf *msg;
2141         unsigned long flags;
2142         int ret;
2143
2144         if (private_data && private_data_len > IB_CM_DREQ_PRIVATE_DATA_SIZE)
2145                 return -EINVAL;
2146
2147         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2148         spin_lock_irqsave(&cm_id_priv->lock, flags);
2149         if (cm_id->state != IB_CM_ESTABLISHED) {
2150                 ret = -EINVAL;
2151                 goto out;
2152         }
2153
2154         if (cm_id->lap_state == IB_CM_LAP_SENT ||
2155             cm_id->lap_state == IB_CM_MRA_LAP_RCVD)
2156                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2157
2158         ret = cm_alloc_msg(cm_id_priv, &msg);
2159         if (ret) {
2160                 cm_enter_timewait(cm_id_priv);
2161                 goto out;
2162         }
2163
2164         cm_format_dreq((struct cm_dreq_msg *) msg->mad, cm_id_priv,
2165                        private_data, private_data_len);
2166         msg->timeout_ms = cm_id_priv->timeout_ms;
2167         msg->context[1] = (void *) (unsigned long) IB_CM_DREQ_SENT;
2168
2169         ret = ib_post_send_mad(msg, NULL);
2170         if (ret) {
2171                 cm_enter_timewait(cm_id_priv);
2172                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2173                 cm_free_msg(msg);
2174                 return ret;
2175         }
2176
2177         cm_id->state = IB_CM_DREQ_SENT;
2178         cm_id_priv->msg = msg;
2179 out:    spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2180         return ret;
2181 }
2182 EXPORT_SYMBOL(ib_send_cm_dreq);
2183
2184 static void cm_format_drep(struct cm_drep_msg *drep_msg,
2185                           struct cm_id_private *cm_id_priv,
2186                           const void *private_data,
2187                           u8 private_data_len)
2188 {
2189         cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, cm_id_priv->tid);
2190         drep_msg->local_comm_id = cm_id_priv->id.local_id;
2191         drep_msg->remote_comm_id = cm_id_priv->id.remote_id;
2192
2193         if (private_data && private_data_len)
2194                 memcpy(drep_msg->private_data, private_data, private_data_len);
2195 }
2196
2197 int ib_send_cm_drep(struct ib_cm_id *cm_id,
2198                     const void *private_data,
2199                     u8 private_data_len)
2200 {
2201         struct cm_id_private *cm_id_priv;
2202         struct ib_mad_send_buf *msg;
2203         unsigned long flags;
2204         void *data;
2205         int ret;
2206
2207         if (private_data && private_data_len > IB_CM_DREP_PRIVATE_DATA_SIZE)
2208                 return -EINVAL;
2209
2210         data = cm_copy_private_data(private_data, private_data_len);
2211         if (IS_ERR(data))
2212                 return PTR_ERR(data);
2213
2214         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2215         spin_lock_irqsave(&cm_id_priv->lock, flags);
2216         if (cm_id->state != IB_CM_DREQ_RCVD) {
2217                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2218                 kfree(data);
2219                 return -EINVAL;
2220         }
2221
2222         cm_set_private_data(cm_id_priv, data, private_data_len);
2223         cm_enter_timewait(cm_id_priv);
2224
2225         ret = cm_alloc_msg(cm_id_priv, &msg);
2226         if (ret)
2227                 goto out;
2228
2229         cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
2230                        private_data, private_data_len);
2231
2232         ret = ib_post_send_mad(msg, NULL);
2233         if (ret) {
2234                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2235                 cm_free_msg(msg);
2236                 return ret;
2237         }
2238
2239 out:    spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2240         return ret;
2241 }
2242 EXPORT_SYMBOL(ib_send_cm_drep);
2243
2244 static int cm_issue_drep(struct cm_port *port,
2245                          struct ib_mad_recv_wc *mad_recv_wc)
2246 {
2247         struct ib_mad_send_buf *msg = NULL;
2248         struct cm_dreq_msg *dreq_msg;
2249         struct cm_drep_msg *drep_msg;
2250         int ret;
2251
2252         ret = cm_alloc_response_msg(port, mad_recv_wc, &msg);
2253         if (ret)
2254                 return ret;
2255
2256         dreq_msg = (struct cm_dreq_msg *) mad_recv_wc->recv_buf.mad;
2257         drep_msg = (struct cm_drep_msg *) msg->mad;
2258
2259         cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, dreq_msg->hdr.tid);
2260         drep_msg->remote_comm_id = dreq_msg->local_comm_id;
2261         drep_msg->local_comm_id = dreq_msg->remote_comm_id;
2262
2263         ret = ib_post_send_mad(msg, NULL);
2264         if (ret)
2265                 cm_free_msg(msg);
2266
2267         return ret;
2268 }
2269
2270 static int cm_dreq_handler(struct cm_work *work)
2271 {
2272         struct cm_id_private *cm_id_priv;
2273         struct cm_dreq_msg *dreq_msg;
2274         struct ib_mad_send_buf *msg = NULL;
2275         int ret;
2276
2277         dreq_msg = (struct cm_dreq_msg *)work->mad_recv_wc->recv_buf.mad;
2278         cm_id_priv = cm_acquire_id(dreq_msg->remote_comm_id,
2279                                    dreq_msg->local_comm_id);
2280         if (!cm_id_priv) {
2281                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2282                                 counter[CM_DREQ_COUNTER]);
2283                 cm_issue_drep(work->port, work->mad_recv_wc);
2284                 return -EINVAL;
2285         }
2286
2287         work->cm_event.private_data = &dreq_msg->private_data;
2288
2289         spin_lock_irq(&cm_id_priv->lock);
2290         if (cm_id_priv->local_qpn != cm_dreq_get_remote_qpn(dreq_msg))
2291                 goto unlock;
2292
2293         switch (cm_id_priv->id.state) {
2294         case IB_CM_REP_SENT:
2295         case IB_CM_DREQ_SENT:
2296                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2297                 break;
2298         case IB_CM_ESTABLISHED:
2299                 if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT ||
2300                     cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
2301                         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2302                 break;
2303         case IB_CM_MRA_REP_RCVD:
2304                 break;
2305         case IB_CM_TIMEWAIT:
2306                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2307                                 counter[CM_DREQ_COUNTER]);
2308                 if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg))
2309                         goto unlock;
2310
2311                 cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
2312                                cm_id_priv->private_data,
2313                                cm_id_priv->private_data_len);
2314                 spin_unlock_irq(&cm_id_priv->lock);
2315
2316                 if (ib_post_send_mad(msg, NULL))
2317                         cm_free_msg(msg);
2318                 goto deref;
2319         case IB_CM_DREQ_RCVD:
2320                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2321                                 counter[CM_DREQ_COUNTER]);
2322                 goto unlock;
2323         default:
2324                 goto unlock;
2325         }
2326         cm_id_priv->id.state = IB_CM_DREQ_RCVD;
2327         cm_id_priv->tid = dreq_msg->hdr.tid;
2328         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2329         if (!ret)
2330                 list_add_tail(&work->list, &cm_id_priv->work_list);
2331         spin_unlock_irq(&cm_id_priv->lock);
2332
2333         if (ret)
2334                 cm_process_work(cm_id_priv, work);
2335         else
2336                 cm_deref_id(cm_id_priv);
2337         return 0;
2338
2339 unlock: spin_unlock_irq(&cm_id_priv->lock);
2340 deref:  cm_deref_id(cm_id_priv);
2341         return -EINVAL;
2342 }
2343
2344 static int cm_drep_handler(struct cm_work *work)
2345 {
2346         struct cm_id_private *cm_id_priv;
2347         struct cm_drep_msg *drep_msg;
2348         int ret;
2349
2350         drep_msg = (struct cm_drep_msg *)work->mad_recv_wc->recv_buf.mad;
2351         cm_id_priv = cm_acquire_id(drep_msg->remote_comm_id,
2352                                    drep_msg->local_comm_id);
2353         if (!cm_id_priv)
2354                 return -EINVAL;
2355
2356         work->cm_event.private_data = &drep_msg->private_data;
2357
2358         spin_lock_irq(&cm_id_priv->lock);
2359         if (cm_id_priv->id.state != IB_CM_DREQ_SENT &&
2360             cm_id_priv->id.state != IB_CM_DREQ_RCVD) {
2361                 spin_unlock_irq(&cm_id_priv->lock);
2362                 goto out;
2363         }
2364         cm_enter_timewait(cm_id_priv);
2365
2366         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2367         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2368         if (!ret)
2369                 list_add_tail(&work->list, &cm_id_priv->work_list);
2370         spin_unlock_irq(&cm_id_priv->lock);
2371
2372         if (ret)
2373                 cm_process_work(cm_id_priv, work);
2374         else
2375                 cm_deref_id(cm_id_priv);
2376         return 0;
2377 out:
2378         cm_deref_id(cm_id_priv);
2379         return -EINVAL;
2380 }
2381
2382 int ib_send_cm_rej(struct ib_cm_id *cm_id,
2383                    enum ib_cm_rej_reason reason,
2384                    void *ari,
2385                    u8 ari_length,
2386                    const void *private_data,
2387                    u8 private_data_len)
2388 {
2389         struct cm_id_private *cm_id_priv;
2390         struct ib_mad_send_buf *msg;
2391         unsigned long flags;
2392         int ret;
2393
2394         if ((private_data && private_data_len > IB_CM_REJ_PRIVATE_DATA_SIZE) ||
2395             (ari && ari_length > IB_CM_REJ_ARI_LENGTH))
2396                 return -EINVAL;
2397
2398         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2399
2400         spin_lock_irqsave(&cm_id_priv->lock, flags);
2401         switch (cm_id->state) {
2402         case IB_CM_REQ_SENT:
2403         case IB_CM_MRA_REQ_RCVD:
2404         case IB_CM_REQ_RCVD:
2405         case IB_CM_MRA_REQ_SENT:
2406         case IB_CM_REP_RCVD:
2407         case IB_CM_MRA_REP_SENT:
2408                 ret = cm_alloc_msg(cm_id_priv, &msg);
2409                 if (!ret)
2410                         cm_format_rej((struct cm_rej_msg *) msg->mad,
2411                                       cm_id_priv, reason, ari, ari_length,
2412                                       private_data, private_data_len);
2413
2414                 cm_reset_to_idle(cm_id_priv);
2415                 break;
2416         case IB_CM_REP_SENT:
2417         case IB_CM_MRA_REP_RCVD:
2418                 ret = cm_alloc_msg(cm_id_priv, &msg);
2419                 if (!ret)
2420                         cm_format_rej((struct cm_rej_msg *) msg->mad,
2421                                       cm_id_priv, reason, ari, ari_length,
2422                                       private_data, private_data_len);
2423
2424                 cm_enter_timewait(cm_id_priv);
2425                 break;
2426         default:
2427                 ret = -EINVAL;
2428                 goto out;
2429         }
2430
2431         if (ret)
2432                 goto out;
2433
2434         ret = ib_post_send_mad(msg, NULL);
2435         if (ret)
2436                 cm_free_msg(msg);
2437
2438 out:    spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2439         return ret;
2440 }
2441 EXPORT_SYMBOL(ib_send_cm_rej);
2442
2443 static void cm_format_rej_event(struct cm_work *work)
2444 {
2445         struct cm_rej_msg *rej_msg;
2446         struct ib_cm_rej_event_param *param;
2447
2448         rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad;
2449         param = &work->cm_event.param.rej_rcvd;
2450         param->ari = rej_msg->ari;
2451         param->ari_length = cm_rej_get_reject_info_len(rej_msg);
2452         param->reason = __be16_to_cpu(rej_msg->reason);
2453         work->cm_event.private_data = &rej_msg->private_data;
2454 }
2455
2456 static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg)
2457 {
2458         struct cm_timewait_info *timewait_info;
2459         struct cm_id_private *cm_id_priv;
2460         __be32 remote_id;
2461
2462         remote_id = rej_msg->local_comm_id;
2463
2464         if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_TIMEOUT) {
2465                 spin_lock_irq(&cm.lock);
2466                 timewait_info = cm_find_remote_id( *((__be64 *) rej_msg->ari),
2467                                                   remote_id);
2468                 if (!timewait_info) {
2469                         spin_unlock_irq(&cm.lock);
2470                         return NULL;
2471                 }
2472                 cm_id_priv = idr_find(&cm.local_id_table, (__force int)
2473                                       (timewait_info->work.local_id ^
2474                                        cm.random_id_operand));
2475                 if (cm_id_priv) {
2476                         if (cm_id_priv->id.remote_id == remote_id)
2477                                 atomic_inc(&cm_id_priv->refcount);
2478                         else
2479                                 cm_id_priv = NULL;
2480                 }
2481                 spin_unlock_irq(&cm.lock);
2482         } else if (cm_rej_get_msg_rejected(rej_msg) == CM_MSG_RESPONSE_REQ)
2483                 cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, 0);
2484         else
2485                 cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, remote_id);
2486
2487         return cm_id_priv;
2488 }
2489
2490 static int cm_rej_handler(struct cm_work *work)
2491 {
2492         struct cm_id_private *cm_id_priv;
2493         struct cm_rej_msg *rej_msg;
2494         int ret;
2495
2496         rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad;
2497         cm_id_priv = cm_acquire_rejected_id(rej_msg);
2498         if (!cm_id_priv)
2499                 return -EINVAL;
2500
2501         cm_format_rej_event(work);
2502
2503         spin_lock_irq(&cm_id_priv->lock);
2504         switch (cm_id_priv->id.state) {
2505         case IB_CM_REQ_SENT:
2506         case IB_CM_MRA_REQ_RCVD:
2507         case IB_CM_REP_SENT:
2508         case IB_CM_MRA_REP_RCVD:
2509                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2510                 /* fall through */
2511         case IB_CM_REQ_RCVD:
2512         case IB_CM_MRA_REQ_SENT:
2513                 if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_STALE_CONN)
2514                         cm_enter_timewait(cm_id_priv);
2515                 else
2516                         cm_reset_to_idle(cm_id_priv);
2517                 break;
2518         case IB_CM_DREQ_SENT:
2519                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2520                 /* fall through */
2521         case IB_CM_REP_RCVD:
2522         case IB_CM_MRA_REP_SENT:
2523                 cm_enter_timewait(cm_id_priv);
2524                 break;
2525         case IB_CM_ESTABLISHED:
2526                 if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT ||
2527                     cm_id_priv->id.lap_state == IB_CM_LAP_SENT) {
2528                         if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT)
2529                                 ib_cancel_mad(cm_id_priv->av.port->mad_agent,
2530                                               cm_id_priv->msg);
2531                         cm_enter_timewait(cm_id_priv);
2532                         break;
2533                 }
2534                 /* fall through */
2535         default:
2536                 spin_unlock_irq(&cm_id_priv->lock);
2537                 ret = -EINVAL;
2538                 goto out;
2539         }
2540
2541         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2542         if (!ret)
2543                 list_add_tail(&work->list, &cm_id_priv->work_list);
2544         spin_unlock_irq(&cm_id_priv->lock);
2545
2546         if (ret)
2547                 cm_process_work(cm_id_priv, work);
2548         else
2549                 cm_deref_id(cm_id_priv);
2550         return 0;
2551 out:
2552         cm_deref_id(cm_id_priv);
2553         return -EINVAL;
2554 }
2555
2556 int ib_send_cm_mra(struct ib_cm_id *cm_id,
2557                    u8 service_timeout,
2558                    const void *private_data,
2559                    u8 private_data_len)
2560 {
2561         struct cm_id_private *cm_id_priv;
2562         struct ib_mad_send_buf *msg;
2563         enum ib_cm_state cm_state;
2564         enum ib_cm_lap_state lap_state;
2565         enum cm_msg_response msg_response;
2566         void *data;
2567         unsigned long flags;
2568         int ret;
2569
2570         if (private_data && private_data_len > IB_CM_MRA_PRIVATE_DATA_SIZE)
2571                 return -EINVAL;
2572
2573         data = cm_copy_private_data(private_data, private_data_len);
2574         if (IS_ERR(data))
2575                 return PTR_ERR(data);
2576
2577         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2578
2579         spin_lock_irqsave(&cm_id_priv->lock, flags);
2580         switch(cm_id_priv->id.state) {
2581         case IB_CM_REQ_RCVD:
2582                 cm_state = IB_CM_MRA_REQ_SENT;
2583                 lap_state = cm_id->lap_state;
2584                 msg_response = CM_MSG_RESPONSE_REQ;
2585                 break;
2586         case IB_CM_REP_RCVD:
2587                 cm_state = IB_CM_MRA_REP_SENT;
2588                 lap_state = cm_id->lap_state;
2589                 msg_response = CM_MSG_RESPONSE_REP;
2590                 break;
2591         case IB_CM_ESTABLISHED:
2592                 if (cm_id->lap_state == IB_CM_LAP_RCVD) {
2593                         cm_state = cm_id->state;
2594                         lap_state = IB_CM_MRA_LAP_SENT;
2595                         msg_response = CM_MSG_RESPONSE_OTHER;
2596                         break;
2597                 }
2598         default:
2599                 ret = -EINVAL;
2600                 goto error1;
2601         }
2602
2603         if (!(service_timeout & IB_CM_MRA_FLAG_DELAY)) {
2604                 ret = cm_alloc_msg(cm_id_priv, &msg);
2605                 if (ret)
2606                         goto error1;
2607
2608                 cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
2609                               msg_response, service_timeout,
2610                               private_data, private_data_len);
2611                 ret = ib_post_send_mad(msg, NULL);
2612                 if (ret)
2613                         goto error2;
2614         }
2615
2616         cm_id->state = cm_state;
2617         cm_id->lap_state = lap_state;
2618         cm_id_priv->service_timeout = service_timeout;
2619         cm_set_private_data(cm_id_priv, data, private_data_len);
2620         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2621         return 0;
2622
2623 error1: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2624         kfree(data);
2625         return ret;
2626
2627 error2: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2628         kfree(data);
2629         cm_free_msg(msg);
2630         return ret;
2631 }
2632 EXPORT_SYMBOL(ib_send_cm_mra);
2633
2634 static struct cm_id_private * cm_acquire_mraed_id(struct cm_mra_msg *mra_msg)
2635 {
2636         switch (cm_mra_get_msg_mraed(mra_msg)) {
2637         case CM_MSG_RESPONSE_REQ:
2638                 return cm_acquire_id(mra_msg->remote_comm_id, 0);
2639         case CM_MSG_RESPONSE_REP:
2640         case CM_MSG_RESPONSE_OTHER:
2641                 return cm_acquire_id(mra_msg->remote_comm_id,
2642                                      mra_msg->local_comm_id);
2643         default:
2644                 return NULL;
2645         }
2646 }
2647
2648 static int cm_mra_handler(struct cm_work *work)
2649 {
2650         struct cm_id_private *cm_id_priv;
2651         struct cm_mra_msg *mra_msg;
2652         int timeout, ret;
2653
2654         mra_msg = (struct cm_mra_msg *)work->mad_recv_wc->recv_buf.mad;
2655         cm_id_priv = cm_acquire_mraed_id(mra_msg);
2656         if (!cm_id_priv)
2657                 return -EINVAL;
2658
2659         work->cm_event.private_data = &mra_msg->private_data;
2660         work->cm_event.param.mra_rcvd.service_timeout =
2661                                         cm_mra_get_service_timeout(mra_msg);
2662         timeout = cm_convert_to_ms(cm_mra_get_service_timeout(mra_msg)) +
2663                   cm_convert_to_ms(cm_id_priv->av.timeout);
2664
2665         spin_lock_irq(&cm_id_priv->lock);
2666         switch (cm_id_priv->id.state) {
2667         case IB_CM_REQ_SENT:
2668                 if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REQ ||
2669                     ib_modify_mad(cm_id_priv->av.port->mad_agent,
2670                                   cm_id_priv->msg, timeout))
2671                         goto out;
2672                 cm_id_priv->id.state = IB_CM_MRA_REQ_RCVD;
2673                 break;
2674         case IB_CM_REP_SENT:
2675                 if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REP ||
2676                     ib_modify_mad(cm_id_priv->av.port->mad_agent,
2677                                   cm_id_priv->msg, timeout))
2678                         goto out;
2679                 cm_id_priv->id.state = IB_CM_MRA_REP_RCVD;
2680                 break;
2681         case IB_CM_ESTABLISHED:
2682                 if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_OTHER ||
2683                     cm_id_priv->id.lap_state != IB_CM_LAP_SENT ||
2684                     ib_modify_mad(cm_id_priv->av.port->mad_agent,
2685                                   cm_id_priv->msg, timeout)) {
2686                         if (cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
2687                                 atomic_long_inc(&work->port->
2688                                                 counter_group[CM_RECV_DUPLICATES].
2689                                                 counter[CM_MRA_COUNTER]);
2690                         goto out;
2691                 }
2692                 cm_id_priv->id.lap_state = IB_CM_MRA_LAP_RCVD;
2693                 break;
2694         case IB_CM_MRA_REQ_RCVD:
2695         case IB_CM_MRA_REP_RCVD:
2696                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2697                                 counter[CM_MRA_COUNTER]);
2698                 /* fall through */
2699         default:
2700                 goto out;
2701         }
2702
2703         cm_id_priv->msg->context[1] = (void *) (unsigned long)
2704                                       cm_id_priv->id.state;
2705         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2706         if (!ret)
2707                 list_add_tail(&work->list, &cm_id_priv->work_list);
2708         spin_unlock_irq(&cm_id_priv->lock);
2709
2710         if (ret)
2711                 cm_process_work(cm_id_priv, work);
2712         else
2713                 cm_deref_id(cm_id_priv);
2714         return 0;
2715 out:
2716         spin_unlock_irq(&cm_id_priv->lock);
2717         cm_deref_id(cm_id_priv);
2718         return -EINVAL;
2719 }
2720
2721 static void cm_format_lap(struct cm_lap_msg *lap_msg,
2722                           struct cm_id_private *cm_id_priv,
2723                           struct ib_sa_path_rec *alternate_path,
2724                           const void *private_data,
2725                           u8 private_data_len)
2726 {
2727         cm_format_mad_hdr(&lap_msg->hdr, CM_LAP_ATTR_ID,
2728                           cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_LAP));
2729         lap_msg->local_comm_id = cm_id_priv->id.local_id;
2730         lap_msg->remote_comm_id = cm_id_priv->id.remote_id;
2731         cm_lap_set_remote_qpn(lap_msg, cm_id_priv->remote_qpn);
2732         /* todo: need remote CM response timeout */
2733         cm_lap_set_remote_resp_timeout(lap_msg, 0x1F);
2734         lap_msg->alt_local_lid = alternate_path->slid;
2735         lap_msg->alt_remote_lid = alternate_path->dlid;
2736         lap_msg->alt_local_gid = alternate_path->sgid;
2737         lap_msg->alt_remote_gid = alternate_path->dgid;
2738         cm_lap_set_flow_label(lap_msg, alternate_path->flow_label);
2739         cm_lap_set_traffic_class(lap_msg, alternate_path->traffic_class);
2740         lap_msg->alt_hop_limit = alternate_path->hop_limit;
2741         cm_lap_set_packet_rate(lap_msg, alternate_path->rate);
2742         cm_lap_set_sl(lap_msg, alternate_path->sl);
2743         cm_lap_set_subnet_local(lap_msg, 1); /* local only... */
2744         cm_lap_set_local_ack_timeout(lap_msg,
2745                 cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
2746                                alternate_path->packet_life_time));
2747
2748         if (private_data && private_data_len)
2749                 memcpy(lap_msg->private_data, private_data, private_data_len);
2750 }
2751
2752 int ib_send_cm_lap(struct ib_cm_id *cm_id,
2753                    struct ib_sa_path_rec *alternate_path,
2754                    const void *private_data,
2755                    u8 private_data_len)
2756 {
2757         struct cm_id_private *cm_id_priv;
2758         struct ib_mad_send_buf *msg;
2759         unsigned long flags;
2760         int ret;
2761
2762         if (private_data && private_data_len > IB_CM_LAP_PRIVATE_DATA_SIZE)
2763                 return -EINVAL;
2764
2765         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2766         spin_lock_irqsave(&cm_id_priv->lock, flags);
2767         if (cm_id->state != IB_CM_ESTABLISHED ||
2768             (cm_id->lap_state != IB_CM_LAP_UNINIT &&
2769              cm_id->lap_state != IB_CM_LAP_IDLE)) {
2770                 ret = -EINVAL;
2771                 goto out;
2772         }
2773
2774         ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av,
2775                                  cm_id_priv);
2776         if (ret)
2777                 goto out;
2778         cm_id_priv->alt_av.timeout =
2779                         cm_ack_timeout(cm_id_priv->target_ack_delay,
2780                                        cm_id_priv->alt_av.timeout - 1);
2781
2782         ret = cm_alloc_msg(cm_id_priv, &msg);
2783         if (ret)
2784                 goto out;
2785
2786         cm_format_lap((struct cm_lap_msg *) msg->mad, cm_id_priv,
2787                       alternate_path, private_data, private_data_len);
2788         msg->timeout_ms = cm_id_priv->timeout_ms;
2789         msg->context[1] = (void *) (unsigned long) IB_CM_ESTABLISHED;
2790
2791         ret = ib_post_send_mad(msg, NULL);
2792         if (ret) {
2793                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2794                 cm_free_msg(msg);
2795                 return ret;
2796         }
2797
2798         cm_id->lap_state = IB_CM_LAP_SENT;
2799         cm_id_priv->msg = msg;
2800
2801 out:    spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2802         return ret;
2803 }
2804 EXPORT_SYMBOL(ib_send_cm_lap);
2805
2806 static void cm_format_path_from_lap(struct cm_id_private *cm_id_priv,
2807                                     struct ib_sa_path_rec *path,
2808                                     struct cm_lap_msg *lap_msg)
2809 {
2810         memset(path, 0, sizeof *path);
2811         path->dgid = lap_msg->alt_local_gid;
2812         path->sgid = lap_msg->alt_remote_gid;
2813         path->dlid = lap_msg->alt_local_lid;
2814         path->slid = lap_msg->alt_remote_lid;
2815         path->flow_label = cm_lap_get_flow_label(lap_msg);
2816         path->hop_limit = lap_msg->alt_hop_limit;
2817         path->traffic_class = cm_lap_get_traffic_class(lap_msg);
2818         path->reversible = 1;
2819         path->pkey = cm_id_priv->pkey;
2820         path->sl = cm_lap_get_sl(lap_msg);
2821         path->mtu_selector = IB_SA_EQ;
2822         path->mtu = cm_id_priv->path_mtu;
2823         path->rate_selector = IB_SA_EQ;
2824         path->rate = cm_lap_get_packet_rate(lap_msg);
2825         path->packet_life_time_selector = IB_SA_EQ;
2826         path->packet_life_time = cm_lap_get_local_ack_timeout(lap_msg);
2827         path->packet_life_time -= (path->packet_life_time > 0);
2828 }
2829
2830 static int cm_lap_handler(struct cm_work *work)
2831 {
2832         struct cm_id_private *cm_id_priv;
2833         struct cm_lap_msg *lap_msg;
2834         struct ib_cm_lap_event_param *param;
2835         struct ib_mad_send_buf *msg = NULL;
2836         int ret;
2837
2838         /* todo: verify LAP request and send reject APR if invalid. */
2839         lap_msg = (struct cm_lap_msg *)work->mad_recv_wc->recv_buf.mad;
2840         cm_id_priv = cm_acquire_id(lap_msg->remote_comm_id,
2841                                    lap_msg->local_comm_id);
2842         if (!cm_id_priv)
2843                 return -EINVAL;
2844
2845         param = &work->cm_event.param.lap_rcvd;
2846         param->alternate_path = &work->path[0];
2847         cm_format_path_from_lap(cm_id_priv, param->alternate_path, lap_msg);
2848         work->cm_event.private_data = &lap_msg->private_data;
2849
2850         spin_lock_irq(&cm_id_priv->lock);
2851         if (cm_id_priv->id.state != IB_CM_ESTABLISHED)
2852                 goto unlock;
2853
2854         switch (cm_id_priv->id.lap_state) {
2855         case IB_CM_LAP_UNINIT:
2856         case IB_CM_LAP_IDLE:
2857                 break;
2858         case IB_CM_MRA_LAP_SENT:
2859                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2860                                 counter[CM_LAP_COUNTER]);
2861                 if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg))
2862                         goto unlock;
2863
2864                 cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
2865                               CM_MSG_RESPONSE_OTHER,
2866                               cm_id_priv->service_timeout,
2867                               cm_id_priv->private_data,
2868                               cm_id_priv->private_data_len);
2869                 spin_unlock_irq(&cm_id_priv->lock);
2870
2871                 if (ib_post_send_mad(msg, NULL))
2872                         cm_free_msg(msg);
2873                 goto deref;
2874         case IB_CM_LAP_RCVD:
2875                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2876                                 counter[CM_LAP_COUNTER]);
2877                 goto unlock;
2878         default:
2879                 goto unlock;
2880         }
2881
2882         cm_id_priv->id.lap_state = IB_CM_LAP_RCVD;
2883         cm_id_priv->tid = lap_msg->hdr.tid;
2884         cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
2885                                 work->mad_recv_wc->recv_buf.grh,
2886                                 &cm_id_priv->av);
2887         cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av,
2888                            cm_id_priv);
2889         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2890         if (!ret)
2891                 list_add_tail(&work->list, &cm_id_priv->work_list);
2892         spin_unlock_irq(&cm_id_priv->lock);
2893
2894         if (ret)
2895                 cm_process_work(cm_id_priv, work);
2896         else
2897                 cm_deref_id(cm_id_priv);
2898         return 0;
2899
2900 unlock: spin_unlock_irq(&cm_id_priv->lock);
2901 deref:  cm_deref_id(cm_id_priv);
2902         return -EINVAL;
2903 }
2904
2905 static void cm_format_apr(struct cm_apr_msg *apr_msg,
2906                           struct cm_id_private *cm_id_priv,
2907                           enum ib_cm_apr_status status,
2908                           void *info,
2909                           u8 info_length,
2910                           const void *private_data,
2911                           u8 private_data_len)
2912 {
2913         cm_format_mad_hdr(&apr_msg->hdr, CM_APR_ATTR_ID, cm_id_priv->tid);
2914         apr_msg->local_comm_id = cm_id_priv->id.local_id;
2915         apr_msg->remote_comm_id = cm_id_priv->id.remote_id;
2916         apr_msg->ap_status = (u8) status;
2917
2918         if (info && info_length) {
2919                 apr_msg->info_length = info_length;
2920                 memcpy(apr_msg->info, info, info_length);
2921         }
2922
2923         if (private_data && private_data_len)
2924                 memcpy(apr_msg->private_data, private_data, private_data_len);
2925 }
2926
2927 int ib_send_cm_apr(struct ib_cm_id *cm_id,
2928                    enum ib_cm_apr_status status,
2929                    void *info,
2930                    u8 info_length,
2931                    const void *private_data,
2932                    u8 private_data_len)
2933 {
2934         struct cm_id_private *cm_id_priv;
2935         struct ib_mad_send_buf *msg;
2936         unsigned long flags;
2937         int ret;
2938
2939         if ((private_data && private_data_len > IB_CM_APR_PRIVATE_DATA_SIZE) ||
2940             (info && info_length > IB_CM_APR_INFO_LENGTH))
2941                 return -EINVAL;
2942
2943         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2944         spin_lock_irqsave(&cm_id_priv->lock, flags);
2945         if (cm_id->state != IB_CM_ESTABLISHED ||
2946             (cm_id->lap_state != IB_CM_LAP_RCVD &&
2947              cm_id->lap_state != IB_CM_MRA_LAP_SENT)) {
2948                 ret = -EINVAL;
2949                 goto out;
2950         }
2951
2952         ret = cm_alloc_msg(cm_id_priv, &msg);
2953         if (ret)
2954                 goto out;
2955
2956         cm_format_apr((struct cm_apr_msg *) msg->mad, cm_id_priv, status,
2957                       info, info_length, private_data, private_data_len);
2958         ret = ib_post_send_mad(msg, NULL);
2959         if (ret) {
2960                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2961                 cm_free_msg(msg);
2962                 return ret;
2963         }
2964
2965         cm_id->lap_state = IB_CM_LAP_IDLE;
2966 out:    spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2967         return ret;
2968 }
2969 EXPORT_SYMBOL(ib_send_cm_apr);
2970
2971 static int cm_apr_handler(struct cm_work *work)
2972 {
2973         struct cm_id_private *cm_id_priv;
2974         struct cm_apr_msg *apr_msg;
2975         int ret;
2976
2977         apr_msg = (struct cm_apr_msg *)work->mad_recv_wc->recv_buf.mad;
2978         cm_id_priv = cm_acquire_id(apr_msg->remote_comm_id,
2979                                    apr_msg->local_comm_id);
2980         if (!cm_id_priv)
2981                 return -EINVAL; /* Unmatched reply. */
2982
2983         work->cm_event.param.apr_rcvd.ap_status = apr_msg->ap_status;
2984         work->cm_event.param.apr_rcvd.apr_info = &apr_msg->info;
2985         work->cm_event.param.apr_rcvd.info_len = apr_msg->info_length;
2986         work->cm_event.private_data = &apr_msg->private_data;
2987
2988         spin_lock_irq(&cm_id_priv->lock);
2989         if (cm_id_priv->id.state != IB_CM_ESTABLISHED ||
2990             (cm_id_priv->id.lap_state != IB_CM_LAP_SENT &&
2991              cm_id_priv->id.lap_state != IB_CM_MRA_LAP_RCVD)) {
2992                 spin_unlock_irq(&cm_id_priv->lock);
2993                 goto out;
2994         }
2995         cm_id_priv->id.lap_state = IB_CM_LAP_IDLE;
2996         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2997         cm_id_priv->msg = NULL;
2998
2999         ret = atomic_inc_and_test(&cm_id_priv->work_count);
3000         if (!ret)
3001                 list_add_tail(&work->list, &cm_id_priv->work_list);
3002         spin_unlock_irq(&cm_id_priv->lock);
3003
3004         if (ret)
3005                 cm_process_work(cm_id_priv, work);
3006         else
3007                 cm_deref_id(cm_id_priv);
3008         return 0;
3009 out:
3010         cm_deref_id(cm_id_priv);
3011         return -EINVAL;
3012 }
3013
3014 static int cm_timewait_handler(struct cm_work *work)
3015 {
3016         struct cm_timewait_info *timewait_info;
3017         struct cm_id_private *cm_id_priv;
3018         int ret;
3019
3020         timewait_info = (struct cm_timewait_info *)work;
3021         spin_lock_irq(&cm.lock);
3022         list_del(&timewait_info->list);
3023         spin_unlock_irq(&cm.lock);
3024
3025         cm_id_priv = cm_acquire_id(timewait_info->work.local_id,
3026                                    timewait_info->work.remote_id);
3027         if (!cm_id_priv)
3028                 return -EINVAL;
3029
3030         spin_lock_irq(&cm_id_priv->lock);
3031         if (cm_id_priv->id.state != IB_CM_TIMEWAIT ||
3032             cm_id_priv->remote_qpn != timewait_info->remote_qpn) {
3033                 spin_unlock_irq(&cm_id_priv->lock);
3034                 goto out;
3035         }
3036         cm_id_priv->id.state = IB_CM_IDLE;
3037         ret = atomic_inc_and_test(&cm_id_priv->work_count);
3038         if (!ret)
3039                 list_add_tail(&work->list, &cm_id_priv->work_list);
3040         spin_unlock_irq(&cm_id_priv->lock);
3041
3042         if (ret)
3043                 cm_process_work(cm_id_priv, work);
3044         else
3045                 cm_deref_id(cm_id_priv);
3046         return 0;
3047 out:
3048         cm_deref_id(cm_id_priv);
3049         return -EINVAL;
3050 }
3051
3052 static void cm_format_sidr_req(struct cm_sidr_req_msg *sidr_req_msg,
3053                                struct cm_id_private *cm_id_priv,
3054                                struct ib_cm_sidr_req_param *param)
3055 {
3056         cm_format_mad_hdr(&sidr_req_msg->hdr, CM_SIDR_REQ_ATTR_ID,
3057                           cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_SIDR));
3058         sidr_req_msg->request_id = cm_id_priv->id.local_id;
3059         sidr_req_msg->pkey = param->path->pkey;
3060         sidr_req_msg->service_id = param->service_id;
3061
3062         if (param->private_data && param->private_data_len)
3063                 memcpy(sidr_req_msg->private_data, param->private_data,
3064                        param->private_data_len);
3065 }
3066
3067 int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
3068                         struct ib_cm_sidr_req_param *param)
3069 {
3070         struct cm_id_private *cm_id_priv;
3071         struct ib_mad_send_buf *msg;
3072         unsigned long flags;
3073         int ret;
3074
3075         if (!param->path || (param->private_data &&
3076              param->private_data_len > IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE))
3077                 return -EINVAL;
3078
3079         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3080         ret = cm_init_av_by_path(param->path, &cm_id_priv->av, cm_id_priv);
3081         if (ret)
3082                 goto out;
3083
3084         cm_id->service_id = param->service_id;
3085         cm_id->service_mask = ~cpu_to_be64(0);
3086         cm_id_priv->timeout_ms = param->timeout_ms;
3087         cm_id_priv->max_cm_retries = param->max_cm_retries;
3088         ret = cm_alloc_msg(cm_id_priv, &msg);
3089         if (ret)
3090                 goto out;
3091
3092         cm_format_sidr_req((struct cm_sidr_req_msg *) msg->mad, cm_id_priv,
3093                            param);
3094         msg->timeout_ms = cm_id_priv->timeout_ms;
3095         msg->context[1] = (void *) (unsigned long) IB_CM_SIDR_REQ_SENT;
3096
3097         spin_lock_irqsave(&cm_id_priv->lock, flags);
3098         if (cm_id->state == IB_CM_IDLE)
3099                 ret = ib_post_send_mad(msg, NULL);
3100         else
3101                 ret = -EINVAL;
3102
3103         if (ret) {
3104                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3105                 cm_free_msg(msg);
3106                 goto out;
3107         }
3108         cm_id->state = IB_CM_SIDR_REQ_SENT;
3109         cm_id_priv->msg = msg;
3110         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3111 out:
3112         return ret;
3113 }
3114 EXPORT_SYMBOL(ib_send_cm_sidr_req);
3115
3116 static void cm_format_sidr_req_event(struct cm_work *work,
3117                                      struct ib_cm_id *listen_id)
3118 {
3119         struct cm_sidr_req_msg *sidr_req_msg;
3120         struct ib_cm_sidr_req_event_param *param;
3121
3122         sidr_req_msg = (struct cm_sidr_req_msg *)
3123                                 work->mad_recv_wc->recv_buf.mad;
3124         param = &work->cm_event.param.sidr_req_rcvd;
3125         param->pkey = __be16_to_cpu(sidr_req_msg->pkey);
3126         param->listen_id = listen_id;
3127         param->service_id = sidr_req_msg->service_id;
3128         param->bth_pkey = cm_get_bth_pkey(work);
3129         param->port = work->port->port_num;
3130         work->cm_event.private_data = &sidr_req_msg->private_data;
3131 }
3132
3133 static int cm_sidr_req_handler(struct cm_work *work)
3134 {
3135         struct ib_cm_id *cm_id;
3136         struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
3137         struct cm_sidr_req_msg *sidr_req_msg;
3138         struct ib_wc *wc;
3139
3140         cm_id = ib_create_cm_id(work->port->cm_dev->ib_device, NULL, NULL);
3141         if (IS_ERR(cm_id))
3142                 return PTR_ERR(cm_id);
3143         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3144
3145         /* Record SGID/SLID and request ID for lookup. */
3146         sidr_req_msg = (struct cm_sidr_req_msg *)
3147                                 work->mad_recv_wc->recv_buf.mad;
3148         wc = work->mad_recv_wc->wc;
3149         cm_id_priv->av.dgid.global.subnet_prefix = cpu_to_be64(wc->slid);
3150         cm_id_priv->av.dgid.global.interface_id = 0;
3151         cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
3152                                 work->mad_recv_wc->recv_buf.grh,
3153                                 &cm_id_priv->av);
3154         cm_id_priv->id.remote_id = sidr_req_msg->request_id;
3155         cm_id_priv->tid = sidr_req_msg->hdr.tid;
3156         atomic_inc(&cm_id_priv->work_count);
3157
3158         spin_lock_irq(&cm.lock);
3159         cur_cm_id_priv = cm_insert_remote_sidr(cm_id_priv);
3160         if (cur_cm_id_priv) {
3161                 spin_unlock_irq(&cm.lock);
3162                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
3163                                 counter[CM_SIDR_REQ_COUNTER]);
3164                 goto out; /* Duplicate message. */
3165         }
3166         cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD;
3167         cur_cm_id_priv = cm_find_listen(cm_id->device,
3168                                         sidr_req_msg->service_id);
3169         if (!cur_cm_id_priv) {
3170                 spin_unlock_irq(&cm.lock);
3171                 cm_reject_sidr_req(cm_id_priv, IB_SIDR_UNSUPPORTED);
3172                 goto out; /* No match. */
3173         }
3174         atomic_inc(&cur_cm_id_priv->refcount);
3175         atomic_inc(&cm_id_priv->refcount);
3176         spin_unlock_irq(&cm.lock);
3177
3178         cm_id_priv->id.cm_handler = cur_cm_id_priv->id.cm_handler;
3179         cm_id_priv->id.context = cur_cm_id_priv->id.context;
3180         cm_id_priv->id.service_id = sidr_req_msg->service_id;
3181         cm_id_priv->id.service_mask = ~cpu_to_be64(0);
3182
3183         cm_format_sidr_req_event(work, &cur_cm_id_priv->id);
3184         cm_process_work(cm_id_priv, work);
3185         cm_deref_id(cur_cm_id_priv);
3186         return 0;
3187 out:
3188         ib_destroy_cm_id(&cm_id_priv->id);
3189         return -EINVAL;
3190 }
3191
3192 static void cm_format_sidr_rep(struct cm_sidr_rep_msg *sidr_rep_msg,
3193                                struct cm_id_private *cm_id_priv,
3194                                struct ib_cm_sidr_rep_param *param)
3195 {
3196         cm_format_mad_hdr(&sidr_rep_msg->hdr, CM_SIDR_REP_ATTR_ID,
3197                           cm_id_priv->tid);
3198         sidr_rep_msg->request_id = cm_id_priv->id.remote_id;
3199         sidr_rep_msg->status = param->status;
3200         cm_sidr_rep_set_qpn(sidr_rep_msg, cpu_to_be32(param->qp_num));
3201         sidr_rep_msg->service_id = cm_id_priv->id.service_id;
3202         sidr_rep_msg->qkey = cpu_to_be32(param->qkey);
3203
3204         if (param->info && param->info_length)
3205                 memcpy(sidr_rep_msg->info, param->info, param->info_length);
3206
3207         if (param->private_data && param->private_data_len)
3208                 memcpy(sidr_rep_msg->private_data, param->private_data,
3209                        param->private_data_len);
3210 }
3211
3212 int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id,
3213                         struct ib_cm_sidr_rep_param *param)
3214 {
3215         struct cm_id_private *cm_id_priv;
3216         struct ib_mad_send_buf *msg;
3217         unsigned long flags;
3218         int ret;
3219
3220         if ((param->info && param->info_length > IB_CM_SIDR_REP_INFO_LENGTH) ||
3221             (param->private_data &&
3222              param->private_data_len > IB_CM_SIDR_REP_PRIVATE_DATA_SIZE))
3223                 return -EINVAL;
3224
3225         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3226         spin_lock_irqsave(&cm_id_priv->lock, flags);
3227         if (cm_id->state != IB_CM_SIDR_REQ_RCVD) {
3228                 ret = -EINVAL;
3229                 goto error;
3230         }
3231
3232         ret = cm_alloc_msg(cm_id_priv, &msg);
3233         if (ret)
3234                 goto error;
3235
3236         cm_format_sidr_rep((struct cm_sidr_rep_msg *) msg->mad, cm_id_priv,
3237                            param);
3238         ret = ib_post_send_mad(msg, NULL);
3239         if (ret) {
3240                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3241                 cm_free_msg(msg);
3242                 return ret;
3243         }
3244         cm_id->state = IB_CM_IDLE;
3245         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3246
3247         spin_lock_irqsave(&cm.lock, flags);
3248         if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node)) {
3249                 rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
3250                 RB_CLEAR_NODE(&cm_id_priv->sidr_id_node);
3251         }
3252         spin_unlock_irqrestore(&cm.lock, flags);
3253         return 0;
3254
3255 error:  spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3256         return ret;
3257 }
3258 EXPORT_SYMBOL(ib_send_cm_sidr_rep);
3259
3260 static void cm_format_sidr_rep_event(struct cm_work *work)
3261 {
3262         struct cm_sidr_rep_msg *sidr_rep_msg;
3263         struct ib_cm_sidr_rep_event_param *param;
3264
3265         sidr_rep_msg = (struct cm_sidr_rep_msg *)
3266                                 work->mad_recv_wc->recv_buf.mad;
3267         param = &work->cm_event.param.sidr_rep_rcvd;
3268         param->status = sidr_rep_msg->status;
3269         param->qkey = be32_to_cpu(sidr_rep_msg->qkey);
3270         param->qpn = be32_to_cpu(cm_sidr_rep_get_qpn(sidr_rep_msg));
3271         param->info = &sidr_rep_msg->info;
3272         param->info_len = sidr_rep_msg->info_length;
3273         work->cm_event.private_data = &sidr_rep_msg->private_data;
3274 }
3275
3276 static int cm_sidr_rep_handler(struct cm_work *work)
3277 {
3278         struct cm_sidr_rep_msg *sidr_rep_msg;
3279         struct cm_id_private *cm_id_priv;
3280
3281         sidr_rep_msg = (struct cm_sidr_rep_msg *)
3282                                 work->mad_recv_wc->recv_buf.mad;
3283         cm_id_priv = cm_acquire_id(sidr_rep_msg->request_id, 0);
3284         if (!cm_id_priv)
3285                 return -EINVAL; /* Unmatched reply. */
3286
3287         spin_lock_irq(&cm_id_priv->lock);
3288         if (cm_id_priv->id.state != IB_CM_SIDR_REQ_SENT) {
3289                 spin_unlock_irq(&cm_id_priv->lock);
3290                 goto out;
3291         }
3292         cm_id_priv->id.state = IB_CM_IDLE;
3293         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
3294         spin_unlock_irq(&cm_id_priv->lock);
3295
3296         cm_format_sidr_rep_event(work);
3297         cm_process_work(cm_id_priv, work);
3298         return 0;
3299 out:
3300         cm_deref_id(cm_id_priv);
3301         return -EINVAL;
3302 }
3303
3304 static void cm_process_send_error(struct ib_mad_send_buf *msg,
3305                                   enum ib_wc_status wc_status)
3306 {
3307         struct cm_id_private *cm_id_priv;
3308         struct ib_cm_event cm_event;
3309         enum ib_cm_state state;
3310         int ret;
3311
3312         memset(&cm_event, 0, sizeof cm_event);
3313         cm_id_priv = msg->context[0];
3314
3315         /* Discard old sends or ones without a response. */
3316         spin_lock_irq(&cm_id_priv->lock);
3317         state = (enum ib_cm_state) (unsigned long) msg->context[1];
3318         if (msg != cm_id_priv->msg || state != cm_id_priv->id.state)
3319                 goto discard;
3320
3321         switch (state) {
3322         case IB_CM_REQ_SENT:
3323         case IB_CM_MRA_REQ_RCVD:
3324                 cm_reset_to_idle(cm_id_priv);
3325                 cm_event.event = IB_CM_REQ_ERROR;
3326                 break;
3327         case IB_CM_REP_SENT:
3328         case IB_CM_MRA_REP_RCVD:
3329                 cm_reset_to_idle(cm_id_priv);
3330                 cm_event.event = IB_CM_REP_ERROR;
3331                 break;
3332         case IB_CM_DREQ_SENT:
3333                 cm_enter_timewait(cm_id_priv);
3334                 cm_event.event = IB_CM_DREQ_ERROR;
3335                 break;
3336         case IB_CM_SIDR_REQ_SENT:
3337                 cm_id_priv->id.state = IB_CM_IDLE;
3338                 cm_event.event = IB_CM_SIDR_REQ_ERROR;
3339                 break;
3340         default:
3341                 goto discard;
3342         }
3343         spin_unlock_irq(&cm_id_priv->lock);
3344         cm_event.param.send_status = wc_status;
3345
3346         /* No other events can occur on the cm_id at this point. */
3347         ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &cm_event);
3348         cm_free_msg(msg);
3349         if (ret)
3350                 ib_destroy_cm_id(&cm_id_priv->id);
3351         return;
3352 discard:
3353         spin_unlock_irq(&cm_id_priv->lock);
3354         cm_free_msg(msg);
3355 }
3356
3357 static void cm_send_handler(struct ib_mad_agent *mad_agent,
3358                             struct ib_mad_send_wc *mad_send_wc)
3359 {
3360         struct ib_mad_send_buf *msg = mad_send_wc->send_buf;
3361         struct cm_port *port;
3362         u16 attr_index;
3363
3364         port = mad_agent->context;
3365         attr_index = be16_to_cpu(((struct ib_mad_hdr *)
3366                                   msg->mad)->attr_id) - CM_ATTR_ID_OFFSET;
3367
3368         /*
3369          * If the send was in response to a received message (context[0] is not
3370          * set to a cm_id), and is not a REJ, then it is a send that was
3371          * manually retried.
3372          */
3373         if (!msg->context[0] && (attr_index != CM_REJ_COUNTER))
3374                 msg->retries = 1;
3375
3376         atomic_long_add(1 + msg->retries,
3377                         &port->counter_group[CM_XMIT].counter[attr_index]);
3378         if (msg->retries)
3379                 atomic_long_add(msg->retries,
3380                                 &port->counter_group[CM_XMIT_RETRIES].
3381                                 counter[attr_index]);
3382
3383         switch (mad_send_wc->status) {
3384         case IB_WC_SUCCESS:
3385         case IB_WC_WR_FLUSH_ERR:
3386                 cm_free_msg(msg);
3387                 break;
3388         default:
3389                 if (msg->context[0] && msg->context[1])
3390                         cm_process_send_error(msg, mad_send_wc->status);
3391                 else
3392                         cm_free_msg(msg);
3393                 break;
3394         }
3395 }
3396
3397 static void cm_work_handler(struct work_struct *_work)
3398 {
3399         struct cm_work *work = container_of(_work, struct cm_work, work.work);
3400         int ret;
3401
3402         switch (work->cm_event.event) {
3403         case IB_CM_REQ_RECEIVED:
3404                 ret = cm_req_handler(work);
3405                 break;
3406         case IB_CM_MRA_RECEIVED:
3407                 ret = cm_mra_handler(work);
3408                 break;
3409         case IB_CM_REJ_RECEIVED:
3410                 ret = cm_rej_handler(work);
3411                 break;
3412         case IB_CM_REP_RECEIVED:
3413                 ret = cm_rep_handler(work);
3414                 break;
3415         case IB_CM_RTU_RECEIVED:
3416                 ret = cm_rtu_handler(work);
3417                 break;
3418         case IB_CM_USER_ESTABLISHED:
3419                 ret = cm_establish_handler(work);
3420                 break;
3421         case IB_CM_DREQ_RECEIVED:
3422                 ret = cm_dreq_handler(work);
3423                 break;
3424         case IB_CM_DREP_RECEIVED:
3425                 ret = cm_drep_handler(work);
3426                 break;
3427         case IB_CM_SIDR_REQ_RECEIVED:
3428                 ret = cm_sidr_req_handler(work);
3429                 break;
3430         case IB_CM_SIDR_REP_RECEIVED:
3431                 ret = cm_sidr_rep_handler(work);
3432                 break;
3433         case IB_CM_LAP_RECEIVED:
3434                 ret = cm_lap_handler(work);
3435                 break;
3436         case IB_CM_APR_RECEIVED:
3437                 ret = cm_apr_handler(work);
3438                 break;
3439         case IB_CM_TIMEWAIT_EXIT:
3440                 ret = cm_timewait_handler(work);
3441                 break;
3442         default:
3443                 ret = -EINVAL;
3444                 break;
3445         }
3446         if (ret)
3447                 cm_free_work(work);
3448 }
3449
3450 static int cm_establish(struct ib_cm_id *cm_id)
3451 {
3452         struct cm_id_private *cm_id_priv;
3453         struct cm_work *work;
3454         unsigned long flags;
3455         int ret = 0;
3456         struct cm_device *cm_dev;
3457
3458         cm_dev = ib_get_client_data(cm_id->device, &cm_client);
3459         if (!cm_dev)
3460                 return -ENODEV;
3461
3462         work = kmalloc(sizeof *work, GFP_ATOMIC);
3463         if (!work)
3464                 return -ENOMEM;
3465
3466         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3467         spin_lock_irqsave(&cm_id_priv->lock, flags);
3468         switch (cm_id->state)
3469         {
3470         case IB_CM_REP_SENT:
3471         case IB_CM_MRA_REP_RCVD:
3472                 cm_id->state = IB_CM_ESTABLISHED;
3473                 break;
3474         case IB_CM_ESTABLISHED:
3475                 ret = -EISCONN;
3476                 break;
3477         default:
3478                 ret = -EINVAL;
3479                 break;
3480         }
3481         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3482
3483         if (ret) {
3484                 kfree(work);
3485                 goto out;
3486         }
3487
3488         /*
3489          * The CM worker thread may try to destroy the cm_id before it
3490          * can execute this work item.  To prevent potential deadlock,
3491          * we need to find the cm_id once we're in the context of the
3492          * worker thread, rather than holding a reference on it.
3493          */
3494         INIT_DELAYED_WORK(&work->work, cm_work_handler);
3495         work->local_id = cm_id->local_id;
3496         work->remote_id = cm_id->remote_id;
3497         work->mad_recv_wc = NULL;
3498         work->cm_event.event = IB_CM_USER_ESTABLISHED;
3499
3500         /* Check if the device started its remove_one */
3501         spin_lock_irqsave(&cm.lock, flags);
3502         if (!cm_dev->going_down) {
3503                 queue_delayed_work(cm.wq, &work->work, 0);
3504         } else {
3505                 kfree(work);
3506                 ret = -ENODEV;
3507         }
3508         spin_unlock_irqrestore(&cm.lock, flags);
3509
3510 out:
3511         return ret;
3512 }
3513
3514 static int cm_migrate(struct ib_cm_id *cm_id)
3515 {
3516         struct cm_id_private *cm_id_priv;
3517         struct cm_av tmp_av;
3518         unsigned long flags;
3519         int tmp_send_port_not_ready;
3520         int ret = 0;
3521
3522         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3523         spin_lock_irqsave(&cm_id_priv->lock, flags);
3524         if (cm_id->state == IB_CM_ESTABLISHED &&
3525             (cm_id->lap_state == IB_CM_LAP_UNINIT ||
3526              cm_id->lap_state == IB_CM_LAP_IDLE)) {
3527                 cm_id->lap_state = IB_CM_LAP_IDLE;
3528                 /* Swap address vector */
3529                 tmp_av = cm_id_priv->av;
3530                 cm_id_priv->av = cm_id_priv->alt_av;
3531                 cm_id_priv->alt_av = tmp_av;
3532                 /* Swap port send ready state */
3533                 tmp_send_port_not_ready = cm_id_priv->prim_send_port_not_ready;
3534                 cm_id_priv->prim_send_port_not_ready = cm_id_priv->altr_send_port_not_ready;
3535                 cm_id_priv->altr_send_port_not_ready = tmp_send_port_not_ready;
3536         } else
3537                 ret = -EINVAL;
3538         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3539
3540         return ret;
3541 }
3542
3543 int ib_cm_notify(struct ib_cm_id *cm_id, enum ib_event_type event)
3544 {
3545         int ret;
3546
3547         switch (event) {
3548         case IB_EVENT_COMM_EST:
3549                 ret = cm_establish(cm_id);
3550                 break;
3551         case IB_EVENT_PATH_MIG:
3552                 ret = cm_migrate(cm_id);
3553                 break;
3554         default:
3555                 ret = -EINVAL;
3556         }
3557         return ret;
3558 }
3559 EXPORT_SYMBOL(ib_cm_notify);
3560
3561 static void cm_recv_handler(struct ib_mad_agent *mad_agent,
3562                             struct ib_mad_recv_wc *mad_recv_wc)
3563 {
3564         struct cm_port *port = mad_agent->context;
3565         struct cm_work *work;
3566         enum ib_cm_event_type event;
3567         u16 attr_id;
3568         int paths = 0;
3569         int going_down = 0;
3570
3571         switch (mad_recv_wc->recv_buf.mad->mad_hdr.attr_id) {
3572         case CM_REQ_ATTR_ID:
3573                 paths = 1 + (((struct cm_req_msg *) mad_recv_wc->recv_buf.mad)->
3574                                                     alt_local_lid != 0);
3575                 event = IB_CM_REQ_RECEIVED;
3576                 break;
3577         case CM_MRA_ATTR_ID:
3578                 event = IB_CM_MRA_RECEIVED;
3579                 break;
3580         case CM_REJ_ATTR_ID:
3581                 event = IB_CM_REJ_RECEIVED;
3582                 break;
3583         case CM_REP_ATTR_ID:
3584                 event = IB_CM_REP_RECEIVED;
3585                 break;
3586         case CM_RTU_ATTR_ID:
3587                 event = IB_CM_RTU_RECEIVED;
3588                 break;
3589         case CM_DREQ_ATTR_ID:
3590                 event = IB_CM_DREQ_RECEIVED;
3591                 break;
3592         case CM_DREP_ATTR_ID:
3593                 event = IB_CM_DREP_RECEIVED;
3594                 break;
3595         case CM_SIDR_REQ_ATTR_ID:
3596                 event = IB_CM_SIDR_REQ_RECEIVED;
3597                 break;
3598         case CM_SIDR_REP_ATTR_ID:
3599                 event = IB_CM_SIDR_REP_RECEIVED;
3600                 break;
3601         case CM_LAP_ATTR_ID:
3602                 paths = 1;
3603                 event = IB_CM_LAP_RECEIVED;
3604                 break;
3605         case CM_APR_ATTR_ID:
3606                 event = IB_CM_APR_RECEIVED;
3607                 break;
3608         default:
3609                 ib_free_recv_mad(mad_recv_wc);
3610                 return;
3611         }
3612
3613         attr_id = be16_to_cpu(mad_recv_wc->recv_buf.mad->mad_hdr.attr_id);
3614         atomic_long_inc(&port->counter_group[CM_RECV].
3615                         counter[attr_id - CM_ATTR_ID_OFFSET]);
3616
3617         work = kmalloc(sizeof *work + sizeof(struct ib_sa_path_rec) * paths,
3618                        GFP_KERNEL);
3619         if (!work) {
3620                 ib_free_recv_mad(mad_recv_wc);
3621                 return;
3622         }
3623
3624         INIT_DELAYED_WORK(&work->work, cm_work_handler);
3625         work->cm_event.event = event;
3626         work->mad_recv_wc = mad_recv_wc;
3627         work->port = port;
3628
3629         /* Check if the device started its remove_one */
3630         spin_lock_irq(&cm.lock);
3631         if (!port->cm_dev->going_down)
3632                 queue_delayed_work(cm.wq, &work->work, 0);
3633         else
3634                 going_down = 1;
3635         spin_unlock_irq(&cm.lock);
3636
3637         if (going_down) {
3638                 kfree(work);
3639                 ib_free_recv_mad(mad_recv_wc);
3640         }
3641 }
3642
3643 static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv,
3644                                 struct ib_qp_attr *qp_attr,
3645                                 int *qp_attr_mask)
3646 {
3647         unsigned long flags;
3648         int ret;
3649
3650         spin_lock_irqsave(&cm_id_priv->lock, flags);
3651         switch (cm_id_priv->id.state) {
3652         case IB_CM_REQ_SENT:
3653         case IB_CM_MRA_REQ_RCVD:
3654         case IB_CM_REQ_RCVD:
3655         case IB_CM_MRA_REQ_SENT:
3656         case IB_CM_REP_RCVD:
3657         case IB_CM_MRA_REP_SENT:
3658         case IB_CM_REP_SENT:
3659         case IB_CM_MRA_REP_RCVD:
3660         case IB_CM_ESTABLISHED:
3661                 *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS |
3662                                 IB_QP_PKEY_INDEX | IB_QP_PORT;
3663                 qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE;
3664                 if (cm_id_priv->responder_resources)
3665                         qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ |
3666                                                     IB_ACCESS_REMOTE_ATOMIC;
3667                 qp_attr->pkey_index = cm_id_priv->av.pkey_index;
3668                 qp_attr->port_num = cm_id_priv->av.port->port_num;
3669                 ret = 0;
3670                 break;
3671         default:
3672                 ret = -EINVAL;
3673                 break;
3674         }
3675         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3676         return ret;
3677 }
3678
3679 static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
3680                                struct ib_qp_attr *qp_attr,
3681                                int *qp_attr_mask)
3682 {
3683         unsigned long flags;
3684         int ret;
3685
3686         spin_lock_irqsave(&cm_id_priv->lock, flags);
3687         switch (cm_id_priv->id.state) {
3688         case IB_CM_REQ_RCVD:
3689         case IB_CM_MRA_REQ_SENT:
3690         case IB_CM_REP_RCVD:
3691         case IB_CM_MRA_REP_SENT:
3692         case IB_CM_REP_SENT:
3693         case IB_CM_MRA_REP_RCVD:
3694         case IB_CM_ESTABLISHED:
3695                 *qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU |
3696                                 IB_QP_DEST_QPN | IB_QP_RQ_PSN;
3697                 qp_attr->ah_attr = cm_id_priv->av.ah_attr;
3698                 qp_attr->path_mtu = cm_id_priv->path_mtu;
3699                 qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn);
3700                 qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn);
3701                 if (cm_id_priv->qp_type == IB_QPT_RC ||
3702                     cm_id_priv->qp_type == IB_QPT_XRC_TGT) {
3703                         *qp_attr_mask |= IB_QP_MAX_DEST_RD_ATOMIC |
3704                                          IB_QP_MIN_RNR_TIMER;
3705                         qp_attr->max_dest_rd_atomic =
3706                                         cm_id_priv->responder_resources;
3707                         qp_attr->min_rnr_timer = 0;
3708                 }
3709                 if (cm_id_priv->alt_av.ah_attr.dlid) {
3710                         *qp_attr_mask |= IB_QP_ALT_PATH;
3711                         qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
3712                         qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
3713                         qp_attr->alt_timeout = cm_id_priv->alt_av.timeout;
3714                         qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
3715                 }
3716                 ret = 0;
3717                 break;
3718         default:
3719                 ret = -EINVAL;
3720                 break;
3721         }
3722         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3723         return ret;
3724 }
3725
3726 static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
3727                                struct ib_qp_attr *qp_attr,
3728                                int *qp_attr_mask)
3729 {
3730         unsigned long flags;
3731         int ret;
3732
3733         spin_lock_irqsave(&cm_id_priv->lock, flags);
3734         switch (cm_id_priv->id.state) {
3735         /* Allow transition to RTS before sending REP */
3736         case IB_CM_REQ_RCVD:
3737         case IB_CM_MRA_REQ_SENT:
3738
3739         case IB_CM_REP_RCVD:
3740         case IB_CM_MRA_REP_SENT:
3741         case IB_CM_REP_SENT:
3742         case IB_CM_MRA_REP_RCVD:
3743         case IB_CM_ESTABLISHED:
3744                 if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT) {
3745                         *qp_attr_mask = IB_QP_STATE | IB_QP_SQ_PSN;
3746                         qp_attr->sq_psn = be32_to_cpu(cm_id_priv->sq_psn);
3747                         switch (cm_id_priv->qp_type) {
3748                         case IB_QPT_RC:
3749                         case IB_QPT_XRC_INI:
3750                                 *qp_attr_mask |= IB_QP_RETRY_CNT | IB_QP_RNR_RETRY |
3751                                                  IB_QP_MAX_QP_RD_ATOMIC;
3752                                 qp_attr->retry_cnt = cm_id_priv->retry_count;
3753                                 qp_attr->rnr_retry = cm_id_priv->rnr_retry_count;
3754                                 qp_attr->max_rd_atomic = cm_id_priv->initiator_depth;
3755                                 /* fall through */
3756                         case IB_QPT_XRC_TGT:
3757                                 *qp_attr_mask |= IB_QP_TIMEOUT;
3758                                 qp_attr->timeout = cm_id_priv->av.timeout;
3759                                 break;
3760                         default:
3761                                 break;
3762                         }
3763                         if (cm_id_priv->alt_av.ah_attr.dlid) {
3764                                 *qp_attr_mask |= IB_QP_PATH_MIG_STATE;
3765                                 qp_attr->path_mig_state = IB_MIG_REARM;
3766                         }
3767                 } else {
3768                         *qp_attr_mask = IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE;
3769                         qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
3770                         qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
3771                         qp_attr->alt_timeout = cm_id_priv->alt_av.timeout;
3772                         qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
3773                         qp_attr->path_mig_state = IB_MIG_REARM;
3774                 }
3775                 ret = 0;
3776                 break;
3777         default:
3778                 ret = -EINVAL;
3779                 break;
3780         }
3781         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3782         return ret;
3783 }
3784
3785 int ib_cm_init_qp_attr(struct ib_cm_id *cm_id,
3786                        struct ib_qp_attr *qp_attr,
3787                        int *qp_attr_mask)
3788 {
3789         struct cm_id_private *cm_id_priv;
3790         int ret;
3791
3792         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3793         switch (qp_attr->qp_state) {
3794         case IB_QPS_INIT:
3795                 ret = cm_init_qp_init_attr(cm_id_priv, qp_attr, qp_attr_mask);
3796                 break;
3797         case IB_QPS_RTR:
3798                 ret = cm_init_qp_rtr_attr(cm_id_priv, qp_attr, qp_attr_mask);
3799                 break;
3800         case IB_QPS_RTS:
3801                 ret = cm_init_qp_rts_attr(cm_id_priv, qp_attr, qp_attr_mask);
3802                 break;
3803         default:
3804                 ret = -EINVAL;
3805                 break;
3806         }
3807         return ret;
3808 }
3809 EXPORT_SYMBOL(ib_cm_init_qp_attr);
3810
3811 static void cm_get_ack_delay(struct cm_device *cm_dev)
3812 {
3813         struct ib_device_attr attr;
3814
3815         if (ib_query_device(cm_dev->ib_device, &attr))
3816                 cm_dev->ack_delay = 0; /* acks will rely on packet life time */
3817         else
3818                 cm_dev->ack_delay = attr.local_ca_ack_delay;
3819 }
3820
3821 static ssize_t cm_show_counter(struct kobject *obj, struct attribute *attr,
3822                                char *buf)
3823 {
3824         struct cm_counter_group *group;
3825         struct cm_counter_attribute *cm_attr;
3826
3827         group = container_of(obj, struct cm_counter_group, obj);
3828         cm_attr = container_of(attr, struct cm_counter_attribute, attr);
3829
3830         return sprintf(buf, "%ld\n",
3831                        atomic_long_read(&group->counter[cm_attr->index]));
3832 }
3833
3834 static const struct sysfs_ops cm_counter_ops = {
3835         .show = cm_show_counter
3836 };
3837
3838 static struct kobj_type cm_counter_obj_type = {
3839         .sysfs_ops = &cm_counter_ops,
3840         .default_attrs = cm_counter_default_attrs
3841 };
3842
3843 static void cm_release_port_obj(struct kobject *obj)
3844 {
3845         struct cm_port *cm_port;
3846
3847         cm_port = container_of(obj, struct cm_port, port_obj);
3848         kfree(cm_port);
3849 }
3850
3851 static struct kobj_type cm_port_obj_type = {
3852         .release = cm_release_port_obj
3853 };
3854
3855 static char *cm_devnode(struct device *dev, umode_t *mode)
3856 {
3857         if (mode)
3858                 *mode = 0666;
3859         return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));
3860 }
3861
3862 struct class cm_class = {
3863         .owner   = THIS_MODULE,
3864         .name    = "infiniband_cm",
3865         .devnode = cm_devnode,
3866 };
3867 EXPORT_SYMBOL(cm_class);
3868
3869 static int cm_create_port_fs(struct cm_port *port)
3870 {
3871         int i, ret;
3872
3873         ret = kobject_init_and_add(&port->port_obj, &cm_port_obj_type,
3874                                    &port->cm_dev->device->kobj,
3875                                    "%d", port->port_num);
3876         if (ret) {
3877                 kfree(port);
3878                 return ret;
3879         }
3880
3881         for (i = 0; i < CM_COUNTER_GROUPS; i++) {
3882                 ret = kobject_init_and_add(&port->counter_group[i].obj,
3883                                            &cm_counter_obj_type,
3884                                            &port->port_obj,
3885                                            "%s", counter_group_names[i]);
3886                 if (ret)
3887                         goto error;
3888         }
3889
3890         return 0;
3891
3892 error:
3893         while (i--)
3894                 kobject_put(&port->counter_group[i].obj);
3895         kobject_put(&port->port_obj);
3896         return ret;
3897
3898 }
3899
3900 static void cm_remove_port_fs(struct cm_port *port)
3901 {
3902         int i;
3903
3904         for (i = 0; i < CM_COUNTER_GROUPS; i++)
3905                 kobject_put(&port->counter_group[i].obj);
3906
3907         kobject_put(&port->port_obj);
3908 }
3909
3910 static void cm_add_one(struct ib_device *ib_device)
3911 {
3912         struct cm_device *cm_dev;
3913         struct cm_port *port;
3914         struct ib_mad_reg_req reg_req = {
3915                 .mgmt_class = IB_MGMT_CLASS_CM,
3916                 .mgmt_class_version = IB_CM_CLASS_VERSION,
3917         };
3918         struct ib_port_modify port_modify = {
3919                 .set_port_cap_mask = IB_PORT_CM_SUP
3920         };
3921         unsigned long flags;
3922         int ret;
3923         int count = 0;
3924         u8 i;
3925
3926         cm_dev = kzalloc(sizeof(*cm_dev) + sizeof(*port) *
3927                          ib_device->phys_port_cnt, GFP_KERNEL);
3928         if (!cm_dev)
3929                 return;
3930
3931         cm_dev->ib_device = ib_device;
3932         cm_get_ack_delay(cm_dev);
3933         cm_dev->going_down = 0;
3934         cm_dev->device = device_create(&cm_class, &ib_device->dev,
3935                                        MKDEV(0, 0), NULL,
3936                                        "%s", ib_device->name);
3937         if (IS_ERR(cm_dev->device)) {
3938                 kfree(cm_dev);
3939                 return;
3940         }
3941
3942         set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask);
3943         for (i = 1; i <= ib_device->phys_port_cnt; i++) {
3944                 if (!rdma_cap_ib_cm(ib_device, i))
3945                         continue;
3946
3947                 port = kzalloc(sizeof *port, GFP_KERNEL);
3948                 if (!port)
3949                         goto error1;
3950
3951                 cm_dev->port[i-1] = port;
3952                 port->cm_dev = cm_dev;
3953                 port->port_num = i;
3954
3955                 INIT_LIST_HEAD(&port->cm_priv_prim_list);
3956                 INIT_LIST_HEAD(&port->cm_priv_altr_list);
3957
3958                 ret = cm_create_port_fs(port);
3959                 if (ret)
3960                         goto error1;
3961
3962                 port->mad_agent = ib_register_mad_agent(ib_device, i,
3963                                                         IB_QPT_GSI,
3964                                                         &reg_req,
3965                                                         0,
3966                                                         cm_send_handler,
3967                                                         cm_recv_handler,
3968                                                         port,
3969                                                         0);
3970                 if (IS_ERR(port->mad_agent))
3971                         goto error2;
3972
3973                 ret = ib_modify_port(ib_device, i, 0, &port_modify);
3974                 if (ret)
3975                         goto error3;
3976
3977                 count++;
3978         }
3979
3980         if (!count)
3981                 goto free;
3982
3983         ib_set_client_data(ib_device, &cm_client, cm_dev);
3984
3985         write_lock_irqsave(&cm.device_lock, flags);
3986         list_add_tail(&cm_dev->list, &cm.device_list);
3987         write_unlock_irqrestore(&cm.device_lock, flags);
3988         return;
3989
3990 error3:
3991         ib_unregister_mad_agent(port->mad_agent);
3992 error2:
3993         cm_remove_port_fs(port);
3994 error1:
3995         port_modify.set_port_cap_mask = 0;
3996         port_modify.clr_port_cap_mask = IB_PORT_CM_SUP;
3997         while (--i) {
3998                 if (!rdma_cap_ib_cm(ib_device, i))
3999                         continue;
4000
4001                 port = cm_dev->port[i-1];
4002                 ib_modify_port(ib_device, port->port_num, 0, &port_modify);
4003                 ib_unregister_mad_agent(port->mad_agent);
4004                 cm_remove_port_fs(port);
4005         }
4006 free:
4007         device_unregister(cm_dev->device);
4008         kfree(cm_dev);
4009 }
4010
4011 static void cm_remove_one(struct ib_device *ib_device, void *client_data)
4012 {
4013         struct cm_device *cm_dev = client_data;
4014         struct cm_port *port;
4015         struct cm_id_private *cm_id_priv;
4016         struct ib_mad_agent *cur_mad_agent;
4017         struct ib_port_modify port_modify = {
4018                 .clr_port_cap_mask = IB_PORT_CM_SUP
4019         };
4020         unsigned long flags;
4021         int i;
4022
4023         if (!cm_dev)
4024                 return;
4025
4026         write_lock_irqsave(&cm.device_lock, flags);
4027         list_del(&cm_dev->list);
4028         write_unlock_irqrestore(&cm.device_lock, flags);
4029
4030         spin_lock_irq(&cm.lock);
4031         cm_dev->going_down = 1;
4032         spin_unlock_irq(&cm.lock);
4033
4034         for (i = 1; i <= ib_device->phys_port_cnt; i++) {
4035                 if (!rdma_cap_ib_cm(ib_device, i))
4036                         continue;
4037
4038                 port = cm_dev->port[i-1];
4039                 ib_modify_port(ib_device, port->port_num, 0, &port_modify);
4040                 /* Mark all the cm_id's as not valid */
4041                 spin_lock_irq(&cm.lock);
4042                 list_for_each_entry(cm_id_priv, &port->cm_priv_altr_list, altr_list)
4043                         cm_id_priv->altr_send_port_not_ready = 1;
4044                 list_for_each_entry(cm_id_priv, &port->cm_priv_prim_list, prim_list)
4045                         cm_id_priv->prim_send_port_not_ready = 1;
4046                 spin_unlock_irq(&cm.lock);
4047                 /*
4048                  * We flush the queue here after the going_down set, this
4049                  * verify that no new works will be queued in the recv handler,
4050                  * after that we can call the unregister_mad_agent
4051                  */
4052                 flush_workqueue(cm.wq);
4053                 spin_lock_irq(&cm.state_lock);
4054                 cur_mad_agent = port->mad_agent;
4055                 port->mad_agent = NULL;
4056                 spin_unlock_irq(&cm.state_lock);
4057                 ib_unregister_mad_agent(cur_mad_agent);
4058                 cm_remove_port_fs(port);
4059         }
4060
4061         device_unregister(cm_dev->device);
4062         kfree(cm_dev);
4063 }
4064
4065 static int __init ib_cm_init(void)
4066 {
4067         int ret;
4068
4069         memset(&cm, 0, sizeof cm);
4070         INIT_LIST_HEAD(&cm.device_list);
4071         rwlock_init(&cm.device_lock);
4072         spin_lock_init(&cm.lock);
4073         spin_lock_init(&cm.state_lock);
4074         cm.listen_service_table = RB_ROOT;
4075         cm.listen_service_id = be64_to_cpu(IB_CM_ASSIGN_SERVICE_ID);
4076         cm.remote_id_table = RB_ROOT;
4077         cm.remote_qp_table = RB_ROOT;
4078         cm.remote_sidr_table = RB_ROOT;
4079         idr_init(&cm.local_id_table);
4080         get_random_bytes(&cm.random_id_operand, sizeof cm.random_id_operand);
4081         INIT_LIST_HEAD(&cm.timewait_list);
4082
4083         ret = class_register(&cm_class);
4084         if (ret) {
4085                 ret = -ENOMEM;
4086                 goto error1;
4087         }
4088
4089         cm.wq = create_workqueue("ib_cm");
4090         if (!cm.wq) {
4091                 ret = -ENOMEM;
4092                 goto error2;
4093         }
4094
4095         ret = ib_register_client(&cm_client);
4096         if (ret)
4097                 goto error3;
4098
4099         return 0;
4100 error3:
4101         destroy_workqueue(cm.wq);
4102 error2:
4103         class_unregister(&cm_class);
4104 error1:
4105         idr_destroy(&cm.local_id_table);
4106         return ret;
4107 }
4108
4109 static void __exit ib_cm_cleanup(void)
4110 {
4111         struct cm_timewait_info *timewait_info, *tmp;
4112
4113         spin_lock_irq(&cm.lock);
4114         list_for_each_entry(timewait_info, &cm.timewait_list, list)
4115                 cancel_delayed_work(&timewait_info->work.work);
4116         spin_unlock_irq(&cm.lock);
4117
4118         ib_unregister_client(&cm_client);
4119         destroy_workqueue(cm.wq);
4120
4121         list_for_each_entry_safe(timewait_info, tmp, &cm.timewait_list, list) {
4122                 list_del(&timewait_info->list);
4123                 kfree(timewait_info);
4124         }
4125
4126         class_unregister(&cm_class);
4127         idr_destroy(&cm.local_id_table);
4128 }
4129
4130 module_init(ib_cm_init);
4131 module_exit(ib_cm_cleanup);
4132