1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2019 Mellanox Technologies. */
4 #include <linux/netdevice.h>
5 #include <net/nexthop.h>
12 static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev)
14 if (!ldev->pf[0].dev || !ldev->pf[1].dev)
17 return mlx5_esw_multipath_prereq(ldev->pf[0].dev, ldev->pf[1].dev);
20 static bool __mlx5_lag_is_multipath(struct mlx5_lag *ldev)
22 return !!(ldev->flags & MLX5_LAG_FLAG_MULTIPATH);
25 bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev)
27 struct mlx5_lag *ldev;
30 ldev = mlx5_lag_dev_get(dev);
31 res = ldev && __mlx5_lag_is_multipath(ldev);
37 * Set lag port affinity
41 * 0 - set normal affinity.
42 * 1 - set affinity to port 1.
43 * 2 - set affinity to port 2.
46 static void mlx5_lag_set_port_affinity(struct mlx5_lag *ldev, int port)
48 struct lag_tracker tracker;
50 if (!__mlx5_lag_is_multipath(ldev))
55 tracker.netdev_state[0].tx_enabled = true;
56 tracker.netdev_state[1].tx_enabled = true;
57 tracker.netdev_state[0].link_up = true;
58 tracker.netdev_state[1].link_up = true;
61 tracker.netdev_state[0].tx_enabled = true;
62 tracker.netdev_state[0].link_up = true;
63 tracker.netdev_state[1].tx_enabled = false;
64 tracker.netdev_state[1].link_up = false;
67 tracker.netdev_state[0].tx_enabled = false;
68 tracker.netdev_state[0].link_up = false;
69 tracker.netdev_state[1].tx_enabled = true;
70 tracker.netdev_state[1].link_up = true;
73 mlx5_core_warn(ldev->pf[0].dev, "Invalid affinity port %d",
78 if (tracker.netdev_state[0].tx_enabled)
79 mlx5_notifier_call_chain(ldev->pf[0].dev->priv.events,
80 MLX5_DEV_EVENT_PORT_AFFINITY,
83 if (tracker.netdev_state[1].tx_enabled)
84 mlx5_notifier_call_chain(ldev->pf[1].dev->priv.events,
85 MLX5_DEV_EVENT_PORT_AFFINITY,
88 mlx5_modify_lag(ldev, &tracker);
91 static void mlx5_lag_fib_event_flush(struct notifier_block *nb)
93 struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb);
94 struct mlx5_lag *ldev = container_of(mp, struct mlx5_lag, lag_mp);
96 flush_workqueue(ldev->wq);
99 struct mlx5_fib_event_work {
100 struct work_struct work;
101 struct mlx5_lag *ldev;
104 struct fib_entry_notifier_info fen_info;
105 struct fib_nh_notifier_info fnh_info;
109 static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
113 struct lag_mp *mp = &ldev->lag_mp;
114 struct fib_nh *fib_nh0, *fib_nh1;
117 /* Handle delete event */
118 if (event == FIB_EVENT_ENTRY_DEL) {
125 /* Handle add/replace event */
126 nhs = fib_info_num_path(fi);
128 if (__mlx5_lag_is_active(ldev)) {
129 struct fib_nh *nh = fib_info_nh(fi, 0);
130 struct net_device *nh_dev = nh->fib_nh_dev;
131 int i = mlx5_lag_dev_get_netdev_idx(ldev, nh_dev);
133 mlx5_lag_set_port_affinity(ldev, ++i);
141 /* Verify next hops are ports of the same hca */
142 fib_nh0 = fib_info_nh(fi, 0);
143 fib_nh1 = fib_info_nh(fi, 1);
144 if (!(fib_nh0->fib_nh_dev == ldev->pf[0].netdev &&
145 fib_nh1->fib_nh_dev == ldev->pf[1].netdev) &&
146 !(fib_nh0->fib_nh_dev == ldev->pf[1].netdev &&
147 fib_nh1->fib_nh_dev == ldev->pf[0].netdev)) {
148 mlx5_core_warn(ldev->pf[0].dev, "Multipath offload require two ports of the same HCA\n");
152 /* First time we see multipath route */
153 if (!mp->mfi && !__mlx5_lag_is_active(ldev)) {
154 struct lag_tracker tracker;
156 tracker = ldev->tracker;
157 mlx5_activate_lag(ldev, &tracker, MLX5_LAG_FLAG_MULTIPATH);
160 mlx5_lag_set_port_affinity(ldev, 0);
164 static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev,
166 struct fib_nh *fib_nh,
169 struct lag_mp *mp = &ldev->lag_mp;
171 /* Check the nh event is related to the route */
172 if (!mp->mfi || mp->mfi != fi)
175 /* nh added/removed */
176 if (event == FIB_EVENT_NH_DEL) {
177 int i = mlx5_lag_dev_get_netdev_idx(ldev, fib_nh->fib_nh_dev);
180 i = (i + 1) % 2 + 1; /* peer port */
181 mlx5_lag_set_port_affinity(ldev, i);
183 } else if (event == FIB_EVENT_NH_ADD &&
184 fib_info_num_path(fi) == 2) {
185 mlx5_lag_set_port_affinity(ldev, 0);
189 static void mlx5_lag_fib_update(struct work_struct *work)
191 struct mlx5_fib_event_work *fib_work =
192 container_of(work, struct mlx5_fib_event_work, work);
193 struct mlx5_lag *ldev = fib_work->ldev;
194 struct fib_nh *fib_nh;
196 /* Protect internal structures from changes */
198 switch (fib_work->event) {
199 case FIB_EVENT_ENTRY_REPLACE: /* fall through */
200 case FIB_EVENT_ENTRY_APPEND: /* fall through */
201 case FIB_EVENT_ENTRY_ADD: /* fall through */
202 case FIB_EVENT_ENTRY_DEL:
203 mlx5_lag_fib_route_event(ldev, fib_work->event,
204 fib_work->fen_info.fi);
205 fib_info_put(fib_work->fen_info.fi);
207 case FIB_EVENT_NH_ADD: /* fall through */
208 case FIB_EVENT_NH_DEL:
209 fib_nh = fib_work->fnh_info.fib_nh;
210 mlx5_lag_fib_nexthop_event(ldev,
212 fib_work->fnh_info.fib_nh,
214 fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
222 static struct mlx5_fib_event_work *
223 mlx5_lag_init_fib_work(struct mlx5_lag *ldev, unsigned long event)
225 struct mlx5_fib_event_work *fib_work;
227 fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
228 if (WARN_ON(!fib_work))
231 INIT_WORK(&fib_work->work, mlx5_lag_fib_update);
232 fib_work->ldev = ldev;
233 fib_work->event = event;
238 static int mlx5_lag_fib_event(struct notifier_block *nb,
242 struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb);
243 struct mlx5_lag *ldev = container_of(mp, struct mlx5_lag, lag_mp);
244 struct fib_notifier_info *info = ptr;
245 struct mlx5_fib_event_work *fib_work;
246 struct fib_entry_notifier_info *fen_info;
247 struct fib_nh_notifier_info *fnh_info;
248 struct net_device *fib_dev;
251 if (!net_eq(info->net, &init_net))
254 if (info->family != AF_INET)
257 if (!mlx5_lag_multipath_check_prereq(ldev))
261 case FIB_EVENT_ENTRY_REPLACE: /* fall through */
262 case FIB_EVENT_ENTRY_APPEND: /* fall through */
263 case FIB_EVENT_ENTRY_ADD: /* fall through */
264 case FIB_EVENT_ENTRY_DEL:
265 fen_info = container_of(info, struct fib_entry_notifier_info,
269 NL_SET_ERR_MSG_MOD(info->extack, "IPv4 route with nexthop objects is not supported");
270 return notifier_from_errno(-EINVAL);
272 fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
273 if (fib_dev != ldev->pf[0].netdev &&
274 fib_dev != ldev->pf[1].netdev) {
277 fib_work = mlx5_lag_init_fib_work(ldev, event);
280 fib_work->fen_info = *fen_info;
281 /* Take reference on fib_info to prevent it from being
282 * freed while work is queued. Release it afterwards.
284 fib_info_hold(fib_work->fen_info.fi);
286 case FIB_EVENT_NH_ADD: /* fall through */
287 case FIB_EVENT_NH_DEL:
288 fnh_info = container_of(info, struct fib_nh_notifier_info,
290 fib_work = mlx5_lag_init_fib_work(ldev, event);
293 fib_work->fnh_info = *fnh_info;
294 fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
300 queue_work(ldev->wq, &fib_work->work);
305 int mlx5_lag_mp_init(struct mlx5_lag *ldev)
307 struct lag_mp *mp = &ldev->lag_mp;
310 if (mp->fib_nb.notifier_call)
313 mp->fib_nb.notifier_call = mlx5_lag_fib_event;
314 err = register_fib_notifier(&mp->fib_nb,
315 mlx5_lag_fib_event_flush);
317 mp->fib_nb.notifier_call = NULL;
322 void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev)
324 struct lag_mp *mp = &ldev->lag_mp;
326 if (!mp->fib_nb.notifier_call)
329 unregister_fib_notifier(&mp->fib_nb);
330 mp->fib_nb.notifier_call = NULL;